axiom 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin.cjs CHANGED
@@ -477,6 +477,7 @@ var r = process.env.FORCE_TTY !== void 0 || (0, import_tty.isatty)(1);
477
477
  var u = p(r);
478
478
 
479
479
  // src/evals/run-vitest.ts
480
+ var import_node_path3 = __toESM(require("path"), 1);
480
481
  var import_node = require("vitest/node");
481
482
 
482
483
  // src/evals/context/storage.ts
@@ -594,7 +595,280 @@ var import_api5 = require("@opentelemetry/api");
594
595
 
595
596
  // src/otel/semconv/attributes.ts
596
597
  var import_semantic_conventions = require("@opentelemetry/semantic-conventions");
598
+
599
+ // src/otel/semconv/eval_proposal.ts
600
+ var ATTR_EVAL_ID = "eval.id";
601
+ var ATTR_EVAL_NAME = "eval.name";
602
+ var ATTR_EVAL_VERSION = "eval.version";
603
+ var ATTR_EVAL_TYPE = "eval.type";
604
+ var ATTR_EVAL_TAGS = "eval.tags";
605
+ var ATTR_EVAL_BASELINE_ID = "eval.baseline.id";
606
+ var ATTR_EVAL_BASELINE_NAME = "eval.baseline.name";
607
+ var ATTR_EVAL_METADATA = "eval.metadata";
608
+ var ATTR_EVAL_COLLECTION_ID = "eval.collection.id";
609
+ var ATTR_EVAL_COLLECTION_SIZE = "eval.collection.size";
610
+ var ATTR_EVAL_COLLECTION_NAME = "eval.collection.name";
611
+ var ATTR_EVAL_CONFIG_FLAGS = "eval.config.flags";
612
+ var ATTR_EVAL_CASE_INDEX = "eval.case.index";
613
+ var ATTR_EVAL_CASE_INPUT = "eval.case.input";
614
+ var ATTR_EVAL_CASE_OUTPUT = "eval.case.output";
615
+ var ATTR_EVAL_CASE_EXPECTED = "eval.case.expected";
616
+ var ATTR_EVAL_CASE_SCORES = "eval.case.scores";
617
+ var ATTR_EVAL_CASE_METADATA = "eval.case.metadata";
618
+ var ATTR_EVAL_TASK_OUTPUT = "eval.task.output";
619
+ var ATTR_EVAL_TASK_NAME = "eval.task.name";
620
+ var ATTR_EVAL_TASK_TYPE = "eval.task.type";
621
+ var ATTR_EVAL_RUN_ID = "eval.run.id";
622
+ var ATTR_EVAL_SCORE_NAME = "eval.score.name";
623
+ var ATTR_EVAL_SCORE_VALUE = "eval.score.value";
624
+ var ATTR_EVAL_SCORE_THRESHOLD = "eval.score.threshold";
625
+ var ATTR_EVAL_SCORE_PASSED = "eval.score.passed";
626
+ var ATTR_EVAL_SCORE_METADATA = "eval.score.metadata";
627
+ var ATTR_EVAL_USER_NAME = "eval.user.name";
628
+ var ATTR_EVAL_USER_EMAIL = "eval.user.email";
629
+
630
+ // src/otel/semconv/attributes.ts
597
631
  var import_incubating = require("@opentelemetry/semantic-conventions/incubating");
632
+ var ATTR_AXIOM_GEN_AI_SCHEMA_URL = "axiom.gen_ai.schema_url";
633
+ var ATTR_AXIOM_GEN_AI_SDK_NAME = "axiom.gen_ai.sdk.name";
634
+ var ATTR_AXIOM_GEN_AI_SDK_VERSION = "axiom.gen_ai.sdk.version";
635
+ var ATTR_GEN_AI_CAPABILITY_NAME = "gen_ai.capability.name";
636
+ var ATTR_GEN_AI_STEP_NAME = "gen_ai.step.name";
637
+ var ATTR_GEN_AI_TOOL_ARGUMENTS = "gen_ai.tool.arguments";
638
+ var ATTR_GEN_AI_TOOL_MESSAGE = "gen_ai.tool.message";
639
+ var GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI = "assemblyai";
640
+ var GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS = "cerebras";
641
+ var GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM = "deepgram";
642
+ var GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA = "deepinfra";
643
+ var GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS = "elevenlabs";
644
+ var GEN_AI_PROVIDER_NAME_VALUE_FAL = "fal";
645
+ var GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS = "fireworks";
646
+ var GEN_AI_PROVIDER_NAME_VALUE_GLADIA = "gladia";
647
+ var GEN_AI_PROVIDER_NAME_VALUE_HUME = "hume";
648
+ var GEN_AI_PROVIDER_NAME_VALUE_LMNT = "lmnt";
649
+ var GEN_AI_PROVIDER_NAME_VALUE_LUMA = "luma";
650
+ var GEN_AI_PROVIDER_NAME_VALUE_REPLICATE = "replicate";
651
+ var GEN_AI_PROVIDER_NAME_VALUE_REVAI = "revai";
652
+ var GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI = "togetherai";
653
+ var GEN_AI_PROVIDER_NAME_VALUE_VERCEL = "vercel";
654
+ var Attr = {
655
+ __EXPERIMENTAL_Flag: (flagName) => `flag.${flagName}`,
656
+ __EXPERIMENTAL_Fact: (factName) => `fact.${factName}`,
657
+ Axiom: {
658
+ GenAI: {
659
+ SchemaURL: ATTR_AXIOM_GEN_AI_SCHEMA_URL,
660
+ SDK: {
661
+ Name: ATTR_AXIOM_GEN_AI_SDK_NAME,
662
+ Version: ATTR_AXIOM_GEN_AI_SDK_VERSION
663
+ }
664
+ }
665
+ },
666
+ GenAI: {
667
+ PromptMetadata: {
668
+ ID: "axiom.gen_ai.prompt.id",
669
+ Name: "axiom.gen_ai.prompt.name",
670
+ Slug: "axiom.gen_ai.prompt.slug",
671
+ Version: "axiom.gen_ai.prompt.version"
672
+ },
673
+ /**
674
+ * These two are used to identify the span
675
+ */
676
+ Capability: {
677
+ Name: ATTR_GEN_AI_CAPABILITY_NAME
678
+ },
679
+ Step: {
680
+ Name: ATTR_GEN_AI_STEP_NAME
681
+ },
682
+ Provider: {
683
+ Name: import_incubating.ATTR_GEN_AI_PROVIDER_NAME,
684
+ Name_Values: {
685
+ Anthropic: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_ANTHROPIC,
686
+ AssemblyAI: GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI,
687
+ AWSBedrock: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AWS_BEDROCK,
688
+ AzureAIInference: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_INFERENCE,
689
+ AzureAIOpenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_OPENAI,
690
+ Cerebras: GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS,
691
+ Cohere: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_COHERE,
692
+ Deepgram: GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM,
693
+ DeepInfra: GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA,
694
+ Deepseek: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_DEEPSEEK,
695
+ ElevenLabs: GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS,
696
+ Fal: GEN_AI_PROVIDER_NAME_VALUE_FAL,
697
+ Fireworks: GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS,
698
+ GCPGemini: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_GEMINI,
699
+ GCPGenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_GEN_AI,
700
+ GCPVertexAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_VERTEX_AI,
701
+ Gladia: GEN_AI_PROVIDER_NAME_VALUE_GLADIA,
702
+ Groq: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GROQ,
703
+ Hume: GEN_AI_PROVIDER_NAME_VALUE_HUME,
704
+ IBMWatsonxAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_IBM_WATSONX_AI,
705
+ Lmnt: GEN_AI_PROVIDER_NAME_VALUE_LMNT,
706
+ Luma: GEN_AI_PROVIDER_NAME_VALUE_LUMA,
707
+ MistralAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_MISTRAL_AI,
708
+ OpenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_OPENAI,
709
+ Perplexity: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_PERPLEXITY,
710
+ Replicate: GEN_AI_PROVIDER_NAME_VALUE_REPLICATE,
711
+ RevAI: GEN_AI_PROVIDER_NAME_VALUE_REVAI,
712
+ TogetherAI: GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI,
713
+ Vercel: GEN_AI_PROVIDER_NAME_VALUE_VERCEL,
714
+ XAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_X_AI
715
+ }
716
+ },
717
+ /**
718
+ * Regular attributes
719
+ */
720
+ Agent: {
721
+ Description: import_incubating.ATTR_GEN_AI_AGENT_DESCRIPTION,
722
+ // not yet used by axiom-ai
723
+ ID: import_incubating.ATTR_GEN_AI_AGENT_ID,
724
+ // not yet used by axiom-ai
725
+ Name: import_incubating.ATTR_GEN_AI_AGENT_NAME
726
+ // not yet used by axiom-ai
727
+ },
728
+ Conversation: {
729
+ ID: import_incubating.ATTR_GEN_AI_CONVERSATION_ID
730
+ // not yet used by axiom-ai, anyway probably needs to be provided by user
731
+ },
732
+ Input: {
733
+ Messages: import_incubating.ATTR_GEN_AI_INPUT_MESSAGES
734
+ },
735
+ Operation: {
736
+ Name: import_incubating.ATTR_GEN_AI_OPERATION_NAME,
737
+ Name_Values: {
738
+ /**
739
+ * Note that "text_completion" is deprecated in favor of "chat" for both OpenAI and Anthropic
740
+ */
741
+ Chat: import_incubating.GEN_AI_OPERATION_NAME_VALUE_CHAT,
742
+ CreateAgent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_CREATE_AGENT,
743
+ Embeddings: import_incubating.GEN_AI_OPERATION_NAME_VALUE_EMBEDDINGS,
744
+ ExecuteTool: import_incubating.GEN_AI_OPERATION_NAME_VALUE_EXECUTE_TOOL,
745
+ GenerateContent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_GENERATE_CONTENT,
746
+ InvokeAgent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_INVOKE_AGENT
747
+ }
748
+ },
749
+ Output: {
750
+ Messages: import_incubating.ATTR_GEN_AI_OUTPUT_MESSAGES,
751
+ Type: import_incubating.ATTR_GEN_AI_OUTPUT_TYPE,
752
+ Type_Values: {
753
+ Text: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_TEXT,
754
+ Json: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_JSON,
755
+ Image: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_IMAGE,
756
+ Speech: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_SPEECH
757
+ }
758
+ },
759
+ /**
760
+ * The provider that is hosting the model, eg AWS Bedrock
761
+ * There doesn't seem to be a semconv for this
762
+ */
763
+ Request: {
764
+ ChoiceCount: import_incubating.ATTR_GEN_AI_REQUEST_CHOICE_COUNT,
765
+ // not yet used by axiom-ai
766
+ EncodingFormats: import_incubating.ATTR_GEN_AI_REQUEST_ENCODING_FORMATS,
767
+ // not yet used by axiom-ai
768
+ FrequencyPenalty: import_incubating.ATTR_GEN_AI_REQUEST_FREQUENCY_PENALTY,
769
+ MaxTokens: import_incubating.ATTR_GEN_AI_REQUEST_MAX_TOKENS,
770
+ /**
771
+ * The model you asked for
772
+ */
773
+ Model: import_incubating.ATTR_GEN_AI_REQUEST_MODEL,
774
+ PresencePenalty: import_incubating.ATTR_GEN_AI_REQUEST_PRESENCE_PENALTY,
775
+ Seed: import_incubating.ATTR_GEN_AI_REQUEST_SEED,
776
+ StopSequences: import_incubating.ATTR_GEN_AI_REQUEST_STOP_SEQUENCES,
777
+ Temperature: import_incubating.ATTR_GEN_AI_REQUEST_TEMPERATURE,
778
+ TopK: import_incubating.ATTR_GEN_AI_REQUEST_TOP_K,
779
+ TopP: import_incubating.ATTR_GEN_AI_REQUEST_TOP_P
780
+ },
781
+ Response: {
782
+ FinishReasons: import_incubating.ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
783
+ ID: import_incubating.ATTR_GEN_AI_RESPONSE_ID,
784
+ /**
785
+ * The model that was actually used (might be different bc routing) - only ever get this from the response, otherwise omit
786
+ */
787
+ Model: import_incubating.ATTR_GEN_AI_RESPONSE_MODEL
788
+ // somehow not landing on the span for google models? check up on this...
789
+ },
790
+ Tool: {
791
+ CallID: import_incubating.ATTR_GEN_AI_TOOL_CALL_ID,
792
+ Description: import_incubating.ATTR_GEN_AI_TOOL_DESCRIPTION,
793
+ Name: import_incubating.ATTR_GEN_AI_TOOL_NAME,
794
+ Type: import_incubating.ATTR_GEN_AI_TOOL_TYPE,
795
+ /**
796
+ * Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span
797
+ * But we at least want to give users THE OPTION to put them on the tool spans themselves as well
798
+ * Because it enables a lot of things with querying
799
+ * @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0
800
+ */
801
+ Arguments: ATTR_GEN_AI_TOOL_ARGUMENTS,
802
+ /**
803
+ * Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span
804
+ * But we at least want to give users THE OPTION to put them on the tool spans themselves as well
805
+ * Because it enables a lot of things with querying
806
+ * @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0
807
+ */
808
+ Message: ATTR_GEN_AI_TOOL_MESSAGE
809
+ },
810
+ Usage: {
811
+ InputTokens: import_incubating.ATTR_GEN_AI_USAGE_INPUT_TOKENS,
812
+ OutputTokens: import_incubating.ATTR_GEN_AI_USAGE_OUTPUT_TOKENS
813
+ }
814
+ },
815
+ Eval: {
816
+ ID: ATTR_EVAL_ID,
817
+ Name: ATTR_EVAL_NAME,
818
+ Version: ATTR_EVAL_VERSION,
819
+ Type: ATTR_EVAL_TYPE,
820
+ Baseline: {
821
+ ID: ATTR_EVAL_BASELINE_ID,
822
+ Name: ATTR_EVAL_BASELINE_NAME
823
+ },
824
+ Tags: ATTR_EVAL_TAGS,
825
+ Metadata: ATTR_EVAL_METADATA,
826
+ Collection: {
827
+ ID: ATTR_EVAL_COLLECTION_ID,
828
+ Name: ATTR_EVAL_COLLECTION_NAME,
829
+ Size: ATTR_EVAL_COLLECTION_SIZE
830
+ },
831
+ Config: {
832
+ Flags: ATTR_EVAL_CONFIG_FLAGS
833
+ },
834
+ Run: {
835
+ ID: ATTR_EVAL_RUN_ID
836
+ },
837
+ Case: {
838
+ Index: ATTR_EVAL_CASE_INDEX,
839
+ Input: ATTR_EVAL_CASE_INPUT,
840
+ Output: ATTR_EVAL_CASE_OUTPUT,
841
+ Expected: ATTR_EVAL_CASE_EXPECTED,
842
+ Scores: ATTR_EVAL_CASE_SCORES,
843
+ Metadata: ATTR_EVAL_CASE_METADATA
844
+ },
845
+ Task: {
846
+ Output: ATTR_EVAL_TASK_OUTPUT,
847
+ Name: ATTR_EVAL_TASK_NAME,
848
+ Type: ATTR_EVAL_TASK_TYPE
849
+ },
850
+ Score: {
851
+ Name: ATTR_EVAL_SCORE_NAME,
852
+ Value: ATTR_EVAL_SCORE_VALUE,
853
+ Threshold: ATTR_EVAL_SCORE_THRESHOLD,
854
+ Passed: ATTR_EVAL_SCORE_PASSED,
855
+ Metadata: ATTR_EVAL_SCORE_METADATA
856
+ },
857
+ User: {
858
+ Name: ATTR_EVAL_USER_NAME,
859
+ Email: ATTR_EVAL_USER_EMAIL
860
+ }
861
+ },
862
+ Error: {
863
+ Type: import_semantic_conventions.ATTR_ERROR_TYPE,
864
+ Message: import_incubating.ATTR_ERROR_MESSAGE
865
+ },
866
+ HTTP: {
867
+ Response: {
868
+ StatusCode: import_semantic_conventions.ATTR_HTTP_RESPONSE_STATUS_CODE
869
+ }
870
+ }
871
+ };
598
872
 
599
873
  // src/otel/startActiveSpan.ts
600
874
  var import_api2 = require("@opentelemetry/api");
@@ -605,7 +879,7 @@ var import_api4 = require("@opentelemetry/api");
605
879
  // package.json
606
880
  var package_default = {
607
881
  name: "axiom",
608
- version: "0.24.0",
882
+ version: "0.25.0",
609
883
  type: "module",
610
884
  author: "Axiom, Inc.",
611
885
  contributors: [
@@ -845,21 +1119,22 @@ var findEvaluationCases = async (evalId, config) => {
845
1119
  return payload.matches.length ? buildSpanTree(payload.matches) : null;
846
1120
  };
847
1121
  var mapSpanToEval = (span) => {
848
- const flagConfigRaw = span.data.attributes["eval.config.flags"] ?? span.data.attributes.custom["eval.config.flags"];
1122
+ const flagConfigRaw = span.data.attributes[Attr.Eval.Config.Flags] ?? span.data.attributes.custom[Attr.Eval.Config.Flags];
849
1123
  return {
850
- id: span.data.attributes.custom["eval.id"],
851
- name: span.data.attributes.custom["eval.name"],
852
- type: span.data.attributes.custom["eval.type"],
853
- version: span.data.attributes.custom["eval.version"],
1124
+ id: span.data.attributes.custom[Attr.Eval.ID],
1125
+ name: span.data.attributes.custom[Attr.Eval.Name],
1126
+ type: span.data.attributes.custom[Attr.Eval.Type],
1127
+ version: span.data.attributes.custom[Attr.Eval.Version],
854
1128
  collection: {
855
- name: span.data.attributes.custom["eval.collection.name"],
856
- size: span.data.attributes.custom["eval.collection.size"]
1129
+ name: span.data.attributes.custom[Attr.Eval.Collection.Name],
1130
+ size: span.data.attributes.custom[Attr.Eval.Collection.Size]
857
1131
  },
858
1132
  baseline: {
859
- id: span.data.attributes.custom["eval.baseline.id"],
860
- name: span.data.attributes.custom["eval.baseline.name"]
1133
+ id: span.data.attributes.custom[Attr.Eval.Baseline.ID],
1134
+ name: span.data.attributes.custom[Attr.Eval.Baseline.Name]
861
1135
  },
862
1136
  prompt: {
1137
+ // TODO: do we still want this?
863
1138
  model: span.data.attributes.custom["eval.prompt.model"],
864
1139
  params: span.data.attributes.custom["eval.prompt.params"]
865
1140
  },
@@ -867,10 +1142,10 @@ var mapSpanToEval = (span) => {
867
1142
  status: span.data.status.code,
868
1143
  traceId: span.data.trace_id,
869
1144
  runAt: span._time,
870
- tags: span.data.attributes.custom["eval.tags"].length ? JSON.parse(span.data.attributes.custom["eval.tags"]) : [],
1145
+ tags: span.data.attributes.custom[Attr.Eval.Tags].length ? JSON.parse(span.data.attributes.custom[Attr.Eval.Tags]) : [],
871
1146
  user: {
872
- name: span.data.attributes.custom["eval.user.name"],
873
- email: span.data.attributes.custom["eval.user.email"]
1147
+ name: span.data.attributes.custom[Attr.Eval.User.Name],
1148
+ email: span.data.attributes.custom[Attr.Eval.User.Email]
874
1149
  },
875
1150
  cases: [],
876
1151
  flagConfig: flagConfigRaw ? JSON.parse(flagConfigRaw) : void 0
@@ -885,19 +1160,17 @@ var mapSpanToCase = (item) => {
885
1160
  } else {
886
1161
  duration = d;
887
1162
  }
888
- const runtimeFlagsRaw = data.attributes.custom["eval.case.config.runtime_flags"];
889
1163
  return {
890
- index: data.attributes.custom["eval.case.index"],
891
- input: data.attributes.custom["eval.case.input"],
892
- output: data.attributes.custom["eval.case.output"],
893
- expected: data.attributes.custom["eval.case.expected"],
1164
+ index: data.attributes.custom[Attr.Eval.Case.Index],
1165
+ input: data.attributes.custom[Attr.Eval.Case.Input],
1166
+ output: data.attributes.custom[Attr.Eval.Case.Output],
1167
+ expected: data.attributes.custom[Attr.Eval.Case.Expected],
894
1168
  duration,
895
1169
  status: data.status.code,
896
- scores: data.attributes.custom["eval.case.scores"] ? JSON.parse(data.attributes.custom["eval.case.scores"]) : {},
1170
+ scores: data.attributes.custom[Attr.Eval.Case.Scores] ? JSON.parse(data.attributes.custom[Attr.Eval.Case.Scores]) : {},
897
1171
  runAt: item._time,
898
1172
  spanId: data.span_id,
899
- traceId: data.trace_id,
900
- runtimeFlags: runtimeFlagsRaw ? JSON.parse(runtimeFlagsRaw) : void 0
1173
+ traceId: data.trace_id
901
1174
  };
902
1175
  };
903
1176
  var buildSpanTree = (spans) => {
@@ -959,10 +1232,10 @@ var buildSpanTree = (spans) => {
959
1232
  );
960
1233
  caseData.scores = {};
961
1234
  scoreSpans.forEach((score) => {
962
- const name = score.data.attributes.custom["eval.score.name"];
1235
+ const name = score.data.attributes.custom[Attr.Eval.Score.Name];
963
1236
  caseData.scores[name] = {
964
1237
  name,
965
- value: score.data.attributes.custom["eval.score.value"],
1238
+ value: score.data.attributes.custom[Attr.Eval.Score.Value],
966
1239
  metadata: {
967
1240
  error: score.data.attributes.error
968
1241
  }
@@ -1725,11 +1998,11 @@ function setupEvalProvider(connection) {
1725
1998
  axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
1726
1999
  resource: (0, import_resources.resourceFromAttributes)({
1727
2000
  ["service.name"]: "axiom",
1728
- ["service.version"]: "0.24.0"
2001
+ ["service.version"]: "0.25.0"
1729
2002
  }),
1730
2003
  spanProcessors: [processor]
1731
2004
  });
1732
- axiomTracer = axiomProvider.getTracer("axiom", "0.24.0");
2005
+ axiomTracer = axiomProvider.getTracer("axiom", "0.25.0");
1733
2006
  }
1734
2007
  async function initInstrumentation(config) {
1735
2008
  if (initialized) {
@@ -1741,7 +2014,7 @@ async function initInstrumentation(config) {
1741
2014
  }
1742
2015
  initializationPromise = (async () => {
1743
2016
  if (!config.enabled) {
1744
- axiomTracer = import_api10.trace.getTracer("axiom", "0.24.0");
2017
+ axiomTracer = import_api10.trace.getTracer("axiom", "0.25.0");
1745
2018
  initialized = true;
1746
2019
  return;
1747
2020
  }
@@ -1804,10 +2077,32 @@ var flush = async () => {
1804
2077
  };
1805
2078
 
1806
2079
  // src/evals/run-vitest.ts
2080
+ var printCollectedEvals = (result, rootDir) => {
2081
+ if (!result.testModules || result.testModules.length === 0) {
2082
+ console.log(u.yellow("\nNo evaluations found\n"));
2083
+ return;
2084
+ }
2085
+ console.log(u.bold("\nFound evaluations:\n"));
2086
+ let totalEvals = 0;
2087
+ let totalCases = 0;
2088
+ for (const module2 of result.testModules) {
2089
+ const relativePath = import_node_path3.default.relative(rootDir, module2.moduleId);
2090
+ for (const suite of module2.children.suites()) {
2091
+ totalEvals++;
2092
+ const caseCount = suite.children.size;
2093
+ totalCases += caseCount;
2094
+ console.log(u.green(`\u2713 ${suite.name} (${caseCount} cases)`));
2095
+ console.log(u.dim(` ${relativePath}`));
2096
+ console.log("");
2097
+ }
2098
+ }
2099
+ console.log(u.bold(`Total: ${totalEvals} evaluations, ${totalCases} test cases
2100
+ `));
2101
+ };
1807
2102
  var runVitest = async (dir, opts) => {
1808
2103
  setAxiomConfig(opts.config);
1809
2104
  await initInstrumentation({
1810
- enabled: !opts.debug,
2105
+ enabled: !opts.debug && !opts.list,
1811
2106
  config: opts.config
1812
2107
  });
1813
2108
  const providedConfig = {
@@ -1821,6 +2116,9 @@ var runVitest = async (dir, opts) => {
1821
2116
  if (opts.debug) {
1822
2117
  console.log(u.bgWhite(u.blackBright(" Debug mode enabled ")));
1823
2118
  }
2119
+ if (opts.list) {
2120
+ console.log(u.bgWhite(u.blackBright(" List mode ")));
2121
+ }
1824
2122
  const vi = await (0, import_node.createVitest)("test", {
1825
2123
  root: dir ? dir : process.cwd(),
1826
2124
  mode: "test",
@@ -1840,11 +2138,18 @@ var runVitest = async (dir, opts) => {
1840
2138
  provide: {
1841
2139
  baseline: opts.baseline,
1842
2140
  debug: opts.debug,
2141
+ list: opts.list,
1843
2142
  overrides: opts.overrides,
1844
2143
  axiomConfig: providedConfig,
1845
2144
  runId: opts.runId
1846
2145
  }
1847
2146
  });
2147
+ if (opts.list) {
2148
+ const result = await vi.collect();
2149
+ printCollectedEvals(result, dir || process.cwd());
2150
+ await vi.close();
2151
+ process.exit(0);
2152
+ }
1848
2153
  await vi.start();
1849
2154
  const dispose = (0, import_node.registerConsoleShortcuts)(vi, process.stdin, process.stdout);
1850
2155
  if (!vi.shouldKeepServer()) {
@@ -1909,7 +2214,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
1909
2214
  ".",
1910
2215
  "any *.eval.ts file in current directory"
1911
2216
  )
1912
- ).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", process.env.AXIOM_TOKEN).option("-d, --dataset <DATASET>", "axiom dataset name", process.env.AXIOM_DATASET).option("-u, --url <AXIOM URL>", "axiom url", process.env.AXIOM_URL ?? "https://api.axiom.co").option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").option("--debug", "run locally without sending to Axiom or loading baselines", false).action(async (target, options) => {
2217
+ ).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", process.env.AXIOM_TOKEN).option("-d, --dataset <DATASET>", "axiom dataset name", process.env.AXIOM_DATASET).option("-u, --url <AXIOM URL>", "axiom url", process.env.AXIOM_URL ?? "https://api.axiom.co").option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").option("--debug", "run locally without sending to Axiom or loading baselines", false).option("--list", "list evaluations and test cases without running them", false).action(async (target, options) => {
1913
2218
  try {
1914
2219
  if (options.debug) {
1915
2220
  process.env.AXIOM_DEBUG = "true";
@@ -1952,6 +2257,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
1952
2257
  exclude,
1953
2258
  testNamePattern,
1954
2259
  debug: options.debug,
2260
+ list: options.list,
1955
2261
  overrides: flagOverrides,
1956
2262
  config,
1957
2263
  runId
@@ -1973,7 +2279,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
1973
2279
  // src/cli/utils/parse-flag-overrides.ts
1974
2280
  var import_zod5 = require("zod");
1975
2281
  var import_node_fs2 = require("fs");
1976
- var import_node_path3 = require("path");
2282
+ var import_node_path4 = require("path");
1977
2283
  var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
1978
2284
  var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
1979
2285
  function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
@@ -2002,8 +2308,8 @@ function coerceValue(raw) {
2002
2308
  return raw;
2003
2309
  }
2004
2310
  }
2005
- function loadConfigFile(path3) {
2006
- const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
2311
+ function loadConfigFile(path4) {
2312
+ const abs = (0, import_node_path4.resolve)(process.cwd(), path4);
2007
2313
  try {
2008
2314
  const contents = (0, import_node_fs2.readFileSync)(abs, "utf8");
2009
2315
  const parsed = JSON.parse(contents);
@@ -2015,7 +2321,7 @@ function loadConfigFile(path3) {
2015
2321
  }
2016
2322
  return parsed;
2017
2323
  } catch (err) {
2018
- console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
2324
+ console.error(`\u274C Could not read or parse flags config "${path4}": ${err.message}`);
2019
2325
  process.exit(1);
2020
2326
  }
2021
2327
  }
@@ -2078,7 +2384,7 @@ var import_commander4 = require("commander");
2078
2384
  var loadVersionCommand = (program2) => {
2079
2385
  return program2.addCommand(
2080
2386
  new import_commander4.Command("version").description("cli version").action(() => {
2081
- console.log("0.24.0");
2387
+ console.log("0.25.0");
2082
2388
  })
2083
2389
  );
2084
2390
  };
@@ -2088,7 +2394,7 @@ var { loadEnvConfig } = import_env.default;
2088
2394
  loadEnvConfig(process.cwd());
2089
2395
  var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
2090
2396
  var program = new import_commander5.Command();
2091
- program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.24.0");
2397
+ program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.25.0");
2092
2398
  loadPushCommand(program);
2093
2399
  loadPullCommand(program);
2094
2400
  loadEvalCommand(program, overrides);