axiom 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin.cjs CHANGED
@@ -249,290 +249,7 @@ var import_api5 = require("@opentelemetry/api");
249
249
 
250
250
  // src/otel/semconv/attributes.ts
251
251
  var import_semantic_conventions = require("@opentelemetry/semantic-conventions");
252
-
253
- // src/otel/semconv/eval_proposal.ts
254
- var ATTR_EVAL_ID = "eval.id";
255
- var ATTR_EVAL_NAME = "eval.name";
256
- var ATTR_EVAL_VERSION = "eval.version";
257
- var ATTR_EVAL_TYPE = "eval.type";
258
- var ATTR_EVAL_TAGS = "eval.tags";
259
- var ATTR_EVAL_BASELINE_ID = "eval.baseline.id";
260
- var ATTR_EVAL_BASELINE_NAME = "eval.baseline.name";
261
- var ATTR_EVAL_BASELINE_VERSION = "eval.baseline.version";
262
- var ATTR_EVAL_METADATA = "eval.metadata";
263
- var ATTR_EVAL_CAPABILITY_NAME = "eval.capability.name";
264
- var ATTR_EVAL_STEP_NAME = "eval.step.name";
265
- var ATTR_EVAL_COLLECTION_ID = "eval.collection.id";
266
- var ATTR_EVAL_COLLECTION_SIZE = "eval.collection.size";
267
- var ATTR_EVAL_COLLECTION_NAME = "eval.collection.name";
268
- var ATTR_EVAL_CONFIG_FLAGS = "eval.config.flags";
269
- var ATTR_EVAL_CASE_INDEX = "eval.case.index";
270
- var ATTR_EVAL_CASE_INPUT = "eval.case.input";
271
- var ATTR_EVAL_CASE_OUTPUT = "eval.case.output";
272
- var ATTR_EVAL_CASE_EXPECTED = "eval.case.expected";
273
- var ATTR_EVAL_CASE_SCORES = "eval.case.scores";
274
- var ATTR_EVAL_CASE_METADATA = "eval.case.metadata";
275
- var ATTR_EVAL_TASK_OUTPUT = "eval.task.output";
276
- var ATTR_EVAL_TASK_NAME = "eval.task.name";
277
- var ATTR_EVAL_TASK_TYPE = "eval.task.type";
278
- var ATTR_EVAL_RUN_ID = "eval.run.id";
279
- var ATTR_EVAL_SCORE_NAME = "eval.score.name";
280
- var ATTR_EVAL_SCORE_VALUE = "eval.score.value";
281
- var ATTR_EVAL_SCORE_THRESHOLD = "eval.score.threshold";
282
- var ATTR_EVAL_SCORE_PASSED = "eval.score.passed";
283
- var ATTR_EVAL_SCORE_METADATA = "eval.score.metadata";
284
- var ATTR_EVAL_USER_NAME = "eval.user.name";
285
- var ATTR_EVAL_USER_EMAIL = "eval.user.email";
286
-
287
- // src/otel/semconv/attributes.ts
288
252
  var import_incubating = require("@opentelemetry/semantic-conventions/incubating");
289
- var ATTR_AXIOM_GEN_AI_SCHEMA_URL = "axiom.gen_ai.schema_url";
290
- var ATTR_AXIOM_GEN_AI_SDK_NAME = "axiom.gen_ai.sdk.name";
291
- var ATTR_AXIOM_GEN_AI_SDK_VERSION = "axiom.gen_ai.sdk.version";
292
- var ATTR_GEN_AI_CAPABILITY_NAME = "gen_ai.capability.name";
293
- var ATTR_GEN_AI_STEP_NAME = "gen_ai.step.name";
294
- var ATTR_GEN_AI_TOOL_ARGUMENTS = "gen_ai.tool.arguments";
295
- var ATTR_GEN_AI_TOOL_MESSAGE = "gen_ai.tool.message";
296
- var GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI = "assemblyai";
297
- var GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS = "cerebras";
298
- var GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM = "deepgram";
299
- var GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA = "deepinfra";
300
- var GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS = "elevenlabs";
301
- var GEN_AI_PROVIDER_NAME_VALUE_FAL = "fal";
302
- var GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS = "fireworks";
303
- var GEN_AI_PROVIDER_NAME_VALUE_GLADIA = "gladia";
304
- var GEN_AI_PROVIDER_NAME_VALUE_HUME = "hume";
305
- var GEN_AI_PROVIDER_NAME_VALUE_LMNT = "lmnt";
306
- var GEN_AI_PROVIDER_NAME_VALUE_LUMA = "luma";
307
- var GEN_AI_PROVIDER_NAME_VALUE_REPLICATE = "replicate";
308
- var GEN_AI_PROVIDER_NAME_VALUE_REVAI = "revai";
309
- var GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI = "togetherai";
310
- var GEN_AI_PROVIDER_NAME_VALUE_VERCEL = "vercel";
311
- var Attr = {
312
- __EXPERIMENTAL_Flag: (flagName) => `flag.${flagName}`,
313
- __EXPERIMENTAL_Fact: (factName) => `fact.${factName}`,
314
- Axiom: {
315
- GenAI: {
316
- SchemaURL: ATTR_AXIOM_GEN_AI_SCHEMA_URL,
317
- SDK: {
318
- Name: ATTR_AXIOM_GEN_AI_SDK_NAME,
319
- Version: ATTR_AXIOM_GEN_AI_SDK_VERSION
320
- }
321
- }
322
- },
323
- GenAI: {
324
- PromptMetadata: {
325
- ID: "axiom.gen_ai.prompt.id",
326
- Name: "axiom.gen_ai.prompt.name",
327
- Slug: "axiom.gen_ai.prompt.slug",
328
- Version: "axiom.gen_ai.prompt.version"
329
- },
330
- /**
331
- * These two are used to identify the span
332
- */
333
- Capability: {
334
- Name: ATTR_GEN_AI_CAPABILITY_NAME
335
- },
336
- Step: {
337
- Name: ATTR_GEN_AI_STEP_NAME
338
- },
339
- Provider: {
340
- Name: import_incubating.ATTR_GEN_AI_PROVIDER_NAME,
341
- Name_Values: {
342
- Anthropic: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_ANTHROPIC,
343
- AssemblyAI: GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI,
344
- AWSBedrock: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AWS_BEDROCK,
345
- AzureAIInference: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_INFERENCE,
346
- AzureAIOpenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_OPENAI,
347
- Cerebras: GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS,
348
- Cohere: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_COHERE,
349
- Deepgram: GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM,
350
- DeepInfra: GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA,
351
- Deepseek: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_DEEPSEEK,
352
- ElevenLabs: GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS,
353
- Fal: GEN_AI_PROVIDER_NAME_VALUE_FAL,
354
- Fireworks: GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS,
355
- GCPGemini: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_GEMINI,
356
- GCPGenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_GEN_AI,
357
- GCPVertexAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_VERTEX_AI,
358
- Gladia: GEN_AI_PROVIDER_NAME_VALUE_GLADIA,
359
- Groq: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GROQ,
360
- Hume: GEN_AI_PROVIDER_NAME_VALUE_HUME,
361
- IBMWatsonxAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_IBM_WATSONX_AI,
362
- Lmnt: GEN_AI_PROVIDER_NAME_VALUE_LMNT,
363
- Luma: GEN_AI_PROVIDER_NAME_VALUE_LUMA,
364
- MistralAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_MISTRAL_AI,
365
- OpenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_OPENAI,
366
- Perplexity: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_PERPLEXITY,
367
- Replicate: GEN_AI_PROVIDER_NAME_VALUE_REPLICATE,
368
- RevAI: GEN_AI_PROVIDER_NAME_VALUE_REVAI,
369
- TogetherAI: GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI,
370
- Vercel: GEN_AI_PROVIDER_NAME_VALUE_VERCEL,
371
- XAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_X_AI
372
- }
373
- },
374
- /**
375
- * Regular attributes
376
- */
377
- Agent: {
378
- Description: import_incubating.ATTR_GEN_AI_AGENT_DESCRIPTION,
379
- // not yet used by axiom-ai
380
- ID: import_incubating.ATTR_GEN_AI_AGENT_ID,
381
- // not yet used by axiom-ai
382
- Name: import_incubating.ATTR_GEN_AI_AGENT_NAME
383
- // not yet used by axiom-ai
384
- },
385
- Conversation: {
386
- ID: import_incubating.ATTR_GEN_AI_CONVERSATION_ID
387
- // not yet used by axiom-ai, anyway probably needs to be provided by user
388
- },
389
- Input: {
390
- Messages: import_incubating.ATTR_GEN_AI_INPUT_MESSAGES
391
- },
392
- Operation: {
393
- Name: import_incubating.ATTR_GEN_AI_OPERATION_NAME,
394
- Name_Values: {
395
- /**
396
- * Note that "text_completion" is deprecated in favor of "chat" for both OpenAI and Anthropic
397
- */
398
- Chat: import_incubating.GEN_AI_OPERATION_NAME_VALUE_CHAT,
399
- CreateAgent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_CREATE_AGENT,
400
- Embeddings: import_incubating.GEN_AI_OPERATION_NAME_VALUE_EMBEDDINGS,
401
- ExecuteTool: import_incubating.GEN_AI_OPERATION_NAME_VALUE_EXECUTE_TOOL,
402
- GenerateContent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_GENERATE_CONTENT,
403
- InvokeAgent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_INVOKE_AGENT
404
- }
405
- },
406
- Output: {
407
- Messages: import_incubating.ATTR_GEN_AI_OUTPUT_MESSAGES,
408
- Type: import_incubating.ATTR_GEN_AI_OUTPUT_TYPE,
409
- Type_Values: {
410
- Text: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_TEXT,
411
- Json: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_JSON,
412
- Image: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_IMAGE,
413
- Speech: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_SPEECH
414
- }
415
- },
416
- /**
417
- * The provider that is hosting the model, eg AWS Bedrock
418
- * There doesn't seem to be a semconv for this
419
- */
420
- Request: {
421
- ChoiceCount: import_incubating.ATTR_GEN_AI_REQUEST_CHOICE_COUNT,
422
- // not yet used by axiom-ai
423
- EncodingFormats: import_incubating.ATTR_GEN_AI_REQUEST_ENCODING_FORMATS,
424
- // not yet used by axiom-ai
425
- FrequencyPenalty: import_incubating.ATTR_GEN_AI_REQUEST_FREQUENCY_PENALTY,
426
- MaxTokens: import_incubating.ATTR_GEN_AI_REQUEST_MAX_TOKENS,
427
- /**
428
- * The model you asked for
429
- */
430
- Model: import_incubating.ATTR_GEN_AI_REQUEST_MODEL,
431
- PresencePenalty: import_incubating.ATTR_GEN_AI_REQUEST_PRESENCE_PENALTY,
432
- Seed: import_incubating.ATTR_GEN_AI_REQUEST_SEED,
433
- StopSequences: import_incubating.ATTR_GEN_AI_REQUEST_STOP_SEQUENCES,
434
- Temperature: import_incubating.ATTR_GEN_AI_REQUEST_TEMPERATURE,
435
- TopK: import_incubating.ATTR_GEN_AI_REQUEST_TOP_K,
436
- TopP: import_incubating.ATTR_GEN_AI_REQUEST_TOP_P
437
- },
438
- Response: {
439
- FinishReasons: import_incubating.ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
440
- ID: import_incubating.ATTR_GEN_AI_RESPONSE_ID,
441
- /**
442
- * The model that was actually used (might be different bc routing) - only ever get this from the response, otherwise omit
443
- */
444
- Model: import_incubating.ATTR_GEN_AI_RESPONSE_MODEL
445
- // somehow not landing on the span for google models? check up on this...
446
- },
447
- Tool: {
448
- CallID: import_incubating.ATTR_GEN_AI_TOOL_CALL_ID,
449
- Description: import_incubating.ATTR_GEN_AI_TOOL_DESCRIPTION,
450
- Name: import_incubating.ATTR_GEN_AI_TOOL_NAME,
451
- Type: import_incubating.ATTR_GEN_AI_TOOL_TYPE,
452
- /**
453
- * Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span
454
- * But we at least want to give users THE OPTION to put them on the tool spans themselves as well
455
- * Because it enables a lot of things with querying
456
- * @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0
457
- */
458
- Arguments: ATTR_GEN_AI_TOOL_ARGUMENTS,
459
- /**
460
- * Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span
461
- * But we at least want to give users THE OPTION to put them on the tool spans themselves as well
462
- * Because it enables a lot of things with querying
463
- * @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0
464
- */
465
- Message: ATTR_GEN_AI_TOOL_MESSAGE
466
- },
467
- Usage: {
468
- InputTokens: import_incubating.ATTR_GEN_AI_USAGE_INPUT_TOKENS,
469
- OutputTokens: import_incubating.ATTR_GEN_AI_USAGE_OUTPUT_TOKENS
470
- }
471
- },
472
- Eval: {
473
- ID: ATTR_EVAL_ID,
474
- Name: ATTR_EVAL_NAME,
475
- Version: ATTR_EVAL_VERSION,
476
- Type: ATTR_EVAL_TYPE,
477
- Baseline: {
478
- ID: ATTR_EVAL_BASELINE_ID,
479
- Name: ATTR_EVAL_BASELINE_NAME,
480
- Version: ATTR_EVAL_BASELINE_VERSION
481
- },
482
- Capability: {
483
- Name: ATTR_EVAL_CAPABILITY_NAME
484
- },
485
- Step: {
486
- Name: ATTR_EVAL_STEP_NAME
487
- },
488
- Tags: ATTR_EVAL_TAGS,
489
- Metadata: ATTR_EVAL_METADATA,
490
- Collection: {
491
- ID: ATTR_EVAL_COLLECTION_ID,
492
- Name: ATTR_EVAL_COLLECTION_NAME,
493
- Size: ATTR_EVAL_COLLECTION_SIZE
494
- },
495
- Config: {
496
- Flags: ATTR_EVAL_CONFIG_FLAGS
497
- },
498
- Run: {
499
- ID: ATTR_EVAL_RUN_ID
500
- },
501
- Case: {
502
- Index: ATTR_EVAL_CASE_INDEX,
503
- Input: ATTR_EVAL_CASE_INPUT,
504
- Output: ATTR_EVAL_CASE_OUTPUT,
505
- Expected: ATTR_EVAL_CASE_EXPECTED,
506
- Scores: ATTR_EVAL_CASE_SCORES,
507
- Metadata: ATTR_EVAL_CASE_METADATA
508
- },
509
- Task: {
510
- Output: ATTR_EVAL_TASK_OUTPUT,
511
- Name: ATTR_EVAL_TASK_NAME,
512
- Type: ATTR_EVAL_TASK_TYPE
513
- },
514
- Score: {
515
- Name: ATTR_EVAL_SCORE_NAME,
516
- Value: ATTR_EVAL_SCORE_VALUE,
517
- Threshold: ATTR_EVAL_SCORE_THRESHOLD,
518
- Passed: ATTR_EVAL_SCORE_PASSED,
519
- Metadata: ATTR_EVAL_SCORE_METADATA
520
- },
521
- User: {
522
- Name: ATTR_EVAL_USER_NAME,
523
- Email: ATTR_EVAL_USER_EMAIL
524
- }
525
- },
526
- Error: {
527
- Type: import_semantic_conventions.ATTR_ERROR_TYPE,
528
- Message: import_incubating.ATTR_ERROR_MESSAGE
529
- },
530
- HTTP: {
531
- Response: {
532
- StatusCode: import_semantic_conventions.ATTR_HTTP_RESPONSE_STATUS_CODE
533
- }
534
- }
535
- };
536
253
 
537
254
  // src/otel/startActiveSpan.ts
538
255
  var import_api2 = require("@opentelemetry/api");
@@ -543,7 +260,7 @@ var import_api4 = require("@opentelemetry/api");
543
260
  // package.json
544
261
  var package_default = {
545
262
  name: "axiom",
546
- version: "0.27.0",
263
+ version: "0.28.0",
547
264
  type: "module",
548
265
  author: "Axiom, Inc.",
549
266
  contributors: [
@@ -747,191 +464,6 @@ function withEvalContext(options = {}, fn) {
747
464
  );
748
465
  }
749
466
 
750
- // src/config/resolver.ts
751
- var buildConsoleUrl = (urlString) => {
752
- const url = new URL(urlString);
753
- return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
754
- };
755
- function resolveAxiomConnection(config) {
756
- let consoleEndpointUrl = buildConsoleUrl(config.eval.url);
757
- if ("__overrideEndpointUrl" in config.eval) {
758
- consoleEndpointUrl = config.eval.__overrideEndpointUrl;
759
- }
760
- return {
761
- url: config.eval.url,
762
- consoleEndpointUrl,
763
- token: config.eval.token,
764
- dataset: config.eval.dataset,
765
- orgId: config.eval.orgId
766
- };
767
- }
768
-
769
- // src/cli/errors.ts
770
- var AxiomCLIError = class extends Error {
771
- constructor(message) {
772
- super(message);
773
- this.name = "AxiomCLIError";
774
- }
775
- };
776
- function errorToString(error) {
777
- if (typeof error === "string") {
778
- return error;
779
- }
780
- if (error instanceof Error) {
781
- return error.message;
782
- }
783
- return JSON.stringify(error);
784
- }
785
-
786
- // src/evals/eval.service.ts
787
- var findEvaluationCases = async (evalId, config) => {
788
- const { dataset, url, token, orgId } = resolveAxiomConnection(config);
789
- const apl = `['${dataset}'] | where trace_id == "${evalId}" | order by _time`;
790
- const headers = new Headers({
791
- Authorization: `Bearer ${token}`,
792
- "Content-Type": "application/json",
793
- ...orgId ? { "X-AXIOM-ORG-ID": orgId } : {}
794
- });
795
- const resp = await fetch(`${url}/v1/datasets/_apl?format=legacy`, {
796
- headers,
797
- method: "POST",
798
- body: JSON.stringify({ apl })
799
- });
800
- const payload = await resp.json();
801
- if (!resp.ok) {
802
- throw new Error(`Failed to query evaluation cases: ${payload.message || resp.statusText}`);
803
- }
804
- return payload.matches.length ? buildSpanTree(payload.matches) : null;
805
- };
806
- var mapSpanToEval = (span) => {
807
- const flagConfigRaw = span.data.attributes[Attr.Eval.Config.Flags] ?? span.data.attributes.custom[Attr.Eval.Config.Flags];
808
- return {
809
- id: span.data.attributes.custom[Attr.Eval.ID],
810
- name: span.data.attributes.custom[Attr.Eval.Name],
811
- type: span.data.attributes.custom[Attr.Eval.Type],
812
- version: span.data.attributes.custom[Attr.Eval.Version],
813
- collection: {
814
- name: span.data.attributes.custom[Attr.Eval.Collection.Name],
815
- size: span.data.attributes.custom[Attr.Eval.Collection.Size]
816
- },
817
- baseline: {
818
- id: span.data.attributes.custom[Attr.Eval.Baseline.ID],
819
- name: span.data.attributes.custom[Attr.Eval.Baseline.Name]
820
- },
821
- prompt: {
822
- // TODO: do we still want this?
823
- model: span.data.attributes.custom["eval.prompt.model"],
824
- params: span.data.attributes.custom["eval.prompt.params"]
825
- },
826
- duration: span.data.duration,
827
- status: span.data.status.code,
828
- traceId: span.data.trace_id,
829
- runAt: span._time,
830
- tags: span.data.attributes.custom[Attr.Eval.Tags].length ? JSON.parse(span.data.attributes.custom[Attr.Eval.Tags]) : [],
831
- user: {
832
- name: span.data.attributes.custom[Attr.Eval.User.Name],
833
- email: span.data.attributes.custom[Attr.Eval.User.Email]
834
- },
835
- cases: [],
836
- flagConfig: flagConfigRaw ? JSON.parse(flagConfigRaw) : void 0
837
- };
838
- };
839
- var mapSpanToCase = (item) => {
840
- const data = item.data;
841
- const d = data.duration;
842
- let duration = "-";
843
- if (d.endsWith("s")) {
844
- duration = `${Number(d.replace("s", "")).toFixed(2)}s`;
845
- } else {
846
- duration = d;
847
- }
848
- return {
849
- index: data.attributes.custom[Attr.Eval.Case.Index],
850
- input: data.attributes.custom[Attr.Eval.Case.Input],
851
- output: data.attributes.custom[Attr.Eval.Case.Output],
852
- expected: data.attributes.custom[Attr.Eval.Case.Expected],
853
- duration,
854
- status: data.status.code,
855
- scores: data.attributes.custom[Attr.Eval.Case.Scores] ? JSON.parse(data.attributes.custom[Attr.Eval.Case.Scores]) : {},
856
- runAt: item._time,
857
- spanId: data.span_id,
858
- traceId: data.trace_id
859
- };
860
- };
861
- var buildSpanTree = (spans) => {
862
- if (!spans.length) {
863
- return null;
864
- }
865
- const evalSpan = spans.find((span) => span.data.attributes.gen_ai.operation.name === "eval");
866
- if (!evalSpan) {
867
- return null;
868
- }
869
- const rootSpan = mapSpanToEval(evalSpan);
870
- const caseSpans = spans.filter((span) => span.data.name.startsWith("case"));
871
- for (const caseSpan of caseSpans) {
872
- const caseData = mapSpanToCase(caseSpan);
873
- const taskSpans = spans.filter(
874
- (span) => span.data.name.startsWith("task") && span.data.parent_span_id === caseSpan.data.span_id
875
- );
876
- if (taskSpans.length > 0) {
877
- const taskSpan = taskSpans[0];
878
- const chatSpans = spans.filter(
879
- (span) => span.data.name.startsWith("chat") && span.data.parent_span_id === taskSpan.data.span_id
880
- );
881
- const chatData = chatSpans.map((chatSpan) => ({
882
- operation: chatSpan.data.attributes.custom?.operation || "",
883
- capability: chatSpan.data.attributes.custom?.capability || "",
884
- step: chatSpan.data.attributes.custom?.step || "",
885
- request: {
886
- max_token: chatSpan.data.attributes.custom?.["request.max_token"] || "",
887
- model: chatSpan.data.attributes.custom?.["request.model"] || "",
888
- temperature: chatSpan.data.attributes.custom?.["request.temperature"] || 0
889
- },
890
- response: {
891
- finish_reasons: chatSpan.data.attributes.custom?.["response.finish_reasons"] || ""
892
- },
893
- usage: {
894
- input_tokens: chatSpan.data.attributes.gen_ai?.usage?.input_tokens || 0,
895
- output_tokens: chatSpan.data.attributes.gen_ai?.usage?.output_tokens || 0
896
- }
897
- }));
898
- const taskData = {
899
- name: taskSpan.data.name,
900
- output: taskSpan.data.attributes.custom?.output || "",
901
- trial: taskSpan.data.attributes.custom?.trial || 0,
902
- type: taskSpan.data.attributes.custom?.type || "",
903
- error: taskSpan.data.attributes.custom?.error,
904
- chat: chatData[0] || {
905
- operation: "",
906
- capability: "",
907
- step: "",
908
- request: { max_token: "", model: "", temperature: 0 },
909
- response: { finish_reasons: "" },
910
- usage: { input_tokens: 0, output_tokens: 0 }
911
- }
912
- };
913
- caseData.task = taskData;
914
- }
915
- const scoreSpans = spans.filter(
916
- (span) => span.data.attributes.gen_ai.operation.name === "eval.score" && span.data.parent_span_id === caseSpan.data.span_id
917
- );
918
- caseData.scores = {};
919
- scoreSpans.forEach((score) => {
920
- const name = score.data.attributes.custom[Attr.Eval.Score.Name];
921
- caseData.scores[name] = {
922
- name,
923
- value: score.data.attributes.custom[Attr.Eval.Score.Value],
924
- metadata: {
925
- error: score.data.attributes.error
926
- }
927
- };
928
- });
929
- rootSpan.cases.push(caseData);
930
- }
931
- rootSpan.cases.sort((a2, b) => a2.index - b.index);
932
- return rootSpan;
933
- };
934
-
935
467
  // src/util/deep-equal.ts
936
468
  function deepEqual(data, other) {
937
469
  if (data === other) {
@@ -1383,6 +915,25 @@ function printFinalReport({
1383
915
  }
1384
916
  }
1385
917
 
918
+ // src/config/resolver.ts
919
+ var buildConsoleUrl = (urlString) => {
920
+ const url = new URL(urlString);
921
+ return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
922
+ };
923
+ function resolveAxiomConnection(config) {
924
+ let consoleEndpointUrl = buildConsoleUrl(config.eval.url);
925
+ if ("__overrideEndpointUrl" in config.eval) {
926
+ consoleEndpointUrl = config.eval.__overrideEndpointUrl;
927
+ }
928
+ return {
929
+ url: config.eval.url,
930
+ consoleEndpointUrl,
931
+ token: config.eval.token,
932
+ dataset: config.eval.dataset,
933
+ orgId: config.eval.orgId
934
+ };
935
+ }
936
+
1386
937
  // src/evals/reporter.ts
1387
938
  var AxiomReporter = class {
1388
939
  constructor() {
@@ -1390,7 +941,6 @@ var AxiomReporter = class {
1390
941
  __publicField(this, "start", 0);
1391
942
  __publicField(this, "_endOfRunConfigEnd");
1392
943
  __publicField(this, "_suiteData", []);
1393
- __publicField(this, "_baselines", /* @__PURE__ */ new Map());
1394
944
  __publicField(this, "_printedFlagOverrides", false);
1395
945
  __publicField(this, "_config");
1396
946
  }
@@ -1415,17 +965,6 @@ var AxiomReporter = class {
1415
965
  }
1416
966
  this._printedFlagOverrides = true;
1417
967
  }
1418
- const baseline = meta.evaluation.baseline;
1419
- if (baseline) {
1420
- const config = getAxiomConfig();
1421
- if (!config) {
1422
- throw new AxiomCLIError("Axiom config not available in reporter");
1423
- }
1424
- const baselineData = await findEvaluationCases(baseline.id, config);
1425
- this._baselines.set(meta.evaluation.name, baselineData || null);
1426
- } else {
1427
- this._baselines.set(meta.evaluation.name, null);
1428
- }
1429
968
  if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
1430
969
  this._endOfRunConfigEnd = meta.evaluation.configEnd;
1431
970
  }
@@ -1455,16 +994,7 @@ var AxiomReporter = class {
1455
994
  }
1456
995
  const cwd = process.cwd();
1457
996
  const relativePath = testSuite.module.moduleId.replace(cwd, "").replace(/^\//, "");
1458
- let suiteBaseline = this._baselines.get(meta.evaluation.name);
1459
- if (suiteBaseline === void 0 && meta.evaluation.baseline) {
1460
- const config = getAxiomConfig();
1461
- if (!config) {
1462
- throw new AxiomCLIError("Axiom config not available in reporter");
1463
- }
1464
- const baselineData = await findEvaluationCases(meta.evaluation.baseline.id, config);
1465
- suiteBaseline = baselineData || null;
1466
- this._baselines.set(meta.evaluation.name, suiteBaseline);
1467
- }
997
+ let suiteBaseline = meta.evaluation.baseline;
1468
998
  this._suiteData.push({
1469
999
  name: meta.evaluation.name,
1470
1000
  file: relativePath,
@@ -1533,6 +1063,23 @@ var import_resources = require("@opentelemetry/resources");
1533
1063
  var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
1534
1064
  var import_api10 = require("@opentelemetry/api");
1535
1065
 
1066
+ // src/cli/errors.ts
1067
+ var AxiomCLIError = class extends Error {
1068
+ constructor(message) {
1069
+ super(message);
1070
+ this.name = "AxiomCLIError";
1071
+ }
1072
+ };
1073
+ function errorToString(error) {
1074
+ if (typeof error === "string") {
1075
+ return error;
1076
+ }
1077
+ if (error instanceof Error) {
1078
+ return error.message;
1079
+ }
1080
+ return JSON.stringify(error);
1081
+ }
1082
+
1536
1083
  // src/config/loader.ts
1537
1084
  var import_c12 = require("c12");
1538
1085
  var import_defu = require("defu");
@@ -1998,11 +1545,11 @@ function setupEvalProvider(connection) {
1998
1545
  axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
1999
1546
  resource: (0, import_resources.resourceFromAttributes)({
2000
1547
  ["service.name"]: "axiom",
2001
- ["service.version"]: "0.27.0"
1548
+ ["service.version"]: "0.28.0"
2002
1549
  }),
2003
1550
  spanProcessors: [processor]
2004
1551
  });
2005
- axiomTracer = axiomProvider.getTracer("axiom", "0.27.0");
1552
+ axiomTracer = axiomProvider.getTracer("axiom", "0.28.0");
2006
1553
  }
2007
1554
  async function initInstrumentation(config) {
2008
1555
  if (initialized) {
@@ -2014,7 +1561,7 @@ async function initInstrumentation(config) {
2014
1561
  }
2015
1562
  initializationPromise = (async () => {
2016
1563
  if (!config.enabled) {
2017
- axiomTracer = import_api10.trace.getTracer("axiom", "0.27.0");
1564
+ axiomTracer = import_api10.trace.getTracer("axiom", "0.28.0");
2018
1565
  initialized = true;
2019
1566
  return;
2020
1567
  }
@@ -2786,7 +2333,7 @@ var import_commander2 = require("commander");
2786
2333
  var loadVersionCommand = (program2) => {
2787
2334
  return program2.addCommand(
2788
2335
  new import_commander2.Command("version").description("cli version").action(() => {
2789
- console.log("0.27.0");
2336
+ console.log("0.28.0");
2790
2337
  })
2791
2338
  );
2792
2339
  };
@@ -2796,7 +2343,7 @@ var { loadEnvConfig } = import_env.default;
2796
2343
  loadEnvConfig(process.cwd());
2797
2344
  var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
2798
2345
  var program = new import_commander3.Command();
2799
- program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.27.0");
2346
+ program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.28.0");
2800
2347
  program.hook("preAction", async (_, actionCommand) => {
2801
2348
  const commandName = actionCommand.name();
2802
2349
  const parentCommand = actionCommand.parent;