npm - axiom - Versions diffs - 0.27.0 → 0.28.0 - Mend

axiom 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/dist/bin.cjs +43 -496
package/dist/bin.cjs.map +1 -1
package/dist/bin.js +5 -4
package/dist/bin.js.map +1 -1
package/dist/{chunk-CZJEEQDG.js → chunk-5TVCLHTM.js} +7 -9
package/dist/chunk-5TVCLHTM.js.map +1 -0
package/dist/{chunk-YCOR62XR.js → chunk-CSMSR3XW.js} +26 -241
package/dist/chunk-CSMSR3XW.js.map +1 -0
package/dist/chunk-MM5FFQJT.js +19 -0
package/dist/chunk-MM5FFQJT.js.map +1 -0
package/dist/{chunk-3YNZM3A7.js → chunk-WMSQHW3M.js} +13 -2
package/dist/chunk-WMSQHW3M.js.map +1 -0
package/dist/evals/custom-runner.cjs +17 -5
package/dist/evals/custom-runner.cjs.map +1 -1
package/dist/evals/custom-runner.js +2 -1
package/dist/evals/custom-runner.js.map +1 -1
package/dist/evals.cjs +84 -67
package/dist/evals.cjs.map +1 -1
package/dist/evals.d.cts +13 -80
package/dist/evals.d.ts +13 -80
package/dist/evals.js +237 -6
package/dist/evals.js.map +1 -1
package/dist/index.cjs +26 -1
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +5 -1
package/dist/index.d.ts +5 -1
package/dist/index.js +2 -1
package/dist/index.js.map +1 -1
package/dist/name-validation.d-CDPeW_pV.d.cts +81 -0
package/dist/name-validation.d-CDPeW_pV.d.ts +81 -0
package/package.json +1 -1
package/dist/chunk-3YNZM3A7.js.map +0 -1
package/dist/chunk-CZJEEQDG.js.map +0 -1
package/dist/chunk-YCOR62XR.js.map +0 -1

package/dist/bin.cjs CHANGED Viewed

@@ -249,290 +249,7 @@ var import_api5 = require("@opentelemetry/api");
 // src/otel/semconv/attributes.ts
 var import_semantic_conventions = require("@opentelemetry/semantic-conventions");
-// src/otel/semconv/eval_proposal.ts
-var ATTR_EVAL_ID = "eval.id";
-var ATTR_EVAL_NAME = "eval.name";
-var ATTR_EVAL_VERSION = "eval.version";
-var ATTR_EVAL_TYPE = "eval.type";
-var ATTR_EVAL_TAGS = "eval.tags";
-var ATTR_EVAL_BASELINE_ID = "eval.baseline.id";
-var ATTR_EVAL_BASELINE_NAME = "eval.baseline.name";
-var ATTR_EVAL_BASELINE_VERSION = "eval.baseline.version";
-var ATTR_EVAL_METADATA = "eval.metadata";
-var ATTR_EVAL_CAPABILITY_NAME = "eval.capability.name";
-var ATTR_EVAL_STEP_NAME = "eval.step.name";
-var ATTR_EVAL_COLLECTION_ID = "eval.collection.id";
-var ATTR_EVAL_COLLECTION_SIZE = "eval.collection.size";
-var ATTR_EVAL_COLLECTION_NAME = "eval.collection.name";
-var ATTR_EVAL_CONFIG_FLAGS = "eval.config.flags";
-var ATTR_EVAL_CASE_INDEX = "eval.case.index";
-var ATTR_EVAL_CASE_INPUT = "eval.case.input";
-var ATTR_EVAL_CASE_OUTPUT = "eval.case.output";
-var ATTR_EVAL_CASE_EXPECTED = "eval.case.expected";
-var ATTR_EVAL_CASE_SCORES = "eval.case.scores";
-var ATTR_EVAL_CASE_METADATA = "eval.case.metadata";
-var ATTR_EVAL_TASK_OUTPUT = "eval.task.output";
-var ATTR_EVAL_TASK_NAME = "eval.task.name";
-var ATTR_EVAL_TASK_TYPE = "eval.task.type";
-var ATTR_EVAL_RUN_ID = "eval.run.id";
-var ATTR_EVAL_SCORE_NAME = "eval.score.name";
-var ATTR_EVAL_SCORE_VALUE = "eval.score.value";
-var ATTR_EVAL_SCORE_THRESHOLD = "eval.score.threshold";
-var ATTR_EVAL_SCORE_PASSED = "eval.score.passed";
-var ATTR_EVAL_SCORE_METADATA = "eval.score.metadata";
-var ATTR_EVAL_USER_NAME = "eval.user.name";
-var ATTR_EVAL_USER_EMAIL = "eval.user.email";
-// src/otel/semconv/attributes.ts
 var import_incubating = require("@opentelemetry/semantic-conventions/incubating");
-var ATTR_AXIOM_GEN_AI_SCHEMA_URL = "axiom.gen_ai.schema_url";
-var ATTR_AXIOM_GEN_AI_SDK_NAME = "axiom.gen_ai.sdk.name";
-var ATTR_AXIOM_GEN_AI_SDK_VERSION = "axiom.gen_ai.sdk.version";
-var ATTR_GEN_AI_CAPABILITY_NAME = "gen_ai.capability.name";
-var ATTR_GEN_AI_STEP_NAME = "gen_ai.step.name";
-var ATTR_GEN_AI_TOOL_ARGUMENTS = "gen_ai.tool.arguments";
-var ATTR_GEN_AI_TOOL_MESSAGE = "gen_ai.tool.message";
-var GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI = "assemblyai";
-var GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS = "cerebras";
-var GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM = "deepgram";
-var GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA = "deepinfra";
-var GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS = "elevenlabs";
-var GEN_AI_PROVIDER_NAME_VALUE_FAL = "fal";
-var GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS = "fireworks";
-var GEN_AI_PROVIDER_NAME_VALUE_GLADIA = "gladia";
-var GEN_AI_PROVIDER_NAME_VALUE_HUME = "hume";
-var GEN_AI_PROVIDER_NAME_VALUE_LMNT = "lmnt";
-var GEN_AI_PROVIDER_NAME_VALUE_LUMA = "luma";
-var GEN_AI_PROVIDER_NAME_VALUE_REPLICATE = "replicate";
-var GEN_AI_PROVIDER_NAME_VALUE_REVAI = "revai";
-var GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI = "togetherai";
-var GEN_AI_PROVIDER_NAME_VALUE_VERCEL = "vercel";
-var Attr = {
-  __EXPERIMENTAL_Flag: (flagName) => `flag.${flagName}`,
-  __EXPERIMENTAL_Fact: (factName) => `fact.${factName}`,
-  Axiom: {
-    GenAI: {
-      SchemaURL: ATTR_AXIOM_GEN_AI_SCHEMA_URL,
-      SDK: {
-        Name: ATTR_AXIOM_GEN_AI_SDK_NAME,
-        Version: ATTR_AXIOM_GEN_AI_SDK_VERSION
-      }
-    }
-  },
-  GenAI: {
-    PromptMetadata: {
-      ID: "axiom.gen_ai.prompt.id",
-      Name: "axiom.gen_ai.prompt.name",
-      Slug: "axiom.gen_ai.prompt.slug",
-      Version: "axiom.gen_ai.prompt.version"
-    },
-    /**
-     * These two are used to identify the span
-     */
-    Capability: {
-      Name: ATTR_GEN_AI_CAPABILITY_NAME
-    },
-    Step: {
-      Name: ATTR_GEN_AI_STEP_NAME
-    },
-    Provider: {
-      Name: import_incubating.ATTR_GEN_AI_PROVIDER_NAME,
-      Name_Values: {
-        Anthropic: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_ANTHROPIC,
-        AssemblyAI: GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI,
-        AWSBedrock: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AWS_BEDROCK,
-        AzureAIInference: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_INFERENCE,
-        AzureAIOpenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_OPENAI,
-        Cerebras: GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS,
-        Cohere: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_COHERE,
-        Deepgram: GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM,
-        DeepInfra: GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA,
-        Deepseek: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_DEEPSEEK,
-        ElevenLabs: GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS,
-        Fal: GEN_AI_PROVIDER_NAME_VALUE_FAL,
-        Fireworks: GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS,
-        GCPGemini: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_GEMINI,
-        GCPGenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_GEN_AI,
-        GCPVertexAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_VERTEX_AI,
-        Gladia: GEN_AI_PROVIDER_NAME_VALUE_GLADIA,
-        Groq: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GROQ,
-        Hume: GEN_AI_PROVIDER_NAME_VALUE_HUME,
-        IBMWatsonxAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_IBM_WATSONX_AI,
-        Lmnt: GEN_AI_PROVIDER_NAME_VALUE_LMNT,
-        Luma: GEN_AI_PROVIDER_NAME_VALUE_LUMA,
-        MistralAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_MISTRAL_AI,
-        OpenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_OPENAI,
-        Perplexity: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_PERPLEXITY,
-        Replicate: GEN_AI_PROVIDER_NAME_VALUE_REPLICATE,
-        RevAI: GEN_AI_PROVIDER_NAME_VALUE_REVAI,
-        TogetherAI: GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI,
-        Vercel: GEN_AI_PROVIDER_NAME_VALUE_VERCEL,
-        XAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_X_AI
-      }
-    },
-    /**
-     * Regular attributes
-     */
-    Agent: {
-      Description: import_incubating.ATTR_GEN_AI_AGENT_DESCRIPTION,
-      // not yet used by axiom-ai
-      ID: import_incubating.ATTR_GEN_AI_AGENT_ID,
-      // not yet used by axiom-ai
-      Name: import_incubating.ATTR_GEN_AI_AGENT_NAME
-      // not yet used by axiom-ai
-    },
-    Conversation: {
-      ID: import_incubating.ATTR_GEN_AI_CONVERSATION_ID
-      // not yet used by axiom-ai, anyway probably needs to be provided by user
-    },
-    Input: {
-      Messages: import_incubating.ATTR_GEN_AI_INPUT_MESSAGES
-    },
-    Operation: {
-      Name: import_incubating.ATTR_GEN_AI_OPERATION_NAME,
-      Name_Values: {
-        /**
-         * Note that "text_completion" is deprecated in favor of "chat" for both OpenAI and Anthropic
-         */
-        Chat: import_incubating.GEN_AI_OPERATION_NAME_VALUE_CHAT,
-        CreateAgent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_CREATE_AGENT,
-        Embeddings: import_incubating.GEN_AI_OPERATION_NAME_VALUE_EMBEDDINGS,
-        ExecuteTool: import_incubating.GEN_AI_OPERATION_NAME_VALUE_EXECUTE_TOOL,
-        GenerateContent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_GENERATE_CONTENT,
-        InvokeAgent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_INVOKE_AGENT
-      }
-    },
-    Output: {
-      Messages: import_incubating.ATTR_GEN_AI_OUTPUT_MESSAGES,
-      Type: import_incubating.ATTR_GEN_AI_OUTPUT_TYPE,
-      Type_Values: {
-        Text: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_TEXT,
-        Json: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_JSON,
-        Image: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_IMAGE,
-        Speech: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_SPEECH
-      }
-    },
-    /**
-     * The provider that is hosting the model, eg AWS Bedrock
-     * There doesn't seem to be a semconv for this
-     */
-    Request: {
-      ChoiceCount: import_incubating.ATTR_GEN_AI_REQUEST_CHOICE_COUNT,
-      // not yet used by axiom-ai
-      EncodingFormats: import_incubating.ATTR_GEN_AI_REQUEST_ENCODING_FORMATS,
-      // not yet used by axiom-ai
-      FrequencyPenalty: import_incubating.ATTR_GEN_AI_REQUEST_FREQUENCY_PENALTY,
-      MaxTokens: import_incubating.ATTR_GEN_AI_REQUEST_MAX_TOKENS,
-      /**
-       * The model you asked for
-       */
-      Model: import_incubating.ATTR_GEN_AI_REQUEST_MODEL,
-      PresencePenalty: import_incubating.ATTR_GEN_AI_REQUEST_PRESENCE_PENALTY,
-      Seed: import_incubating.ATTR_GEN_AI_REQUEST_SEED,
-      StopSequences: import_incubating.ATTR_GEN_AI_REQUEST_STOP_SEQUENCES,
-      Temperature: import_incubating.ATTR_GEN_AI_REQUEST_TEMPERATURE,
-      TopK: import_incubating.ATTR_GEN_AI_REQUEST_TOP_K,
-      TopP: import_incubating.ATTR_GEN_AI_REQUEST_TOP_P
-    },
-    Response: {
-      FinishReasons: import_incubating.ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
-      ID: import_incubating.ATTR_GEN_AI_RESPONSE_ID,
-      /**
-       * The model that was actually used (might be different bc routing) - only ever get this from the response, otherwise omit
-       */
-      Model: import_incubating.ATTR_GEN_AI_RESPONSE_MODEL
-      // somehow not landing on the span for google models? check up on this...
-    },
-    Tool: {
-      CallID: import_incubating.ATTR_GEN_AI_TOOL_CALL_ID,
-      Description: import_incubating.ATTR_GEN_AI_TOOL_DESCRIPTION,
-      Name: import_incubating.ATTR_GEN_AI_TOOL_NAME,
-      Type: import_incubating.ATTR_GEN_AI_TOOL_TYPE,
-      /**
-       * Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span
-       * But we at least want to give users THE OPTION to put them on the tool spans themselves as well
-       * Because it enables a lot of things with querying
-       * @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0
-       */
-      Arguments: ATTR_GEN_AI_TOOL_ARGUMENTS,
-      /**
-       * Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span
-       * But we at least want to give users THE OPTION to put them on the tool spans themselves as well
-       * Because it enables a lot of things with querying
-       * @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0
-       */
-      Message: ATTR_GEN_AI_TOOL_MESSAGE
-    },
-    Usage: {
-      InputTokens: import_incubating.ATTR_GEN_AI_USAGE_INPUT_TOKENS,
-      OutputTokens: import_incubating.ATTR_GEN_AI_USAGE_OUTPUT_TOKENS
-    }
-  },
-  Eval: {
-    ID: ATTR_EVAL_ID,
-    Name: ATTR_EVAL_NAME,
-    Version: ATTR_EVAL_VERSION,
-    Type: ATTR_EVAL_TYPE,
-    Baseline: {
-      ID: ATTR_EVAL_BASELINE_ID,
-      Name: ATTR_EVAL_BASELINE_NAME,
-      Version: ATTR_EVAL_BASELINE_VERSION
-    },
-    Capability: {
-      Name: ATTR_EVAL_CAPABILITY_NAME
-    },
-    Step: {
-      Name: ATTR_EVAL_STEP_NAME
-    },
-    Tags: ATTR_EVAL_TAGS,
-    Metadata: ATTR_EVAL_METADATA,
-    Collection: {
-      ID: ATTR_EVAL_COLLECTION_ID,
-      Name: ATTR_EVAL_COLLECTION_NAME,
-      Size: ATTR_EVAL_COLLECTION_SIZE
-    },
-    Config: {
-      Flags: ATTR_EVAL_CONFIG_FLAGS
-    },
-    Run: {
-      ID: ATTR_EVAL_RUN_ID
-    },
-    Case: {
-      Index: ATTR_EVAL_CASE_INDEX,
-      Input: ATTR_EVAL_CASE_INPUT,
-      Output: ATTR_EVAL_CASE_OUTPUT,
-      Expected: ATTR_EVAL_CASE_EXPECTED,
-      Scores: ATTR_EVAL_CASE_SCORES,
-      Metadata: ATTR_EVAL_CASE_METADATA
-    },
-    Task: {
-      Output: ATTR_EVAL_TASK_OUTPUT,
-      Name: ATTR_EVAL_TASK_NAME,
-      Type: ATTR_EVAL_TASK_TYPE
-    },
-    Score: {
-      Name: ATTR_EVAL_SCORE_NAME,
-      Value: ATTR_EVAL_SCORE_VALUE,
-      Threshold: ATTR_EVAL_SCORE_THRESHOLD,
-      Passed: ATTR_EVAL_SCORE_PASSED,
-      Metadata: ATTR_EVAL_SCORE_METADATA
-    },
-    User: {
-      Name: ATTR_EVAL_USER_NAME,
-      Email: ATTR_EVAL_USER_EMAIL
-    }
-  },
-  Error: {
-    Type: import_semantic_conventions.ATTR_ERROR_TYPE,
-    Message: import_incubating.ATTR_ERROR_MESSAGE
-  },
-  HTTP: {
-    Response: {
-      StatusCode: import_semantic_conventions.ATTR_HTTP_RESPONSE_STATUS_CODE
-    }
-  }
-};
 // src/otel/startActiveSpan.ts
 var import_api2 = require("@opentelemetry/api");
@@ -543,7 +260,7 @@ var import_api4 = require("@opentelemetry/api");
 // package.json
 var package_default = {
   name: "axiom",
-  version: "0.27.0",
+  version: "0.28.0",
   type: "module",
   author: "Axiom, Inc.",
   contributors: [
@@ -747,191 +464,6 @@ function withEvalContext(options = {}, fn) {
   );
 }
-// src/config/resolver.ts
-var buildConsoleUrl = (urlString) => {
-  const url = new URL(urlString);
-  return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
-};
-function resolveAxiomConnection(config) {
-  let consoleEndpointUrl = buildConsoleUrl(config.eval.url);
-  if ("__overrideEndpointUrl" in config.eval) {
-    consoleEndpointUrl = config.eval.__overrideEndpointUrl;
-  }
-  return {
-    url: config.eval.url,
-    consoleEndpointUrl,
-    token: config.eval.token,
-    dataset: config.eval.dataset,
-    orgId: config.eval.orgId
-  };
-}
-// src/cli/errors.ts
-var AxiomCLIError = class extends Error {
-  constructor(message) {
-    super(message);
-    this.name = "AxiomCLIError";
-  }
-};
-function errorToString(error) {
-  if (typeof error === "string") {
-    return error;
-  }
-  if (error instanceof Error) {
-    return error.message;
-  }
-  return JSON.stringify(error);
-}
-// src/evals/eval.service.ts
-var findEvaluationCases = async (evalId, config) => {
-  const { dataset, url, token, orgId } = resolveAxiomConnection(config);
-  const apl = `['${dataset}'] | where trace_id == "${evalId}" | order by _time`;
-  const headers = new Headers({
-    Authorization: `Bearer ${token}`,
-    "Content-Type": "application/json",
-    ...orgId ? { "X-AXIOM-ORG-ID": orgId } : {}
-  });
-  const resp = await fetch(`${url}/v1/datasets/_apl?format=legacy`, {
-    headers,
-    method: "POST",
-    body: JSON.stringify({ apl })
-  });
-  const payload = await resp.json();
-  if (!resp.ok) {
-    throw new Error(`Failed to query evaluation cases: ${payload.message || resp.statusText}`);
-  }
-  return payload.matches.length ? buildSpanTree(payload.matches) : null;
-};
-var mapSpanToEval = (span) => {
-  const flagConfigRaw = span.data.attributes[Attr.Eval.Config.Flags] ?? span.data.attributes.custom[Attr.Eval.Config.Flags];
-  return {
-    id: span.data.attributes.custom[Attr.Eval.ID],
-    name: span.data.attributes.custom[Attr.Eval.Name],
-    type: span.data.attributes.custom[Attr.Eval.Type],
-    version: span.data.attributes.custom[Attr.Eval.Version],
-    collection: {
-      name: span.data.attributes.custom[Attr.Eval.Collection.Name],
-      size: span.data.attributes.custom[Attr.Eval.Collection.Size]
-    },
-    baseline: {
-      id: span.data.attributes.custom[Attr.Eval.Baseline.ID],
-      name: span.data.attributes.custom[Attr.Eval.Baseline.Name]
-    },
-    prompt: {
-      // TODO: do we still want this?
-      model: span.data.attributes.custom["eval.prompt.model"],
-      params: span.data.attributes.custom["eval.prompt.params"]
-    },
-    duration: span.data.duration,
-    status: span.data.status.code,
-    traceId: span.data.trace_id,
-    runAt: span._time,
-    tags: span.data.attributes.custom[Attr.Eval.Tags].length ? JSON.parse(span.data.attributes.custom[Attr.Eval.Tags]) : [],
-    user: {
-      name: span.data.attributes.custom[Attr.Eval.User.Name],
-      email: span.data.attributes.custom[Attr.Eval.User.Email]
-    },
-    cases: [],
-    flagConfig: flagConfigRaw ? JSON.parse(flagConfigRaw) : void 0
-  };
-};
-var mapSpanToCase = (item) => {
-  const data = item.data;
-  const d = data.duration;
-  let duration = "-";
-  if (d.endsWith("s")) {
-    duration = `${Number(d.replace("s", "")).toFixed(2)}s`;
-  } else {
-    duration = d;
-  }
-  return {
-    index: data.attributes.custom[Attr.Eval.Case.Index],
-    input: data.attributes.custom[Attr.Eval.Case.Input],
-    output: data.attributes.custom[Attr.Eval.Case.Output],
-    expected: data.attributes.custom[Attr.Eval.Case.Expected],
-    duration,
-    status: data.status.code,
-    scores: data.attributes.custom[Attr.Eval.Case.Scores] ? JSON.parse(data.attributes.custom[Attr.Eval.Case.Scores]) : {},
-    runAt: item._time,
-    spanId: data.span_id,
-    traceId: data.trace_id
-  };
-};
-var buildSpanTree = (spans) => {
-  if (!spans.length) {
-    return null;
-  }
-  const evalSpan = spans.find((span) => span.data.attributes.gen_ai.operation.name === "eval");
-  if (!evalSpan) {
-    return null;
-  }
-  const rootSpan = mapSpanToEval(evalSpan);
-  const caseSpans = spans.filter((span) => span.data.name.startsWith("case"));
-  for (const caseSpan of caseSpans) {
-    const caseData = mapSpanToCase(caseSpan);
-    const taskSpans = spans.filter(
-      (span) => span.data.name.startsWith("task") && span.data.parent_span_id === caseSpan.data.span_id
-    );
-    if (taskSpans.length > 0) {
-      const taskSpan = taskSpans[0];
-      const chatSpans = spans.filter(
-        (span) => span.data.name.startsWith("chat") && span.data.parent_span_id === taskSpan.data.span_id
-      );
-      const chatData = chatSpans.map((chatSpan) => ({
-        operation: chatSpan.data.attributes.custom?.operation || "",
-        capability: chatSpan.data.attributes.custom?.capability || "",
-        step: chatSpan.data.attributes.custom?.step || "",
-        request: {
-          max_token: chatSpan.data.attributes.custom?.["request.max_token"] || "",
-          model: chatSpan.data.attributes.custom?.["request.model"] || "",
-          temperature: chatSpan.data.attributes.custom?.["request.temperature"] || 0
-        },
-        response: {
-          finish_reasons: chatSpan.data.attributes.custom?.["response.finish_reasons"] || ""
-        },
-        usage: {
-          input_tokens: chatSpan.data.attributes.gen_ai?.usage?.input_tokens || 0,
-          output_tokens: chatSpan.data.attributes.gen_ai?.usage?.output_tokens || 0
-        }
-      }));
-      const taskData = {
-        name: taskSpan.data.name,
-        output: taskSpan.data.attributes.custom?.output || "",
-        trial: taskSpan.data.attributes.custom?.trial || 0,
-        type: taskSpan.data.attributes.custom?.type || "",
-        error: taskSpan.data.attributes.custom?.error,
-        chat: chatData[0] || {
-          operation: "",
-          capability: "",
-          step: "",
-          request: { max_token: "", model: "", temperature: 0 },
-          response: { finish_reasons: "" },
-          usage: { input_tokens: 0, output_tokens: 0 }
-        }
-      };
-      caseData.task = taskData;
-    }
-    const scoreSpans = spans.filter(
-      (span) => span.data.attributes.gen_ai.operation.name === "eval.score" && span.data.parent_span_id === caseSpan.data.span_id
-    );
-    caseData.scores = {};
-    scoreSpans.forEach((score) => {
-      const name = score.data.attributes.custom[Attr.Eval.Score.Name];
-      caseData.scores[name] = {
-        name,
-        value: score.data.attributes.custom[Attr.Eval.Score.Value],
-        metadata: {
-          error: score.data.attributes.error
-        }
-      };
-    });
-    rootSpan.cases.push(caseData);
-  }
-  rootSpan.cases.sort((a2, b) => a2.index - b.index);
-  return rootSpan;
-};
 // src/util/deep-equal.ts
 function deepEqual(data, other) {
   if (data === other) {
@@ -1383,6 +915,25 @@ function printFinalReport({
   }
 }
+// src/config/resolver.ts
+var buildConsoleUrl = (urlString) => {
+  const url = new URL(urlString);
+  return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
+};
+function resolveAxiomConnection(config) {
+  let consoleEndpointUrl = buildConsoleUrl(config.eval.url);
+  if ("__overrideEndpointUrl" in config.eval) {
+    consoleEndpointUrl = config.eval.__overrideEndpointUrl;
+  }
+  return {
+    url: config.eval.url,
+    consoleEndpointUrl,
+    token: config.eval.token,
+    dataset: config.eval.dataset,
+    orgId: config.eval.orgId
+  };
+}
 // src/evals/reporter.ts
 var AxiomReporter = class {
   constructor() {
@@ -1390,7 +941,6 @@ var AxiomReporter = class {
     __publicField(this, "start", 0);
     __publicField(this, "_endOfRunConfigEnd");
     __publicField(this, "_suiteData", []);
-    __publicField(this, "_baselines", /* @__PURE__ */ new Map());
     __publicField(this, "_printedFlagOverrides", false);
     __publicField(this, "_config");
   }
@@ -1415,17 +965,6 @@ var AxiomReporter = class {
       }
       this._printedFlagOverrides = true;
     }
-    const baseline = meta.evaluation.baseline;
-    if (baseline) {
-      const config = getAxiomConfig();
-      if (!config) {
-        throw new AxiomCLIError("Axiom config not available in reporter");
-      }
-      const baselineData = await findEvaluationCases(baseline.id, config);
-      this._baselines.set(meta.evaluation.name, baselineData || null);
-    } else {
-      this._baselines.set(meta.evaluation.name, null);
-    }
     if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
       this._endOfRunConfigEnd = meta.evaluation.configEnd;
     }
@@ -1455,16 +994,7 @@ var AxiomReporter = class {
     }
     const cwd = process.cwd();
     const relativePath = testSuite.module.moduleId.replace(cwd, "").replace(/^\//, "");
-    let suiteBaseline = this._baselines.get(meta.evaluation.name);
-    if (suiteBaseline === void 0 && meta.evaluation.baseline) {
-      const config = getAxiomConfig();
-      if (!config) {
-        throw new AxiomCLIError("Axiom config not available in reporter");
-      }
-      const baselineData = await findEvaluationCases(meta.evaluation.baseline.id, config);
-      suiteBaseline = baselineData || null;
-      this._baselines.set(meta.evaluation.name, suiteBaseline);
-    }
+    let suiteBaseline = meta.evaluation.baseline;
     this._suiteData.push({
       name: meta.evaluation.name,
       file: relativePath,
@@ -1533,6 +1063,23 @@ var import_resources = require("@opentelemetry/resources");
 var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
 var import_api10 = require("@opentelemetry/api");
+// src/cli/errors.ts
+var AxiomCLIError = class extends Error {
+  constructor(message) {
+    super(message);
+    this.name = "AxiomCLIError";
+  }
+};
+function errorToString(error) {
+  if (typeof error === "string") {
+    return error;
+  }
+  if (error instanceof Error) {
+    return error.message;
+  }
+  return JSON.stringify(error);
+}
 // src/config/loader.ts
 var import_c12 = require("c12");
 var import_defu = require("defu");
@@ -1998,11 +1545,11 @@ function setupEvalProvider(connection) {
   axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
     resource: (0, import_resources.resourceFromAttributes)({
       ["service.name"]: "axiom",
-      ["service.version"]: "0.27.0"
+      ["service.version"]: "0.28.0"
     }),
     spanProcessors: [processor]
   });
-  axiomTracer = axiomProvider.getTracer("axiom", "0.27.0");
+  axiomTracer = axiomProvider.getTracer("axiom", "0.28.0");
 }
 async function initInstrumentation(config) {
   if (initialized) {
@@ -2014,7 +1561,7 @@ async function initInstrumentation(config) {
   }
   initializationPromise = (async () => {
     if (!config.enabled) {
-      axiomTracer = import_api10.trace.getTracer("axiom", "0.27.0");
+      axiomTracer = import_api10.trace.getTracer("axiom", "0.28.0");
       initialized = true;
       return;
     }
@@ -2786,7 +2333,7 @@ var import_commander2 = require("commander");
 var loadVersionCommand = (program2) => {
   return program2.addCommand(
     new import_commander2.Command("version").description("cli version").action(() => {
-      console.log("0.27.0");
+      console.log("0.28.0");
     })
   );
 };
@@ -2796,7 +2343,7 @@ var { loadEnvConfig } = import_env.default;
 loadEnvConfig(process.cwd());
 var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
 var program = new import_commander3.Command();
-program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.27.0");
+program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.28.0");
 program.hook("preAction", async (_, actionCommand) => {
   const commandName = actionCommand.name();
   const parentCommand = actionCommand.parent;