@netlify/axis 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -916
- package/dist/cli.js +2 -4
- package/dist/cli.js.map +1 -1
- package/dist/config/validator.d.ts.map +1 -1
- package/dist/config/validator.js +2 -20
- package/dist/config/validator.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/report-ui/index.html +51 -45
- package/dist/reports/writer.d.ts +1 -1
- package/dist/reports/writer.d.ts.map +1 -1
- package/dist/reports/writer.js +2 -1
- package/dist/reports/writer.js.map +1 -1
- package/dist/runner/runner.d.ts.map +1 -1
- package/dist/runner/runner.js +18 -29
- package/dist/runner/runner.js.map +1 -1
- package/dist/scoring/index.d.ts +1 -7
- package/dist/scoring/index.d.ts.map +1 -1
- package/dist/scoring/index.js +1 -16
- package/dist/scoring/index.js.map +1 -1
- package/dist/scoring/sparse-index.d.ts +2 -2
- package/dist/scoring/sparse-index.d.ts.map +1 -1
- package/dist/scoring/sparse-index.js +8 -9
- package/dist/scoring/sparse-index.js.map +1 -1
- package/dist/skills/resolver.d.ts +2 -4
- package/dist/skills/resolver.d.ts.map +1 -1
- package/dist/skills/resolver.js +4 -10
- package/dist/skills/resolver.js.map +1 -1
- package/dist/transcript/categorize.d.ts +1 -3
- package/dist/transcript/categorize.d.ts.map +1 -1
- package/dist/transcript/categorize.js +0 -27
- package/dist/transcript/categorize.js.map +1 -1
- package/dist/types/config.d.ts +4 -17
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/report.d.ts +2 -0
- package/dist/types/report.d.ts.map +1 -1
- package/dist/types/scenario.d.ts +1 -2
- package/dist/types/scenario.d.ts.map +1 -1
- package/dist/types/scoring.d.ts +0 -4
- package/dist/types/scoring.d.ts.map +1 -1
- package/package.json +5 -3
- package/dist/docs-site/_astro/cli.DDWZtG0-.css +0 -1
- package/dist/docs-site/cli/index.html +0 -18
- package/dist/docs-site/configuration/index.html +0 -121
- package/dist/docs-site/content-assets.mjs +0 -1
- package/dist/docs-site/content-modules.mjs +0 -1
- package/dist/docs-site/data-store.json +0 -9
- package/dist/docs-site/index.html +0 -69
- package/dist/docs-site/quickstart/index.html +0 -59
- package/dist/docs-site/running/index.html +0 -87
- package/dist/docs-site/scoring/index.html +0 -135
- package/dist/report-ui/mock-data.json +0 -298
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@netlify/axis",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Agent eXperience Index Score — synthetic testing for AI agent interaction",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
},
|
|
27
27
|
"lint-staged": {
|
|
28
28
|
"*.{ts,tsx}": [
|
|
29
|
-
"eslint --fix",
|
|
29
|
+
"eslint --fix --no-warn-ignored",
|
|
30
30
|
"prettier --write"
|
|
31
31
|
],
|
|
32
32
|
"*.{json,md,yml,yaml}": [
|
|
@@ -37,7 +37,9 @@
|
|
|
37
37
|
"node": ">=18.0.0"
|
|
38
38
|
},
|
|
39
39
|
"files": [
|
|
40
|
-
"dist"
|
|
40
|
+
"dist",
|
|
41
|
+
"!dist/docs-site",
|
|
42
|
+
"!dist/report-ui/mock-data.json"
|
|
41
43
|
],
|
|
42
44
|
"devDependencies": {
|
|
43
45
|
"@eslint/js": "^9.39.4",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
:root{--bg: #fafbf9;--bg-card: #ffffff;--text: #1a1a2e;--text-secondary: #4a5568;--text-muted: #637083;--border: #e5e7eb;--border-bright: #d1d5db;--light-gray: #f3f4f6;--accent: #016867;--accent-hover: #015554;--accent-bright: #2dd4bf;--accent-light: rgba(1, 104, 103, .07);--color-env: #059669;--color-svc: #2563eb;--color-agent: #6b7280;--warning: #d97706;--warning-light: rgba(217, 119, 6, .06);--danger: #dc2626;--danger-light: rgba(220, 38, 38, .06);--success: #059669;--success-light: rgba(5, 150, 105, .06);--sidebar-bg: #0f2b2b;--sidebar-text: #8faca8;--sidebar-text-muted: #7a9b97;--sidebar-hover: rgba(255, 255, 255, .06);--sidebar-active-bg: rgba(45, 212, 191, .12);--sidebar-active-text: #2dd4bf;--radius: 8px;--radius-lg: 12px;--shadow: 0 1px 3px rgba(0, 0, 0, .06);--shadow-lg: 0 8px 24px rgba(0, 0, 0, .08);--glow: 0 0 40px rgba(1, 104, 103, .06);--font: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif;--font-mono: "SF Mono", SFMono-Regular, Menlo, Consolas, monospace;--transition: .15s ease;--sidebar-width: 240px}*,*:before,*:after{box-sizing:border-box;margin:0;padding:0}html{font-size:15px;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}body{font-family:var(--font);color:var(--text);background:var(--bg);background-image:radial-gradient(ellipse at top,rgba(1,104,103,.04) 0%,transparent 60%);background-attachment:fixed;line-height:1.6;min-height:100vh}:focus-visible{outline:2px solid var(--accent);outline-offset:2px;border-radius:2px}.site-header{position:sticky;top:0;z-index:50;background:#fafbf9d9;backdrop-filter:blur(16px);-webkit-backdrop-filter:blur(16px);border-bottom:1px solid var(--border);padding:0 24px;height:56px;display:flex;align-items:center;justify-content:space-between}.site-logo{display:flex;align-items:center;gap:10px;text-decoration:none;color:var(--text)}.site-logo-mark{font-size:2.0625rem;font-weight:800;letter-spacing:.12em;color:var(--accent)}.logo-ax{letter-spacing:-.06em}.logo-i{display:inline-block;font-style:italic;transform:skew(-20deg);margin-left:2px;margin-right:-2px}.site-header-links{display:flex;gap:16px;align-items:center}.site-header-links a{font-size:1rem;color:var(--text-secondary);text-decoration:none;font-weight:500;transition:color var(--transition)}.site-header-links a:hover{color:var(--accent)}.mobile-menu-btn{display:none;background:none;border:none;cursor:pointer;color:var(--text);font-size:1.25rem;padding:4px}.site-shell{display:flex;min-height:calc(100vh - 56px)}.sidebar{width:var(--sidebar-width);flex-shrink:0;background:radial-gradient(ellipse at 20% 15%,rgba(45,212,191,.18) 0%,transparent 50%),radial-gradient(ellipse at 80% 85%,rgba(30,80,160,.2) 0%,transparent 50%),radial-gradient(ellipse at 50% 50%,rgba(14,74,74,.4) 0%,transparent 70%),linear-gradient(180deg,#0d3d3d,#0a2424);padding:24px 0;position:sticky;top:56px;height:calc(100vh - 56px);overflow-y:auto}.sidebar-nav{display:flex;flex-direction:column;gap:2px;padding:0 12px}.sidebar-link{display:block;padding:8px 16px;font-size:.875rem;font-weight:500;color:var(--sidebar-text);text-decoration:none;border-radius:var(--radius);transition:background var(--transition),color var(--transition)}.sidebar-link:hover{background:var(--sidebar-hover);color:#c8dbd9}.sidebar-link.active{background:var(--sidebar-active-bg);color:var(--sidebar-active-text);font-weight:600}.sidebar-section-label{padding:20px 16px 6px;font-size:.6875rem;font-weight:600;text-transform:uppercase;letter-spacing:.06em;color:var(--sidebar-text-muted)}.main-content{flex:1;min-width:0;padding:40px 48px 80px;max-width:860px}h1{font-size:2.25rem;font-weight:800;letter-spacing:-.03em;line-height:1.15;margin-bottom:12px;background:linear-gradient(135deg,var(--text) 40%,var(--accent) 100%);-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text}h2{font-size:1.5rem;font-weight:700;letter-spacing:-.02em;color:var(--text);margin-top:48px;margin-bottom:16px;padding-bottom:12px;border-bottom:none;background-image:linear-gradient(90deg,rgba(1,104,103,.35),transparent 80%);background-size:100% 1px;background-position:bottom left;background-repeat:no-repeat}h3{font-size:1.125rem;font-weight:600;color:var(--text);margin-top:32px;margin-bottom:12px}h4{font-size:.9375rem;font-weight:600;color:var(--text-secondary);margin-top:24px;margin-bottom:8px}p{margin-bottom:16px;color:var(--text-secondary)}.lead{font-size:1.125rem;color:var(--text-secondary);line-height:1.7;margin-bottom:32px}a{color:var(--accent);text-decoration:underline;text-decoration-thickness:1px;text-underline-offset:2px;transition:color var(--transition)}a:hover{color:var(--accent-hover)}.sidebar-link,.site-logo,.site-header-links a{text-decoration:none}strong{font-weight:600;color:var(--text)}ul,ol{margin-bottom:16px;padding-left:24px}li{margin-bottom:6px;color:var(--text-secondary)}li>ul,li>ol{margin-top:6px;margin-bottom:0}code{font-family:var(--font-mono);font-size:.8125em;background:var(--accent-light);padding:2px 6px;border-radius:4px;color:var(--accent)}pre{background:#0f2b2b;color:#d4e8e6;padding:20px 24px;border-radius:var(--radius-lg);overflow-x:auto;margin-bottom:24px;font-size:.8125rem;line-height:1.7;border:1px solid rgba(255,255,255,.08);border-left:3px solid rgba(45,212,191,.5);box-shadow:var(--glow)}pre code{background:none;padding:0;border-radius:0;color:inherit;font-size:inherit}.card-grid{display:grid;grid-template-columns:repeat(2,1fr);gap:16px;margin-bottom:32px}.card{background:var(--bg-card);border:1px solid var(--border);border-radius:var(--radius-lg);padding:20px;box-shadow:var(--shadow);transition:border-color var(--transition),box-shadow var(--transition)}.card:hover{border-color:var(--border-bright);box-shadow:0 4px 24px #01686714,var(--glow)}.card-title{font-size:.9375rem;font-weight:600;margin-bottom:6px;color:var(--text)}.card-desc{font-size:.8125rem;color:var(--text-secondary);line-height:1.5}.card-accent{border-top:3px solid var(--accent)}.card-env{border-top:3px solid var(--color-env)}.card-svc{border-top:3px solid var(--color-svc)}.card-agent{border-top:3px solid var(--color-agent)}.card-weight{display:inline-block;font-size:.6875rem;font-weight:700;font-family:var(--font-mono);background:var(--accent-light);color:var(--accent);padding:2px 8px;border-radius:10px;margin-bottom:8px}table{width:100%;border-collapse:collapse;margin-bottom:24px;font-size:.875rem}thead th{text-align:left;padding:10px 12px;font-size:.75rem;text-transform:uppercase;letter-spacing:.06em;color:var(--text-muted);font-weight:600;border-bottom:2px solid var(--border)}tbody td{padding:10px 12px;border-bottom:1px solid var(--light-gray);vertical-align:top;color:var(--text-secondary)}tbody tr:hover{background:var(--light-gray)}td code{font-size:.75rem}.callout{padding:16px 20px;border-radius:var(--radius);margin-bottom:24px;font-size:.875rem;line-height:1.6}.callout p:last-child{margin-bottom:0}.callout-title{font-weight:600;font-size:.75rem;text-transform:uppercase;letter-spacing:.04em;margin-bottom:4px}.callout-info{background:var(--accent-light);border-left:3px solid var(--accent)}.callout-info .callout-title{color:var(--accent)}.callout-warn{background:var(--warning-light);border-left:3px solid var(--warning)}.callout-warn .callout-title{color:var(--warning)}.callout-success{background:var(--success-light);border-left:3px solid var(--success)}.callout-success .callout-title{color:var(--success)}.pipeline{display:flex;flex-wrap:wrap;gap:8px;align-items:center;margin-bottom:32px;padding:24px;background:var(--bg-card);border:1px solid var(--border);border-radius:var(--radius-lg);box-shadow:var(--shadow)}.pipeline-step{display:flex;align-items:center;gap:4px;padding:6px 14px;background:var(--light-gray);border:1px solid var(--border);border-radius:20px;font-size:.8125rem;font-weight:500;color:var(--text-secondary);white-space:nowrap}.pipeline-step.llm{background:var(--accent-light);border-color:var(--accent);color:var(--accent);font-weight:600}.pipeline-arrow{color:var(--text-muted);font-size:.875rem;flex-shrink:0}.hero{margin-bottom:48px}.hero-badge{display:inline-block;font-size:.6875rem;font-weight:600;text-transform:uppercase;letter-spacing:.06em;color:var(--accent);background:var(--accent-light);border:1px solid rgba(1,104,103,.2);padding:4px 12px;border-radius:12px;margin-bottom:16px}.cli-block{background:var(--bg-card);border:1px solid var(--border);border-radius:var(--radius-lg);padding:20px 24px;margin-bottom:16px;box-shadow:var(--shadow);transition:border-color var(--transition),box-shadow var(--transition)}.cli-block:hover{border-color:var(--border-bright);box-shadow:var(--shadow),var(--glow)}.cli-command{font-family:var(--font-mono);font-size:.9375rem;font-weight:600;color:var(--accent);margin-bottom:8px}.cli-desc{font-size:.875rem;color:var(--text-secondary);margin-bottom:12px}.cli-flags{display:flex;flex-direction:column;gap:6px}.cli-flag{display:flex;gap:12px;font-size:.8125rem}.cli-flag-name{font-family:var(--font-mono);font-weight:500;color:var(--text);flex-shrink:0;min-width:200px}.cli-flag-desc{color:var(--text-secondary)}.site-footer{margin-top:64px;padding:24px 0;border-top:1px solid var(--border);font-size:.8125rem;color:var(--text-muted)}.dimension-wheel{display:grid;grid-template-columns:1fr 200px 1fr;grid-template-rows:1fr 1fr;gap:20px 28px;align-items:center;margin-bottom:32px;padding:24px 0}.dimension-chart{grid-column:2;grid-row:1 / 3;width:200px;height:200px}.dimension-chart circle{transition:stroke-width .2s ease,opacity .2s ease;cursor:pointer}.dimension-chart text{transition:opacity .2s ease}.dimension-label{display:block;padding:10px 14px;border-radius:var(--radius);text-decoration:none;transition:background var(--transition),box-shadow var(--transition)}.dimension-label:hover{background:var(--light-gray);box-shadow:var(--shadow)}.dimension-pct{display:block;font-size:.75rem;font-weight:700;font-family:var(--font-mono);margin-bottom:2px}.dimension-name{display:block;font-size:.9375rem;font-weight:600;color:var(--text);margin-bottom:2px}.dimension-desc{display:block;font-size:.8125rem;color:var(--text-secondary);line-height:1.4}.dimension-label-goal{grid-column:3;grid-row:1;justify-self:start}.dimension-label-env{grid-column:3;grid-row:2;justify-self:start}.dimension-label-svc{grid-column:1;grid-row:2;justify-self:end;text-align:right}.dimension-label-agent{grid-column:1;grid-row:1;justify-self:end;text-align:right}@media(max-width:768px){.mobile-menu-btn{display:block}.sidebar{position:fixed;top:56px;left:0;bottom:0;z-index:40;transform:translate(-100%);transition:transform .2s ease;box-shadow:var(--shadow-lg)}.sidebar.open{transform:translate(0)}.sidebar-backdrop{display:none;position:fixed;inset:56px 0 0;z-index:39;background:#0f2b2b80}.sidebar-backdrop.open{display:block}.main-content{padding:24px 16px 64px}.card-grid{grid-template-columns:1fr}.dimension-wheel{grid-template-columns:1fr;grid-template-rows:auto;justify-items:center}.dimension-chart{grid-column:1;grid-row:1}.dimension-label-goal{grid-column:1;grid-row:2;justify-self:stretch;text-align:left}.dimension-label-env{grid-column:1;grid-row:3;justify-self:stretch;text-align:left}.dimension-label-svc{grid-column:1;grid-row:4;justify-self:stretch;text-align:left}.dimension-label-agent{grid-column:1;grid-row:5;justify-self:stretch;text-align:left}.pipeline{flex-direction:column;align-items:stretch}.pipeline-arrow{text-align:center;transform:rotate(90deg)}.cli-flag{flex-direction:column;gap:2px}.cli-flag-name{min-width:0}}
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html><html lang="en"> <head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>CLI - AXIS Docs</title><meta name="description" content="Documentation for AXIS, the Agent eXperience Index Score synthetic testing framework for AI agents."><link rel="stylesheet" href="/_astro/cli.DDWZtG0-.css"></head> <body> <header class="site-header"> <a href="/" class="site-logo" aria-label="AXIS home"> <span class="site-logo-mark"><span class="logo-ax">AX</span><span class="logo-i">I</span>S</span> </a> <div class="site-header-links"> <a href="https://github.com/netlify/axis">GitHub</a> <a href="https://www.npmjs.com/package/@netlify/axis">npm</a> </div> <button class="mobile-menu-btn" id="menu-btn" aria-label="Toggle navigation">☰</button> </header> <div class="site-shell"> <div class="sidebar-backdrop" id="sidebar-backdrop"></div> <aside class="sidebar" id="sidebar"> <nav class="sidebar-nav"> <div class="sidebar-section-label">Getting Started</div> <a href="/" class="sidebar-link">What is AXIS</a> <a href="/quickstart" class="sidebar-link">Quick Start</a> <div class="sidebar-section-label">How It Works</div> <a href="/scoring" class="sidebar-link">Scoring Framework</a> <a href="/running" class="sidebar-link">Running Tests</a> <div class="sidebar-section-label">Reference</div> <a href="/cli" class="sidebar-link active">CLI</a> <a href="/configuration" class="sidebar-link">Configuration</a> </nav> </aside> <main class="main-content"> <h1>CLI Reference</h1> <p class="lead">
|
|
2
|
-
Complete reference for the <code>axis</code> command-line interface. All commands can be run
|
|
3
|
-
directly or via <code>npx @netlify/axis</code>.
|
|
4
|
-
</p> <h2><code>axis run</code></h2> <div class="cli-block"> <div class="cli-command">axis run [options]</div> <div class="cli-desc">Execute scenarios against configured agents.</div> <div class="cli-flags"> <div class="cli-flag"> <span class="cli-flag-name"><code>-c, --config <path></code></span> <span class="cli-flag-desc">Config file (default: <code>axis.config.json</code>).</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>-s, --scenario <key></code></span> <span class="cli-flag-desc">Run a specific scenario by key.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>-a, --agent <name></code></span> <span class="cli-flag-desc">Run with a specific agent only.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>--json</code></span> <span class="cli-flag-desc">JSON output to stdout (no live terminal display).</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>-v, --verbose</code></span> <span class="cli-flag-desc">Detailed per-step logging.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>-o, --output-dir <dir></code></span> <span class="cli-flag-desc">Also write report files to this directory.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>--concurrency <n></code></span> <span class="cli-flag-desc">Max parallel jobs.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>--debug</code></span> <span class="cli-flag-desc">Capture raw agent stdout for debugging.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>--no-score</code></span> <span class="cli-flag-desc">Skip scoring (raw results only).</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>--refresh-skills</code></span> <span class="cli-flag-desc">Force re-clone remote skills.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>--compare-baseline [name]</code></span> <span class="cli-flag-desc">Diff against a baseline after scoring.</span> </div> </div> </div> <h2><code>axis reports</code></h2> <div class="cli-block"> <div class="cli-command">axis reports [reportId] [scenarioKey] [options]</div> <div class="cli-desc">View past AXIS reports.</div> <div class="cli-flags"> <div class="cli-flag"> <span class="cli-flag-name"><code>[reportId]</code></span> <span class="cli-flag-desc">Report ID or <code>latest</code>. Omit to list all reports.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>[scenarioKey]</code></span> <span class="cli-flag-desc">Drill into a specific scenario detail.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>-a, --agent <name...></code></span> <span class="cli-flag-desc">Filter by agent(s), repeatable.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>--json</code></span> <span class="cli-flag-desc">Output as JSON.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>--html</code></span> <span class="cli-flag-desc">Open report in browser.</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>-n, --limit <count></code></span> <span class="cli-flag-desc">Max reports to list (default: 10).</span> </div> </div> </div> <h2><code>axis baseline</code></h2> <div class="cli-block"> <div class="cli-command">axis baseline set [name]</div> <div class="cli-desc">Create or update a baseline snapshot from a report.</div> <div class="cli-flags"> <div class="cli-flag"> <span class="cli-flag-name"><code>--from <reportId></code></span> <span class="cli-flag-desc">Use a specific report (default: latest).</span> </div> </div> </div> <div class="cli-block"> <div class="cli-command">axis baseline list</div> <div class="cli-desc">List all saved baselines.</div> </div> <div class="cli-block"> <div class="cli-command">axis baseline show [name]</div> <div class="cli-desc">Display baseline contents.</div> <div class="cli-flags"> <div class="cli-flag"> <span class="cli-flag-name"><code>--json</code></span> <span class="cli-flag-desc">Output as JSON.</span> </div> </div> </div> <div class="cli-block"> <div class="cli-command">axis baseline diff [name]</div> <div class="cli-desc">Compare a report against a baseline. Exits with code 1 if regressions are detected.</div> <div class="cli-flags"> <div class="cli-flag"> <span class="cli-flag-name"><code>--report <reportId></code></span> <span class="cli-flag-desc">Specific report to compare (default: latest).</span> </div> <div class="cli-flag"> <span class="cli-flag-name"><code>--json</code></span> <span class="cli-flag-desc">Output as JSON.</span> </div> </div> </div> <div class="cli-block"> <div class="cli-command">axis baseline delete [name]</div> <div class="cli-desc">Delete a saved baseline.</div> </div> <footer class="site-footer">
|
|
5
|
-
AXIS is maintained by Netlify.
|
|
6
|
-
</footer> </main> </div> <script>
|
|
7
|
-
const btn = document.getElementById("menu-btn");
|
|
8
|
-
const sidebar = document.getElementById("sidebar");
|
|
9
|
-
const backdrop = document.getElementById("sidebar-backdrop");
|
|
10
|
-
|
|
11
|
-
function toggle() {
|
|
12
|
-
sidebar.classList.toggle("open");
|
|
13
|
-
backdrop.classList.toggle("open");
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
btn.addEventListener("click", toggle);
|
|
17
|
-
backdrop.addEventListener("click", toggle);
|
|
18
|
-
</script> </body> </html>
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html><html lang="en"> <head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Configuration - AXIS Docs</title><meta name="description" content="Documentation for AXIS, the Agent eXperience Index Score synthetic testing framework for AI agents."><link rel="stylesheet" href="/_astro/cli.DDWZtG0-.css"></head> <body> <header class="site-header"> <a href="/" class="site-logo" aria-label="AXIS home"> <span class="site-logo-mark"><span class="logo-ax">AX</span><span class="logo-i">I</span>S</span> </a> <div class="site-header-links"> <a href="https://github.com/netlify/axis">GitHub</a> <a href="https://www.npmjs.com/package/@netlify/axis">npm</a> </div> <button class="mobile-menu-btn" id="menu-btn" aria-label="Toggle navigation">☰</button> </header> <div class="site-shell"> <div class="sidebar-backdrop" id="sidebar-backdrop"></div> <aside class="sidebar" id="sidebar"> <nav class="sidebar-nav"> <div class="sidebar-section-label">Getting Started</div> <a href="/" class="sidebar-link">What is AXIS</a> <a href="/quickstart" class="sidebar-link">Quick Start</a> <div class="sidebar-section-label">How It Works</div> <a href="/scoring" class="sidebar-link">Scoring Framework</a> <a href="/running" class="sidebar-link">Running Tests</a> <div class="sidebar-section-label">Reference</div> <a href="/cli" class="sidebar-link">CLI</a> <a href="/configuration" class="sidebar-link active">Configuration</a> </nav> </aside> <main class="main-content"> <h1>Configuration Reference</h1> <p class="lead">
|
|
2
|
-
Complete reference for <code>axis.config.json</code> and scenario files.
|
|
3
|
-
</p> <h2>axis.config.json</h2> <p>
|
|
4
|
-
AXIS is configured via an <code>axis.config.json</code> file in your project root.
|
|
5
|
-
</p> <pre><code>{
|
|
6
|
-
"scenarios": "./scenarios",
|
|
7
|
-
"agents": [
|
|
8
|
-
"claude-code",
|
|
9
|
-
{
|
|
10
|
-
"adapter": "gemini",
|
|
11
|
-
"model": "gemini-2.5-pro",
|
|
12
|
-
"scenarios": ["cms/*"],
|
|
13
|
-
"flags": { "yolo": true }
|
|
14
|
-
}
|
|
15
|
-
],
|
|
16
|
-
"defaults": {
|
|
17
|
-
"concurrency": 4,
|
|
18
|
-
"scoring_weights": {
|
|
19
|
-
"goal_achievement": 0.4,
|
|
20
|
-
"environment": 0.2,
|
|
21
|
-
"service": 0.2,
|
|
22
|
-
"agent": 0.2
|
|
23
|
-
}
|
|
24
|
-
},
|
|
25
|
-
"env": ["ANTHROPIC_API_KEY", "GEMINI_API_KEY"],
|
|
26
|
-
"mcp_servers": {
|
|
27
|
-
"filesystem": {
|
|
28
|
-
"type": "stdio",
|
|
29
|
-
"command": "npx",
|
|
30
|
-
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"]
|
|
31
|
-
}
|
|
32
|
-
},
|
|
33
|
-
"skills": ["./skills/deploy"],
|
|
34
|
-
"adapters": {
|
|
35
|
-
"my-agent": "./adapters/my-agent.ts"
|
|
36
|
-
}
|
|
37
|
-
}</code></pre> <h3>Top-Level Fields</h3> <table> <thead> <tr> <th>Field</th> <th>Type</th> <th>Description</th> </tr> </thead> <tbody> <tr> <td><code>scenarios</code></td> <td><code>string</code></td> <td>Path to the scenarios directory (required).</td> </tr> <tr> <td><code>agents</code></td> <td><code>(string | AgentConfig)[]</code></td> <td>Adapter names or full agent configurations (required).</td> </tr> <tr> <td><code>defaults</code></td> <td><code>object</code></td> <td>Default concurrency and scoring weights.</td> </tr> <tr> <td><code>env</code></td> <td><code>string[]</code></td> <td>Environment variables to pass through to agent processes.</td> </tr> <tr> <td><code>mcp_servers</code></td> <td><code>object</code></td> <td>MCP servers available to all agents.</td> </tr> <tr> <td><code>skills</code></td> <td><code>string[]</code></td> <td>Skills available to all agents (local paths, GitHub shorthand, or URLs).</td> </tr> <tr> <td><code>adapters</code></td> <td><code>object</code></td> <td>Custom adapter module paths, keyed by adapter name.</td> </tr> </tbody> </table> <h3>Agent Configuration</h3> <p>
|
|
38
|
-
Agents can be specified as a simple string (adapter name) or a full configuration object.
|
|
39
|
-
</p> <table> <thead> <tr> <th>Field</th> <th>Type</th> <th>Description</th> </tr> </thead> <tbody> <tr> <td><code>adapter</code></td> <td><code>string</code></td> <td>Adapter type (required): <code>claude-code</code>, <code>codex</code>, <code>gemini</code>, etc.</td> </tr> <tr> <td><code>model</code></td> <td><code>string</code></td> <td>Model override passed to the agent CLI.</td> </tr> <tr> <td><code>scenarios</code></td> <td><code>string[]</code></td> <td>Subset of scenarios to run (supports glob patterns like <code>cms/*</code>).</td> </tr> <tr> <td><code>skills</code></td> <td><code>string[]</code></td> <td>Agent-specific skills (merged with top-level skills).</td> </tr> <tr> <td><code>flags</code></td> <td><code>object</code></td> <td>CLI flags passed to the agent (for example, <code>{"full-auto": true}</code>).</td> </tr> <tr> <td><code>command</code></td> <td><code>string</code></td> <td>Custom CLI command (for custom adapters).</td> </tr> </tbody> </table><code> <h3>Scoring Weights</h3> <p>
|
|
40
|
-
Override the default dimension weights under <code>defaults.scoring_weights</code>. Values
|
|
41
|
-
must sum to 1.0.
|
|
42
|
-
</p> <pre><code>{
|
|
43
|
-
"defaults": {
|
|
44
|
-
"scoring_weights": {
|
|
45
|
-
"goal_achievement": 0.4,
|
|
46
|
-
"environment": 0.2,
|
|
47
|
-
"service": 0.2,
|
|
48
|
-
"agent": 0.2
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
}</code></pre> <h3>MCP Servers</h3> <p>
|
|
52
|
-
Configure Model Context Protocol servers that are automatically wired into each agent
|
|
53
|
-
environment. AXIS supports both stdio (local process) and HTTP (remote endpoint) servers.
|
|
54
|
-
</p> <pre><code>{
|
|
55
|
-
"mcp_servers": {
|
|
56
|
-
"filesystem": {
|
|
57
|
-
"type": "stdio",
|
|
58
|
-
"command": "npx",
|
|
59
|
-
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"],
|
|
60
|
-
"env": { "LOG_LEVEL": "info" }
|
|
61
|
-
},
|
|
62
|
-
"remote-api": {
|
|
63
|
-
"type": "http",
|
|
64
|
-
"url": "https://mcp.example.com/tools",
|
|
65
|
-
"headers": { "Authorization": "Bearer ${TOKEN}" }
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
}</code></pre> <p>
|
|
69
|
-
Each adapter writes MCP configuration in its native format before spawning the agent.
|
|
70
|
-
</p> <ul> <li><strong>Claude Code:</strong> <code>.mcp.json</code> in workspace root.</li> <li><strong>Codex:</strong> <code>config.toml</code> in <code>CODEX_HOME</code>.</li> <li><strong>Gemini:</strong> <code>settings.json</code> in <code>GEMINI_CLI_HOME</code>.</li> </ul> <h3>Skills</h3> <p>
|
|
71
|
-
Skills extend agent capabilities with reusable instruction sets. Specify them at the top level
|
|
72
|
-
(shared), per agent, or per scenario.
|
|
73
|
-
</p> <pre><code>{
|
|
74
|
-
"skills": [
|
|
75
|
-
"./skills/deploy",
|
|
76
|
-
"netlify/axis-skill-deploy",
|
|
77
|
-
"https://github.com/owner/repo"
|
|
78
|
-
]
|
|
79
|
-
}</code></pre> <ul> <li><strong>Local paths:</strong> Relative to the config file.</li> <li><strong>GitHub shorthand:</strong> <code>owner/repo</code> format.</li> <li><strong>Full URLs:</strong> GitHub repository URLs.</li> </ul> <p>
|
|
80
|
-
Remote skills are cached in <code>.axis/skills-cache/</code>. Use <code>--refresh-skills</code>
|
|
81
|
-
to force re-clone.
|
|
82
|
-
</p> <h3>Environment Variables</h3> <p>
|
|
83
|
-
The <code>env</code> field lists additional environment variables to pass through to agent
|
|
84
|
-
processes. The following are always passed through by default:
|
|
85
|
-
</p> <ul> <li><code>ANTHROPIC_API_KEY</code>, <code>CODEX_API_KEY</code>, <code>GEMINI_API_KEY</code></li> <li>System essentials: <code>PATH</code>, <code>USER</code>, <code>SHELL</code>, <code>LANG</code>, <code>TERM</code>, <code>TMPDIR</code></li> </ul> <h2>Scenarios</h2> <p>
|
|
86
|
-
Scenarios are JSON files in the configured scenarios directory. The filename (without <code>.json</code>)
|
|
87
|
-
becomes the scenario key. Nested directories create namespaced keys.
|
|
88
|
-
</p> <ul> <li><code>scenarios/hello-world.json</code> maps to key <code>hello-world</code>.</li> <li><code>scenarios/cms/create-post.json</code> maps to key <code>cms/create-post</code>.</li> </ul> <h3>Scenario Schema</h3> <pre><code>{
|
|
89
|
-
"name": "Debug and fix a broken script",
|
|
90
|
-
"prompt": "There is a JavaScript file at /tmp/app/add.js that has a bug. Find it, fix it, and verify.",
|
|
91
|
-
"rubric": [
|
|
92
|
-
{ "check": "Agent identified the bug", "weight": 0.3 },
|
|
93
|
-
{ "check": "Agent fixed the bug", "weight": 0.4 },
|
|
94
|
-
{ "check": "Agent verified the fix", "weight": 0.3 }
|
|
95
|
-
],
|
|
96
|
-
"setup": [
|
|
97
|
-
{ "action": "run_script", "command": "mkdir -p /tmp/app" },
|
|
98
|
-
{ "action": "run_script", "command": "echo 'function add(a,b) { return a-b; }' > /tmp/app/add.js" }
|
|
99
|
-
],
|
|
100
|
-
"teardown": [
|
|
101
|
-
{ "action": "run_script", "command": "rm -rf /tmp/app" }
|
|
102
|
-
],
|
|
103
|
-
"agents": ["claude-code"]
|
|
104
|
-
}</code></pre> <table> <thead> <tr> <th>Field</th> <th>Type</th> <th>Description</th> </tr> </thead> <tbody> <tr> <td><code>name</code></td> <td><code>string</code></td> <td>Human-readable scenario title (required).</td> </tr> <tr> <td><code>prompt</code></td> <td><code>string</code></td> <td>Task description sent to the agent (required).</td> </tr> <tr> <td><code>rubric</code></td> <td><code>string | object[]</code></td> <td>Success criteria: a string or array of checks with optional weights (required).</td> </tr> <tr> <td><code>setup</code></td> <td><code>object[]</code></td> <td>Lifecycle actions run before the agent.</td> </tr> <tr> <td><code>teardown</code></td> <td><code>object[]</code></td> <td>Lifecycle actions run after scoring.</td> </tr> <tr> <td><code>agents</code></td> <td><code>string[]</code></td> <td>Override which agents run this scenario.</td> </tr> <tr> <td><code>skills</code></td> <td><code>string[]</code></td> <td>Scenario-specific skills.</td> </tr> </tbody> </table> <div class="callout callout-info"> <div class="callout-title">Setup and Teardown</div> <p>
|
|
105
|
-
Lifecycle actions run sequentially with a 30-second timeout per action. Setup failures abort
|
|
106
|
-
the job. Teardown failures are logged but do not block subsequent jobs.
|
|
107
|
-
</p> </div> </code> <footer class="site-footer">
|
|
108
|
-
AXIS is maintained by Netlify.
|
|
109
|
-
</footer> </main> </div> <script>
|
|
110
|
-
const btn = document.getElementById("menu-btn");
|
|
111
|
-
const sidebar = document.getElementById("sidebar");
|
|
112
|
-
const backdrop = document.getElementById("sidebar-backdrop");
|
|
113
|
-
|
|
114
|
-
function toggle() {
|
|
115
|
-
sidebar.classList.toggle("open");
|
|
116
|
-
backdrop.classList.toggle("open");
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
btn.addEventListener("click", toggle);
|
|
120
|
-
backdrop.addEventListener("click", toggle);
|
|
121
|
-
</script> </body> </html>
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export default new Map();
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export default new Map();
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
[
|
|
2
|
-
["Map", 1, 2],
|
|
3
|
-
"meta::meta",
|
|
4
|
-
["Map", 3, 4, 5, 6],
|
|
5
|
-
"astro-version",
|
|
6
|
-
"5.18.1",
|
|
7
|
-
"astro-config-digest",
|
|
8
|
-
"{\"root\":{},\"srcDir\":{},\"publicDir\":{},\"outDir\":{},\"cacheDir\":{},\"compressHTML\":true,\"base\":\"/\",\"trailingSlash\":\"ignore\",\"output\":\"static\",\"scopedStyleStrategy\":\"attribute\",\"build\":{\"format\":\"directory\",\"client\":{},\"server\":{},\"assets\":\"_astro\",\"serverEntry\":\"entry.mjs\",\"redirects\":true,\"inlineStylesheets\":\"auto\",\"concurrency\":1},\"server\":{\"open\":false,\"host\":false,\"port\":4321,\"streaming\":true,\"allowedHosts\":[]},\"redirects\":{},\"image\":{\"endpoint\":{\"route\":\"/_image\"},\"service\":{\"entrypoint\":\"astro/assets/services/sharp\",\"config\":{}},\"domains\":[],\"remotePatterns\":[],\"responsiveStyles\":false},\"devToolbar\":{\"enabled\":true},\"markdown\":{\"syntaxHighlight\":{\"type\":\"shiki\",\"excludeLangs\":[\"math\"]},\"shikiConfig\":{\"langs\":[],\"langAlias\":{},\"theme\":\"github-dark\",\"themes\":{},\"wrap\":false,\"transformers\":[]},\"remarkPlugins\":[],\"rehypePlugins\":[],\"remarkRehype\":{},\"gfm\":true,\"smartypants\":true},\"security\":{\"checkOrigin\":true,\"allowedDomains\":[],\"actionBodySizeLimit\":1048576},\"env\":{\"schema\":{},\"validateSecrets\":false},\"experimental\":{\"clientPrerender\":false,\"contentIntellisense\":false,\"headingIdCompat\":false,\"preserveScriptOrder\":false,\"liveContentCollections\":false,\"csp\":false,\"staticImportMetaEnv\":false,\"chromeDevtoolsWorkspace\":false,\"failOnPrerenderConflict\":false,\"svgo\":false},\"legacy\":{\"collections\":false}}"
|
|
9
|
-
]
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html><html lang="en"> <head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>What is AXIS - AXIS Docs</title><meta name="description" content="Documentation for AXIS, the Agent eXperience Index Score synthetic testing framework for AI agents."><link rel="stylesheet" href="/_astro/cli.DDWZtG0-.css"></head> <body> <header class="site-header"> <a href="/" class="site-logo" aria-label="AXIS home"> <span class="site-logo-mark"><span class="logo-ax">AX</span><span class="logo-i">I</span>S</span> </a> <div class="site-header-links"> <a href="https://github.com/netlify/axis">GitHub</a> <a href="https://www.npmjs.com/package/@netlify/axis">npm</a> </div> <button class="mobile-menu-btn" id="menu-btn" aria-label="Toggle navigation">☰</button> </header> <div class="site-shell"> <div class="sidebar-backdrop" id="sidebar-backdrop"></div> <aside class="sidebar" id="sidebar"> <nav class="sidebar-nav"> <div class="sidebar-section-label">Getting Started</div> <a href="/" class="sidebar-link active">What is AXIS</a> <a href="/quickstart" class="sidebar-link">Quick Start</a> <div class="sidebar-section-label">How It Works</div> <a href="/scoring" class="sidebar-link">Scoring Framework</a> <a href="/running" class="sidebar-link">Running Tests</a> <div class="sidebar-section-label">Reference</div> <a href="/cli" class="sidebar-link">CLI</a> <a href="/configuration" class="sidebar-link">Configuration</a> </nav> </aside> <main class="main-content"> <div class="hero"> <h1>AXIS - Agent eXperience Index Score</h1> <p class="lead">
|
|
2
|
-
AXIS is both an open scoring framework for measuring <strong>agent experience (AX)</strong>
|
|
3
|
-
and a CLI tool that implements it. Think
|
|
4
|
-
<a href="https://developer.chrome.com/docs/lighthouse">Lighthouse</a>, but instead of scoring
|
|
5
|
-
user experience, AXIS scores <strong>agent experience</strong>.
|
|
6
|
-
</p> </div> <h2>Why AX Matters</h2> <p>
|
|
7
|
-
The web has Lighthouse. APIs have contract testing. Performance has k6. But there is no
|
|
8
|
-
standardized way to answer: <em>"How well does my system work when an AI agent tries to use it?"</em> </p> <p>
|
|
9
|
-
As agents become a primary interface for websites, APIs, and developer platforms, the systems
|
|
10
|
-
they interact with need to be measured and optimized for that interaction. Just as we optimize
|
|
11
|
-
for page load time or accessibility, AX is the agent-era equivalent of UX.
|
|
12
|
-
</p> <h2>Our Approach</h2> <p>
|
|
13
|
-
AXIS is built on two core beliefs about how agent experience should be measured.
|
|
14
|
-
</p> <p> <strong>Measure what matters, where you have leverage.</strong> Agent experience is not a single
|
|
15
|
-
number. It breaks down into distinct dimensions: how well the agent completes the task, how it
|
|
16
|
-
uses the environment, how it interacts with services, and how it reasons through problems.
|
|
17
|
-
Purpose-built tooling that scores each dimension independently gives you a clear picture of
|
|
18
|
-
where to focus. If your API responses are slowing agents down, you see it in the Service
|
|
19
|
-
dimension. If your project structure confuses agents, Environment tells you. Generic pass/fail
|
|
20
|
-
testing does not surface these signals.
|
|
21
|
-
</p> <p> <strong>Test against real agent behavior, not theoretical support.</strong> It is not enough to
|
|
22
|
-
validate that your system publishes the right config files or follows a protocol spec. What
|
|
23
|
-
matters is whether agents actually discover and use what you provide. AXIS measures this by
|
|
24
|
-
running real agents against real scenarios and observing what happens: which tools they call,
|
|
25
|
-
which files they read, which APIs they hit. This tells you what agents <em>do</em>, not what
|
|
26
|
-
they <em>could</em> do in theory.
|
|
27
|
-
</p> <h2>The Scoring Framework</h2> <p>
|
|
28
|
-
At its core, AXIS defines a standard way to measure agent experience across four independent
|
|
29
|
-
dimensions. Any tool, platform, or CI system can implement this framework to produce
|
|
30
|
-
comparable AX measurements.
|
|
31
|
-
</p> <div class="card-grid"> <div class="card card-accent"> <span class="card-weight">40%</span> <div class="card-title">Goal Achievement</div> <div class="card-desc">
|
|
32
|
-
Did the agent complete the task? Evaluated against rubric criteria you define for each
|
|
33
|
-
scenario.
|
|
34
|
-
</div> </div> <div class="card card-env"> <span class="card-weight">20%</span> <div class="card-title">Environment</div> <div class="card-desc">
|
|
35
|
-
How well did the agent use the OS, filesystem, and dev tools? Measures quality of shell
|
|
36
|
-
commands, file operations, git usage, and build tools.
|
|
37
|
-
</div> </div> <div class="card card-svc"> <span class="card-weight">20%</span> <div class="card-title">Service</div> <div class="card-desc">
|
|
38
|
-
How effectively did the agent use external services? Evaluates API calls, MCP tools,
|
|
39
|
-
network requests, and third-party integrations.
|
|
40
|
-
</div> </div> <div class="card card-agent"> <span class="card-weight">20%</span> <div class="card-title">Agent</div> <div class="card-desc">
|
|
41
|
-
How well did the agent reason and self-organize? Covers planning, task management, tool
|
|
42
|
-
discovery, and metacognitive behavior.
|
|
43
|
-
</div> </div> </div> <p>
|
|
44
|
-
These four dimensions combine into a single 0 to 100 <strong>AXIS Result</strong>. The
|
|
45
|
-
framework specifies what signals feed each dimension, how interactions are categorized, and
|
|
46
|
-
how the composite score is calculated. See <a href="/scoring">Scoring Framework</a> for full
|
|
47
|
-
details on the signals and scoring logic.
|
|
48
|
-
</p> <h2>The CLI Tool</h2> <p>
|
|
49
|
-
The <code>@netlify/axis</code> package is the reference implementation of the scoring framework.
|
|
50
|
-
It provides a CLI that runs agent scenarios, captures transcripts, scores the results, and
|
|
51
|
-
produces reports.
|
|
52
|
-
</p> <ul> <li><strong>Define scenarios</strong> as JSON files with a prompt, rubric, and optional setup/teardown steps.</li> <li><strong>Run them</strong> against any supported agent (or your own custom adapter) in isolated workspaces.</li> <li><strong>Score automatically</strong> using a multi-pass LLM evaluation pipeline that produces per-dimension and composite scores.</li> <li><strong>Track over time</strong> with persistent reports, baseline snapshots, and regression detection for CI gating.</li> </ul> <h3>Built-in Adapters</h3> <p>
|
|
53
|
-
The CLI ships with adapters for popular AI coding agents. Each adapter handles process spawning,
|
|
54
|
-
transcript capture, and output normalization for its agent.
|
|
55
|
-
</p> <table> <thead> <tr> <th>Adapter</th> <th>Agent</th> <th>Required Env</th> </tr> </thead> <tbody> <tr> <td><code>claude-code</code></td> <td>Claude Code</td> <td><code>ANTHROPIC_API_KEY</code></td> </tr> <tr> <td><code>codex</code></td> <td>OpenAI Codex</td> <td><code>CODEX_API_KEY</code></td> </tr> <tr> <td><code>gemini</code></td> <td>Google Gemini CLI</td> <td><code>GEMINI_API_KEY</code></td> </tr> <tr> <td><code>goose</code></td> <td>Goose</td> <td>None</td> </tr> <tr> <td><code>claude-sdk</code></td> <td>Claude SDK</td> <td><code>ANTHROPIC_API_KEY</code></td> </tr> <tr> <td><code>gemini-acp</code></td> <td>Gemini (ACP)</td> <td><code>GEMINI_API_KEY</code></td> </tr> <tr> <td>Custom</td> <td>Any agent via <code>createAgentAdapter()</code></td> <td>User-defined</td> </tr> </tbody> </table> <footer class="site-footer">
|
|
56
|
-
AXIS is maintained by Netlify.
|
|
57
|
-
</footer> </main> </div> <script>
|
|
58
|
-
const btn = document.getElementById("menu-btn");
|
|
59
|
-
const sidebar = document.getElementById("sidebar");
|
|
60
|
-
const backdrop = document.getElementById("sidebar-backdrop");
|
|
61
|
-
|
|
62
|
-
function toggle() {
|
|
63
|
-
sidebar.classList.toggle("open");
|
|
64
|
-
backdrop.classList.toggle("open");
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
btn.addEventListener("click", toggle);
|
|
68
|
-
backdrop.addEventListener("click", toggle);
|
|
69
|
-
</script> </body> </html>
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html><html lang="en"> <head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Quick Start - AXIS Docs</title><meta name="description" content="Documentation for AXIS, the Agent eXperience Index Score synthetic testing framework for AI agents."><link rel="stylesheet" href="/_astro/cli.DDWZtG0-.css"></head> <body> <header class="site-header"> <a href="/" class="site-logo" aria-label="AXIS home"> <span class="site-logo-mark"><span class="logo-ax">AX</span><span class="logo-i">I</span>S</span> </a> <div class="site-header-links"> <a href="https://github.com/netlify/axis">GitHub</a> <a href="https://www.npmjs.com/package/@netlify/axis">npm</a> </div> <button class="mobile-menu-btn" id="menu-btn" aria-label="Toggle navigation">☰</button> </header> <div class="site-shell"> <div class="sidebar-backdrop" id="sidebar-backdrop"></div> <aside class="sidebar" id="sidebar"> <nav class="sidebar-nav"> <div class="sidebar-section-label">Getting Started</div> <a href="/" class="sidebar-link">What is AXIS</a> <a href="/quickstart" class="sidebar-link active">Quick Start</a> <div class="sidebar-section-label">How It Works</div> <a href="/scoring" class="sidebar-link">Scoring Framework</a> <a href="/running" class="sidebar-link">Running Tests</a> <div class="sidebar-section-label">Reference</div> <a href="/cli" class="sidebar-link">CLI</a> <a href="/configuration" class="sidebar-link">Configuration</a> </nav> </aside> <main class="main-content"> <h1>Quick Start</h1> <p class="lead">
|
|
2
|
-
Get AXIS running in your project in a few minutes. This guide walks through creating a config,
|
|
3
|
-
writing your first scenario, and viewing the results.
|
|
4
|
-
</p> <h2>Prerequisites</h2> <ul> <li>Node.js 18 or later.</li> <li>An API key for at least one supported agent (for example, <code>ANTHROPIC_API_KEY</code> for Claude Code).</li> </ul> <h2>1. Create a Config File</h2> <p>
|
|
5
|
-
Add an <code>axis.config.json</code> to your project root. At minimum, specify where your
|
|
6
|
-
scenarios live and which agents to run.
|
|
7
|
-
</p> <pre><code>{
|
|
8
|
-
"scenarios": "./scenarios",
|
|
9
|
-
"agents": ["claude-code"]
|
|
10
|
-
}</code></pre> <h2>2. Write a Scenario</h2> <p>
|
|
11
|
-
Create a <code>scenarios/</code> directory and add your first scenario as a JSON file. Each
|
|
12
|
-
scenario needs a name, a prompt (the task for the agent), and a rubric (your success criteria).
|
|
13
|
-
</p> <pre><code>{
|
|
14
|
-
"name": "Create a greeting file",
|
|
15
|
-
"prompt": "Create a file called hello.txt with the content 'Hello from AXIS'.",
|
|
16
|
-
"rubric": [
|
|
17
|
-
{ "check": "File hello.txt exists", "weight": 0.5 },
|
|
18
|
-
{ "check": "File contains 'Hello from AXIS'", "weight": 0.5 }
|
|
19
|
-
]
|
|
20
|
-
}</code></pre> <p>
|
|
21
|
-
Save this as <code>scenarios/hello-world.json</code>. The filename (without <code>.json</code>)
|
|
22
|
-
becomes the scenario key used in reports and CLI commands.
|
|
23
|
-
</p> <h2>3. Run It</h2> <pre><code>npx @netlify/axis run</code></pre> <p>
|
|
24
|
-
AXIS spawns the agent in an isolated workspace, captures the full interaction transcript, scores
|
|
25
|
-
the result against your rubric, and displays a report in your terminal.
|
|
26
|
-
</p> <h2>4. View the Report</h2> <p>
|
|
27
|
-
Every run saves a report to <code>.axis/reports/</code>. You can view it again at any time.
|
|
28
|
-
</p> <pre><code># View the latest report summary
|
|
29
|
-
npx @netlify/axis reports latest
|
|
30
|
-
|
|
31
|
-
# Open the HTML report in your browser
|
|
32
|
-
npx @netlify/axis reports latest --html
|
|
33
|
-
|
|
34
|
-
# Get JSON output for scripting
|
|
35
|
-
npx @netlify/axis reports latest --json</code></pre> <h2>5. Set a Baseline</h2> <p>
|
|
36
|
-
Once you have a run you are happy with, save it as a baseline. Future runs can diff against it
|
|
37
|
-
to detect regressions.
|
|
38
|
-
</p> <pre><code># Save the latest report as a baseline
|
|
39
|
-
npx @netlify/axis baseline set
|
|
40
|
-
|
|
41
|
-
# Compare future runs automatically
|
|
42
|
-
npx @netlify/axis run --compare-baseline</code></pre> <p>
|
|
43
|
-
The diff command exits with code 1 if any regressions are detected, making it suitable for CI
|
|
44
|
-
gating.
|
|
45
|
-
</p> <h2>Next Steps</h2> <ul> <li><a href="/scoring">Scoring Framework</a> explains how the four dimensions are calculated and what signals drive each score.</li> <li><a href="/running">Running Tests</a> covers the full config reference, CLI commands, custom adapters, MCP servers, and CI integration.</li> </ul> <footer class="site-footer">
|
|
46
|
-
AXIS is maintained by Netlify.
|
|
47
|
-
</footer> </main> </div> <script>
|
|
48
|
-
const btn = document.getElementById("menu-btn");
|
|
49
|
-
const sidebar = document.getElementById("sidebar");
|
|
50
|
-
const backdrop = document.getElementById("sidebar-backdrop");
|
|
51
|
-
|
|
52
|
-
function toggle() {
|
|
53
|
-
sidebar.classList.toggle("open");
|
|
54
|
-
backdrop.classList.toggle("open");
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
btn.addEventListener("click", toggle);
|
|
58
|
-
backdrop.addEventListener("click", toggle);
|
|
59
|
-
</script> </body> </html>
|
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html><html lang="en"> <head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Running Tests - AXIS Docs</title><meta name="description" content="Documentation for AXIS, the Agent eXperience Index Score synthetic testing framework for AI agents."><link rel="stylesheet" href="/_astro/cli.DDWZtG0-.css"></head> <body> <header class="site-header"> <a href="/" class="site-logo" aria-label="AXIS home"> <span class="site-logo-mark"><span class="logo-ax">AX</span><span class="logo-i">I</span>S</span> </a> <div class="site-header-links"> <a href="https://github.com/netlify/axis">GitHub</a> <a href="https://www.npmjs.com/package/@netlify/axis">npm</a> </div> <button class="mobile-menu-btn" id="menu-btn" aria-label="Toggle navigation">☰</button> </header> <div class="site-shell"> <div class="sidebar-backdrop" id="sidebar-backdrop"></div> <aside class="sidebar" id="sidebar"> <nav class="sidebar-nav"> <div class="sidebar-section-label">Getting Started</div> <a href="/" class="sidebar-link">What is AXIS</a> <a href="/quickstart" class="sidebar-link">Quick Start</a> <div class="sidebar-section-label">How It Works</div> <a href="/scoring" class="sidebar-link">Scoring Framework</a> <a href="/running" class="sidebar-link active">Running Tests</a> <div class="sidebar-section-label">Reference</div> <a href="/cli" class="sidebar-link">CLI</a> <a href="/configuration" class="sidebar-link">Configuration</a> </nav> </aside> <main class="main-content"> <h1>Running AXIS Tests</h1> <p class="lead">
|
|
2
|
-
How AXIS executes scenarios, manages agent processes, and produces reports.
|
|
3
|
-
</p> <h2>Execution Model</h2> <p>
|
|
4
|
-
When you run <code>axis run</code>, AXIS loads your config, discovers scenarios, and executes
|
|
5
|
-
each scenario/agent combination as an independent job. Jobs run in parallel up to the configured
|
|
6
|
-
concurrency limit.
|
|
7
|
-
</p> <p>
|
|
8
|
-
Each job follows the same lifecycle:
|
|
9
|
-
</p> <ol> <li>Run setup actions (if defined in the scenario).</li> <li>Spawn the agent process in an isolated workspace.</li> <li>Stream and capture the full interaction transcript.</li> <li>Score the transcript against the rubric (unless <code>--no-score</code> is set).</li> <li>Run teardown actions (if defined).</li> <li>Save the result to the report.</li> </ol> <h2>Built-in Adapters</h2> <p>
|
|
10
|
-
AXIS ships with adapters for popular AI coding agents. Each adapter handles CLI resolution,
|
|
11
|
-
process spawning, transcript capture, and output normalization.
|
|
12
|
-
</p> <table> <thead> <tr> <th>Adapter</th> <th>CLI Binary</th> <th>Required Env</th> <th>Default Flags</th> </tr> </thead> <tbody> <tr> <td><code>claude-code</code></td> <td><code>claude</code></td> <td><code>ANTHROPIC_API_KEY</code></td> <td><code>dangerously-skip-permissions</code></td> </tr> <tr> <td><code>codex</code></td> <td><code>codex</code></td> <td><code>CODEX_API_KEY</code></td> <td><code>full-auto</code>, <code>skip-git-repo-check</code></td> </tr> <tr> <td><code>gemini</code></td> <td><code>gemini</code></td> <td><code>GEMINI_API_KEY</code></td> <td><code>yolo</code></td> </tr> <tr> <td><code>goose</code></td> <td><code>goose</code></td> <td>None</td> <td>None</td> </tr> <tr> <td><code>claude-sdk</code></td> <td>SDK</td> <td><code>ANTHROPIC_API_KEY</code></td> <td>None</td> </tr> <tr> <td><code>gemini-acp</code></td> <td>ACP</td> <td><code>GEMINI_API_KEY</code></td> <td>None</td> </tr> </tbody> </table> <p>
|
|
13
|
-
CLI binaries are resolved automatically. If not found locally, AXIS falls back to
|
|
14
|
-
<code>npx --yes <package></code> silently.
|
|
15
|
-
</p> <h2>Custom Adapters</h2> <p>
|
|
16
|
-
Create a custom adapter module using <code>createAgentAdapter()</code> and register it in your
|
|
17
|
-
<a href="/configuration">config</a>.
|
|
18
|
-
</p> <pre><code>// adapters/my-agent.ts
|
|
19
|
-
import { createAgentAdapter } from "@netlify/axis";
|
|
20
|
-
|
|
21
|
-
export default createAgentAdapter<{ stdout: string }>({
|
|
22
|
-
name: "my-agent",
|
|
23
|
-
resolveCommand: () => ({ command: "my-cli", prefixArgs: [] }),
|
|
24
|
-
buildArgs: (input) => [input.prompt],
|
|
25
|
-
initialState: () => ({ stdout: "" }),
|
|
26
|
-
streamConfig: {
|
|
27
|
-
mode: "aggregate",
|
|
28
|
-
onChunk: (chunk, ctx) => {
|
|
29
|
-
ctx.state.stdout += chunk;
|
|
30
|
-
},
|
|
31
|
-
},
|
|
32
|
-
getResult: (ctx) => ({
|
|
33
|
-
result: ctx.state.stdout.trim() || null,
|
|
34
|
-
}),
|
|
35
|
-
});</code></pre> <p>
|
|
36
|
-
Adapters support two stream modes: <strong>lines</strong> (NDJSON, one JSON object per stdout
|
|
37
|
-
line) and <strong>aggregate</strong> (raw chunks accumulated in state). The module must export
|
|
38
|
-
an <code>AgentAdapter</code> as default or as a named <code>adapter</code> export.
|
|
39
|
-
</p> <h2>Workspace Isolation</h2> <p>
|
|
40
|
-
Each agent run gets a fresh temporary directory as its workspace. AXIS isolates the following.
|
|
41
|
-
</p> <ul> <li><strong>HOME directory:</strong> Set to the workspace to prevent global config leakage.</li> <li><strong>Adapter-specific dirs:</strong> <code>CLAUDE_CONFIG_DIR</code>, <code>CODEX_HOME</code>, <code>GEMINI_CLI_HOME</code>.</li> <li><strong>Environment variables:</strong> Only explicitly listed vars and system essentials are passed through.</li> </ul> <h2>Reports</h2> <p>
|
|
42
|
-
Every run automatically saves a report to <code>.axis/reports/</code>.
|
|
43
|
-
</p> <pre><code>.axis/reports/{reportId}/
|
|
44
|
-
report.json # Manifest with summary + metadata
|
|
45
|
-
scenarios/{key}/{agent}.json # Full result with transcript
|
|
46
|
-
scenarios/{key}/{agent}.raw.ndjson # Raw stdout (--debug only)
|
|
47
|
-
scenarios/{key}/{agent}.sparse-index.txt # Scoring reference</code></pre> <p>
|
|
48
|
-
Use <code>axis reports</code> to list, view, and export reports. See the
|
|
49
|
-
<a href="/cli">CLI reference</a> for all available options.
|
|
50
|
-
</p> <h2>Baselines</h2> <p>
|
|
51
|
-
Baselines let you snapshot scores and detect regressions in future runs. They are stored in
|
|
52
|
-
<code>.axis/baselines/</code> and designed to be checked into version control.
|
|
53
|
-
</p> <pre><code># 1. Run your scenarios
|
|
54
|
-
axis run
|
|
55
|
-
|
|
56
|
-
# 2. Save the results as a baseline
|
|
57
|
-
axis baseline set
|
|
58
|
-
|
|
59
|
-
# 3. In future runs, compare against the baseline
|
|
60
|
-
axis run --compare-baseline
|
|
61
|
-
|
|
62
|
-
# 4. Or diff explicitly
|
|
63
|
-
axis baseline diff</code></pre> <p>
|
|
64
|
-
Baseline diff uses a noise tolerance of 1 point. Score deltas of 0 to 1 are reported
|
|
65
|
-
as unchanged. The diff command exits with code 1 if any regressions are detected, making it
|
|
66
|
-
suitable for CI gating.
|
|
67
|
-
</p> <h2>CI Integration</h2> <p>
|
|
68
|
-
AXIS is designed to work in CI environments. Key patterns:
|
|
69
|
-
</p> <ul> <li>Use <code>--json</code> for machine-readable output.</li> <li>Use <code>--compare-baseline</code> to gate on regressions (exit code 1).</li> <li>Set <code>--concurrency</code> to control resource usage.</li> <li>Pass API keys via environment variables.</li> </ul> <pre><code># GitHub Actions example
|
|
70
|
-
- name: Run AXIS tests
|
|
71
|
-
env:
|
|
72
|
-
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
73
|
-
run: npx @netlify/axis run --json --compare-baseline</code></pre> <footer class="site-footer">
|
|
74
|
-
AXIS is maintained by Netlify.
|
|
75
|
-
</footer> </main> </div> <script>
|
|
76
|
-
const btn = document.getElementById("menu-btn");
|
|
77
|
-
const sidebar = document.getElementById("sidebar");
|
|
78
|
-
const backdrop = document.getElementById("sidebar-backdrop");
|
|
79
|
-
|
|
80
|
-
function toggle() {
|
|
81
|
-
sidebar.classList.toggle("open");
|
|
82
|
-
backdrop.classList.toggle("open");
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
btn.addEventListener("click", toggle);
|
|
86
|
-
backdrop.addEventListener("click", toggle);
|
|
87
|
-
</script> </body> </html>
|