agent-regression-lab 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,147 @@
1
+ import { build } from "esbuild";
2
+ import { createServer } from "node:http";
3
+ import { readFileSync, existsSync } from "node:fs";
4
+ import { extname, resolve } from "node:path";
5
+ import { ensureDir } from "../lib/fs.js";
6
+ import { getRunErrorDetail } from "../runOutput.js";
7
+ import { Storage } from "../storage.js";
8
+ const UI_ROOT = resolve("artifacts", "ui");
9
+ const ASSETS_ROOT = resolve(UI_ROOT, "assets");
10
+ const PORT = 4173;
11
+ export async function startUiServer() {
12
+ await buildUiAssets();
13
+ const server = createServer((request, response) => {
14
+ void handleRequest(request, response);
15
+ });
16
+ await new Promise((resolvePromise) => {
17
+ server.listen(PORT, "127.0.0.1", () => resolvePromise());
18
+ });
19
+ console.log(`UI available at http://127.0.0.1:${PORT}`);
20
+ }
21
+ async function handleRequest(request, response) {
22
+ const url = new URL(request.url ?? "/", `http://${request.headers.host ?? `127.0.0.1:${PORT}`}`);
23
+ if (url.pathname.startsWith("/api/")) {
24
+ handleApi(url, response);
25
+ return;
26
+ }
27
+ if (url.pathname.startsWith("/assets/")) {
28
+ serveStatic(resolve(UI_ROOT, `.${url.pathname}`), response);
29
+ return;
30
+ }
31
+ response.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
32
+ response.end(renderHtml());
33
+ }
34
+ function handleApi(url, response) {
35
+ const storage = new Storage();
36
+ try {
37
+ if (url.pathname === "/api/runs") {
38
+ const payload = storage.listRuns({
39
+ suite: url.searchParams.get("suite") || undefined,
40
+ status: url.searchParams.get("status") ?? undefined,
41
+ provider: url.searchParams.get("provider") || undefined,
42
+ });
43
+ sendJson(response, 200, { runs: payload });
44
+ return;
45
+ }
46
+ if (url.pathname.startsWith("/api/runs/")) {
47
+ const runId = decodeURIComponent(url.pathname.slice("/api/runs/".length));
48
+ const bundle = storage.getRun(runId);
49
+ if (!bundle) {
50
+ sendJson(response, 404, { error: `Run '${runId}' not found.` });
51
+ return;
52
+ }
53
+ sendJson(response, 200, {
54
+ run: bundle.run,
55
+ agentVersion: bundle.agentVersion,
56
+ evaluatorResults: bundle.evaluatorResults,
57
+ toolCalls: bundle.toolCalls,
58
+ traceEvents: bundle.traceEvents,
59
+ errorDetail: getRunErrorDetail(bundle),
60
+ });
61
+ return;
62
+ }
63
+ if (url.pathname === "/api/compare") {
64
+ const baseline = url.searchParams.get("baseline");
65
+ const candidate = url.searchParams.get("candidate");
66
+ if (!baseline || !candidate) {
67
+ sendJson(response, 400, { error: "Both 'baseline' and 'candidate' query params are required." });
68
+ return;
69
+ }
70
+ const comparison = storage.compareRuns(baseline, candidate);
71
+ sendJson(response, 200, {
72
+ baseline: {
73
+ ...comparison.baseline,
74
+ errorDetail: getRunErrorDetail(comparison.baseline),
75
+ },
76
+ candidate: {
77
+ ...comparison.candidate,
78
+ errorDetail: getRunErrorDetail(comparison.candidate),
79
+ },
80
+ notes: comparison.notes,
81
+ deltas: comparison.deltas,
82
+ evaluatorDiffs: comparison.evaluatorDiffs,
83
+ toolDiffs: comparison.toolDiffs,
84
+ });
85
+ return;
86
+ }
87
+ sendJson(response, 404, { error: "Not found." });
88
+ }
89
+ catch (error) {
90
+ sendJson(response, 500, { error: error instanceof Error ? error.message : String(error) });
91
+ }
92
+ }
93
+ async function buildUiAssets() {
94
+ ensureDir(ASSETS_ROOT);
95
+ await build({
96
+ entryPoints: [resolve("src", "ui", "client.tsx")],
97
+ outdir: ASSETS_ROOT,
98
+ bundle: true,
99
+ format: "esm",
100
+ splitting: false,
101
+ platform: "browser",
102
+ sourcemap: false,
103
+ logLevel: "silent",
104
+ loader: {
105
+ ".css": "css",
106
+ },
107
+ });
108
+ }
109
+ function serveStatic(path, response) {
110
+ if (!existsSync(path)) {
111
+ response.writeHead(404, { "Content-Type": "text/plain; charset=utf-8" });
112
+ response.end("Not found");
113
+ return;
114
+ }
115
+ const contentType = getContentType(path);
116
+ response.writeHead(200, { "Content-Type": contentType });
117
+ response.end(readFileSync(path));
118
+ }
119
+ function sendJson(response, status, payload) {
120
+ response.writeHead(status, { "Content-Type": "application/json; charset=utf-8" });
121
+ response.end(JSON.stringify(payload));
122
+ }
123
+ function renderHtml() {
124
+ return `<!doctype html>
125
+ <html lang="en">
126
+ <head>
127
+ <meta charset="utf-8" />
128
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
129
+ <title>Agent Regression Lab Alpha</title>
130
+ <link rel="stylesheet" href="/assets/client.css" />
131
+ </head>
132
+ <body>
133
+ <div id="root"></div>
134
+ <script type="module" src="/assets/client.js"></script>
135
+ </body>
136
+ </html>`;
137
+ }
138
+ function getContentType(path) {
139
+ switch (extname(path)) {
140
+ case ".js":
141
+ return "application/javascript; charset=utf-8";
142
+ case ".css":
143
+ return "text/css; charset=utf-8";
144
+ default:
145
+ return "text/plain; charset=utf-8";
146
+ }
147
+ }
package/package.json ADDED
@@ -0,0 +1,53 @@
1
+ {
2
+ "name": "agent-regression-lab",
3
+ "version": "0.1.0",
4
+ "private": false,
5
+ "description": "Local-first scenario-based evaluation harness for AI agents.",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "git+https://github.com/YakshithK/agent-regression-lab.git"
10
+ },
11
+ "homepage": "https://github.com/YakshithK/agent-regression-lab#readme",
12
+ "bugs": {
13
+ "url": "https://github.com/YakshithK/agent-regression-lab/issues"
14
+ },
15
+ "keywords": [
16
+ "ai",
17
+ "agent",
18
+ "evaluation",
19
+ "cli",
20
+ "regression"
21
+ ],
22
+ "type": "module",
23
+ "bin": {
24
+ "agentlab": "./dist/index.js"
25
+ },
26
+ "files": [
27
+ "dist",
28
+ "README.md"
29
+ ],
30
+ "engines": {
31
+ "node": ">=22"
32
+ },
33
+ "scripts": {
34
+ "build": "tsc -p tsconfig.json",
35
+ "check": "tsc -p tsconfig.json --noEmit",
36
+ "test": "tsx --test tests/**/*.test.ts",
37
+ "smoke:cli": "npm run build && node dist/index.js --help && node dist/index.js version",
38
+ "start": "tsx src/index.ts",
39
+ "run": "tsx src/index.ts"
40
+ },
41
+ "dependencies": {
42
+ "@types/node": "^25.5.0",
43
+ "@types/react": "^19.2.14",
44
+ "@types/react-dom": "^19.2.3",
45
+ "esbuild": "^0.27.4",
46
+ "openai": "^6.33.0",
47
+ "react": "^19.2.4",
48
+ "react-dom": "^19.2.4",
49
+ "tsx": "^4.21.0",
50
+ "typescript": "^6.0.2",
51
+ "yaml": "^2.8.3"
52
+ }
53
+ }