agent-regression-lab 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ui/server.js CHANGED
@@ -1,12 +1,15 @@
1
1
  import { build } from "esbuild";
2
2
  import { createServer } from "node:http";
3
- import { readFileSync, existsSync } from "node:fs";
4
- import { extname, resolve } from "node:path";
3
+ import { readFileSync, existsSync, writeFileSync } from "node:fs";
4
+ import { dirname, extname, resolve } from "node:path";
5
+ import { fileURLToPath } from "node:url";
5
6
  import { ensureDir } from "../lib/fs.js";
6
7
  import { getRunErrorDetail } from "../runOutput.js";
7
8
  import { Storage } from "../storage.js";
8
9
  const UI_ROOT = resolve("artifacts", "ui");
9
10
  const ASSETS_ROOT = resolve(UI_ROOT, "assets");
11
+ const PACKAGED_ASSETS_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..", "ui-assets");
12
+ const SOURCE_UI_ENTRY = resolve("src", "ui", "client.tsx");
10
13
  const PORT = 4173;
11
14
  export async function startUiServer() {
12
15
  await buildUiAssets();
@@ -77,6 +80,10 @@ function handleApi(url, response) {
77
80
  ...comparison.candidate,
78
81
  errorDetail: getRunErrorDetail(comparison.candidate),
79
82
  },
83
+ classification: comparison.classification,
84
+ verdictDelta: comparison.verdictDelta,
85
+ terminationDelta: comparison.terminationDelta,
86
+ outputChanged: comparison.outputChanged,
80
87
  notes: comparison.notes,
81
88
  deltas: comparison.deltas,
82
89
  evaluatorDiffs: comparison.evaluatorDiffs,
@@ -84,16 +91,38 @@ function handleApi(url, response) {
84
91
  });
85
92
  return;
86
93
  }
94
+ if (url.pathname === "/api/compare-suite") {
95
+ const baselineBatch = url.searchParams.get("baselineBatch");
96
+ const candidateBatch = url.searchParams.get("candidateBatch");
97
+ if (!baselineBatch || !candidateBatch) {
98
+ sendJson(response, 400, { error: "Both 'baselineBatch' and 'candidateBatch' query params are required." });
99
+ return;
100
+ }
101
+ const comparison = storage.compareSuites(baselineBatch, candidateBatch);
102
+ sendJson(response, 200, comparison);
103
+ return;
104
+ }
87
105
  sendJson(response, 404, { error: "Not found." });
88
106
  }
89
107
  catch (error) {
90
108
  sendJson(response, 500, { error: error instanceof Error ? error.message : String(error) });
91
109
  }
110
+ finally {
111
+ storage.close();
112
+ }
92
113
  }
93
114
  async function buildUiAssets() {
115
+ if (existsSync(PACKAGED_ASSETS_ROOT)) {
116
+ ensureDir(ASSETS_ROOT);
117
+ writePackagedAssetCopies();
118
+ return;
119
+ }
120
+ if (!existsSync(SOURCE_UI_ENTRY)) {
121
+ throw new Error("UI assets are unavailable. Install a package build that includes dist/ui-assets or run from the repo root.");
122
+ }
94
123
  ensureDir(ASSETS_ROOT);
95
124
  await build({
96
- entryPoints: [resolve("src", "ui", "client.tsx")],
125
+ entryPoints: [SOURCE_UI_ENTRY],
97
126
  outdir: ASSETS_ROOT,
98
127
  bundle: true,
99
128
  format: "esm",
@@ -106,6 +135,21 @@ async function buildUiAssets() {
106
135
  },
107
136
  });
108
137
  }
138
+ function writePackagedAssetCopies() {
139
+ for (const assetName of ["client.js", "client.css"]) {
140
+ const sourcePath = resolve(PACKAGED_ASSETS_ROOT, assetName);
141
+ const targetPath = resolve(ASSETS_ROOT, assetName);
142
+ if (!existsSync(sourcePath)) {
143
+ throw new Error(`Packaged UI asset '${assetName}' is missing.`);
144
+ }
145
+ responseSafeCopy(sourcePath, targetPath);
146
+ }
147
+ }
148
+ function responseSafeCopy(sourcePath, targetPath) {
149
+ ensureDir(dirname(targetPath));
150
+ const contents = readFileSync(sourcePath);
151
+ writeFileSync(targetPath, contents);
152
+ }
109
153
  function serveStatic(path, response) {
110
154
  if (!existsSync(path)) {
111
155
  response.writeHead(404, { "Content-Type": "text/plain; charset=utf-8" });
@@ -0,0 +1,174 @@
1
+ /* src/ui/styles.css */
2
+ :root {
3
+ color-scheme: light;
4
+ --bg: #f1ede4;
5
+ --panel: #fffdf7;
6
+ --ink: #1c1a16;
7
+ --muted: #665f54;
8
+ --line: #d6ccbc;
9
+ --accent: #9e3d22;
10
+ --pass: #1e6a42;
11
+ --fail: #9a2c1f;
12
+ --error: #5b1e72;
13
+ }
14
+ * {
15
+ box-sizing: border-box;
16
+ }
17
+ body {
18
+ margin: 0;
19
+ background:
20
+ radial-gradient(
21
+ circle at top,
22
+ #f8f3ea 0,
23
+ var(--bg) 45%,
24
+ #e4dccd 100%);
25
+ color: var(--ink);
26
+ font-family:
27
+ "IBM Plex Sans",
28
+ "Helvetica Neue",
29
+ sans-serif;
30
+ }
31
+ a {
32
+ color: var(--accent);
33
+ text-decoration: none;
34
+ }
35
+ pre {
36
+ white-space: pre-wrap;
37
+ word-break: break-word;
38
+ background: #f7f1e6;
39
+ border: 1px solid var(--line);
40
+ padding: 0.8rem;
41
+ border-radius: 10px;
42
+ }
43
+ .shell {
44
+ min-height: 100vh;
45
+ }
46
+ .topbar {
47
+ position: sticky;
48
+ top: 0;
49
+ backdrop-filter: blur(10px);
50
+ background: rgba(241, 237, 228, 0.92);
51
+ border-bottom: 1px solid var(--line);
52
+ padding: 1rem 1.25rem;
53
+ }
54
+ .brand {
55
+ font-family: "IBM Plex Mono", monospace;
56
+ font-size: 0.95rem;
57
+ text-transform: uppercase;
58
+ letter-spacing: 0.08em;
59
+ color: var(--ink);
60
+ }
61
+ .page {
62
+ max-width: 1200px;
63
+ margin: 0 auto;
64
+ padding: 1.25rem;
65
+ }
66
+ .hero {
67
+ margin-bottom: 1rem;
68
+ }
69
+ .hero h1 {
70
+ margin: 0 0 0.35rem;
71
+ font-size: 2rem;
72
+ }
73
+ .hero p,
74
+ .muted {
75
+ color: var(--muted);
76
+ }
77
+ .filters,
78
+ .stats,
79
+ .panel-grid,
80
+ .compare-grid {
81
+ display: grid;
82
+ gap: 1rem;
83
+ }
84
+ .filters {
85
+ grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
86
+ margin-bottom: 1rem;
87
+ }
88
+ input,
89
+ select {
90
+ width: 100%;
91
+ padding: 0.75rem 0.85rem;
92
+ border: 1px solid var(--line);
93
+ border-radius: 10px;
94
+ background: var(--panel);
95
+ }
96
+ .stats {
97
+ grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
98
+ margin-bottom: 1rem;
99
+ }
100
+ .stat,
101
+ .panel,
102
+ .empty {
103
+ background: var(--panel);
104
+ border: 1px solid var(--line);
105
+ border-radius: 16px;
106
+ padding: 1rem;
107
+ }
108
+ .stat-value {
109
+ font-size: 1.4rem;
110
+ margin-top: 0.25rem;
111
+ }
112
+ .panel-grid,
113
+ .compare-grid {
114
+ grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
115
+ margin-bottom: 1rem;
116
+ }
117
+ .table {
118
+ width: 100%;
119
+ border-collapse: collapse;
120
+ background: var(--panel);
121
+ border: 1px solid var(--line);
122
+ border-radius: 16px;
123
+ overflow: hidden;
124
+ }
125
+ .table th,
126
+ .table td {
127
+ text-align: left;
128
+ padding: 0.85rem;
129
+ border-bottom: 1px solid var(--line);
130
+ vertical-align: top;
131
+ }
132
+ .table th {
133
+ font-family: "IBM Plex Mono", monospace;
134
+ font-size: 0.8rem;
135
+ text-transform: uppercase;
136
+ letter-spacing: 0.04em;
137
+ color: var(--muted);
138
+ }
139
+ .pill {
140
+ display: inline-block;
141
+ padding: 0.2rem 0.55rem;
142
+ border-radius: 999px;
143
+ font-size: 0.8rem;
144
+ font-weight: 700;
145
+ text-transform: uppercase;
146
+ letter-spacing: 0.04em;
147
+ }
148
+ .pill.pass {
149
+ background: rgba(30, 106, 66, 0.12);
150
+ color: var(--pass);
151
+ }
152
+ .pill.fail {
153
+ background: rgba(154, 44, 31, 0.12);
154
+ color: var(--fail);
155
+ }
156
+ .pill.error {
157
+ background: rgba(91, 30, 114, 0.12);
158
+ color: var(--error);
159
+ }
160
+ .stack,
161
+ .timeline {
162
+ display: grid;
163
+ gap: 0.75rem;
164
+ padding-left: 1rem;
165
+ }
166
+ .timeline.compact {
167
+ gap: 0.35rem;
168
+ }
169
+ @media (max-width: 720px) {
170
+ .table {
171
+ display: block;
172
+ overflow-x: auto;
173
+ }
174
+ }