@pellux/goodvibes-agent 0.1.53 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,399 +0,0 @@
1
- /**
2
- * Eval Panel — renders evaluation harness results in list and detail modes.
3
- *
4
- * Displays suite run summaries, per-scenario scorecards, and regression
5
- * indicators. Wired with an EvalRegistry that holds the latest run results.
6
- */
7
-
8
- import { BasePanel } from './base-panel.ts';
9
- import type { Line } from '../types/grid.ts';
10
- import { createEmptyLine } from '../types/grid.ts';
11
- import {
12
- buildEmptyState,
13
- buildPanelLine,
14
- buildPanelWorkspace,
15
- resolveScrollablePanelSection,
16
- DEFAULT_PANEL_PALETTE,
17
- } from './polish.ts';
18
-
19
- // ── EvalRegistry ─────────────────────────────────────────────────────────────
20
-
21
- import type {
22
- EvalSuiteResult,
23
- EvalResult,
24
- EvalGateResult,
25
- EvalDimension,
26
- } from '@/runtime/index.ts';
27
-
28
- /**
29
- * Holds the latest eval run state for display in EvalPanel.
30
- * Created externally, injected into the panel.
31
- */
32
- export class EvalRegistry {
33
- private _suiteResults: EvalSuiteResult[] = [];
34
- private _gateResults: EvalGateResult[] = [];
35
- private _running = false;
36
- private _lastRunAt: number | null = null;
37
- private readonly _subscribers = new Set<() => void>();
38
-
39
- push(result: EvalSuiteResult): void {
40
- const idx = this._suiteResults.findIndex((r) => r.suite === result.suite);
41
- if (idx >= 0) {
42
- this._suiteResults[idx] = result;
43
- } else {
44
- this._suiteResults.push(result);
45
- }
46
- this._lastRunAt = Date.now();
47
- this._notify();
48
- }
49
-
50
- pushGate(gate: EvalGateResult): void {
51
- const idx = this._gateResults.findIndex((g) => g.suite === gate.suite);
52
- if (idx >= 0) {
53
- this._gateResults[idx] = gate;
54
- } else {
55
- this._gateResults.push(gate);
56
- }
57
- this._notify();
58
- }
59
-
60
- setRunning(running: boolean): void {
61
- this._running = running;
62
- this._notify();
63
- }
64
-
65
- isRunning(): boolean { return this._running; }
66
- getLastRunAt(): number | null { return this._lastRunAt; }
67
- getSuiteResults(): EvalSuiteResult[] { return this._suiteResults; }
68
- getGateResults(): EvalGateResult[] { return this._gateResults; }
69
-
70
- subscribe(cb: () => void): () => void {
71
- this._subscribers.add(cb);
72
- return () => this._subscribers.delete(cb);
73
- }
74
-
75
- private _notify(): void {
76
- for (const cb of this._subscribers) cb();
77
- }
78
- }
79
-
80
- // ── Colour palette (hex fg colours for createStyledCell) ─────────────────────
81
-
82
- const C = {
83
- ...DEFAULT_PANEL_PALETTE,
84
- header: '#94a3b8',
85
- headerBg: '#1e293b',
86
- cyan: '#38bdf8',
87
- green: '#22c55e',
88
- yellow: '#eab308',
89
- red: '#ef4444',
90
- dim: '#4b5563',
91
- label: '#64748b',
92
- value: '#e2e8f0',
93
- selected: '#f1f5f9',
94
- sep: '#1e293b',
95
- white: '#cbd5e1',
96
- selectBg: '#0f172a',
97
- } as const;
98
-
99
- // ── Helpers ───────────────────────────────────────────────────────────────────
100
-
101
- function scoreColor(score: number): string {
102
- if (score >= 80) return C.green;
103
- if (score >= 60) return C.yellow;
104
- return C.red;
105
- }
106
-
107
- function fmtTime(ms: number): string {
108
- if (ms < 1000) return `${ms.toFixed(0)}ms`;
109
- return `${(ms / 1000).toFixed(1)}s`;
110
- }
111
-
112
- const DIMENSION_ORDER: EvalDimension[] = ['safety', 'quality', 'latency', 'cost', 'recovery'];
113
-
114
- // ── EvalPanel ─────────────────────────────────────────────────────────────────
115
-
116
- export class EvalPanel extends BasePanel {
117
- private readonly _registry: EvalRegistry;
118
- private _mode: 'list' | 'detail' = 'list';
119
- private _selectedSuiteIdx = 0;
120
- private _selectedScenarioIdx = 0;
121
- private _scrollOffset = 0;
122
- private _unsub: (() => void) | null = null;
123
-
124
- public constructor(registry: EvalRegistry) {
125
- super('eval', 'Eval', 'V', 'monitoring');
126
- this._registry = registry;
127
- }
128
-
129
- public override onActivate(): void {
130
- this._unsub = this._registry.subscribe(() => this.markDirty());
131
- this.markDirty();
132
- }
133
-
134
- public override onDestroy(): void {
135
- this._unsub?.();
136
- this._unsub = null;
137
- }
138
-
139
- public handleInput(key: string): boolean {
140
- const suites = this._registry.getSuiteResults();
141
-
142
- if (this._mode === 'list') {
143
- if (key === 'ArrowUp' || key === 'k') {
144
- this._selectedSuiteIdx = Math.max(0, this._selectedSuiteIdx - 1);
145
- this.markDirty();
146
- return true;
147
- }
148
- if (key === 'ArrowDown' || key === 'j') {
149
- this._selectedSuiteIdx = Math.min(suites.length - 1, this._selectedSuiteIdx + 1);
150
- this.markDirty();
151
- return true;
152
- }
153
- if ((key === 'Enter' || key === 'Return' || key === 'l') && suites.length > 0) {
154
- this._mode = 'detail';
155
- this._selectedScenarioIdx = 0;
156
- this._scrollOffset = 0;
157
- this.markDirty();
158
- return true;
159
- }
160
- return false;
161
- }
162
-
163
- // detail mode
164
- if (key === 'Escape' || key === 'q' || key === 'h') {
165
- this._mode = 'list';
166
- this.markDirty();
167
- return true;
168
- }
169
- if (key === 'ArrowUp' || key === 'k') {
170
- const suite = suites[this._selectedSuiteIdx];
171
- if (suite) {
172
- this._selectedScenarioIdx = Math.max(0, this._selectedScenarioIdx - 1);
173
- this._scrollOffset = 0;
174
- this.markDirty();
175
- }
176
- return true;
177
- }
178
- if (key === 'ArrowDown' || key === 'j') {
179
- const suite = suites[this._selectedSuiteIdx];
180
- if (suite) {
181
- this._selectedScenarioIdx = Math.min(
182
- suite.results.length - 1,
183
- this._selectedScenarioIdx + 1,
184
- );
185
- this._scrollOffset = 0;
186
- this.markDirty();
187
- }
188
- return true;
189
- }
190
- if (key === 'PageUp') {
191
- this._scrollOffset = Math.max(0, this._scrollOffset - 5);
192
- this.markDirty();
193
- return true;
194
- }
195
- if (key === 'PageDown') {
196
- this._scrollOffset += 5;
197
- this.markDirty();
198
- return true;
199
- }
200
- return false;
201
- }
202
-
203
- public render(width: number, height: number): Line[] {
204
- this.needsRender = false;
205
- const suites = this._registry.getSuiteResults();
206
- const gates = this._registry.getGateResults();
207
- const intro = 'Evaluation harness runs, gates, scenario scorecards, and regression indicators for model and product validation.';
208
-
209
- const running = this._registry.isRunning();
210
- const lastRun = this._registry.getLastRunAt();
211
- const summaryLine = buildPanelLine(width, [
212
- [' state: ', C.label],
213
- [running ? 'running' : 'idle', running ? C.yellow : C.dim],
214
- [' last: ', C.label],
215
- [lastRun ? new Date(lastRun).toLocaleTimeString() : 'n/a', C.dim],
216
- ]);
217
-
218
- if (suites.length === 0) {
219
- const workspace = buildPanelWorkspace(width, height, {
220
- title: 'Eval Harness',
221
- intro,
222
- sections: [{
223
- title: 'Status',
224
- lines: [
225
- summaryLine,
226
- ...buildEmptyState(
227
- width,
228
- ' No results yet.',
229
- 'Run an eval suite to populate this workspace with suite scores, gate results, and per-scenario detail.',
230
- [{ command: '/eval run <suite>', summary: 'start a suite such as core-performance, safety-baseline, or cost-tokens' }],
231
- C,
232
- ),
233
- ],
234
- }],
235
- palette: C,
236
- });
237
- while (workspace.length < height) workspace.push(createEmptyLine(width));
238
- return workspace;
239
- }
240
-
241
- const lines: Line[] = [];
242
- if (this._mode === 'list') {
243
- this._renderList(lines, suites, gates, width, height, intro, summaryLine);
244
- } else {
245
- const suite = suites[this._selectedSuiteIdx];
246
- if (suite) {
247
- this._renderDetail(lines, suite, width, height, intro, summaryLine);
248
- }
249
- }
250
-
251
- return lines;
252
- }
253
-
254
- // ── List view ────────────────────────────────────────────────────────────────
255
-
256
- private _renderList(
257
- lines: Line[],
258
- suites: EvalSuiteResult[],
259
- gates: EvalGateResult[],
260
- width: number,
261
- _height: number,
262
- intro: string,
263
- summaryLine: Line,
264
- ): void {
265
- const gateMap = new Map(gates.map((g) => [g.suite, g]));
266
- const sectionLines: Line[] = [
267
- summaryLine,
268
- buildPanelLine(width, [
269
- ['Suite'.padEnd(28), C.header],
270
- ['Score'.padEnd(8), C.header],
271
- ['Pass'.padEnd(6), C.header],
272
- ['Gate'.padEnd(6), C.header],
273
- ['Duration', C.header],
274
- ]),
275
- ];
276
-
277
- suites.forEach((suite, idx) => {
278
- const selected = idx === this._selectedSuiteIdx;
279
- const gate = gateMap.get(suite.suite);
280
- const gateStr = gate ? (gate.passed ? 'ok' : 'FAIL') : '-';
281
- const gateColor = gate ? (gate.passed ? C.green : C.red) : C.dim;
282
- const durationMs = suite.finishedAt - suite.startedAt;
283
- const scoreC = scoreColor(suite.meanScore);
284
- const passC = suite.passed ? C.green : C.red;
285
- const nameColor = selected ? C.selected : C.white;
286
- const bg = selected ? C.selectBg : undefined;
287
- const prefix = selected ? '▸ ' : ' ';
288
- const name = suite.suite.slice(0, 24).padEnd(26);
289
-
290
- sectionLines.push(buildPanelLine(width, [
291
- [prefix + name, nameColor, bg],
292
- [suite.meanScore.toFixed(1).padEnd(8), scoreC, bg],
293
- [(suite.passed ? 'PASS' : 'FAIL').padEnd(6), passC, bg],
294
- [gateStr.padEnd(6), gateColor, bg],
295
- [fmtTime(durationMs), C.dim, bg],
296
- ]));
297
- });
298
-
299
- sectionLines.push(buildPanelLine(width, [[' Enter/l: detail j/k: navigate', C.dim]]));
300
- lines.push(...buildPanelWorkspace(width, _height, {
301
- title: 'Eval Harness',
302
- intro,
303
- sections: [{ title: 'Suites', lines: sectionLines }],
304
- palette: C,
305
- }));
306
- }
307
-
308
- // ── Detail view ──────────────────────────────────────────────────────────────
309
-
310
- private _renderDetail(
311
- lines: Line[],
312
- suite: EvalSuiteResult,
313
- width: number,
314
- height: number,
315
- intro: string,
316
- summaryLine: Line,
317
- ): void {
318
- const sectionLines: Line[] = [
319
- summaryLine,
320
- buildPanelLine(width, [
321
- [`Suite: ${suite.suite}`, C.cyan],
322
- [' mean=', C.label],
323
- [suite.meanScore.toFixed(1), scoreColor(suite.meanScore)],
324
- [' ', C.label],
325
- [suite.passed ? 'PASS' : 'FAIL', suite.passed ? C.green : C.red],
326
- ]),
327
- ];
328
-
329
- const allDetailLines: Line[] = [];
330
- suite.results.forEach((result, idx) => {
331
- const selected = idx === this._selectedScenarioIdx;
332
- this._renderScenarioBlock(allDetailLines, result, selected, width);
333
- });
334
-
335
- const detailSection = resolveScrollablePanelSection(width, height, {
336
- intro,
337
- palette: C,
338
- beforeSections: [{ title: 'Scenario Detail', lines: sectionLines }],
339
- section: {
340
- scrollableLines: allDetailLines,
341
- scrollOffset: this._scrollOffset,
342
- minRows: 1,
343
- },
344
- });
345
- this._scrollOffset = detailSection.scrollOffset;
346
- sectionLines.push(...detailSection.section.lines);
347
- sectionLines.push(buildPanelLine(width, [[' Esc/q: back j/k: scenario PgUp/PgDn: scroll', C.dim]]));
348
- lines.push(...buildPanelWorkspace(width, height, {
349
- title: 'Eval Harness',
350
- intro,
351
- sections: [{ title: 'Scenario Detail', lines: sectionLines }],
352
- palette: C,
353
- }));
354
- }
355
-
356
- private _renderScenarioBlock(
357
- lines: Line[],
358
- result: EvalResult,
359
- selected: boolean,
360
- width: number,
361
- ): void {
362
- const sc = result.scorecard;
363
- const prefix = selected ? '▸ ' : ' ';
364
- const nameColor = selected ? C.selected : C.white;
365
- const scoreC = scoreColor(sc.compositeScore);
366
- const passC = sc.passed ? C.green : C.red;
367
- const nameLen = Math.max(1, width - 22);
368
-
369
- lines.push(buildPanelLine(width, [
370
- [prefix + result.scenario.name.slice(0, nameLen).padEnd(nameLen + 2), nameColor, selected ? C.selectBg : undefined],
371
- [sc.compositeScore.toFixed(1).padStart(5), scoreC, selected ? C.selectBg : undefined],
372
- [' ', C.label, selected ? C.selectBg : undefined],
373
- [sc.passed ? 'PASS' : 'FAIL', passC, selected ? C.selectBg : undefined],
374
- ]));
375
-
376
- if (selected) {
377
- for (const dim of DIMENSION_ORDER) {
378
- const d = sc.dimensions.find((x) => x.dimension === dim);
379
- if (!d) continue;
380
- const filled = Math.round(d.score / 10);
381
- const bar = '#'.repeat(filled) + '.'.repeat(10 - filled);
382
- lines.push(buildPanelLine(width, [
383
- [' ' + dim.padEnd(10) + ' ', C.label],
384
- [bar, scoreColor(d.score)],
385
- [` ${d.score.toFixed(0).padStart(3)}/100`, C.value],
386
- ]));
387
- }
388
-
389
- if (sc.notes && sc.notes.length > 0) {
390
- for (const note of sc.notes) {
391
- lines.push(buildPanelLine(width, [
392
- [' ! ', C.yellow],
393
- [note.slice(0, width - 6), C.yellow],
394
- ]));
395
- }
396
- }
397
- }
398
- }
399
- }