npm - @pellux/goodvibes-agent - Versions diffs - 0.1.54 → 0.1.55 - Mend

@pellux/goodvibes-agent 0.1.54 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +4 -0
package/package.json +2 -5
package/src/input/command-registry.ts +0 -1
package/src/input/commands/local-runtime.ts +6 -7
package/src/input/commands/operator-runtime.ts +0 -50
package/src/input/commands/product-runtime.ts +3 -129
package/src/input/commands.ts +0 -4
package/src/panels/builtin/operations.ts +0 -12
package/src/panels/builtin/shared.ts +0 -2
package/src/version.ts +1 -1
package/src/input/commands/eval.ts +0 -217
package/src/panels/eval-panel.ts +0 -399

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,10 @@
 All notable changes to GoodVibes Agent will be recorded here.
+## 0.1.55 - 2026-05-31
+- d8f4eee Remove copied developer audit surfaces
 ## 0.1.54 - 2026-05-31
 - dc1a290 Keep release docs version-neutral

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pellux/goodvibes-agent",
-  "version": "0.1.54",
+  "version": "0.1.55",
   "private": false,
   "description": "Near-fork GoodVibes operator assistant with the GoodVibes TUI shell, renderer, input, fullscreen workspace, and daemon-connected Agent product brain.",
   "type": "module",
@@ -65,10 +65,7 @@
     "architecture:check": "bun run scripts/check-architecture.ts",
     "foundation:artifacts": "bun run scripts/export-foundation-artifacts.ts",
     "verification:ledger": "bun run scripts/verification-ledger.ts",
-    "verification:live": "bun run scripts/verify-live.ts",
-    "eval:gate": "bun run scripts/eval-gate.ts",
-    "eval:gate:verbose": "bun run scripts/eval-gate.ts --verbose",
-    "eval:baseline": "bun run scripts/eval-gate.ts --save-baseline"
+    "verification:live": "bun run scripts/verify-live.ts"
   },
   "license": "MIT",
   "repository": {

package/src/input/command-registry.ts CHANGED Viewed

@@ -177,7 +177,6 @@ export interface CommandOpsServices
 export interface CommandExtensionRegistryServices {
   readonly toolRegistry: ToolRegistry;
   readonly mcpRegistry: McpRegistry;
-  readonly evalRegistry?: import('../panels/eval-panel.ts').EvalRegistry;
 }
 export interface CommandExtensionServices

package/src/input/commands/local-runtime.ts CHANGED Viewed

@@ -67,7 +67,7 @@ export function registerLocalRuntimeCommands(registry: CommandRegistry): void {
   registry.register({
     name: 'tools',
     aliases: ['t'],
-    description: 'List available tools and review compact native tool capability surfaces',
+    description: 'List available tools and review tool safety/status',
     usage: '[review|panel]',
     handler(args, ctx) {
       const sub = (args[0] ?? '').toLowerCase();
@@ -79,12 +79,11 @@ export function registerLocalRuntimeCommands(registry: CommandRegistry): void {
         }
         if (sub === 'review') {
           ctx.print([
-            'Tool Surface Review',
-            '  Native file tools stay compact by default.',
-            '  Read/write/edit/notebook capabilities are available through the native tool stack, with detail routed to the tools panel and approval surfaces instead of transcript bloat.',
-            '  Shell and native tool approvals classify work into read, mutation, destructive, dependency, config, notebook, network, remote, and lifecycle risk families.',
-            '  Use /tools panel to inspect risk class, output-policy actions, spill posture, compact summaries, and approval posture for recent calls.',
-            '  Use /approval review shell or /approval review file when you need the action-specific why-prompted posture.',
+            'Tool Status',
+            '  Tools are available for the main Agent conversation.',
+            '  Read-only actions can run directly; writes, destructive changes, network effects, service changes, and external side effects require explicit user intent or approval.',
+            '  Recent tool activity and approval posture are available in the tools and approvals views.',
+            '  Build/fix/review work should be delegated explicitly with /delegate.',
           ].join('\n'));
         }
         return;

package/src/input/commands/operator-runtime.ts CHANGED Viewed

@@ -1,6 +1,5 @@
 import type { CommandRegistry } from '../command-registry.ts';
 import type { ProfileData } from '@pellux/goodvibes-sdk/platform/profiles';
-import { ToolContractVerifier } from '@/runtime/index.ts';
 import type { ReplaySnapshotInput } from '@/runtime/index.ts';
 import { logger } from '@pellux/goodvibes-sdk/platform/utils';
 import { registerOperatorPanelCommand } from './operator-panel-runtime.ts';
@@ -283,55 +282,6 @@ export function registerOperatorRuntimeCommands(registry: CommandRegistry): void
     },
   });
-  registry.register({
-    name: 'tool',
-    description: 'Tool contract verification — verify registered tool contracts',
-    usage: 'verify <name> | verify-all | contract show <name>',
-    argsHint: 'verify <name> | verify-all | contract show <name>',
-    handler(args, ctx) {
-      const sub = args[0];
-      if (sub === 'verify' && args[1]) {
-        const result = ctx.extensions.toolRegistry.verifyContract(args[1]);
-        if (!result) {
-          ctx.print(`[tool verify] Tool '${args[1]}' is not registered.`);
-          return;
-        }
-        ctx.print(ToolContractVerifier.formatResult(result));
-        return;
-      }
-      if (sub === 'verify-all') {
-        ctx.print(ToolContractVerifier.formatAllResults(ctx.extensions.toolRegistry.verifyAllContracts()));
-        return;
-      }
-      if (sub === 'contract' && args[1] === 'show' && args[2]) {
-        const toolName = args[2];
-        const result = ctx.extensions.toolRegistry.verifyContract(toolName);
-        if (!result) {
-          ctx.print(`[tool contract show] Tool '${toolName}' is not registered.`);
-          return;
-        }
-        const lines: string[] = [ToolContractVerifier.formatResult(result)];
-        const tool = ctx.extensions.toolRegistry.list().find((t) => t.definition.name === toolName);
-        if (tool) {
-          lines.push('');
-          lines.push('Tool Definition:');
-          lines.push(`  Name:        ${tool.definition.name}`);
-          lines.push(`  Description: ${tool.definition.description}`);
-          lines.push(`  Parameters:  ${JSON.stringify(tool.definition.parameters, null, 2).replace(/\n/g, '\n               ')}`);
-        }
-        ctx.print(lines.join('\n'));
-        return;
-      }
-      ctx.print(
-        'Usage: /tool <subcommand>\n'
-        + '  /tool verify <name>             — verify contract for a specific registered tool\n'
-        + '  /tool verify-all                — verify contracts for all registered tools\n'
-        + '  /tool contract show <name>      — show full contract details for a tool'
-      );
-    },
-  });
   registry.register({
     name: 'forensics',
     aliases: ['foren'],

package/src/input/commands/product-runtime.ts CHANGED Viewed

@@ -1,9 +1,7 @@
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
-import { dirname, join, resolve } from 'node:path';
+import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { dirname } from 'node:path';
 import type { CommandContext, CommandRegistry } from '../command-registry.ts';
-import { listInstalledEcosystemEntries, loadEcosystemCatalog } from '@/runtime/index.ts';
-import { BUILTIN_SUITES } from '@/runtime/index.ts';
-import { requireEcosystemCatalogPaths, requireReadModels, requireSecretsManager, requireServiceRegistry, requireShellPaths } from './runtime-services.ts';
+import { requireReadModels, requireSecretsManager, requireServiceRegistry, requireShellPaths } from './runtime-services.ts';
 import { requireYesFlag, stripYesFlag } from './confirmation.ts';
 interface TrustReviewBundle {
@@ -29,29 +27,6 @@ interface TrustReviewBundle {
   };
 }
-interface ReleaseBundle {
-  readonly version: 1;
-  readonly capturedAt: number;
-  readonly runtime: {
-    readonly provider: string;
-    readonly model: string;
-    readonly sessionId: string;
-  };
-  readonly evalSuites: readonly string[];
-  readonly incidentCount: number;
-  readonly remote: {
-    readonly pools: number;
-    readonly contracts: number;
-    readonly artifacts: number;
-  };
-  readonly ecosystem: {
-    readonly pluginCatalog: number;
-    readonly skillCatalog: number;
-    readonly installedPlugins: number;
-    readonly installedSkills: number;
-  };
-}
 function countByMode<T extends string>(values: readonly T[], mode: T): number {
   return values.filter((value) => value === mode).length;
 }
@@ -112,46 +87,6 @@ function inspectTrustBundle(path: string): string {
   ].join('\n');
 }
-function buildReleaseBundle(ctx: Parameters<NonNullable<CommandRegistry['register']>>[0]['handler'] extends (args: string[], context: infer C) => unknown ? C : never): ReleaseBundle {
-  const remoteRuntime = ctx.ops.remoteRuntime;
-  const incidents = ctx.extensions.forensicsRegistry?.getAll() ?? [];
-  const ecosystemPaths = requireEcosystemCatalogPaths(ctx);
-  return {
-    version: 1,
-    capturedAt: Date.now(),
-    runtime: {
-      provider: ctx.session.runtime.provider,
-      model: ctx.session.runtime.model,
-      sessionId: ctx.session.runtime.sessionId,
-    },
-    evalSuites: Object.keys(BUILTIN_SUITES),
-    incidentCount: incidents.length,
-    remote: {
-      pools: remoteRuntime?.listPools().length ?? 0,
-      contracts: remoteRuntime?.listContracts().length ?? 0,
-      artifacts: remoteRuntime?.listArtifacts().length ?? 0,
-    },
-    ecosystem: {
-      pluginCatalog: loadEcosystemCatalog('plugin', ecosystemPaths).length,
-      skillCatalog: loadEcosystemCatalog('skill', ecosystemPaths).length,
-      installedPlugins: listInstalledEcosystemEntries('plugin', ecosystemPaths).length,
-      installedSkills: listInstalledEcosystemEntries('skill', ecosystemPaths).length,
-    },
-  };
-}
-function inspectReleaseBundle(path: string): string {
-  const parsed = JSON.parse(readFileSync(path, 'utf-8')) as ReleaseBundle;
-  return [
-    'Release Bundle Review',
-    `  provider/model: ${parsed.runtime.provider || '(unset)'}/${parsed.runtime.model || '(unset)'}`,
-    `  eval suites: ${parsed.evalSuites.length}`,
-    `  incidents: ${parsed.incidentCount}`,
-    `  remote pools/contracts/artifacts: ${parsed.remote.pools}/${parsed.remote.contracts}/${parsed.remote.artifacts}`,
-    `  ecosystem catalog plugins/skills: ${parsed.ecosystem.pluginCatalog}/${parsed.ecosystem.skillCatalog}`,
-  ].join('\n');
-}
 export function registerProductRuntimeCommands(registry: CommandRegistry): void {
   registry.register({
     name: 'trust',
@@ -314,65 +249,4 @@ export function registerProductRuntimeCommands(registry: CommandRegistry): void
     },
   });
-  registry.register({
-    name: 'release',
-    description: 'Package certification and release-readiness operations',
-    usage: '[review|checklist|bundle export <path> --yes|bundle inspect <path>]',
-    handler(args, ctx) {
-      const parsed = stripYesFlag(args);
-      const commandArgs = [...parsed.rest];
-      const shellPaths = requireShellPaths(ctx);
-      const sub = commandArgs[0] ?? 'review';
-      if (sub === 'review') {
-        const bundle = buildReleaseBundle(ctx);
-        ctx.print([
-          'Release Review',
-          `  provider/model: ${bundle.runtime.provider || '(unset)'}/${bundle.runtime.model || '(unset)'}`,
-          `  eval suites: ${bundle.evalSuites.length}`,
-          `  incidents: ${bundle.incidentCount}`,
-          `  remote pools/contracts/artifacts: ${bundle.remote.pools}/${bundle.remote.contracts}/${bundle.remote.artifacts}`,
-          `  ecosystem catalog plugins/skills: ${bundle.ecosystem.pluginCatalog}/${bundle.ecosystem.skillCatalog}`,
-          `  installed plugins/skills: ${bundle.ecosystem.installedPlugins}/${bundle.ecosystem.installedSkills}`,
-        ].join('\n'));
-        return;
-      }
-      if (sub === 'checklist') {
-        ctx.print([
-          'Release Checklist',
-          '  1. Run /setup review and /setup doctor',
-          '  2. Run /security review and /trust review',
-          '  3. Run /policy preflight and /policy simulate',
-          '  4. Run /eval gate <suite> --yes for required certification suites',
-          '  5. Review /incident latest and /bridge status',
-          '  6. Export /release bundle export <path> --yes for release evidence',
-        ].join('\n'));
-        return;
-      }
-      if (sub === 'bundle') {
-        const mode = commandArgs[1];
-        const pathArg = commandArgs[2];
-        if ((mode === 'export' || mode === 'inspect') && !pathArg) {
-          ctx.print(`Usage: /release bundle ${mode} <path>${mode === 'export' ? ' --yes' : ''}`);
-          return;
-        }
-        if (mode === 'export') {
-          if (!parsed.yes) {
-            requireYesFlag(ctx, `export release bundle to ${pathArg}`, '/release bundle export <path> --yes');
-            return;
-          }
-          const bundle = buildReleaseBundle(ctx);
-          const targetPath = shellPaths.resolveWorkspacePath(pathArg!);
-          mkdirSync(dirname(targetPath), { recursive: true });
-          writeFileSync(targetPath, JSON.stringify(bundle, null, 2) + '\n', 'utf-8');
-          ctx.print(`Release bundle exported to ${targetPath}`);
-          return;
-        }
-        if (mode === 'inspect') {
-          ctx.print(inspectReleaseBundle(shellPaths.resolveWorkspacePath(pathArg!)));
-          return;
-        }
-      }
-      ctx.print('Usage: /release [review|checklist|bundle export <path> --yes|bundle inspect <path>]');
-    },
-  });
 }

package/src/input/commands.ts CHANGED Viewed

@@ -1,7 +1,6 @@
 import type { CommandRegistry } from './command-registry.ts';
 import { policyCommand } from './commands/policy.ts';
 import { providerCommand } from './commands/provider.ts';
-import { evalCommand } from './commands/eval.ts';
 import { sessionCommand } from './commands/session.ts';
 import { recallCommand } from './commands/memory.ts';
 import { knowledgeCommand } from './commands/knowledge.ts';
@@ -126,9 +125,6 @@ export function registerBuiltinCommands(registry: CommandRegistry): void {
   // ── /provider ─────────────────────────────────────────────────────────────
   registry.register(providerCommand);
-  // ── /eval ─────────────────────────────────────────────────────────────────
-  registry.register(evalCommand);
   // ── /session ─────────────────────────────────────────────────────────────
   registry.register(sessionCommand);

package/src/panels/builtin/operations.ts CHANGED Viewed

@@ -27,7 +27,6 @@ import { DebugPanel } from '../debug-panel.ts';
 import { IncidentReviewPanel } from '../incident-review-panel.ts';
 import { ForensicsPanel } from '../forensics-panel.ts';
 import { PolicyPanel } from '../policy-panel.ts';
-import { EvalPanel } from '../eval-panel.ts';
 import { createProviderAccountSnapshotQuery } from '../provider-account-snapshot.ts';
 import {
   createEnvironmentVariableQuery,
@@ -335,15 +334,4 @@ export function registerOperationsPanels(manager: PanelManager, deps: ResolvedBu
     factory: () => new PolicyPanel(deps.policyRuntimeState),
   });
-  if (deps.evalRegistry) {
-    const { evalRegistry } = deps;
-    manager.registerType({
-      id: 'eval',
-      name: 'Eval',
-      icon: 'Y',
-      category: 'monitoring',
-      description: 'Evaluation harness: benchmark suite results, scorecards, and regression gates',
-      factory: () => new EvalPanel(evalRegistry),
-    });
-  }
 }

package/src/panels/builtin/shared.ts CHANGED Viewed

@@ -62,8 +62,6 @@ export interface BuiltinPanelDeps {
   dismissPlanning?: () => void;
   /** ForensicsRegistry for the Forensics panel. */
   forensicsRegistry?: import('@/runtime/index.ts').ForensicsRegistry;
-  /** EvalRegistry for the Eval panel. */
-  evalRegistry?: import('../eval-panel.ts').EvalRegistry;
   /** MemoryRegistry for the Memory panel. */
   memoryRegistry?: MemoryRegistry;
   /** Isolated Agent Knowledge service for the Agent Knowledge panel. */

package/src/version.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import { join } from 'node:path';
 // The prebuild script updates the fallback value before compilation.
 // Uses import.meta.dir (Bun) to locate package.json relative to this file,
 // which is correct regardless of the process working directory.
-let _version = '0.1.54';
+let _version = '0.1.55';
 let _sdkVersion = '0.33.35';
 try {
   const pkg = JSON.parse(readFileSync(join(import.meta.dir, '..', 'package.json'), 'utf-8')) as {

package/src/input/commands/eval.ts DELETED Viewed

@@ -1,217 +0,0 @@
-/**
- * /eval command handler.
- *
- * Implements the Evaluation Harness commands:
- *
- *   /eval list                    — List all available eval suites
- *   /eval run <suite> --yes       — Run a named suite (or 'all')
- *   /eval compare <baseline-file> — Compare last run against a baseline file
- *   /eval gate <suite> --yes      — Run suite and apply CI gate (exits 1 on regression)
- */
-import type { SlashCommand, CommandContext } from '../command-registry.ts';
-import { EvalRunner } from '@/runtime/index.ts';
-import { BUILTIN_SUITES } from '@/runtime/index.ts';
-import { formatScorecard } from '@/runtime/index.ts';
-import { loadBaseline, captureBaseline, formatBaselineComparison, writeBaseline } from '@/runtime/index.ts';
-import type { EvalRegistry } from '../../panels/eval-panel.ts';
-import { formatSuiteResult, formatGateResult } from '@/runtime/index.ts';
-import { requireShellPaths } from './runtime-services.ts';
-import { summarizeError } from '@pellux/goodvibes-sdk/platform/utils';
-import { requireYesFlag, stripYesFlag } from './confirmation.ts';
-// ── Subcommand helpers ────────────────────────────────────────────────────────
-function printSuiteList(context: CommandContext): void {
-  context.print('[eval] Available suites:');
-  for (const [name, scenarios] of Object.entries(BUILTIN_SUITES)) {
-    context.print(`  ${name}  (${scenarios.length} scenarios)`);
-    for (const s of scenarios) {
-      context.print(`    - ${s.id}: ${s.name}`);
-    }
-  }
-  context.print('[eval] Usage: /eval run <suite> --yes  or  /eval run all --yes');
-}
-function getRegistry(context: CommandContext): EvalRegistry | undefined {
-  return context.extensions.evalRegistry;
-}
-// ── /eval list ────────────────────────────────────────────────────────────────
-function handleList(_args: string[], context: CommandContext): void {
-  printSuiteList(context);
-}
-// ── /eval run ────────────────────────────────────────────────────────────────
-async function handleRun(args: string[], context: CommandContext): Promise<void> {
-  const { rest, yes } = stripYesFlag(args);
-  const suiteName = rest[0] ?? 'all';
-  const registry = getRegistry(context);
-  const suitesToRun =
-    suiteName === 'all'
-      ? Object.keys(BUILTIN_SUITES)
-      : BUILTIN_SUITES[suiteName]
-        ? [suiteName]
-        : null;
-  if (!suitesToRun) {
-    context.print(`[eval] Unknown suite: "${suiteName}". Run /eval list to see available suites.`);
-    return;
-  }
-  if (!yes) {
-    requireYesFlag(context, `run eval suite ${suiteName}`, '/eval run <suite|all> --yes');
-    return;
-  }
-  const runner = new EvalRunner();
-  registry?.setRunning(true);
-  for (const name of suitesToRun) {
-    const scenarios = BUILTIN_SUITES[name];
-    if (!scenarios) continue;
-    context.print(`[eval] Running suite: ${name} (${scenarios.length} scenarios)...`);
-    const result = await runner.runSuite(name, scenarios);
-    registry?.push(result);
-    context.print(formatSuiteResult(result));
-    for (const r of result.results) {
-      context.print(formatScorecard(r.scorecard));
-    }
-  }
-  registry?.setRunning(false);
-}
-// ── /eval compare ─────────────────────────────────────────────────────────────
-async function handleCompare(args: string[], context: CommandContext): Promise<void> {
-  const baselineFile = args[0] ?? '.goodvibes/eval/baseline.json';
-  const registry = getRegistry(context);
-  const projectRoot = requireShellPaths(context).workingDirectory;
-  const suiteResults = registry?.getSuiteResults() ?? [];
-  if (suiteResults.length === 0) {
-    context.print('[eval] No suite results to compare. Run /eval run <suite> --yes first.');
-    return;
-  }
-  const baseline = await loadBaseline(baselineFile, projectRoot);
-  if (!baseline) {
-    context.print(`[eval] Baseline file not found: ${baselineFile}`);
-    context.print('[eval] Tip: run /eval gate <suite> [baseline-file] --save-baseline --yes to create a baseline.');
-    return;
-  }
-  for (const result of suiteResults) {
-    context.print(formatBaselineComparison(baseline, result));
-  }
-}
-// ── /eval gate ────────────────────────────────────────────────────────────────
-async function handleGate(args: string[], context: CommandContext): Promise<void> {
-  const { rest, yes } = stripYesFlag(args);
-  const positional = rest.filter((arg) => arg !== '--save-baseline');
-  const suiteName = positional[0];
-  const baselineFile = positional[1] ?? '.goodvibes/eval/baseline.json';
-  const saveFlag = rest.includes('--save-baseline');
-  const projectRoot = requireShellPaths(context).workingDirectory;
-  if (!suiteName) {
-    context.print('[eval] Usage: /eval gate <suite> [baseline-file] [--save-baseline] --yes');
-    return;
-  }
-  const scenarios = BUILTIN_SUITES[suiteName];
-  if (!scenarios) {
-    context.print(`[eval] Unknown suite: "${suiteName}". Run /eval list to see available suites.`);
-    return;
-  }
-  if (!yes) {
-    requireYesFlag(context, `run eval gate ${suiteName}`, '/eval gate <suite> [baseline-file] [--save-baseline] --yes');
-    return;
-  }
-  const registry = getRegistry(context);
-  const runner = new EvalRunner();
-  context.print(`[eval] Gate: running suite "${suiteName}"...`);
-  registry?.setRunning(true);
-  const fresh = await runner.runSuite(suiteName, scenarios);
-  registry?.push(fresh);
-  registry?.setRunning(false);
-  const baseline = await loadBaseline(baselineFile, projectRoot);
-  const gate = runner.evaluateGate(fresh, baseline);
-  registry?.pushGate(gate);
-  context.print(formatGateResult(gate));
-  if (saveFlag || !baseline) {
-    const label = suiteName ?? 'latest';
-    const newBaseline = captureBaseline(label, [fresh]);
-    try {
-      await writeBaseline(baselineFile, newBaseline, projectRoot);
-      context.print(`[eval] Baseline saved to ${baselineFile}`);
-    } catch (err) {
-      context.print(`[eval] Warning: could not save baseline: ${summarizeError(err)}`);
-    }
-  }
-  if (!gate.passed) {
-    context.print(`[eval] Gate FAILED: ${gate.regressions.length} regression(s) detected.`);
-  } else {
-    context.print('[eval] Gate PASSED.');
-  }
-}
-// ── Top-level command ─────────────────────────────────────────────────────────
-export const evalCommand: SlashCommand = {
-  name: 'eval',
-  description: 'Evaluation harness: run benchmark suites, compare baselines, and gate regressions.',
-  usage: '<subcommand> [args]',
-  argsHint: 'list|run <suite> --yes|compare <baseline>|gate <suite> --yes',
-  handler: async (args: string[], context: CommandContext): Promise<void> => {
-    const [sub, ...rest] = args;
-    switch (sub) {
-      case 'list':
-      case 'ls':
-        handleList(rest, context);
-        break;
-      case 'run':
-        await handleRun(rest, context);
-        break;
-      case 'compare':
-      case 'cmp':
-        await handleCompare(rest, context);
-        break;
-      case 'gate':
-        await handleGate(rest, context);
-        break;
-      default: {
-        const usage = [
-          'Usage: /eval <subcommand>',
-          '  list                           — List all available eval suites',
-          '  run <suite|all> --yes          — Run a named suite (or all suites)',
-          '  compare [baseline-file]        — Compare last results against baseline',
-          '  gate <suite> [baseline-file] --yes',
-          '                                 — Run suite and apply regression gate',
-          '    --save-baseline              — Save fresh run as new baseline',
-        ].join('\n');
-        context.print(usage);
-        break;
-      }
-    }
-  },
-};

package/src/panels/eval-panel.ts DELETED Viewed

@@ -1,399 +0,0 @@
-/**
- * Eval Panel — renders evaluation harness results in list and detail modes.
- *
- * Displays suite run summaries, per-scenario scorecards, and regression
- * indicators. Wired with an EvalRegistry that holds the latest run results.
- */
-import { BasePanel } from './base-panel.ts';
-import type { Line } from '../types/grid.ts';
-import { createEmptyLine } from '../types/grid.ts';
-import {
-  buildEmptyState,
-  buildPanelLine,
-  buildPanelWorkspace,
-  resolveScrollablePanelSection,
-  DEFAULT_PANEL_PALETTE,
-} from './polish.ts';
-// ── EvalRegistry ─────────────────────────────────────────────────────────────
-import type {
-  EvalSuiteResult,
-  EvalResult,
-  EvalGateResult,
-  EvalDimension,
-} from '@/runtime/index.ts';
-/**
- * Holds the latest eval run state for display in EvalPanel.
- * Created externally, injected into the panel.
- */
-export class EvalRegistry {
-  private _suiteResults: EvalSuiteResult[] = [];
-  private _gateResults: EvalGateResult[] = [];
-  private _running = false;
-  private _lastRunAt: number | null = null;
-  private readonly _subscribers = new Set<() => void>();
-  push(result: EvalSuiteResult): void {
-    const idx = this._suiteResults.findIndex((r) => r.suite === result.suite);
-    if (idx >= 0) {
-      this._suiteResults[idx] = result;
-    } else {
-      this._suiteResults.push(result);
-    }
-    this._lastRunAt = Date.now();
-    this._notify();
-  }
-  pushGate(gate: EvalGateResult): void {
-    const idx = this._gateResults.findIndex((g) => g.suite === gate.suite);
-    if (idx >= 0) {
-      this._gateResults[idx] = gate;
-    } else {
-      this._gateResults.push(gate);
-    }
-    this._notify();
-  }
-  setRunning(running: boolean): void {
-    this._running = running;
-    this._notify();
-  }
-  isRunning(): boolean { return this._running; }
-  getLastRunAt(): number | null { return this._lastRunAt; }
-  getSuiteResults(): EvalSuiteResult[] { return this._suiteResults; }
-  getGateResults(): EvalGateResult[] { return this._gateResults; }
-  subscribe(cb: () => void): () => void {
-    this._subscribers.add(cb);
-    return () => this._subscribers.delete(cb);
-  }
-  private _notify(): void {
-    for (const cb of this._subscribers) cb();
-  }
-}
-// ── Colour palette (hex fg colours for createStyledCell) ─────────────────────
-const C = {
-  ...DEFAULT_PANEL_PALETTE,
-  header:   '#94a3b8',
-  headerBg: '#1e293b',
-  cyan:     '#38bdf8',
-  green:    '#22c55e',
-  yellow:   '#eab308',
-  red:      '#ef4444',
-  dim:      '#4b5563',
-  label:    '#64748b',
-  value:    '#e2e8f0',
-  selected: '#f1f5f9',
-  sep:      '#1e293b',
-  white:    '#cbd5e1',
-  selectBg: '#0f172a',
-} as const;
-// ── Helpers ───────────────────────────────────────────────────────────────────
-function scoreColor(score: number): string {
-  if (score >= 80) return C.green;
-  if (score >= 60) return C.yellow;
-  return C.red;
-}
-function fmtTime(ms: number): string {
-  if (ms < 1000) return `${ms.toFixed(0)}ms`;
-  return `${(ms / 1000).toFixed(1)}s`;
-}
-const DIMENSION_ORDER: EvalDimension[] = ['safety', 'quality', 'latency', 'cost', 'recovery'];
-// ── EvalPanel ─────────────────────────────────────────────────────────────────
-export class EvalPanel extends BasePanel {
-  private readonly _registry: EvalRegistry;
-  private _mode: 'list' | 'detail' = 'list';
-  private _selectedSuiteIdx = 0;
-  private _selectedScenarioIdx = 0;
-  private _scrollOffset = 0;
-  private _unsub: (() => void) | null = null;
-  public constructor(registry: EvalRegistry) {
-    super('eval', 'Eval', 'V', 'monitoring');
-    this._registry = registry;
-  }
-  public override onActivate(): void {
-    this._unsub = this._registry.subscribe(() => this.markDirty());
-    this.markDirty();
-  }
-  public override onDestroy(): void {
-    this._unsub?.();
-    this._unsub = null;
-  }
-  public handleInput(key: string): boolean {
-    const suites = this._registry.getSuiteResults();
-    if (this._mode === 'list') {
-      if (key === 'ArrowUp' || key === 'k') {
-        this._selectedSuiteIdx = Math.max(0, this._selectedSuiteIdx - 1);
-        this.markDirty();
-        return true;
-      }
-      if (key === 'ArrowDown' || key === 'j') {
-        this._selectedSuiteIdx = Math.min(suites.length - 1, this._selectedSuiteIdx + 1);
-        this.markDirty();
-        return true;
-      }
-      if ((key === 'Enter' || key === 'Return' || key === 'l') && suites.length > 0) {
-        this._mode = 'detail';
-        this._selectedScenarioIdx = 0;
-        this._scrollOffset = 0;
-        this.markDirty();
-        return true;
-      }
-      return false;
-    }
-    // detail mode
-    if (key === 'Escape' || key === 'q' || key === 'h') {
-      this._mode = 'list';
-      this.markDirty();
-      return true;
-    }
-    if (key === 'ArrowUp' || key === 'k') {
-      const suite = suites[this._selectedSuiteIdx];
-      if (suite) {
-        this._selectedScenarioIdx = Math.max(0, this._selectedScenarioIdx - 1);
-        this._scrollOffset = 0;
-        this.markDirty();
-      }
-      return true;
-    }
-    if (key === 'ArrowDown' || key === 'j') {
-      const suite = suites[this._selectedSuiteIdx];
-      if (suite) {
-        this._selectedScenarioIdx = Math.min(
-          suite.results.length - 1,
-          this._selectedScenarioIdx + 1,
-        );
-        this._scrollOffset = 0;
-        this.markDirty();
-      }
-      return true;
-    }
-    if (key === 'PageUp') {
-      this._scrollOffset = Math.max(0, this._scrollOffset - 5);
-      this.markDirty();
-      return true;
-    }
-    if (key === 'PageDown') {
-      this._scrollOffset += 5;
-      this.markDirty();
-      return true;
-    }
-    return false;
-  }
-  public render(width: number, height: number): Line[] {
-    this.needsRender = false;
-    const suites = this._registry.getSuiteResults();
-    const gates = this._registry.getGateResults();
-    const intro = 'Evaluation harness runs, gates, scenario scorecards, and regression indicators for model and product validation.';
-    const running = this._registry.isRunning();
-    const lastRun = this._registry.getLastRunAt();
-    const summaryLine = buildPanelLine(width, [
-      ['  state: ', C.label],
-      [running ? 'running' : 'idle', running ? C.yellow : C.dim],
-      ['  last: ', C.label],
-      [lastRun ? new Date(lastRun).toLocaleTimeString() : 'n/a', C.dim],
-    ]);
-    if (suites.length === 0) {
-      const workspace = buildPanelWorkspace(width, height, {
-        title: 'Eval Harness',
-        intro,
-        sections: [{
-          title: 'Status',
-          lines: [
-            summaryLine,
-            ...buildEmptyState(
-              width,
-              ' No results yet.',
-              'Run an eval suite to populate this workspace with suite scores, gate results, and per-scenario detail.',
-              [{ command: '/eval run <suite>', summary: 'start a suite such as core-performance, safety-baseline, or cost-tokens' }],
-              C,
-            ),
-          ],
-        }],
-        palette: C,
-      });
-      while (workspace.length < height) workspace.push(createEmptyLine(width));
-      return workspace;
-    }
-    const lines: Line[] = [];
-    if (this._mode === 'list') {
-      this._renderList(lines, suites, gates, width, height, intro, summaryLine);
-    } else {
-      const suite = suites[this._selectedSuiteIdx];
-      if (suite) {
-        this._renderDetail(lines, suite, width, height, intro, summaryLine);
-      }
-    }
-    return lines;
-  }
-  // ── List view ────────────────────────────────────────────────────────────────
-  private _renderList(
-    lines: Line[],
-    suites: EvalSuiteResult[],
-    gates: EvalGateResult[],
-    width: number,
-    _height: number,
-    intro: string,
-    summaryLine: Line,
-  ): void {
-    const gateMap = new Map(gates.map((g) => [g.suite, g]));
-    const sectionLines: Line[] = [
-      summaryLine,
-      buildPanelLine(width, [
-      ['Suite'.padEnd(28), C.header],
-      ['Score'.padEnd(8), C.header],
-      ['Pass'.padEnd(6), C.header],
-      ['Gate'.padEnd(6), C.header],
-      ['Duration', C.header],
-      ]),
-    ];
-    suites.forEach((suite, idx) => {
-      const selected = idx === this._selectedSuiteIdx;
-      const gate = gateMap.get(suite.suite);
-      const gateStr = gate ? (gate.passed ? 'ok' : 'FAIL') : '-';
-      const gateColor = gate ? (gate.passed ? C.green : C.red) : C.dim;
-      const durationMs = suite.finishedAt - suite.startedAt;
-      const scoreC = scoreColor(suite.meanScore);
-      const passC = suite.passed ? C.green : C.red;
-      const nameColor = selected ? C.selected : C.white;
-      const bg = selected ? C.selectBg : undefined;
-      const prefix = selected ? '▸ ' : '  ';
-      const name = suite.suite.slice(0, 24).padEnd(26);
-      sectionLines.push(buildPanelLine(width, [
-        [prefix + name, nameColor, bg],
-        [suite.meanScore.toFixed(1).padEnd(8), scoreC, bg],
-        [(suite.passed ? 'PASS' : 'FAIL').padEnd(6), passC, bg],
-        [gateStr.padEnd(6), gateColor, bg],
-        [fmtTime(durationMs), C.dim, bg],
-      ]));
-    });
-    sectionLines.push(buildPanelLine(width, [[' Enter/l: detail  j/k: navigate', C.dim]]));
-    lines.push(...buildPanelWorkspace(width, _height, {
-      title: 'Eval Harness',
-      intro,
-      sections: [{ title: 'Suites', lines: sectionLines }],
-      palette: C,
-    }));
-  }
-  // ── Detail view ──────────────────────────────────────────────────────────────
-  private _renderDetail(
-    lines: Line[],
-    suite: EvalSuiteResult,
-    width: number,
-    height: number,
-    intro: string,
-    summaryLine: Line,
-  ): void {
-    const sectionLines: Line[] = [
-      summaryLine,
-      buildPanelLine(width, [
-      [`Suite: ${suite.suite}`, C.cyan],
-      ['  mean=', C.label],
-      [suite.meanScore.toFixed(1), scoreColor(suite.meanScore)],
-      ['  ', C.label],
-      [suite.passed ? 'PASS' : 'FAIL', suite.passed ? C.green : C.red],
-      ]),
-    ];
-    const allDetailLines: Line[] = [];
-    suite.results.forEach((result, idx) => {
-      const selected = idx === this._selectedScenarioIdx;
-      this._renderScenarioBlock(allDetailLines, result, selected, width);
-    });
-    const detailSection = resolveScrollablePanelSection(width, height, {
-      intro,
-      palette: C,
-      beforeSections: [{ title: 'Scenario Detail', lines: sectionLines }],
-      section: {
-        scrollableLines: allDetailLines,
-        scrollOffset: this._scrollOffset,
-        minRows: 1,
-      },
-    });
-    this._scrollOffset = detailSection.scrollOffset;
-    sectionLines.push(...detailSection.section.lines);
-    sectionLines.push(buildPanelLine(width, [[' Esc/q: back  j/k: scenario  PgUp/PgDn: scroll', C.dim]]));
-    lines.push(...buildPanelWorkspace(width, height, {
-      title: 'Eval Harness',
-      intro,
-      sections: [{ title: 'Scenario Detail', lines: sectionLines }],
-      palette: C,
-    }));
-  }
-  private _renderScenarioBlock(
-    lines: Line[],
-    result: EvalResult,
-    selected: boolean,
-    width: number,
-  ): void {
-    const sc = result.scorecard;
-    const prefix = selected ? '▸ ' : '  ';
-    const nameColor = selected ? C.selected : C.white;
-    const scoreC = scoreColor(sc.compositeScore);
-    const passC = sc.passed ? C.green : C.red;
-    const nameLen = Math.max(1, width - 22);
-    lines.push(buildPanelLine(width, [
-      [prefix + result.scenario.name.slice(0, nameLen).padEnd(nameLen + 2), nameColor, selected ? C.selectBg : undefined],
-      [sc.compositeScore.toFixed(1).padStart(5), scoreC, selected ? C.selectBg : undefined],
-      ['  ', C.label, selected ? C.selectBg : undefined],
-      [sc.passed ? 'PASS' : 'FAIL', passC, selected ? C.selectBg : undefined],
-    ]));
-    if (selected) {
-      for (const dim of DIMENSION_ORDER) {
-        const d = sc.dimensions.find((x) => x.dimension === dim);
-        if (!d) continue;
-        const filled = Math.round(d.score / 10);
-        const bar = '#'.repeat(filled) + '.'.repeat(10 - filled);
-        lines.push(buildPanelLine(width, [
-          ['    ' + dim.padEnd(10) + ' ', C.label],
-          [bar, scoreColor(d.score)],
-          [` ${d.score.toFixed(0).padStart(3)}/100`, C.value],
-        ]));
-      }
-      if (sc.notes && sc.notes.length > 0) {
-        for (const note of sc.notes) {
-          lines.push(buildPanelLine(width, [
-            ['    ! ', C.yellow],
-            [note.slice(0, width - 6), C.yellow],
-          ]));
-        }
-      }
-    }
-  }
-}