selftune 0.2.22 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +95 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/adapters/pi/hook.ts +273 -0
  12. package/cli/selftune/adapters/pi/install.ts +207 -0
  13. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  15. package/cli/selftune/auto-update.ts +200 -8
  16. package/cli/selftune/canonical-export.ts +55 -25
  17. package/cli/selftune/command-surface.ts +397 -0
  18. package/cli/selftune/constants.ts +10 -1
  19. package/cli/selftune/contribute/contribute.ts +64 -13
  20. package/cli/selftune/contribution-config.ts +57 -3
  21. package/cli/selftune/contribution-preferences.ts +117 -0
  22. package/cli/selftune/contribution-signals.ts +8 -4
  23. package/cli/selftune/contribution-staging.ts +13 -2
  24. package/cli/selftune/contributions.ts +55 -121
  25. package/cli/selftune/creator-contributions.ts +29 -10
  26. package/cli/selftune/cron/setup.ts +7 -3
  27. package/cli/selftune/dashboard-contract.ts +87 -0
  28. package/cli/selftune/dashboard-server.ts +168 -17
  29. package/cli/selftune/dashboard.ts +350 -17
  30. package/cli/selftune/eval/baseline.ts +21 -5
  31. package/cli/selftune/eval/execution-eval.ts +170 -0
  32. package/cli/selftune/eval/family-overlap.ts +2 -2
  33. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  34. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  35. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  36. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  37. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  38. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  39. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  40. package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
  41. package/cli/selftune/evolution/evidence.ts +2 -6
  42. package/cli/selftune/evolution/evolve-body.ts +152 -38
  43. package/cli/selftune/evolution/evolve.ts +244 -52
  44. package/cli/selftune/evolution/rollback.ts +0 -1
  45. package/cli/selftune/evolution/validate-body.ts +111 -49
  46. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  47. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  48. package/cli/selftune/evolution/validate-routing.ts +51 -108
  49. package/cli/selftune/evolution/validation-contract.ts +91 -0
  50. package/cli/selftune/grading/auto-grade.ts +11 -7
  51. package/cli/selftune/grading/grade-session.ts +10 -16
  52. package/cli/selftune/hooks/skill-eval.ts +2 -1
  53. package/cli/selftune/hooks-shared/types.ts +1 -0
  54. package/cli/selftune/index.ts +58 -15
  55. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  56. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  57. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  58. package/cli/selftune/ingestors/pi-ingest.ts +727 -0
  59. package/cli/selftune/init.ts +38 -4
  60. package/cli/selftune/localdb/direct-write.ts +120 -1
  61. package/cli/selftune/localdb/materialize.ts +6 -7
  62. package/cli/selftune/localdb/queries/cron.ts +34 -0
  63. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  64. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  65. package/cli/selftune/localdb/queries/execution.ts +133 -0
  66. package/cli/selftune/localdb/queries/json.ts +18 -0
  67. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  68. package/cli/selftune/localdb/queries/raw.ts +95 -0
  69. package/cli/selftune/localdb/queries/staging.ts +270 -0
  70. package/cli/selftune/localdb/queries/trust.ts +392 -0
  71. package/cli/selftune/localdb/queries.ts +60 -2162
  72. package/cli/selftune/localdb/schema.ts +59 -0
  73. package/cli/selftune/monitoring/watch.ts +96 -29
  74. package/cli/selftune/normalization.ts +3 -0
  75. package/cli/selftune/observability.ts +12 -3
  76. package/cli/selftune/orchestrate/cli.ts +161 -0
  77. package/cli/selftune/orchestrate/execute.ts +295 -0
  78. package/cli/selftune/orchestrate/finalize.ts +157 -0
  79. package/cli/selftune/orchestrate/locks.ts +40 -0
  80. package/cli/selftune/orchestrate/plan.ts +131 -0
  81. package/cli/selftune/orchestrate/post-run.ts +59 -0
  82. package/cli/selftune/orchestrate/prepare.ts +334 -0
  83. package/cli/selftune/orchestrate/report.ts +182 -0
  84. package/cli/selftune/orchestrate/runtime.ts +120 -0
  85. package/cli/selftune/orchestrate/signals.ts +48 -0
  86. package/cli/selftune/orchestrate.ts +162 -1142
  87. package/cli/selftune/registry/client.ts +74 -0
  88. package/cli/selftune/registry/history.ts +54 -0
  89. package/cli/selftune/registry/index.ts +90 -0
  90. package/cli/selftune/registry/install.ts +141 -0
  91. package/cli/selftune/registry/list.ts +44 -0
  92. package/cli/selftune/registry/push.ts +171 -0
  93. package/cli/selftune/registry/rollback.ts +49 -0
  94. package/cli/selftune/registry/status.ts +62 -0
  95. package/cli/selftune/registry/sync.ts +125 -0
  96. package/cli/selftune/repair/skill-usage.ts +9 -3
  97. package/cli/selftune/routes/overview.ts +5 -2
  98. package/cli/selftune/routes/skill-report.ts +15 -2
  99. package/cli/selftune/schedule.ts +5 -5
  100. package/cli/selftune/status.ts +70 -2
  101. package/cli/selftune/sync.ts +127 -23
  102. package/cli/selftune/testing-readiness.ts +597 -0
  103. package/cli/selftune/types.ts +46 -5
  104. package/cli/selftune/uninstall.ts +2 -1
  105. package/cli/selftune/utils/canonical-log.ts +1 -9
  106. package/cli/selftune/utils/cli-error.ts +9 -0
  107. package/cli/selftune/utils/jsonl.ts +1 -30
  108. package/cli/selftune/utils/llm-call.ts +126 -6
  109. package/cli/selftune/utils/skill-discovery.ts +24 -0
  110. package/cli/selftune/workflows/proposals.ts +184 -0
  111. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  112. package/cli/selftune/workflows/workflows.ts +100 -26
  113. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  114. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  115. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  116. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  117. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  118. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  119. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  120. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
  121. package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
  122. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  123. package/package.json +25 -9
  124. package/packages/dashboard-core/AGENTS.md +18 -0
  125. package/packages/dashboard-core/README.md +30 -0
  126. package/packages/dashboard-core/index.ts +3 -0
  127. package/packages/dashboard-core/package.json +39 -0
  128. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  129. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  130. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  131. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  132. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  133. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  134. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  135. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  136. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  137. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  138. package/packages/dashboard-core/src/gates/index.ts +3 -0
  139. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  140. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  141. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  142. package/packages/dashboard-core/src/host/index.ts +3 -0
  143. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  144. package/packages/dashboard-core/src/models/index.ts +4 -0
  145. package/packages/dashboard-core/src/models/overview.ts +98 -0
  146. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  147. package/packages/dashboard-core/src/models/skills.ts +34 -0
  148. package/packages/dashboard-core/src/routes/index.ts +2 -0
  149. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  150. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  151. package/packages/dashboard-core/src/routes/types.ts +39 -0
  152. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  153. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  154. package/packages/dashboard-core/src/screens/index.ts +37 -0
  155. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  156. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  157. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  158. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  159. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  160. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  161. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  162. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  163. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  164. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  165. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  166. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  167. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  168. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  169. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  170. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  171. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  172. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  173. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  174. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  175. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  176. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  177. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  178. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  179. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  180. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  181. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  182. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  183. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  184. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  185. package/packages/telemetry-contract/package.json +1 -1
  186. package/packages/telemetry-contract/src/index.ts +1 -0
  187. package/packages/telemetry-contract/src/schemas.ts +63 -5
  188. package/packages/telemetry-contract/src/types.ts +97 -7
  189. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  190. package/packages/ui/AGENTS.md +16 -0
  191. package/packages/ui/README.md +1 -1
  192. package/packages/ui/package.json +1 -1
  193. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  194. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  195. package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
  196. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  197. package/packages/ui/src/components/InfoTip.tsx +1 -2
  198. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  199. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  200. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  201. package/packages/ui/src/components/OverviewPanels.tsx +693 -0
  202. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  203. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  204. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  205. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  206. package/packages/ui/src/components/index.ts +56 -1
  207. package/packages/ui/src/components/section-cards.tsx +18 -35
  208. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  209. package/packages/ui/src/lib/constants.tsx +0 -1
  210. package/packages/ui/src/primitives/card.tsx +1 -1
  211. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  212. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  213. package/packages/ui/src/primitives/select.tsx +2 -2
  214. package/packages/ui/src/primitives/tabs.tsx +7 -6
  215. package/packages/ui/src/types.ts +182 -4
  216. package/skill/SKILL.md +130 -318
  217. package/skill/agents/diagnosis-analyst.md +3 -3
  218. package/skill/agents/evolution-reviewer.md +3 -3
  219. package/skill/agents/integration-guide.md +3 -3
  220. package/skill/agents/pattern-analyst.md +2 -2
  221. package/skill/references/cli-quick-reference.md +89 -0
  222. package/skill/references/creator-playbook.md +131 -0
  223. package/skill/references/examples.md +48 -0
  224. package/skill/references/troubleshooting.md +47 -0
  225. package/skill/references/version-history.md +1 -1
  226. package/skill/selftune.contribute.json +11 -0
  227. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  228. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  229. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  230. package/skill/workflows/CreateTestDeploy.md +170 -0
  231. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  232. package/skill/{Workflows → workflows}/Cron.md +1 -1
  233. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  234. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  235. package/skill/{Workflows → workflows}/Evals.md +67 -2
  236. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  237. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  238. package/skill/{Workflows → workflows}/Grade.md +1 -1
  239. package/skill/{Workflows → workflows}/Ingest.md +60 -2
  240. package/skill/{Workflows → workflows}/Initialize.md +16 -9
  241. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  242. package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
  243. package/skill/workflows/Registry.md +99 -0
  244. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  245. package/skill/workflows/SignalsDashboard.md +87 -0
  246. package/skill/{Workflows → workflows}/Sync.md +3 -1
  247. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  248. package/skill/{Workflows → workflows}/Watch.md +42 -2
  249. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  250. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  251. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  252. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  253. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  254. package/cli/selftune/utils/html.ts +0 -27
  255. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
  256. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  257. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  258. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  259. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  260. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  261. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  262. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  263. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  264. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  265. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  266. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  267. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  268. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  269. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  270. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -1,3 +1,8 @@
1
+ import { useMemo, useState } from "react";
2
+ import { Badge } from "../primitives/badge";
3
+ import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
4
+ import type { EvidenceEntry, EvolutionEntry } from "../types";
5
+ import { formatRate, timeAgo } from "../lib/format";
1
6
  import {
2
7
  CheckCircleIcon,
3
8
  ChevronDownIcon,
@@ -5,20 +10,25 @@ import {
5
10
  CircleDotIcon,
6
11
  FileTextIcon,
7
12
  InfoIcon,
13
+ RocketIcon,
14
+ ShieldCheckIcon,
8
15
  ShieldAlertIcon,
9
16
  XCircleIcon,
17
+ UndoIcon,
18
+ ArrowRightIcon,
10
19
  TrendingUpIcon,
11
20
  TrendingDownIcon,
12
21
  ListChecksIcon,
13
22
  } from "lucide-react";
14
- import { useMemo, useState } from "react";
15
- import type { ReactNode } from "react";
16
23
  import Markdown from "react-markdown";
17
24
 
18
- import { formatRate, timeAgo } from "../lib/format";
19
- import { Badge } from "../primitives/badge";
20
- import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
21
- import type { EvidenceEntry, EvolutionEntry } from "../types";
25
+ const ACTION_ICON: Record<string, React.ReactNode> = {
26
+ created: <CircleDotIcon className="size-3.5" />,
27
+ validated: <ShieldCheckIcon className="size-3.5" />,
28
+ deployed: <RocketIcon className="size-3.5" />,
29
+ rejected: <XCircleIcon className="size-3.5" />,
30
+ rolled_back: <UndoIcon className="size-3.5" />,
31
+ };
22
32
 
23
33
  const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
24
34
  created: "outline",
@@ -32,117 +42,15 @@ interface Props {
32
42
  proposalId: string;
33
43
  evolution: EvolutionEntry[];
34
44
  evidence: EvidenceEntry[];
35
- showContextBanner?: boolean;
36
- }
37
-
38
- function getValidationModeMeta(mode?: string | null): {
39
- label: string;
40
- variant: "default" | "secondary" | "destructive" | "outline";
41
- description: string;
42
- } | null {
43
- switch (mode) {
44
- case "host_replay":
45
- return {
46
- label: "Replay-backed validation",
47
- variant: "default",
48
- description:
49
- "Validated against a controlled replay fixture instead of a free-form judge prompt.",
50
- };
51
- case "llm_judge":
52
- return {
53
- label: "Model judgment",
54
- variant: "secondary",
55
- description: "Validated by an LLM trigger check rather than a replay fixture.",
56
- };
57
- case "structural_guard":
58
- return {
59
- label: "Structural guard",
60
- variant: "outline",
61
- description:
62
- "Only deterministic structural checks ran; no replay or judge validation was needed.",
63
- };
64
- default:
65
- return null;
66
- }
67
- }
68
-
69
- function sentenceCase(value: string): string {
70
- return value.replace(/_/g, " ");
71
- }
72
-
73
- function getOutcomePresentation(action?: string | null): {
74
- title: string;
75
- summary: string;
76
- tone: string;
77
- icon: ReactNode;
78
- liveSkillNote: string;
79
- } {
80
- switch (action) {
81
- case "rejected":
82
- return {
83
- title: "Proposal rejected",
84
- summary: "Selftune proposed a change, but blocked it before your live skill was updated.",
85
- tone: "border-red-500/20 bg-red-500/8 text-red-700 dark:text-red-50",
86
- icon: <XCircleIcon className="size-4 text-red-400" />,
87
- liveSkillNote: "Your live skill is unchanged.",
88
- };
89
- case "validated":
90
- return {
91
- title: "Proposal validated",
92
- summary: "The proposed change improved the eval signal and is ready for review or deploy.",
93
- tone: "border-emerald-500/20 bg-emerald-500/8 text-emerald-700 dark:text-emerald-50",
94
- icon: <CheckCircleIcon className="size-4 text-emerald-400" />,
95
- liveSkillNote: "Your live skill has not changed until this proposal is deployed.",
96
- };
97
- case "deployed":
98
- return {
99
- title: "Proposal deployed",
100
- summary: "The proposed change passed validation and was applied to the live skill.",
101
- tone: "border-primary/25 bg-primary/8 text-foreground",
102
- icon: <TrendingUpIcon className="size-4 text-primary" />,
103
- liveSkillNote: "Your live skill now includes this change.",
104
- };
105
- case "rolled_back":
106
- return {
107
- title: "Proposal rolled back",
108
- summary: "A deployed change was later reversed because follow-up evidence showed risk.",
109
- tone: "border-amber-500/20 bg-amber-500/8 text-amber-800 dark:text-amber-50",
110
- icon: <TrendingDownIcon className="size-4 text-amber-400" />,
111
- liveSkillNote: "Your live skill no longer uses this proposal.",
112
- };
113
- case "created":
114
- default:
115
- return {
116
- title: "Proposal under review",
117
- summary: "Selftune found a possible improvement and recorded the proposed change.",
118
- tone: "border-border/30 bg-muted/25 text-foreground",
119
- icon: <CircleDotIcon className="size-4 text-muted-foreground" />,
120
- liveSkillNote: "Your live skill is unchanged until a proposal is validated and deployed.",
121
- };
122
- }
123
45
  }
124
46
 
125
47
  /** Parse YAML-ish frontmatter from text, returns { meta, body } */
126
48
  function parseFrontmatter(text: string): { meta: Record<string, string>; body: string } {
127
- const lines = text.split("\n");
128
- if (lines.length < 3 || lines[0].trim() !== "---") {
129
- return { meta: {}, body: text };
130
- }
131
-
132
- let closingIndex = -1;
133
- for (let i = 1; i < lines.length; i++) {
134
- if (lines[i].trim() === "---") {
135
- closingIndex = i;
136
- break;
137
- }
138
- }
139
-
140
- if (closingIndex === -1) {
141
- return { meta: {}, body: text };
142
- }
49
+ const match = text.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/);
50
+ if (!match) return { meta: {}, body: text };
143
51
 
144
52
  const meta: Record<string, string> = {};
145
- for (const line of lines.slice(1, closingIndex)) {
53
+ for (const line of match[1].split("\n")) {
146
54
  const idx = line.indexOf(":");
147
55
  if (idx > 0) {
148
56
  const key = line.slice(0, idx).trim();
@@ -150,8 +58,7 @@ function parseFrontmatter(text: string): { meta: Record<string, string>; body: s
150
58
  if (key && val) meta[key] = val;
151
59
  }
152
60
  }
153
-
154
- return { meta, body: lines.slice(closingIndex + 1).join("\n") };
61
+ return { meta, body: match[2] };
155
62
  }
156
63
 
157
64
  function FrontmatterTable({ meta }: { meta: Record<string, string> }) {
@@ -212,7 +119,7 @@ function SkillContentBlock({
212
119
  }
213
120
 
214
121
  /** Smart formatting for a single validation value */
215
- function formatValidationValue(key: string, val: unknown): ReactNode {
122
+ function formatValidationValue(key: string, val: unknown): React.ReactNode {
216
123
  // Booleans
217
124
  if (typeof val === "boolean") {
218
125
  return val ? (
@@ -251,6 +158,44 @@ function formatValidationValue(key: string, val: unknown): ReactNode {
251
158
  return <span>{String(val)}</span>;
252
159
  }
253
160
 
161
+ function getPerEntryPassStatus(entry: unknown): boolean | null {
162
+ if (typeof entry !== "object" || entry === null) return null;
163
+ const obj = entry as Record<string, unknown>;
164
+ const afterPass = obj.after_pass ?? obj.after ?? obj.triggered ?? obj.result;
165
+ const passed = obj.passed ?? obj.matched;
166
+ return typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null;
167
+ }
168
+
169
+ function getEvidenceListKey(prefix: string, value: unknown): string {
170
+ if (typeof value !== "object" || value === null) {
171
+ return `${prefix}:${JSON.stringify(value)}`;
172
+ }
173
+
174
+ const record = value as Record<string, unknown>;
175
+ const nested =
176
+ typeof record.entry === "object" && record.entry !== null
177
+ ? (record.entry as Record<string, unknown>)
178
+ : null;
179
+ const query =
180
+ typeof nested?.query === "string"
181
+ ? nested.query
182
+ : typeof record.query === "string"
183
+ ? record.query
184
+ : typeof record.prompt === "string"
185
+ ? record.prompt
186
+ : typeof record.input === "string"
187
+ ? record.input
188
+ : null;
189
+
190
+ if (query) return `${prefix}:${query}`;
191
+
192
+ const action = typeof record.action === "string" ? record.action : null;
193
+ const timestamp = typeof record.timestamp === "string" ? record.timestamp : null;
194
+ if (action && timestamp) return `${prefix}:${action}:${timestamp}`;
195
+
196
+ return `${prefix}:${JSON.stringify(record)}`;
197
+ }
198
+
254
199
  /** Render a per_entry_result row — handles both flat EvalEntry and nested { entry, before_pass, after_pass } */
255
200
  function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
256
201
  // Handle nested shape: { entry: { query, should_trigger }, before_pass, after_pass }
@@ -261,11 +206,7 @@ function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
261
206
  const beforePass =
262
207
  entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline;
263
208
  const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result;
264
- const passed = entry.passed ?? entry.matched;
265
-
266
- // Determine icon: use after_pass for per_entry_results, passed for others
267
- const isPass =
268
- typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null;
209
+ const isPass = getPerEntryPassStatus(entry);
269
210
 
270
211
  return (
271
212
  <div className="flex items-start gap-2 text-xs py-1.5 border-b border-border/50 last:border-0">
@@ -314,15 +255,19 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
314
255
  validation_mode,
315
256
  validation_agent,
316
257
  validation_fixture_id,
317
- validation_evidence_ref,
258
+ validation_fallback_reason,
318
259
  ...rest
319
260
  } = validation;
320
261
 
321
262
  const regressionsArr = Array.isArray(regressions) ? regressions : [];
322
263
  const newPassesArr = Array.isArray(new_passes) ? new_passes : [];
323
264
  const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : [];
324
- const validationMeta =
325
- typeof validation_mode === "string" ? getValidationModeMeta(validation_mode) : null;
265
+ const validationMode = typeof validation_mode === "string" ? validation_mode : null;
266
+ const validationAgent = typeof validation_agent === "string" ? validation_agent : null;
267
+ const validationFixtureId =
268
+ typeof validation_fixture_id === "string" ? validation_fixture_id : null;
269
+ const validationFallbackReason =
270
+ typeof validation_fallback_reason === "string" ? validation_fallback_reason : null;
326
271
 
327
272
  return (
328
273
  <div className="rounded-md border bg-muted/30 p-3 space-y-3">
@@ -333,34 +278,6 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
333
278
  </span>
334
279
  </p>
335
280
 
336
- {validationMeta && (
337
- <div className="rounded-md border bg-card px-3 py-2">
338
- <div className="flex flex-wrap items-center gap-2">
339
- <Badge variant={validationMeta.variant} className="text-[10px]">
340
- {validationMeta.label}
341
- </Badge>
342
- {typeof validation_agent === "string" && validation_agent.trim() && (
343
- <Badge variant="outline" className="text-[10px]">
344
- agent: {validation_agent}
345
- </Badge>
346
- )}
347
- {typeof validation_fixture_id === "string" && validation_fixture_id.trim() && (
348
- <Badge variant="outline" className="text-[10px]">
349
- fixture: {validation_fixture_id}
350
- </Badge>
351
- )}
352
- </div>
353
- <p className="mt-1 text-[11px] leading-relaxed text-muted-foreground">
354
- {validationMeta.description}
355
- </p>
356
- {typeof validation_evidence_ref === "string" && validation_evidence_ref.trim() && (
357
- <p className="mt-1 text-[10px] font-mono text-muted-foreground/70">
358
- {validation_evidence_ref}
359
- </p>
360
- )}
361
- </div>
362
- )}
363
-
364
281
  {/* Summary bar */}
365
282
  <div className="flex items-center gap-3 flex-wrap">
366
283
  {improved !== undefined && (
@@ -368,6 +285,21 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
368
285
  {improved ? "Improved" : "Regressed"}
369
286
  </Badge>
370
287
  )}
288
+ {validationMode && (
289
+ <Badge variant="outline" className="text-[10px] capitalize">
290
+ {validationMode.replace(/_/g, " ")}
291
+ </Badge>
292
+ )}
293
+ {validationAgent && (
294
+ <Badge variant="secondary" className="text-[10px]">
295
+ {validationAgent}
296
+ </Badge>
297
+ )}
298
+ {validationFixtureId && (
299
+ <Badge variant="secondary" className="text-[10px] font-mono" title={validationFixtureId}>
300
+ fixture #{validationFixtureId.slice(0, 8)}
301
+ </Badge>
302
+ )}
371
303
  {typeof before_pass_rate === "number" && typeof after_pass_rate === "number" && (
372
304
  <span className="text-xs font-mono text-muted-foreground">
373
305
  {(before_pass_rate * 100).toFixed(1)}% &rarr; {(after_pass_rate * 100).toFixed(1)}%
@@ -375,13 +307,7 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
375
307
  )}
376
308
  {typeof net_change === "number" && (
377
309
  <span
378
- className={`text-xs font-mono font-semibold ${
379
- net_change > 0
380
- ? "text-emerald-600 dark:text-emerald-400"
381
- : net_change < 0
382
- ? "text-red-500"
383
- : "text-muted-foreground"
384
- }`}
310
+ className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
385
311
  >
386
312
  {net_change > 0 ? "+" : ""}
387
313
  {(net_change * 100).toFixed(1)}%
@@ -389,6 +315,12 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
389
315
  )}
390
316
  </div>
391
317
 
318
+ {validationFallbackReason && (
319
+ <div className="rounded border border-amber-300/60 bg-amber-50 px-2.5 py-2 text-[11px] text-amber-900 dark:border-amber-900/60 dark:bg-amber-950/30 dark:text-amber-200">
320
+ Replay fallback: {validationFallbackReason}
321
+ </div>
322
+ )}
323
+
392
324
  {/* New passes */}
393
325
  {newPassesArr.length > 0 && (
394
326
  <div>
@@ -396,9 +328,9 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
396
328
  New Passes ({newPassesArr.length})
397
329
  </p>
398
330
  <div className="rounded border bg-card p-2">
399
- {newPassesArr.map((entry, j) => (
331
+ {newPassesArr.map((entry) => (
400
332
  <PerEntryResult
401
- key={j}
333
+ key={getEvidenceListKey("new-pass", entry)}
402
334
  entry={
403
335
  typeof entry === "object" && entry !== null
404
336
  ? (entry as Record<string, unknown>)
@@ -417,9 +349,9 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
417
349
  Regressions ({regressionsArr.length})
418
350
  </p>
419
351
  <div className="rounded border border-red-200 dark:border-red-900/50 bg-card p-2">
420
- {regressionsArr.map((entry, j) => (
352
+ {regressionsArr.map((entry) => (
421
353
  <PerEntryResult
422
- key={j}
354
+ key={getEvidenceListKey("regression", entry)}
423
355
  entry={
424
356
  typeof entry === "object" && entry !== null
425
357
  ? (entry as Record<string, unknown>)
@@ -451,17 +383,7 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
451
383
 
452
384
  function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
453
385
  const [expanded, setExpanded] = useState(false);
454
- const passCount = entries.filter((e) => {
455
- if (typeof e !== "object" || e === null) return false;
456
- const obj = e as Record<string, unknown>;
457
- return (
458
- obj.passed === true ||
459
- obj.matched === true ||
460
- obj.triggered === true ||
461
- obj.after === true ||
462
- obj.result === true
463
- );
464
- }).length;
386
+ const passCount = entries.filter((entry) => getPerEntryPassStatus(entry) === true).length;
465
387
 
466
388
  const display = expanded ? entries : entries.slice(0, 5);
467
389
 
@@ -489,9 +411,9 @@ function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
489
411
  />
490
412
  </div>
491
413
  <div className="rounded border bg-card p-2 max-h-[300px] overflow-y-auto">
492
- {display.map((entry, j) => (
414
+ {display.map((entry) => (
493
415
  <PerEntryResult
494
- key={j}
416
+ key={getEvidenceListKey("per-entry", entry)}
495
417
  entry={
496
418
  typeof entry === "object" && entry !== null
497
419
  ? (entry as Record<string, unknown>)
@@ -554,13 +476,13 @@ function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }
554
476
  </button>
555
477
  {expanded && (
556
478
  <div className="space-y-1">
557
- {evalSet.map((evalEntry, j) => {
479
+ {evalSet.map((evalEntry) => {
558
480
  const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input;
559
481
  const expected = evalEntry.expected ?? evalEntry.should_trigger;
560
482
  const passed = evalEntry.passed ?? evalEntry.result;
561
483
  return (
562
484
  <div
563
- key={j}
485
+ key={getEvidenceListKey("eval-set", evalEntry)}
564
486
  className="flex items-start gap-2 text-xs py-1 border-b border-border/50 last:border-0"
565
487
  >
566
488
  {typeof passed === "boolean" ? (
@@ -726,12 +648,7 @@ function CollapsedEvidenceCard({
726
648
  );
727
649
  }
728
650
 
729
- export function EvidenceViewer({
730
- proposalId,
731
- evolution,
732
- evidence,
733
- showContextBanner = true,
734
- }: Props) {
651
+ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
735
652
  const steps = useMemo(
736
653
  () =>
737
654
  evolution
@@ -750,7 +667,6 @@ export function EvidenceViewer({
750
667
 
751
668
  // Track which earlier rounds are manually expanded
752
669
  const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set());
753
- const [expandedProposalTargets, setExpandedProposalTargets] = useState<Set<string>>(new Set());
754
670
 
755
671
  const toggleRound = (key: string) => {
756
672
  setExpandedRounds((prev) => {
@@ -761,15 +677,6 @@ export function EvidenceViewer({
761
677
  });
762
678
  };
763
679
 
764
- const toggleProposalHistory = (target: string) => {
765
- setExpandedProposalTargets((prev) => {
766
- const next = new Set(prev);
767
- if (next.has(target)) next.delete(target);
768
- else next.add(target);
769
- return next;
770
- });
771
- };
772
-
773
680
  const snapshot = useMemo(() => {
774
681
  for (let i = steps.length - 1; i >= 0; i--) {
775
682
  if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>;
@@ -793,318 +700,176 @@ export function EvidenceViewer({
793
700
  return { proposalEntries: proposals, validationsByTarget: validationMap };
794
701
  }, [entries]);
795
702
 
796
- const latestStep = steps[steps.length - 1] ?? null;
797
- const lifecycleLabel = steps.map((step) => step.action.replace("_", " ")).join(" -> ");
798
- const outcome = getOutcomePresentation(latestStep?.action);
799
- const validationMeta = getValidationModeMeta(latestStep?.validation_mode);
800
- const latestProposalConfidence = useMemo(() => {
801
- for (let i = proposalEntries.length - 1; i >= 0; i--) {
802
- if (proposalEntries[i].confidence !== null) {
803
- return proposalEntries[i].confidence;
804
- }
805
- }
806
- return null;
807
- }, [proposalEntries]);
808
- const proposalCards = useMemo(() => {
809
- const grouped = new Map<string, EvidenceEntry[]>();
810
- for (const entry of proposalEntries) {
811
- const key = entry.target || "proposal";
812
- const group = grouped.get(key) ?? [];
813
- group.push(entry);
814
- grouped.set(key, group);
815
- }
816
-
817
- return Array.from(grouped.entries()).map(([target, group]) => {
818
- let richest = group[group.length - 1];
819
- for (let i = group.length - 1; i >= 0; i--) {
820
- if (group[i].original_text || group[i].proposed_text || group[i].rationale) {
821
- richest = group[i];
822
- break;
823
- }
824
- }
825
- const primaryIndex = group.findIndex((entry) => entry === richest);
826
- return {
827
- target,
828
- primaryEntry: richest,
829
- historyEntries: group.filter((_, index) => index !== primaryIndex),
830
- entries: group,
831
- };
832
- });
833
- }, [proposalEntries]);
834
-
835
703
  return (
836
704
  <div className="space-y-4">
837
705
  {/* Context banner */}
838
- {showContextBanner && (
839
- <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
840
- <InfoIcon className="mt-0.5 size-4 shrink-0 text-primary/60" />
841
- <p className="text-xs leading-relaxed text-muted-foreground">
842
- This view shows the complete evidence trail for a skill evolution proposal &mdash; how
843
- the skill was changed, the eval test results before and after, and whether the change
844
- improved performance.
845
- </p>
846
- </div>
847
- )}
706
+ <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
707
+ <InfoIcon className="size-4 text-primary/60 shrink-0 mt-0.5" />
708
+ <p className="text-xs text-muted-foreground leading-relaxed">
709
+ This view shows the complete evidence trail for a skill evolution proposal &mdash; how the
710
+ skill was changed, the eval test results before and after, and whether the change improved
711
+ performance.
712
+ </p>
713
+ </div>
848
714
 
849
- <Card className="border-border/15 bg-muted/10">
715
+ {/* Proposal journey */}
716
+ <Card>
850
717
  <CardHeader className="pb-3">
851
- <CardTitle className="flex flex-wrap items-center gap-2 text-sm">
852
- <span>Proposal Summary</span>
718
+ <CardTitle className="text-sm flex items-center gap-2">
719
+ <span>Proposal Journey</span>
853
720
  <span className="font-mono text-xs text-muted-foreground">
854
721
  #{proposalId.slice(0, 12)}
855
722
  </span>
856
723
  </CardTitle>
857
724
  </CardHeader>
858
725
  <CardContent className="space-y-3">
859
- <div className={`rounded-lg border px-4 py-3 ${outcome.tone}`}>
860
- <div className="flex items-start gap-3">
861
- <div className="mt-0.5 shrink-0">{outcome.icon}</div>
862
- <div className="min-w-0 space-y-1.5">
863
- <div className="flex flex-wrap items-center gap-2">
864
- <p className="text-sm font-semibold">{outcome.title}</p>
865
- {latestStep && (
866
- <Badge
867
- variant={ACTION_VARIANT[latestStep.action] ?? "secondary"}
868
- className="text-[10px] capitalize"
869
- >
870
- {sentenceCase(latestStep.action)}
871
- </Badge>
872
- )}
726
+ <div className="flex items-center gap-2 flex-wrap">
727
+ {steps.map((step, i) => (
728
+ <div key={`${step.action}-${step.timestamp}`} className="contents">
729
+ {i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
730
+ <div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
731
+ {ACTION_ICON[step.action]}
732
+ <Badge
733
+ variant={ACTION_VARIANT[step.action] ?? "secondary"}
734
+ className="text-[10px] capitalize"
735
+ >
736
+ {step.action.replace("_", " ")}
737
+ </Badge>
738
+ <span className="text-[10px] text-muted-foreground">
739
+ {timeAgo(step.timestamp)}
740
+ </span>
873
741
  </div>
874
- <p className="text-sm leading-6 text-current/90">{outcome.summary}</p>
875
- {latestStep?.details && (
876
- <div className="rounded-md bg-black/10 px-3 py-2 text-sm leading-6 text-current/90 dark:bg-black/20">
877
- {latestStep.details}
878
- </div>
879
- )}
880
- <p className="text-xs font-medium text-current/75">{outcome.liveSkillNote}</p>
881
742
  </div>
882
- </div>
883
- </div>
884
-
885
- <div className="flex flex-wrap items-center gap-2">
886
- {latestStep?.timestamp && (
887
- <span className="text-[10px] font-mono text-muted-foreground">
888
- {timeAgo(latestStep.timestamp)}
889
- </span>
890
- )}
891
- <Badge variant="outline" className="text-[10px]">
892
- {entries.length} evidence {entries.length === 1 ? "row" : "rows"}
893
- </Badge>
894
- {validationMeta && (
895
- <Badge variant={validationMeta.variant} className="text-[10px]">
896
- {validationMeta.label}
897
- </Badge>
898
- )}
899
- {latestStep?.validation_fixture_id && (
900
- <Badge variant="outline" className="text-[10px]">
901
- fixture: {latestStep.validation_fixture_id}
902
- </Badge>
903
- )}
904
- {latestProposalConfidence != null && (
905
- <Badge variant="secondary" className="text-[10px]">
906
- {Math.round(latestProposalConfidence * 100)}% confidence
907
- </Badge>
908
- )}
909
- </div>
910
-
911
- {validationMeta && (
912
- <p className="text-[11px] leading-relaxed text-muted-foreground">
913
- {validationMeta.description}
914
- </p>
915
- )}
916
-
917
- <div className="flex flex-wrap items-center gap-2 text-[11px] text-muted-foreground">
918
- <span className="font-headline uppercase tracking-[0.16em] text-muted-foreground/80">
919
- Lifecycle
920
- </span>
921
- <span>{lifecycleLabel ? sentenceCase(lifecycleLabel) : "No lifecycle recorded"}</span>
743
+ ))}
922
744
  </div>
923
745
 
924
- {typeof snapshot?.net_change === "number" &&
925
- typeof snapshot.before_pass_rate === "number" &&
926
- typeof snapshot.after_pass_rate === "number" && (
927
- <div className="flex flex-wrap items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
746
+ {/* Eval snapshot pass rate change */}
747
+ {snapshot && (
748
+ <div className="flex items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
749
+ {typeof snapshot.net_change === "number" && (
928
750
  <div className="flex items-center gap-1">
929
- {snapshot.net_change > 0 ? (
751
+ {(snapshot.net_change as number) > 0 ? (
930
752
  <TrendingUpIcon className="size-3.5 text-emerald-500" />
931
- ) : snapshot.net_change < 0 ? (
932
- <TrendingDownIcon className="size-3.5 text-red-500" />
933
753
  ) : (
934
- <CircleDotIcon className="size-3.5 text-muted-foreground" />
754
+ <TrendingDownIcon className="size-3.5 text-red-500" />
935
755
  )}
936
756
  <span
937
- className={`text-sm font-mono font-semibold ${
938
- snapshot.net_change > 0
939
- ? "text-emerald-600 dark:text-emerald-400"
940
- : snapshot.net_change < 0
941
- ? "text-red-500"
942
- : "text-muted-foreground"
943
- }`}
757
+ className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
944
758
  >
945
- {snapshot.net_change > 0 ? "+" : ""}
946
- {Math.round(snapshot.net_change * 100)}%
759
+ {(snapshot.net_change as number) > 0 ? "+" : ""}
760
+ {Math.round((snapshot.net_change as number) * 100)}%
947
761
  </span>
948
762
  </div>
949
- <span className="text-xs font-mono text-muted-foreground">
950
- {Math.round(snapshot.before_pass_rate * 100)}% &rarr;{" "}
951
- {Math.round(snapshot.after_pass_rate * 100)}%
952
- </span>
953
- {snapshot.net_change > 0 ? (
954
- <Badge variant="default" className="text-[10px]">
955
- Improved
956
- </Badge>
957
- ) : snapshot.net_change < 0 ? (
958
- <Badge variant="destructive" className="text-[10px]">
959
- Regressed
960
- </Badge>
961
- ) : (
962
- <Badge variant="outline" className="text-[10px]">
963
- No change
964
- </Badge>
763
+ )}
764
+ {typeof snapshot.before_pass_rate === "number" &&
765
+ typeof snapshot.after_pass_rate === "number" && (
766
+ <span className="text-xs text-muted-foreground font-mono">
767
+ {Math.round((snapshot.before_pass_rate as number) * 100)}% &rarr;{" "}
768
+ {Math.round((snapshot.after_pass_rate as number) * 100)}%
769
+ </span>
965
770
  )}
966
- </div>
967
- )}
771
+ {snapshot.improved !== undefined && (
772
+ <Badge
773
+ variant={snapshot.improved ? "default" : "destructive"}
774
+ className="text-[10px]"
775
+ >
776
+ {snapshot.improved ? "Improved" : "Regressed"}
777
+ </Badge>
778
+ )}
779
+ </div>
780
+ )}
781
+
782
+ {/* Details from last step */}
783
+ {steps.length > 0 && steps[steps.length - 1].details && (
784
+ <p className="text-xs text-muted-foreground leading-relaxed">
785
+ {steps[steps.length - 1].details}
786
+ </p>
787
+ )}
968
788
  </CardContent>
969
789
  </Card>
970
790
 
971
791
  {/* Proposal-stage evidence — standalone cards showing original/proposed text */}
972
- {proposalCards.length > 0 && (
973
- <div className="space-y-2">
974
- <div className="space-y-1">
975
- <p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
976
- What changed
977
- </p>
978
- <p className="text-sm text-muted-foreground">
979
- This is the actual skill text selftune proposed changing.
980
- </p>
981
- </div>
982
- {proposalCards.map((group) => {
983
- const hasHistory = group.historyEntries.length > 0;
984
- const isExpanded = expandedProposalTargets.has(group.target);
792
+ {proposalEntries.map((entry) => (
793
+ <EvidenceCard
794
+ key={`proposal-${entry.target}-${entry.timestamp}`}
795
+ entry={entry}
796
+ roundLabel={null}
797
+ roundStatus="single"
798
+ prevPassRate={null}
799
+ currPassRate={null}
800
+ />
801
+ ))}
985
802
 
986
- return (
987
- <div key={`proposal-${group.target}`} className="space-y-2">
988
- <EvidenceCard
989
- entry={group.primaryEntry}
990
- roundLabel={hasHistory ? `Latest draft of ${group.entries.length}` : null}
991
- roundStatus={hasHistory ? "final" : "single"}
992
- prevPassRate={null}
993
- currPassRate={null}
994
- />
995
- {hasHistory && (
996
- <div className="space-y-2">
803
+ {/* Validation-stage evidence — grouped by target with iteration rounds */}
804
+ {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
805
+ const hasMultipleRounds = targetEntries.length > 1;
806
+
807
+ return (
808
+ <div key={target} className="space-y-2">
809
+ {targetEntries.map((entry, i) => {
810
+ const isLast = i === targetEntries.length - 1;
811
+ const roundLabel = hasMultipleRounds
812
+ ? `Round ${i + 1} of ${targetEntries.length}`
813
+ : null;
814
+ const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
815
+ const currPassRate = getAfterPassRate(entry);
816
+ const roundKey = `${target}-${entry.timestamp}`;
817
+ const roundStatus: RoundStatus = !hasMultipleRounds
818
+ ? "single"
819
+ : isLast
820
+ ? "final"
821
+ : "intermediate";
822
+
823
+ // Earlier rounds: collapsed by default
824
+ if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
825
+ return (
826
+ <CollapsedEvidenceCard
827
+ key={roundKey}
828
+ entry={entry}
829
+ roundLabel={roundLabel!}
830
+ onExpand={() => toggleRound(roundKey)}
831
+ />
832
+ );
833
+ }
834
+
835
+ // Expanded earlier round — show with collapse toggle
836
+ if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
837
+ return (
838
+ <div key={roundKey} className="space-y-1">
997
839
  <button
998
840
  type="button"
999
- onClick={() => toggleProposalHistory(group.target)}
1000
- className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
841
+ onClick={() => toggleRound(roundKey)}
842
+ className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground transition-colors px-1"
1001
843
  >
1002
- {isExpanded ? (
1003
- <ChevronDownIcon className="size-3" />
1004
- ) : (
1005
- <ChevronRightIcon className="size-3" />
1006
- )}
1007
- {isExpanded ? "Hide" : "Show"} {group.historyEntries.length} earlier{" "}
1008
- {group.historyEntries.length === 1 ? "draft" : "drafts"}
844
+ <ChevronDownIcon className="size-3" />
845
+ Collapse {roundLabel}
1009
846
  </button>
1010
- {isExpanded &&
1011
- group.historyEntries.map((entry, index) => (
1012
- <EvidenceCard
1013
- key={`proposal-history-${group.target}-${entry.timestamp}-${index}`}
1014
- entry={entry}
1015
- roundLabel={`Draft ${index + 1} of ${group.historyEntries.length}`}
1016
- roundStatus="intermediate"
1017
- prevPassRate={null}
1018
- currPassRate={null}
1019
- />
1020
- ))}
1021
- </div>
1022
- )}
1023
- </div>
1024
- );
1025
- })}
1026
- </div>
1027
- )}
1028
-
1029
- {/* Validation-stage evidence — grouped by target with iteration rounds */}
1030
- {Array.from(validationsByTarget.entries()).length > 0 && (
1031
- <div className="space-y-2">
1032
- <div className="space-y-1">
1033
- <p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
1034
- How it was tested
1035
- </p>
1036
- <p className="text-sm text-muted-foreground">
1037
- Validation evidence shows whether the proposal improved the eval signal.
1038
- </p>
1039
- </div>
1040
- {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
1041
- const hasMultipleRounds = targetEntries.length > 1;
1042
-
1043
- return (
1044
- <div key={target} className="space-y-2">
1045
- {targetEntries.map((entry, i) => {
1046
- const isLast = i === targetEntries.length - 1;
1047
- const roundLabel = hasMultipleRounds
1048
- ? `Round ${i + 1} of ${targetEntries.length}`
1049
- : null;
1050
- const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
1051
- const currPassRate = getAfterPassRate(entry);
1052
- const roundKey = `${target}-${entry.timestamp}`;
1053
- const roundStatus: RoundStatus = !hasMultipleRounds
1054
- ? "single"
1055
- : isLast
1056
- ? "final"
1057
- : "intermediate";
1058
-
1059
- if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
1060
- return (
1061
- <CollapsedEvidenceCard
1062
- key={roundKey}
1063
- entry={entry}
1064
- roundLabel={roundLabel!}
1065
- onExpand={() => toggleRound(roundKey)}
1066
- />
1067
- );
1068
- }
1069
-
1070
- if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
1071
- return (
1072
- <div key={roundKey} className="space-y-1">
1073
- <button
1074
- type="button"
1075
- onClick={() => toggleRound(roundKey)}
1076
- className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
1077
- >
1078
- <ChevronDownIcon className="size-3" />
1079
- Collapse {roundLabel}
1080
- </button>
1081
- <EvidenceCard
1082
- entry={entry}
1083
- roundLabel={roundLabel}
1084
- roundStatus={roundStatus}
1085
- prevPassRate={prevPassRate}
1086
- currPassRate={currPassRate}
1087
- />
1088
- </div>
1089
- );
1090
- }
1091
-
1092
- return (
1093
847
  <EvidenceCard
1094
- key={roundKey}
1095
848
  entry={entry}
1096
849
  roundLabel={roundLabel}
1097
850
  roundStatus={roundStatus}
1098
851
  prevPassRate={prevPassRate}
1099
852
  currPassRate={currPassRate}
1100
853
  />
1101
- );
1102
- })}
1103
- </div>
1104
- );
1105
- })}
1106
- </div>
1107
- )}
854
+ </div>
855
+ );
856
+ }
857
+
858
+ // Final round (or single entry) — always expanded
859
+ return (
860
+ <EvidenceCard
861
+ key={roundKey}
862
+ entry={entry}
863
+ roundLabel={roundLabel}
864
+ roundStatus={roundStatus}
865
+ prevPassRate={prevPassRate}
866
+ currPassRate={currPassRate}
867
+ />
868
+ );
869
+ })}
870
+ </div>
871
+ );
872
+ })}
1108
873
 
1109
874
  {entries.length === 0 && (
1110
875
  <div className="flex items-center justify-center rounded-lg border border-dashed py-8">