selftune 0.2.18 → 0.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +9 -4
  2. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +60 -0
  3. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
  5. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
  6. package/apps/local-dashboard/dist/index.html +5 -5
  7. package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
  8. package/cli/selftune/constants.ts +10 -0
  9. package/cli/selftune/contribute/contribute.ts +30 -2
  10. package/cli/selftune/contribution-config.ts +249 -0
  11. package/cli/selftune/contribution-relay.ts +177 -0
  12. package/cli/selftune/contribution-signals.ts +219 -0
  13. package/cli/selftune/contribution-staging.ts +147 -0
  14. package/cli/selftune/contributions.ts +532 -0
  15. package/cli/selftune/creator-contributions.ts +333 -0
  16. package/cli/selftune/dashboard-contract.ts +209 -1
  17. package/cli/selftune/dashboard-server.ts +45 -11
  18. package/cli/selftune/eval/family-overlap.ts +714 -0
  19. package/cli/selftune/eval/hooks-to-evals.ts +182 -28
  20. package/cli/selftune/eval/synthetic-evals.ts +298 -11
  21. package/cli/selftune/evolution/evidence.ts +5 -0
  22. package/cli/selftune/evolution/evolve-body.ts +62 -2
  23. package/cli/selftune/evolution/evolve.ts +58 -1
  24. package/cli/selftune/evolution/validate-body.ts +10 -0
  25. package/cli/selftune/evolution/validate-host-replay.ts +236 -0
  26. package/cli/selftune/evolution/validate-proposal.ts +10 -0
  27. package/cli/selftune/evolution/validate-routing.ts +112 -5
  28. package/cli/selftune/export.ts +2 -2
  29. package/cli/selftune/index.ts +41 -5
  30. package/cli/selftune/ingestors/codex-rollout.ts +31 -35
  31. package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
  32. package/cli/selftune/localdb/db.ts +2 -2
  33. package/cli/selftune/localdb/direct-write.ts +8 -3
  34. package/cli/selftune/localdb/materialize.ts +7 -2
  35. package/cli/selftune/localdb/queries.ts +712 -31
  36. package/cli/selftune/localdb/schema.ts +30 -1
  37. package/cli/selftune/recover.ts +153 -0
  38. package/cli/selftune/repair/skill-usage.ts +363 -4
  39. package/cli/selftune/routes/actions.ts +35 -1
  40. package/cli/selftune/routes/analytics.ts +14 -0
  41. package/cli/selftune/routes/index.ts +1 -0
  42. package/cli/selftune/routes/overview.ts +112 -4
  43. package/cli/selftune/routes/skill-report.ts +575 -11
  44. package/cli/selftune/status.ts +81 -2
  45. package/cli/selftune/sync.ts +56 -2
  46. package/cli/selftune/trust-model.ts +66 -0
  47. package/cli/selftune/types.ts +103 -0
  48. package/cli/selftune/utils/skill-detection.ts +43 -0
  49. package/cli/selftune/utils/text-similarity.ts +73 -0
  50. package/cli/selftune/watchlist.ts +65 -0
  51. package/package.json +1 -1
  52. package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
  53. package/packages/ui/src/components/EvidenceViewer.tsx +419 -145
  54. package/packages/ui/src/components/EvolutionTimeline.tsx +81 -29
  55. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
  56. package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
  57. package/packages/ui/src/components/section-cards.tsx +12 -9
  58. package/packages/ui/src/primitives/card.tsx +1 -1
  59. package/packages/ui/src/types.ts +4 -0
  60. package/skill/SKILL.md +11 -1
  61. package/skill/Workflows/AlphaUpload.md +4 -0
  62. package/skill/Workflows/Composability.md +78 -0
  63. package/skill/Workflows/Contribute.md +6 -3
  64. package/skill/Workflows/Contributions.md +97 -0
  65. package/skill/Workflows/CreatorContributions.md +74 -0
  66. package/skill/Workflows/Dashboard.md +31 -0
  67. package/skill/Workflows/Evals.md +57 -8
  68. package/skill/Workflows/Evolve.md +23 -0
  69. package/skill/Workflows/Ingest.md +7 -0
  70. package/skill/Workflows/Initialize.md +20 -1
  71. package/skill/Workflows/Recover.md +84 -0
  72. package/skill/Workflows/RepairSkillUsage.md +12 -4
  73. package/skill/Workflows/Sync.md +18 -12
  74. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
  75. package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
  76. package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
  77. package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
@@ -5,17 +5,14 @@ import {
5
5
  CircleDotIcon,
6
6
  FileTextIcon,
7
7
  InfoIcon,
8
- RocketIcon,
9
- ShieldCheckIcon,
10
8
  ShieldAlertIcon,
11
9
  XCircleIcon,
12
- UndoIcon,
13
- ArrowRightIcon,
14
10
  TrendingUpIcon,
15
11
  TrendingDownIcon,
16
12
  ListChecksIcon,
17
13
  } from "lucide-react";
18
14
  import { useMemo, useState } from "react";
15
+ import type { ReactNode } from "react";
19
16
  import Markdown from "react-markdown";
20
17
 
21
18
  import { formatRate, timeAgo } from "../lib/format";
@@ -23,14 +20,6 @@ import { Badge } from "../primitives/badge";
23
20
  import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
24
21
  import type { EvidenceEntry, EvolutionEntry } from "../types";
25
22
 
26
- const ACTION_ICON: Record<string, React.ReactNode> = {
27
- created: <CircleDotIcon className="size-3.5" />,
28
- validated: <ShieldCheckIcon className="size-3.5" />,
29
- deployed: <RocketIcon className="size-3.5" />,
30
- rejected: <XCircleIcon className="size-3.5" />,
31
- rolled_back: <UndoIcon className="size-3.5" />,
32
- };
33
-
34
23
  const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
35
24
  created: "outline",
36
25
  validated: "secondary",
@@ -43,6 +32,94 @@ interface Props {
43
32
  proposalId: string;
44
33
  evolution: EvolutionEntry[];
45
34
  evidence: EvidenceEntry[];
35
+ showContextBanner?: boolean;
36
+ }
37
+
38
+ function getValidationModeMeta(mode?: string | null): {
39
+ label: string;
40
+ variant: "default" | "secondary" | "destructive" | "outline";
41
+ description: string;
42
+ } | null {
43
+ switch (mode) {
44
+ case "host_replay":
45
+ return {
46
+ label: "Replay-backed validation",
47
+ variant: "default",
48
+ description:
49
+ "Validated against a controlled replay fixture instead of a free-form judge prompt.",
50
+ };
51
+ case "llm_judge":
52
+ return {
53
+ label: "Model judgment",
54
+ variant: "secondary",
55
+ description: "Validated by an LLM trigger check rather than a replay fixture.",
56
+ };
57
+ case "structural_guard":
58
+ return {
59
+ label: "Structural guard",
60
+ variant: "outline",
61
+ description:
62
+ "Only deterministic structural checks ran; no replay or judge validation was needed.",
63
+ };
64
+ default:
65
+ return null;
66
+ }
67
+ }
68
+
69
+ function sentenceCase(value: string): string {
70
+ return value.replace(/_/g, " ");
71
+ }
72
+
73
+ function getOutcomePresentation(action?: string | null): {
74
+ title: string;
75
+ summary: string;
76
+ tone: string;
77
+ icon: ReactNode;
78
+ liveSkillNote: string;
79
+ } {
80
+ switch (action) {
81
+ case "rejected":
82
+ return {
83
+ title: "Proposal rejected",
84
+ summary: "Selftune proposed a change, but blocked it before your live skill was updated.",
85
+ tone: "border-red-500/20 bg-red-500/8 text-red-700 dark:text-red-50",
86
+ icon: <XCircleIcon className="size-4 text-red-400" />,
87
+ liveSkillNote: "Your live skill is unchanged.",
88
+ };
89
+ case "validated":
90
+ return {
91
+ title: "Proposal validated",
92
+ summary: "The proposed change improved the eval signal and is ready for review or deploy.",
93
+ tone: "border-emerald-500/20 bg-emerald-500/8 text-emerald-700 dark:text-emerald-50",
94
+ icon: <CheckCircleIcon className="size-4 text-emerald-400" />,
95
+ liveSkillNote: "Your live skill has not changed until this proposal is deployed.",
96
+ };
97
+ case "deployed":
98
+ return {
99
+ title: "Proposal deployed",
100
+ summary: "The proposed change passed validation and was applied to the live skill.",
101
+ tone: "border-primary/25 bg-primary/8 text-foreground",
102
+ icon: <TrendingUpIcon className="size-4 text-primary" />,
103
+ liveSkillNote: "Your live skill now includes this change.",
104
+ };
105
+ case "rolled_back":
106
+ return {
107
+ title: "Proposal rolled back",
108
+ summary: "A deployed change was later reversed because follow-up evidence showed risk.",
109
+ tone: "border-amber-500/20 bg-amber-500/8 text-amber-800 dark:text-amber-50",
110
+ icon: <TrendingDownIcon className="size-4 text-amber-400" />,
111
+ liveSkillNote: "Your live skill no longer uses this proposal.",
112
+ };
113
+ case "created":
114
+ default:
115
+ return {
116
+ title: "Proposal under review",
117
+ summary: "Selftune found a possible improvement and recorded the proposed change.",
118
+ tone: "border-border/30 bg-muted/25 text-foreground",
119
+ icon: <CircleDotIcon className="size-4 text-muted-foreground" />,
120
+ liveSkillNote: "Your live skill is unchanged until a proposal is validated and deployed.",
121
+ };
122
+ }
46
123
  }
47
124
 
48
125
  /** Parse YAML-ish frontmatter from text, returns { meta, body } */
@@ -135,7 +212,7 @@ function SkillContentBlock({
135
212
  }
136
213
 
137
214
  /** Smart formatting for a single validation value */
138
- function formatValidationValue(key: string, val: unknown): React.ReactNode {
215
+ function formatValidationValue(key: string, val: unknown): ReactNode {
139
216
  // Booleans
140
217
  if (typeof val === "boolean") {
141
218
  return val ? (
@@ -234,12 +311,18 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
234
311
  regressions,
235
312
  new_passes,
236
313
  per_entry_results,
314
+ validation_mode,
315
+ validation_agent,
316
+ validation_fixture_id,
317
+ validation_evidence_ref,
237
318
  ...rest
238
319
  } = validation;
239
320
 
240
321
  const regressionsArr = Array.isArray(regressions) ? regressions : [];
241
322
  const newPassesArr = Array.isArray(new_passes) ? new_passes : [];
242
323
  const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : [];
324
+ const validationMeta =
325
+ typeof validation_mode === "string" ? getValidationModeMeta(validation_mode) : null;
243
326
 
244
327
  return (
245
328
  <div className="rounded-md border bg-muted/30 p-3 space-y-3">
@@ -250,6 +333,34 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
250
333
  </span>
251
334
  </p>
252
335
 
336
+ {validationMeta && (
337
+ <div className="rounded-md border bg-card px-3 py-2">
338
+ <div className="flex flex-wrap items-center gap-2">
339
+ <Badge variant={validationMeta.variant} className="text-[10px]">
340
+ {validationMeta.label}
341
+ </Badge>
342
+ {typeof validation_agent === "string" && validation_agent.trim() && (
343
+ <Badge variant="outline" className="text-[10px]">
344
+ agent: {validation_agent}
345
+ </Badge>
346
+ )}
347
+ {typeof validation_fixture_id === "string" && validation_fixture_id.trim() && (
348
+ <Badge variant="outline" className="text-[10px]">
349
+ fixture: {validation_fixture_id}
350
+ </Badge>
351
+ )}
352
+ </div>
353
+ <p className="mt-1 text-[11px] leading-relaxed text-muted-foreground">
354
+ {validationMeta.description}
355
+ </p>
356
+ {typeof validation_evidence_ref === "string" && validation_evidence_ref.trim() && (
357
+ <p className="mt-1 text-[10px] font-mono text-muted-foreground/70">
358
+ {validation_evidence_ref}
359
+ </p>
360
+ )}
361
+ </div>
362
+ )}
363
+
253
364
  {/* Summary bar */}
254
365
  <div className="flex items-center gap-3 flex-wrap">
255
366
  {improved !== undefined && (
@@ -264,7 +375,13 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
264
375
  )}
265
376
  {typeof net_change === "number" && (
266
377
  <span
267
- className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
378
+ className={`text-xs font-mono font-semibold ${
379
+ net_change > 0
380
+ ? "text-emerald-600 dark:text-emerald-400"
381
+ : net_change < 0
382
+ ? "text-red-500"
383
+ : "text-muted-foreground"
384
+ }`}
268
385
  >
269
386
  {net_change > 0 ? "+" : ""}
270
387
  {(net_change * 100).toFixed(1)}%
@@ -609,7 +726,12 @@ function CollapsedEvidenceCard({
609
726
  );
610
727
  }
611
728
 
612
- export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
729
+ export function EvidenceViewer({
730
+ proposalId,
731
+ evolution,
732
+ evidence,
733
+ showContextBanner = true,
734
+ }: Props) {
613
735
  const steps = useMemo(
614
736
  () =>
615
737
  evolution
@@ -628,6 +750,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
628
750
 
629
751
  // Track which earlier rounds are manually expanded
630
752
  const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set());
753
+ const [expandedProposalTargets, setExpandedProposalTargets] = useState<Set<string>>(new Set());
631
754
 
632
755
  const toggleRound = (key: string) => {
633
756
  setExpandedRounds((prev) => {
@@ -638,6 +761,15 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
638
761
  });
639
762
  };
640
763
 
764
+ const toggleProposalHistory = (target: string) => {
765
+ setExpandedProposalTargets((prev) => {
766
+ const next = new Set(prev);
767
+ if (next.has(target)) next.delete(target);
768
+ else next.add(target);
769
+ return next;
770
+ });
771
+ };
772
+
641
773
  const snapshot = useMemo(() => {
642
774
  for (let i = steps.length - 1; i >= 0; i--) {
643
775
  if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>;
@@ -661,176 +793,318 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
661
793
  return { proposalEntries: proposals, validationsByTarget: validationMap };
662
794
  }, [entries]);
663
795
 
796
+ const latestStep = steps[steps.length - 1] ?? null;
797
+ const lifecycleLabel = steps.map((step) => step.action.replace("_", " ")).join(" -> ");
798
+ const outcome = getOutcomePresentation(latestStep?.action);
799
+ const validationMeta = getValidationModeMeta(latestStep?.validation_mode);
800
+ const latestProposalConfidence = useMemo(() => {
801
+ for (let i = proposalEntries.length - 1; i >= 0; i--) {
802
+ if (proposalEntries[i].confidence !== null) {
803
+ return proposalEntries[i].confidence;
804
+ }
805
+ }
806
+ return null;
807
+ }, [proposalEntries]);
808
+ const proposalCards = useMemo(() => {
809
+ const grouped = new Map<string, EvidenceEntry[]>();
810
+ for (const entry of proposalEntries) {
811
+ const key = entry.target || "proposal";
812
+ const group = grouped.get(key) ?? [];
813
+ group.push(entry);
814
+ grouped.set(key, group);
815
+ }
816
+
817
+ return Array.from(grouped.entries()).map(([target, group]) => {
818
+ let richest = group[group.length - 1];
819
+ for (let i = group.length - 1; i >= 0; i--) {
820
+ if (group[i].original_text || group[i].proposed_text || group[i].rationale) {
821
+ richest = group[i];
822
+ break;
823
+ }
824
+ }
825
+ const primaryIndex = group.findIndex((entry) => entry === richest);
826
+ return {
827
+ target,
828
+ primaryEntry: richest,
829
+ historyEntries: group.filter((_, index) => index !== primaryIndex),
830
+ entries: group,
831
+ };
832
+ });
833
+ }, [proposalEntries]);
834
+
664
835
  return (
665
836
  <div className="space-y-4">
666
837
  {/* Context banner */}
667
- <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
668
- <InfoIcon className="size-4 text-primary/60 shrink-0 mt-0.5" />
669
- <p className="text-xs text-muted-foreground leading-relaxed">
670
- This view shows the complete evidence trail for a skill evolution proposal &mdash; how the
671
- skill was changed, the eval test results before and after, and whether the change improved
672
- performance.
673
- </p>
674
- </div>
838
+ {showContextBanner && (
839
+ <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
840
+ <InfoIcon className="mt-0.5 size-4 shrink-0 text-primary/60" />
841
+ <p className="text-xs leading-relaxed text-muted-foreground">
842
+ This view shows the complete evidence trail for a skill evolution proposal &mdash; how
843
+ the skill was changed, the eval test results before and after, and whether the change
844
+ improved performance.
845
+ </p>
846
+ </div>
847
+ )}
675
848
 
676
- {/* Proposal journey */}
677
- <Card>
849
+ <Card className="border-border/15 bg-muted/10">
678
850
  <CardHeader className="pb-3">
679
- <CardTitle className="text-sm flex items-center gap-2">
680
- <span>Proposal Journey</span>
851
+ <CardTitle className="flex flex-wrap items-center gap-2 text-sm">
852
+ <span>Proposal Summary</span>
681
853
  <span className="font-mono text-xs text-muted-foreground">
682
854
  #{proposalId.slice(0, 12)}
683
855
  </span>
684
856
  </CardTitle>
685
857
  </CardHeader>
686
858
  <CardContent className="space-y-3">
687
- <div className="flex items-center gap-2 flex-wrap">
688
- {steps.map((step, i) => (
689
- <div key={`${step.action}-${i}`} className="contents">
690
- {i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
691
- <div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
692
- {ACTION_ICON[step.action]}
693
- <Badge
694
- variant={ACTION_VARIANT[step.action] ?? "secondary"}
695
- className="text-[10px] capitalize"
696
- >
697
- {step.action.replace("_", " ")}
698
- </Badge>
699
- <span className="text-[10px] text-muted-foreground">
700
- {timeAgo(step.timestamp)}
701
- </span>
859
+ <div className={`rounded-lg border px-4 py-3 ${outcome.tone}`}>
860
+ <div className="flex items-start gap-3">
861
+ <div className="mt-0.5 shrink-0">{outcome.icon}</div>
862
+ <div className="min-w-0 space-y-1.5">
863
+ <div className="flex flex-wrap items-center gap-2">
864
+ <p className="text-sm font-semibold">{outcome.title}</p>
865
+ {latestStep && (
866
+ <Badge
867
+ variant={ACTION_VARIANT[latestStep.action] ?? "secondary"}
868
+ className="text-[10px] capitalize"
869
+ >
870
+ {sentenceCase(latestStep.action)}
871
+ </Badge>
872
+ )}
702
873
  </div>
874
+ <p className="text-sm leading-6 text-current/90">{outcome.summary}</p>
875
+ {latestStep?.details && (
876
+ <div className="rounded-md bg-black/10 px-3 py-2 text-sm leading-6 text-current/90 dark:bg-black/20">
877
+ {latestStep.details}
878
+ </div>
879
+ )}
880
+ <p className="text-xs font-medium text-current/75">{outcome.liveSkillNote}</p>
703
881
  </div>
704
- ))}
882
+ </div>
883
+ </div>
884
+
885
+ <div className="flex flex-wrap items-center gap-2">
886
+ {latestStep?.timestamp && (
887
+ <span className="text-[10px] font-mono text-muted-foreground">
888
+ {timeAgo(latestStep.timestamp)}
889
+ </span>
890
+ )}
891
+ <Badge variant="outline" className="text-[10px]">
892
+ {entries.length} evidence {entries.length === 1 ? "row" : "rows"}
893
+ </Badge>
894
+ {validationMeta && (
895
+ <Badge variant={validationMeta.variant} className="text-[10px]">
896
+ {validationMeta.label}
897
+ </Badge>
898
+ )}
899
+ {latestStep?.validation_fixture_id && (
900
+ <Badge variant="outline" className="text-[10px]">
901
+ fixture: {latestStep.validation_fixture_id}
902
+ </Badge>
903
+ )}
904
+ {latestProposalConfidence != null && (
905
+ <Badge variant="secondary" className="text-[10px]">
906
+ {Math.round(latestProposalConfidence * 100)}% confidence
907
+ </Badge>
908
+ )}
909
+ </div>
910
+
911
+ {validationMeta && (
912
+ <p className="text-[11px] leading-relaxed text-muted-foreground">
913
+ {validationMeta.description}
914
+ </p>
915
+ )}
916
+
917
+ <div className="flex flex-wrap items-center gap-2 text-[11px] text-muted-foreground">
918
+ <span className="font-headline uppercase tracking-[0.16em] text-muted-foreground/80">
919
+ Lifecycle
920
+ </span>
921
+ <span>{lifecycleLabel ? sentenceCase(lifecycleLabel) : "No lifecycle recorded"}</span>
705
922
  </div>
706
923
 
707
- {/* Eval snapshot pass rate change */}
708
- {snapshot && (
709
- <div className="flex items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
710
- {typeof snapshot.net_change === "number" && (
924
+ {typeof snapshot?.net_change === "number" &&
925
+ typeof snapshot.before_pass_rate === "number" &&
926
+ typeof snapshot.after_pass_rate === "number" && (
927
+ <div className="flex flex-wrap items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
711
928
  <div className="flex items-center gap-1">
712
- {(snapshot.net_change as number) > 0 ? (
929
+ {snapshot.net_change > 0 ? (
713
930
  <TrendingUpIcon className="size-3.5 text-emerald-500" />
714
- ) : (
931
+ ) : snapshot.net_change < 0 ? (
715
932
  <TrendingDownIcon className="size-3.5 text-red-500" />
933
+ ) : (
934
+ <CircleDotIcon className="size-3.5 text-muted-foreground" />
716
935
  )}
717
936
  <span
718
- className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
937
+ className={`text-sm font-mono font-semibold ${
938
+ snapshot.net_change > 0
939
+ ? "text-emerald-600 dark:text-emerald-400"
940
+ : snapshot.net_change < 0
941
+ ? "text-red-500"
942
+ : "text-muted-foreground"
943
+ }`}
719
944
  >
720
- {(snapshot.net_change as number) > 0 ? "+" : ""}
721
- {Math.round((snapshot.net_change as number) * 100)}%
945
+ {snapshot.net_change > 0 ? "+" : ""}
946
+ {Math.round(snapshot.net_change * 100)}%
722
947
  </span>
723
948
  </div>
724
- )}
725
- {typeof snapshot.before_pass_rate === "number" &&
726
- typeof snapshot.after_pass_rate === "number" && (
727
- <span className="text-xs text-muted-foreground font-mono">
728
- {Math.round((snapshot.before_pass_rate as number) * 100)}% &rarr;{" "}
729
- {Math.round((snapshot.after_pass_rate as number) * 100)}%
730
- </span>
949
+ <span className="text-xs font-mono text-muted-foreground">
950
+ {Math.round(snapshot.before_pass_rate * 100)}% &rarr;{" "}
951
+ {Math.round(snapshot.after_pass_rate * 100)}%
952
+ </span>
953
+ {snapshot.net_change > 0 ? (
954
+ <Badge variant="default" className="text-[10px]">
955
+ Improved
956
+ </Badge>
957
+ ) : snapshot.net_change < 0 ? (
958
+ <Badge variant="destructive" className="text-[10px]">
959
+ Regressed
960
+ </Badge>
961
+ ) : (
962
+ <Badge variant="outline" className="text-[10px]">
963
+ No change
964
+ </Badge>
731
965
  )}
732
- {snapshot.improved !== undefined && (
733
- <Badge
734
- variant={snapshot.improved ? "default" : "destructive"}
735
- className="text-[10px]"
736
- >
737
- {snapshot.improved ? "Improved" : "Regressed"}
738
- </Badge>
739
- )}
740
- </div>
741
- )}
742
-
743
- {/* Details from last step */}
744
- {steps.length > 0 && steps[steps.length - 1].details && (
745
- <p className="text-xs text-muted-foreground leading-relaxed">
746
- {steps[steps.length - 1].details}
747
- </p>
748
- )}
966
+ </div>
967
+ )}
749
968
  </CardContent>
750
969
  </Card>
751
970
 
752
971
  {/* Proposal-stage evidence — standalone cards showing original/proposed text */}
753
- {proposalEntries.map((entry) => (
754
- <EvidenceCard
755
- key={`proposal-${entry.target}-${entry.timestamp}`}
756
- entry={entry}
757
- roundLabel={null}
758
- roundStatus="single"
759
- prevPassRate={null}
760
- currPassRate={null}
761
- />
762
- ))}
972
+ {proposalCards.length > 0 && (
973
+ <div className="space-y-2">
974
+ <div className="space-y-1">
975
+ <p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
976
+ What changed
977
+ </p>
978
+ <p className="text-sm text-muted-foreground">
979
+ This is the actual skill text selftune proposed changing.
980
+ </p>
981
+ </div>
982
+ {proposalCards.map((group) => {
983
+ const hasHistory = group.historyEntries.length > 0;
984
+ const isExpanded = expandedProposalTargets.has(group.target);
763
985
 
764
- {/* Validation-stage evidence — grouped by target with iteration rounds */}
765
- {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
766
- const hasMultipleRounds = targetEntries.length > 1;
767
-
768
- return (
769
- <div key={target} className="space-y-2">
770
- {targetEntries.map((entry, i) => {
771
- const isLast = i === targetEntries.length - 1;
772
- const roundLabel = hasMultipleRounds
773
- ? `Round ${i + 1} of ${targetEntries.length}`
774
- : null;
775
- const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
776
- const currPassRate = getAfterPassRate(entry);
777
- const roundKey = `${target}-${entry.timestamp}`;
778
- const roundStatus: RoundStatus = !hasMultipleRounds
779
- ? "single"
780
- : isLast
781
- ? "final"
782
- : "intermediate";
783
-
784
- // Earlier rounds: collapsed by default
785
- if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
786
- return (
787
- <CollapsedEvidenceCard
788
- key={roundKey}
789
- entry={entry}
790
- roundLabel={roundLabel!}
791
- onExpand={() => toggleRound(roundKey)}
792
- />
793
- );
794
- }
795
-
796
- // Expanded earlier round — show with collapse toggle
797
- if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
798
- return (
799
- <div key={roundKey} className="space-y-1">
986
+ return (
987
+ <div key={`proposal-${group.target}`} className="space-y-2">
988
+ <EvidenceCard
989
+ entry={group.primaryEntry}
990
+ roundLabel={hasHistory ? `Latest draft of ${group.entries.length}` : null}
991
+ roundStatus={hasHistory ? "final" : "single"}
992
+ prevPassRate={null}
993
+ currPassRate={null}
994
+ />
995
+ {hasHistory && (
996
+ <div className="space-y-2">
800
997
  <button
801
998
  type="button"
802
- onClick={() => toggleRound(roundKey)}
803
- className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground transition-colors px-1"
999
+ onClick={() => toggleProposalHistory(group.target)}
1000
+ className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
804
1001
  >
805
- <ChevronDownIcon className="size-3" />
806
- Collapse {roundLabel}
1002
+ {isExpanded ? (
1003
+ <ChevronDownIcon className="size-3" />
1004
+ ) : (
1005
+ <ChevronRightIcon className="size-3" />
1006
+ )}
1007
+ {isExpanded ? "Hide" : "Show"} {group.historyEntries.length} earlier{" "}
1008
+ {group.historyEntries.length === 1 ? "draft" : "drafts"}
807
1009
  </button>
1010
+ {isExpanded &&
1011
+ group.historyEntries.map((entry, index) => (
1012
+ <EvidenceCard
1013
+ key={`proposal-history-${group.target}-${entry.timestamp}-${index}`}
1014
+ entry={entry}
1015
+ roundLabel={`Draft ${index + 1} of ${group.historyEntries.length}`}
1016
+ roundStatus="intermediate"
1017
+ prevPassRate={null}
1018
+ currPassRate={null}
1019
+ />
1020
+ ))}
1021
+ </div>
1022
+ )}
1023
+ </div>
1024
+ );
1025
+ })}
1026
+ </div>
1027
+ )}
1028
+
1029
+ {/* Validation-stage evidence — grouped by target with iteration rounds */}
1030
+ {Array.from(validationsByTarget.entries()).length > 0 && (
1031
+ <div className="space-y-2">
1032
+ <div className="space-y-1">
1033
+ <p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
1034
+ How it was tested
1035
+ </p>
1036
+ <p className="text-sm text-muted-foreground">
1037
+ Validation evidence shows whether the proposal improved the eval signal.
1038
+ </p>
1039
+ </div>
1040
+ {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
1041
+ const hasMultipleRounds = targetEntries.length > 1;
1042
+
1043
+ return (
1044
+ <div key={target} className="space-y-2">
1045
+ {targetEntries.map((entry, i) => {
1046
+ const isLast = i === targetEntries.length - 1;
1047
+ const roundLabel = hasMultipleRounds
1048
+ ? `Round ${i + 1} of ${targetEntries.length}`
1049
+ : null;
1050
+ const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
1051
+ const currPassRate = getAfterPassRate(entry);
1052
+ const roundKey = `${target}-${entry.timestamp}`;
1053
+ const roundStatus: RoundStatus = !hasMultipleRounds
1054
+ ? "single"
1055
+ : isLast
1056
+ ? "final"
1057
+ : "intermediate";
1058
+
1059
+ if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
1060
+ return (
1061
+ <CollapsedEvidenceCard
1062
+ key={roundKey}
1063
+ entry={entry}
1064
+ roundLabel={roundLabel!}
1065
+ onExpand={() => toggleRound(roundKey)}
1066
+ />
1067
+ );
1068
+ }
1069
+
1070
+ if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
1071
+ return (
1072
+ <div key={roundKey} className="space-y-1">
1073
+ <button
1074
+ type="button"
1075
+ onClick={() => toggleRound(roundKey)}
1076
+ className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
1077
+ >
1078
+ <ChevronDownIcon className="size-3" />
1079
+ Collapse {roundLabel}
1080
+ </button>
1081
+ <EvidenceCard
1082
+ entry={entry}
1083
+ roundLabel={roundLabel}
1084
+ roundStatus={roundStatus}
1085
+ prevPassRate={prevPassRate}
1086
+ currPassRate={currPassRate}
1087
+ />
1088
+ </div>
1089
+ );
1090
+ }
1091
+
1092
+ return (
808
1093
  <EvidenceCard
1094
+ key={roundKey}
809
1095
  entry={entry}
810
1096
  roundLabel={roundLabel}
811
1097
  roundStatus={roundStatus}
812
1098
  prevPassRate={prevPassRate}
813
1099
  currPassRate={currPassRate}
814
1100
  />
815
- </div>
816
- );
817
- }
818
-
819
- // Final round (or single entry) — always expanded
820
- return (
821
- <EvidenceCard
822
- key={roundKey}
823
- entry={entry}
824
- roundLabel={roundLabel}
825
- roundStatus={roundStatus}
826
- prevPassRate={prevPassRate}
827
- currPassRate={currPassRate}
828
- />
829
- );
830
- })}
831
- </div>
832
- );
833
- })}
1101
+ );
1102
+ })}
1103
+ </div>
1104
+ );
1105
+ })}
1106
+ </div>
1107
+ )}
834
1108
 
835
1109
  {entries.length === 0 && (
836
1110
  <div className="flex items-center justify-center rounded-lg border border-dashed py-8">