selftune 0.2.18 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +9 -4
  2. package/apps/local-dashboard/dist/assets/index-DnhnXQm6.js +60 -0
  3. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
  5. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
  6. package/apps/local-dashboard/dist/index.html +5 -5
  7. package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
  8. package/cli/selftune/constants.ts +10 -0
  9. package/cli/selftune/contribute/contribute.ts +30 -2
  10. package/cli/selftune/contribution-config.ts +249 -0
  11. package/cli/selftune/contribution-relay.ts +177 -0
  12. package/cli/selftune/contribution-signals.ts +219 -0
  13. package/cli/selftune/contribution-staging.ts +147 -0
  14. package/cli/selftune/contributions.ts +532 -0
  15. package/cli/selftune/creator-contributions.ts +333 -0
  16. package/cli/selftune/dashboard-contract.ts +205 -1
  17. package/cli/selftune/dashboard-server.ts +45 -11
  18. package/cli/selftune/eval/family-overlap.ts +395 -0
  19. package/cli/selftune/eval/hooks-to-evals.ts +182 -28
  20. package/cli/selftune/eval/synthetic-evals.ts +298 -11
  21. package/cli/selftune/export.ts +2 -2
  22. package/cli/selftune/index.ts +41 -5
  23. package/cli/selftune/ingestors/codex-rollout.ts +31 -35
  24. package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
  25. package/cli/selftune/localdb/db.ts +2 -2
  26. package/cli/selftune/localdb/queries.ts +701 -30
  27. package/cli/selftune/localdb/schema.ts +20 -0
  28. package/cli/selftune/recover.ts +153 -0
  29. package/cli/selftune/repair/skill-usage.ts +363 -4
  30. package/cli/selftune/routes/actions.ts +35 -1
  31. package/cli/selftune/routes/analytics.ts +14 -0
  32. package/cli/selftune/routes/index.ts +1 -0
  33. package/cli/selftune/routes/overview.ts +112 -4
  34. package/cli/selftune/routes/skill-report.ts +569 -10
  35. package/cli/selftune/status.ts +81 -2
  36. package/cli/selftune/sync.ts +56 -2
  37. package/cli/selftune/trust-model.ts +66 -0
  38. package/cli/selftune/types.ts +49 -0
  39. package/cli/selftune/utils/skill-detection.ts +43 -0
  40. package/cli/selftune/watchlist.ts +65 -0
  41. package/package.json +1 -1
  42. package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
  43. package/packages/ui/src/components/EvidenceViewer.tsx +335 -144
  44. package/packages/ui/src/components/EvolutionTimeline.tsx +58 -28
  45. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
  46. package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
  47. package/packages/ui/src/components/section-cards.tsx +12 -9
  48. package/packages/ui/src/primitives/card.tsx +1 -1
  49. package/skill/SKILL.md +11 -1
  50. package/skill/Workflows/AlphaUpload.md +4 -0
  51. package/skill/Workflows/Composability.md +64 -0
  52. package/skill/Workflows/Contribute.md +6 -3
  53. package/skill/Workflows/Contributions.md +97 -0
  54. package/skill/Workflows/CreatorContributions.md +74 -0
  55. package/skill/Workflows/Dashboard.md +31 -0
  56. package/skill/Workflows/Evals.md +57 -8
  57. package/skill/Workflows/Ingest.md +7 -0
  58. package/skill/Workflows/Initialize.md +20 -1
  59. package/skill/Workflows/Recover.md +84 -0
  60. package/skill/Workflows/RepairSkillUsage.md +12 -4
  61. package/skill/Workflows/Sync.md +18 -12
  62. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
  63. package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
  64. package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
  65. package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
@@ -5,12 +5,8 @@ import {
5
5
  CircleDotIcon,
6
6
  FileTextIcon,
7
7
  InfoIcon,
8
- RocketIcon,
9
- ShieldCheckIcon,
10
8
  ShieldAlertIcon,
11
9
  XCircleIcon,
12
- UndoIcon,
13
- ArrowRightIcon,
14
10
  TrendingUpIcon,
15
11
  TrendingDownIcon,
16
12
  ListChecksIcon,
@@ -23,14 +19,6 @@ import { Badge } from "../primitives/badge";
23
19
  import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
24
20
  import type { EvidenceEntry, EvolutionEntry } from "../types";
25
21
 
26
- const ACTION_ICON: Record<string, React.ReactNode> = {
27
- created: <CircleDotIcon className="size-3.5" />,
28
- validated: <ShieldCheckIcon className="size-3.5" />,
29
- deployed: <RocketIcon className="size-3.5" />,
30
- rejected: <XCircleIcon className="size-3.5" />,
31
- rolled_back: <UndoIcon className="size-3.5" />,
32
- };
33
-
34
22
  const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
35
23
  created: "outline",
36
24
  validated: "secondary",
@@ -43,6 +31,63 @@ interface Props {
43
31
  proposalId: string;
44
32
  evolution: EvolutionEntry[];
45
33
  evidence: EvidenceEntry[];
34
+ showContextBanner?: boolean;
35
+ }
36
+
37
+ function sentenceCase(value: string): string {
38
+ return value.replace(/_/g, " ");
39
+ }
40
+
41
+ function getOutcomePresentation(action?: string | null): {
42
+ title: string;
43
+ summary: string;
44
+ tone: string;
45
+ icon: React.ReactNode;
46
+ liveSkillNote: string;
47
+ } {
48
+ switch (action) {
49
+ case "rejected":
50
+ return {
51
+ title: "Proposal rejected",
52
+ summary: "Selftune proposed a change, but blocked it before your live skill was updated.",
53
+ tone: "border-red-500/20 bg-red-500/8 text-red-700 dark:text-red-50",
54
+ icon: <XCircleIcon className="size-4 text-red-400" />,
55
+ liveSkillNote: "Your live skill is unchanged.",
56
+ };
57
+ case "validated":
58
+ return {
59
+ title: "Proposal validated",
60
+ summary: "The proposed change improved the eval signal and is ready for review or deploy.",
61
+ tone: "border-emerald-500/20 bg-emerald-500/8 text-emerald-700 dark:text-emerald-50",
62
+ icon: <CheckCircleIcon className="size-4 text-emerald-400" />,
63
+ liveSkillNote: "Your live skill has not changed until this proposal is deployed.",
64
+ };
65
+ case "deployed":
66
+ return {
67
+ title: "Proposal deployed",
68
+ summary: "The proposed change passed validation and was applied to the live skill.",
69
+ tone: "border-primary/25 bg-primary/8 text-foreground",
70
+ icon: <TrendingUpIcon className="size-4 text-primary" />,
71
+ liveSkillNote: "Your live skill now includes this change.",
72
+ };
73
+ case "rolled_back":
74
+ return {
75
+ title: "Proposal rolled back",
76
+ summary: "A deployed change was later reversed because follow-up evidence showed risk.",
77
+ tone: "border-amber-500/20 bg-amber-500/8 text-amber-800 dark:text-amber-50",
78
+ icon: <TrendingDownIcon className="size-4 text-amber-400" />,
79
+ liveSkillNote: "Your live skill no longer uses this proposal.",
80
+ };
81
+ case "created":
82
+ default:
83
+ return {
84
+ title: "Proposal under review",
85
+ summary: "Selftune found a possible improvement and recorded the proposed change.",
86
+ tone: "border-border/30 bg-muted/25 text-foreground",
87
+ icon: <CircleDotIcon className="size-4 text-muted-foreground" />,
88
+ liveSkillNote: "Your live skill is unchanged until a proposal is validated and deployed.",
89
+ };
90
+ }
46
91
  }
47
92
 
48
93
  /** Parse YAML-ish frontmatter from text, returns { meta, body } */
@@ -264,7 +309,13 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
264
309
  )}
265
310
  {typeof net_change === "number" && (
266
311
  <span
267
- className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
312
+ className={`text-xs font-mono font-semibold ${
313
+ net_change > 0
314
+ ? "text-emerald-600 dark:text-emerald-400"
315
+ : net_change < 0
316
+ ? "text-red-500"
317
+ : "text-muted-foreground"
318
+ }`}
268
319
  >
269
320
  {net_change > 0 ? "+" : ""}
270
321
  {(net_change * 100).toFixed(1)}%
@@ -609,7 +660,12 @@ function CollapsedEvidenceCard({
609
660
  );
610
661
  }
611
662
 
612
- export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
663
+ export function EvidenceViewer({
664
+ proposalId,
665
+ evolution,
666
+ evidence,
667
+ showContextBanner = true,
668
+ }: Props) {
613
669
  const steps = useMemo(
614
670
  () =>
615
671
  evolution
@@ -628,6 +684,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
628
684
 
629
685
  // Track which earlier rounds are manually expanded
630
686
  const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set());
687
+ const [expandedProposalTargets, setExpandedProposalTargets] = useState<Set<string>>(new Set());
631
688
 
632
689
  const toggleRound = (key: string) => {
633
690
  setExpandedRounds((prev) => {
@@ -638,6 +695,15 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
638
695
  });
639
696
  };
640
697
 
698
+ const toggleProposalHistory = (target: string) => {
699
+ setExpandedProposalTargets((prev) => {
700
+ const next = new Set(prev);
701
+ if (next.has(target)) next.delete(target);
702
+ else next.add(target);
703
+ return next;
704
+ });
705
+ };
706
+
641
707
  const snapshot = useMemo(() => {
642
708
  for (let i = steps.length - 1; i >= 0; i--) {
643
709
  if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>;
@@ -661,176 +727,301 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
661
727
  return { proposalEntries: proposals, validationsByTarget: validationMap };
662
728
  }, [entries]);
663
729
 
730
+ const latestStep = steps[steps.length - 1] ?? null;
731
+ const lifecycleLabel = steps.map((step) => step.action.replace("_", " ")).join(" -> ");
732
+ const outcome = getOutcomePresentation(latestStep?.action);
733
+ const latestProposalConfidence = useMemo(() => {
734
+ for (let i = proposalEntries.length - 1; i >= 0; i--) {
735
+ if (proposalEntries[i].confidence !== null) {
736
+ return proposalEntries[i].confidence;
737
+ }
738
+ }
739
+ return null;
740
+ }, [proposalEntries]);
741
+ const proposalCards = useMemo(() => {
742
+ const grouped = new Map<string, EvidenceEntry[]>();
743
+ for (const entry of proposalEntries) {
744
+ const key = entry.target || "proposal";
745
+ const group = grouped.get(key) ?? [];
746
+ group.push(entry);
747
+ grouped.set(key, group);
748
+ }
749
+
750
+ return Array.from(grouped.entries()).map(([target, group]) => {
751
+ let richest = group[group.length - 1];
752
+ for (let i = group.length - 1; i >= 0; i--) {
753
+ if (group[i].original_text || group[i].proposed_text || group[i].rationale) {
754
+ richest = group[i];
755
+ break;
756
+ }
757
+ }
758
+ const primaryIndex = group.findIndex((entry) => entry === richest);
759
+ return {
760
+ target,
761
+ primaryEntry: richest,
762
+ historyEntries: group.filter((_, index) => index !== primaryIndex),
763
+ entries: group,
764
+ };
765
+ });
766
+ }, [proposalEntries]);
767
+
664
768
  return (
665
769
  <div className="space-y-4">
666
770
  {/* Context banner */}
667
- <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
668
- <InfoIcon className="size-4 text-primary/60 shrink-0 mt-0.5" />
669
- <p className="text-xs text-muted-foreground leading-relaxed">
670
- This view shows the complete evidence trail for a skill evolution proposal &mdash; how the
671
- skill was changed, the eval test results before and after, and whether the change improved
672
- performance.
673
- </p>
674
- </div>
771
+ {showContextBanner && (
772
+ <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
773
+ <InfoIcon className="mt-0.5 size-4 shrink-0 text-primary/60" />
774
+ <p className="text-xs leading-relaxed text-muted-foreground">
775
+ This view shows the complete evidence trail for a skill evolution proposal &mdash; how
776
+ the skill was changed, the eval test results before and after, and whether the change
777
+ improved performance.
778
+ </p>
779
+ </div>
780
+ )}
675
781
 
676
- {/* Proposal journey */}
677
- <Card>
782
+ <Card className="border-border/15 bg-muted/10">
678
783
  <CardHeader className="pb-3">
679
- <CardTitle className="text-sm flex items-center gap-2">
680
- <span>Proposal Journey</span>
784
+ <CardTitle className="flex flex-wrap items-center gap-2 text-sm">
785
+ <span>Proposal Summary</span>
681
786
  <span className="font-mono text-xs text-muted-foreground">
682
787
  #{proposalId.slice(0, 12)}
683
788
  </span>
684
789
  </CardTitle>
685
790
  </CardHeader>
686
791
  <CardContent className="space-y-3">
687
- <div className="flex items-center gap-2 flex-wrap">
688
- {steps.map((step, i) => (
689
- <div key={`${step.action}-${i}`} className="contents">
690
- {i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
691
- <div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
692
- {ACTION_ICON[step.action]}
693
- <Badge
694
- variant={ACTION_VARIANT[step.action] ?? "secondary"}
695
- className="text-[10px] capitalize"
696
- >
697
- {step.action.replace("_", " ")}
698
- </Badge>
699
- <span className="text-[10px] text-muted-foreground">
700
- {timeAgo(step.timestamp)}
701
- </span>
792
+ <div className={`rounded-lg border px-4 py-3 ${outcome.tone}`}>
793
+ <div className="flex items-start gap-3">
794
+ <div className="mt-0.5 shrink-0">{outcome.icon}</div>
795
+ <div className="min-w-0 space-y-1.5">
796
+ <div className="flex flex-wrap items-center gap-2">
797
+ <p className="text-sm font-semibold">{outcome.title}</p>
798
+ {latestStep && (
799
+ <Badge
800
+ variant={ACTION_VARIANT[latestStep.action] ?? "secondary"}
801
+ className="text-[10px] capitalize"
802
+ >
803
+ {sentenceCase(latestStep.action)}
804
+ </Badge>
805
+ )}
702
806
  </div>
807
+ <p className="text-sm leading-6 text-current/90">{outcome.summary}</p>
808
+ {latestStep?.details && (
809
+ <div className="rounded-md bg-black/10 px-3 py-2 text-sm leading-6 text-current/90 dark:bg-black/20">
810
+ {latestStep.details}
811
+ </div>
812
+ )}
813
+ <p className="text-xs font-medium text-current/75">{outcome.liveSkillNote}</p>
703
814
  </div>
704
- ))}
815
+ </div>
705
816
  </div>
706
817
 
707
- {/* Eval snapshot pass rate change */}
708
- {snapshot && (
709
- <div className="flex items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
710
- {typeof snapshot.net_change === "number" && (
818
+ <div className="flex flex-wrap items-center gap-2">
819
+ {latestStep?.timestamp && (
820
+ <span className="text-[10px] font-mono text-muted-foreground">
821
+ {timeAgo(latestStep.timestamp)}
822
+ </span>
823
+ )}
824
+ <Badge variant="outline" className="text-[10px]">
825
+ {entries.length} evidence {entries.length === 1 ? "row" : "rows"}
826
+ </Badge>
827
+ {latestProposalConfidence != null && (
828
+ <Badge variant="secondary" className="text-[10px]">
829
+ {Math.round(latestProposalConfidence * 100)}% confidence
830
+ </Badge>
831
+ )}
832
+ </div>
833
+
834
+ <div className="flex flex-wrap items-center gap-2 text-[11px] text-muted-foreground">
835
+ <span className="font-headline uppercase tracking-[0.16em] text-muted-foreground/80">
836
+ Lifecycle
837
+ </span>
838
+ <span>{lifecycleLabel ? sentenceCase(lifecycleLabel) : "No lifecycle recorded"}</span>
839
+ </div>
840
+
841
+ {typeof snapshot?.net_change === "number" &&
842
+ typeof snapshot.before_pass_rate === "number" &&
843
+ typeof snapshot.after_pass_rate === "number" && (
844
+ <div className="flex flex-wrap items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
711
845
  <div className="flex items-center gap-1">
712
- {(snapshot.net_change as number) > 0 ? (
846
+ {snapshot.net_change > 0 ? (
713
847
  <TrendingUpIcon className="size-3.5 text-emerald-500" />
714
- ) : (
848
+ ) : snapshot.net_change < 0 ? (
715
849
  <TrendingDownIcon className="size-3.5 text-red-500" />
850
+ ) : (
851
+ <CircleDotIcon className="size-3.5 text-muted-foreground" />
716
852
  )}
717
853
  <span
718
- className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
854
+ className={`text-sm font-mono font-semibold ${
855
+ snapshot.net_change > 0
856
+ ? "text-emerald-600 dark:text-emerald-400"
857
+ : snapshot.net_change < 0
858
+ ? "text-red-500"
859
+ : "text-muted-foreground"
860
+ }`}
719
861
  >
720
- {(snapshot.net_change as number) > 0 ? "+" : ""}
721
- {Math.round((snapshot.net_change as number) * 100)}%
862
+ {snapshot.net_change > 0 ? "+" : ""}
863
+ {Math.round(snapshot.net_change * 100)}%
722
864
  </span>
723
865
  </div>
724
- )}
725
- {typeof snapshot.before_pass_rate === "number" &&
726
- typeof snapshot.after_pass_rate === "number" && (
727
- <span className="text-xs text-muted-foreground font-mono">
728
- {Math.round((snapshot.before_pass_rate as number) * 100)}% &rarr;{" "}
729
- {Math.round((snapshot.after_pass_rate as number) * 100)}%
730
- </span>
866
+ <span className="text-xs font-mono text-muted-foreground">
867
+ {Math.round(snapshot.before_pass_rate * 100)}% &rarr;{" "}
868
+ {Math.round(snapshot.after_pass_rate * 100)}%
869
+ </span>
870
+ {snapshot.net_change > 0 ? (
871
+ <Badge variant="default" className="text-[10px]">
872
+ Improved
873
+ </Badge>
874
+ ) : snapshot.net_change < 0 ? (
875
+ <Badge variant="destructive" className="text-[10px]">
876
+ Regressed
877
+ </Badge>
878
+ ) : (
879
+ <Badge variant="outline" className="text-[10px]">
880
+ No change
881
+ </Badge>
731
882
  )}
732
- {snapshot.improved !== undefined && (
733
- <Badge
734
- variant={snapshot.improved ? "default" : "destructive"}
735
- className="text-[10px]"
736
- >
737
- {snapshot.improved ? "Improved" : "Regressed"}
738
- </Badge>
739
- )}
740
- </div>
741
- )}
742
-
743
- {/* Details from last step */}
744
- {steps.length > 0 && steps[steps.length - 1].details && (
745
- <p className="text-xs text-muted-foreground leading-relaxed">
746
- {steps[steps.length - 1].details}
747
- </p>
748
- )}
883
+ </div>
884
+ )}
749
885
  </CardContent>
750
886
  </Card>
751
887
 
752
888
  {/* Proposal-stage evidence — standalone cards showing original/proposed text */}
753
- {proposalEntries.map((entry) => (
754
- <EvidenceCard
755
- key={`proposal-${entry.target}-${entry.timestamp}`}
756
- entry={entry}
757
- roundLabel={null}
758
- roundStatus="single"
759
- prevPassRate={null}
760
- currPassRate={null}
761
- />
762
- ))}
889
+ {proposalCards.length > 0 && (
890
+ <div className="space-y-2">
891
+ <div className="space-y-1">
892
+ <p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
893
+ What changed
894
+ </p>
895
+ <p className="text-sm text-muted-foreground">
896
+ This is the actual skill text selftune proposed changing.
897
+ </p>
898
+ </div>
899
+ {proposalCards.map((group) => {
900
+ const hasHistory = group.historyEntries.length > 0;
901
+ const isExpanded = expandedProposalTargets.has(group.target);
763
902
 
764
- {/* Validation-stage evidence — grouped by target with iteration rounds */}
765
- {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
766
- const hasMultipleRounds = targetEntries.length > 1;
767
-
768
- return (
769
- <div key={target} className="space-y-2">
770
- {targetEntries.map((entry, i) => {
771
- const isLast = i === targetEntries.length - 1;
772
- const roundLabel = hasMultipleRounds
773
- ? `Round ${i + 1} of ${targetEntries.length}`
774
- : null;
775
- const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
776
- const currPassRate = getAfterPassRate(entry);
777
- const roundKey = `${target}-${entry.timestamp}`;
778
- const roundStatus: RoundStatus = !hasMultipleRounds
779
- ? "single"
780
- : isLast
781
- ? "final"
782
- : "intermediate";
783
-
784
- // Earlier rounds: collapsed by default
785
- if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
786
- return (
787
- <CollapsedEvidenceCard
788
- key={roundKey}
789
- entry={entry}
790
- roundLabel={roundLabel!}
791
- onExpand={() => toggleRound(roundKey)}
792
- />
793
- );
794
- }
795
-
796
- // Expanded earlier round — show with collapse toggle
797
- if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
798
- return (
799
- <div key={roundKey} className="space-y-1">
903
+ return (
904
+ <div key={`proposal-${group.target}`} className="space-y-2">
905
+ <EvidenceCard
906
+ entry={group.primaryEntry}
907
+ roundLabel={hasHistory ? `Latest draft of ${group.entries.length}` : null}
908
+ roundStatus={hasHistory ? "final" : "single"}
909
+ prevPassRate={null}
910
+ currPassRate={null}
911
+ />
912
+ {hasHistory && (
913
+ <div className="space-y-2">
800
914
  <button
801
915
  type="button"
802
- onClick={() => toggleRound(roundKey)}
803
- className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground transition-colors px-1"
916
+ onClick={() => toggleProposalHistory(group.target)}
917
+ className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
804
918
  >
805
- <ChevronDownIcon className="size-3" />
806
- Collapse {roundLabel}
919
+ {isExpanded ? (
920
+ <ChevronDownIcon className="size-3" />
921
+ ) : (
922
+ <ChevronRightIcon className="size-3" />
923
+ )}
924
+ {isExpanded ? "Hide" : "Show"} {group.historyEntries.length} earlier{" "}
925
+ {group.historyEntries.length === 1 ? "draft" : "drafts"}
807
926
  </button>
927
+ {isExpanded &&
928
+ group.historyEntries.map((entry, index) => (
929
+ <EvidenceCard
930
+ key={`proposal-history-${group.target}-${entry.timestamp}-${index}`}
931
+ entry={entry}
932
+ roundLabel={`Draft ${index + 1} of ${group.historyEntries.length}`}
933
+ roundStatus="intermediate"
934
+ prevPassRate={null}
935
+ currPassRate={null}
936
+ />
937
+ ))}
938
+ </div>
939
+ )}
940
+ </div>
941
+ );
942
+ })}
943
+ </div>
944
+ )}
945
+
946
+ {/* Validation-stage evidence — grouped by target with iteration rounds */}
947
+ {Array.from(validationsByTarget.entries()).length > 0 && (
948
+ <div className="space-y-2">
949
+ <div className="space-y-1">
950
+ <p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
951
+ How it was tested
952
+ </p>
953
+ <p className="text-sm text-muted-foreground">
954
+ Validation evidence shows whether the proposal improved the eval signal.
955
+ </p>
956
+ </div>
957
+ {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
958
+ const hasMultipleRounds = targetEntries.length > 1;
959
+
960
+ return (
961
+ <div key={target} className="space-y-2">
962
+ {targetEntries.map((entry, i) => {
963
+ const isLast = i === targetEntries.length - 1;
964
+ const roundLabel = hasMultipleRounds
965
+ ? `Round ${i + 1} of ${targetEntries.length}`
966
+ : null;
967
+ const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
968
+ const currPassRate = getAfterPassRate(entry);
969
+ const roundKey = `${target}-${entry.timestamp}`;
970
+ const roundStatus: RoundStatus = !hasMultipleRounds
971
+ ? "single"
972
+ : isLast
973
+ ? "final"
974
+ : "intermediate";
975
+
976
+ if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
977
+ return (
978
+ <CollapsedEvidenceCard
979
+ key={roundKey}
980
+ entry={entry}
981
+ roundLabel={roundLabel!}
982
+ onExpand={() => toggleRound(roundKey)}
983
+ />
984
+ );
985
+ }
986
+
987
+ if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
988
+ return (
989
+ <div key={roundKey} className="space-y-1">
990
+ <button
991
+ type="button"
992
+ onClick={() => toggleRound(roundKey)}
993
+ className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
994
+ >
995
+ <ChevronDownIcon className="size-3" />
996
+ Collapse {roundLabel}
997
+ </button>
998
+ <EvidenceCard
999
+ entry={entry}
1000
+ roundLabel={roundLabel}
1001
+ roundStatus={roundStatus}
1002
+ prevPassRate={prevPassRate}
1003
+ currPassRate={currPassRate}
1004
+ />
1005
+ </div>
1006
+ );
1007
+ }
1008
+
1009
+ return (
808
1010
  <EvidenceCard
1011
+ key={roundKey}
809
1012
  entry={entry}
810
1013
  roundLabel={roundLabel}
811
1014
  roundStatus={roundStatus}
812
1015
  prevPassRate={prevPassRate}
813
1016
  currPassRate={currPassRate}
814
1017
  />
815
- </div>
816
- );
817
- }
818
-
819
- // Final round (or single entry) — always expanded
820
- return (
821
- <EvidenceCard
822
- key={roundKey}
823
- entry={entry}
824
- roundLabel={roundLabel}
825
- roundStatus={roundStatus}
826
- prevPassRate={prevPassRate}
827
- currPassRate={currPassRate}
828
- />
829
- );
830
- })}
831
- </div>
832
- );
833
- })}
1018
+ );
1019
+ })}
1020
+ </div>
1021
+ );
1022
+ })}
1023
+ </div>
1024
+ )}
834
1025
 
835
1026
  {entries.length === 0 && (
836
1027
  <div className="flex items-center justify-center rounded-lg border border-dashed py-8">