selftune 0.2.18 → 0.2.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -4
- package/apps/local-dashboard/dist/assets/index-DnhnXQm6.js +60 -0
- package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
- package/cli/selftune/constants.ts +10 -0
- package/cli/selftune/contribute/contribute.ts +30 -2
- package/cli/selftune/contribution-config.ts +249 -0
- package/cli/selftune/contribution-relay.ts +177 -0
- package/cli/selftune/contribution-signals.ts +219 -0
- package/cli/selftune/contribution-staging.ts +147 -0
- package/cli/selftune/contributions.ts +532 -0
- package/cli/selftune/creator-contributions.ts +333 -0
- package/cli/selftune/dashboard-contract.ts +205 -1
- package/cli/selftune/dashboard-server.ts +45 -11
- package/cli/selftune/eval/family-overlap.ts +395 -0
- package/cli/selftune/eval/hooks-to-evals.ts +182 -28
- package/cli/selftune/eval/synthetic-evals.ts +298 -11
- package/cli/selftune/export.ts +2 -2
- package/cli/selftune/index.ts +41 -5
- package/cli/selftune/ingestors/codex-rollout.ts +31 -35
- package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
- package/cli/selftune/localdb/db.ts +2 -2
- package/cli/selftune/localdb/queries.ts +701 -30
- package/cli/selftune/localdb/schema.ts +20 -0
- package/cli/selftune/recover.ts +153 -0
- package/cli/selftune/repair/skill-usage.ts +363 -4
- package/cli/selftune/routes/actions.ts +35 -1
- package/cli/selftune/routes/analytics.ts +14 -0
- package/cli/selftune/routes/index.ts +1 -0
- package/cli/selftune/routes/overview.ts +112 -4
- package/cli/selftune/routes/skill-report.ts +569 -10
- package/cli/selftune/status.ts +81 -2
- package/cli/selftune/sync.ts +56 -2
- package/cli/selftune/trust-model.ts +66 -0
- package/cli/selftune/types.ts +49 -0
- package/cli/selftune/utils/skill-detection.ts +43 -0
- package/cli/selftune/watchlist.ts +65 -0
- package/package.json +1 -1
- package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
- package/packages/ui/src/components/EvidenceViewer.tsx +335 -144
- package/packages/ui/src/components/EvolutionTimeline.tsx +58 -28
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
- package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
- package/packages/ui/src/components/section-cards.tsx +12 -9
- package/packages/ui/src/primitives/card.tsx +1 -1
- package/skill/SKILL.md +11 -1
- package/skill/Workflows/AlphaUpload.md +4 -0
- package/skill/Workflows/Composability.md +64 -0
- package/skill/Workflows/Contribute.md +6 -3
- package/skill/Workflows/Contributions.md +97 -0
- package/skill/Workflows/CreatorContributions.md +74 -0
- package/skill/Workflows/Dashboard.md +31 -0
- package/skill/Workflows/Evals.md +57 -8
- package/skill/Workflows/Ingest.md +7 -0
- package/skill/Workflows/Initialize.md +20 -1
- package/skill/Workflows/Recover.md +84 -0
- package/skill/Workflows/RepairSkillUsage.md +12 -4
- package/skill/Workflows/Sync.md +18 -12
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
- package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
|
@@ -5,12 +5,8 @@ import {
|
|
|
5
5
|
CircleDotIcon,
|
|
6
6
|
FileTextIcon,
|
|
7
7
|
InfoIcon,
|
|
8
|
-
RocketIcon,
|
|
9
|
-
ShieldCheckIcon,
|
|
10
8
|
ShieldAlertIcon,
|
|
11
9
|
XCircleIcon,
|
|
12
|
-
UndoIcon,
|
|
13
|
-
ArrowRightIcon,
|
|
14
10
|
TrendingUpIcon,
|
|
15
11
|
TrendingDownIcon,
|
|
16
12
|
ListChecksIcon,
|
|
@@ -23,14 +19,6 @@ import { Badge } from "../primitives/badge";
|
|
|
23
19
|
import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
|
|
24
20
|
import type { EvidenceEntry, EvolutionEntry } from "../types";
|
|
25
21
|
|
|
26
|
-
const ACTION_ICON: Record<string, React.ReactNode> = {
|
|
27
|
-
created: <CircleDotIcon className="size-3.5" />,
|
|
28
|
-
validated: <ShieldCheckIcon className="size-3.5" />,
|
|
29
|
-
deployed: <RocketIcon className="size-3.5" />,
|
|
30
|
-
rejected: <XCircleIcon className="size-3.5" />,
|
|
31
|
-
rolled_back: <UndoIcon className="size-3.5" />,
|
|
32
|
-
};
|
|
33
|
-
|
|
34
22
|
const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
|
|
35
23
|
created: "outline",
|
|
36
24
|
validated: "secondary",
|
|
@@ -43,6 +31,63 @@ interface Props {
|
|
|
43
31
|
proposalId: string;
|
|
44
32
|
evolution: EvolutionEntry[];
|
|
45
33
|
evidence: EvidenceEntry[];
|
|
34
|
+
showContextBanner?: boolean;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function sentenceCase(value: string): string {
|
|
38
|
+
return value.replace(/_/g, " ");
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function getOutcomePresentation(action?: string | null): {
|
|
42
|
+
title: string;
|
|
43
|
+
summary: string;
|
|
44
|
+
tone: string;
|
|
45
|
+
icon: React.ReactNode;
|
|
46
|
+
liveSkillNote: string;
|
|
47
|
+
} {
|
|
48
|
+
switch (action) {
|
|
49
|
+
case "rejected":
|
|
50
|
+
return {
|
|
51
|
+
title: "Proposal rejected",
|
|
52
|
+
summary: "Selftune proposed a change, but blocked it before your live skill was updated.",
|
|
53
|
+
tone: "border-red-500/20 bg-red-500/8 text-red-700 dark:text-red-50",
|
|
54
|
+
icon: <XCircleIcon className="size-4 text-red-400" />,
|
|
55
|
+
liveSkillNote: "Your live skill is unchanged.",
|
|
56
|
+
};
|
|
57
|
+
case "validated":
|
|
58
|
+
return {
|
|
59
|
+
title: "Proposal validated",
|
|
60
|
+
summary: "The proposed change improved the eval signal and is ready for review or deploy.",
|
|
61
|
+
tone: "border-emerald-500/20 bg-emerald-500/8 text-emerald-700 dark:text-emerald-50",
|
|
62
|
+
icon: <CheckCircleIcon className="size-4 text-emerald-400" />,
|
|
63
|
+
liveSkillNote: "Your live skill has not changed until this proposal is deployed.",
|
|
64
|
+
};
|
|
65
|
+
case "deployed":
|
|
66
|
+
return {
|
|
67
|
+
title: "Proposal deployed",
|
|
68
|
+
summary: "The proposed change passed validation and was applied to the live skill.",
|
|
69
|
+
tone: "border-primary/25 bg-primary/8 text-foreground",
|
|
70
|
+
icon: <TrendingUpIcon className="size-4 text-primary" />,
|
|
71
|
+
liveSkillNote: "Your live skill now includes this change.",
|
|
72
|
+
};
|
|
73
|
+
case "rolled_back":
|
|
74
|
+
return {
|
|
75
|
+
title: "Proposal rolled back",
|
|
76
|
+
summary: "A deployed change was later reversed because follow-up evidence showed risk.",
|
|
77
|
+
tone: "border-amber-500/20 bg-amber-500/8 text-amber-800 dark:text-amber-50",
|
|
78
|
+
icon: <TrendingDownIcon className="size-4 text-amber-400" />,
|
|
79
|
+
liveSkillNote: "Your live skill no longer uses this proposal.",
|
|
80
|
+
};
|
|
81
|
+
case "created":
|
|
82
|
+
default:
|
|
83
|
+
return {
|
|
84
|
+
title: "Proposal under review",
|
|
85
|
+
summary: "Selftune found a possible improvement and recorded the proposed change.",
|
|
86
|
+
tone: "border-border/30 bg-muted/25 text-foreground",
|
|
87
|
+
icon: <CircleDotIcon className="size-4 text-muted-foreground" />,
|
|
88
|
+
liveSkillNote: "Your live skill is unchanged until a proposal is validated and deployed.",
|
|
89
|
+
};
|
|
90
|
+
}
|
|
46
91
|
}
|
|
47
92
|
|
|
48
93
|
/** Parse YAML-ish frontmatter from text, returns { meta, body } */
|
|
@@ -264,7 +309,13 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
264
309
|
)}
|
|
265
310
|
{typeof net_change === "number" && (
|
|
266
311
|
<span
|
|
267
|
-
className={`text-xs font-mono font-semibold ${
|
|
312
|
+
className={`text-xs font-mono font-semibold ${
|
|
313
|
+
net_change > 0
|
|
314
|
+
? "text-emerald-600 dark:text-emerald-400"
|
|
315
|
+
: net_change < 0
|
|
316
|
+
? "text-red-500"
|
|
317
|
+
: "text-muted-foreground"
|
|
318
|
+
}`}
|
|
268
319
|
>
|
|
269
320
|
{net_change > 0 ? "+" : ""}
|
|
270
321
|
{(net_change * 100).toFixed(1)}%
|
|
@@ -609,7 +660,12 @@ function CollapsedEvidenceCard({
|
|
|
609
660
|
);
|
|
610
661
|
}
|
|
611
662
|
|
|
612
|
-
export function EvidenceViewer({
|
|
663
|
+
export function EvidenceViewer({
|
|
664
|
+
proposalId,
|
|
665
|
+
evolution,
|
|
666
|
+
evidence,
|
|
667
|
+
showContextBanner = true,
|
|
668
|
+
}: Props) {
|
|
613
669
|
const steps = useMemo(
|
|
614
670
|
() =>
|
|
615
671
|
evolution
|
|
@@ -628,6 +684,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
|
628
684
|
|
|
629
685
|
// Track which earlier rounds are manually expanded
|
|
630
686
|
const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set());
|
|
687
|
+
const [expandedProposalTargets, setExpandedProposalTargets] = useState<Set<string>>(new Set());
|
|
631
688
|
|
|
632
689
|
const toggleRound = (key: string) => {
|
|
633
690
|
setExpandedRounds((prev) => {
|
|
@@ -638,6 +695,15 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
|
638
695
|
});
|
|
639
696
|
};
|
|
640
697
|
|
|
698
|
+
const toggleProposalHistory = (target: string) => {
|
|
699
|
+
setExpandedProposalTargets((prev) => {
|
|
700
|
+
const next = new Set(prev);
|
|
701
|
+
if (next.has(target)) next.delete(target);
|
|
702
|
+
else next.add(target);
|
|
703
|
+
return next;
|
|
704
|
+
});
|
|
705
|
+
};
|
|
706
|
+
|
|
641
707
|
const snapshot = useMemo(() => {
|
|
642
708
|
for (let i = steps.length - 1; i >= 0; i--) {
|
|
643
709
|
if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>;
|
|
@@ -661,176 +727,301 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
|
661
727
|
return { proposalEntries: proposals, validationsByTarget: validationMap };
|
|
662
728
|
}, [entries]);
|
|
663
729
|
|
|
730
|
+
const latestStep = steps[steps.length - 1] ?? null;
|
|
731
|
+
const lifecycleLabel = steps.map((step) => step.action.replace("_", " ")).join(" -> ");
|
|
732
|
+
const outcome = getOutcomePresentation(latestStep?.action);
|
|
733
|
+
const latestProposalConfidence = useMemo(() => {
|
|
734
|
+
for (let i = proposalEntries.length - 1; i >= 0; i--) {
|
|
735
|
+
if (proposalEntries[i].confidence !== null) {
|
|
736
|
+
return proposalEntries[i].confidence;
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
return null;
|
|
740
|
+
}, [proposalEntries]);
|
|
741
|
+
const proposalCards = useMemo(() => {
|
|
742
|
+
const grouped = new Map<string, EvidenceEntry[]>();
|
|
743
|
+
for (const entry of proposalEntries) {
|
|
744
|
+
const key = entry.target || "proposal";
|
|
745
|
+
const group = grouped.get(key) ?? [];
|
|
746
|
+
group.push(entry);
|
|
747
|
+
grouped.set(key, group);
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
return Array.from(grouped.entries()).map(([target, group]) => {
|
|
751
|
+
let richest = group[group.length - 1];
|
|
752
|
+
for (let i = group.length - 1; i >= 0; i--) {
|
|
753
|
+
if (group[i].original_text || group[i].proposed_text || group[i].rationale) {
|
|
754
|
+
richest = group[i];
|
|
755
|
+
break;
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
const primaryIndex = group.findIndex((entry) => entry === richest);
|
|
759
|
+
return {
|
|
760
|
+
target,
|
|
761
|
+
primaryEntry: richest,
|
|
762
|
+
historyEntries: group.filter((_, index) => index !== primaryIndex),
|
|
763
|
+
entries: group,
|
|
764
|
+
};
|
|
765
|
+
});
|
|
766
|
+
}, [proposalEntries]);
|
|
767
|
+
|
|
664
768
|
return (
|
|
665
769
|
<div className="space-y-4">
|
|
666
770
|
{/* Context banner */}
|
|
667
|
-
|
|
668
|
-
<
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
771
|
+
{showContextBanner && (
|
|
772
|
+
<div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
|
|
773
|
+
<InfoIcon className="mt-0.5 size-4 shrink-0 text-primary/60" />
|
|
774
|
+
<p className="text-xs leading-relaxed text-muted-foreground">
|
|
775
|
+
This view shows the complete evidence trail for a skill evolution proposal — how
|
|
776
|
+
the skill was changed, the eval test results before and after, and whether the change
|
|
777
|
+
improved performance.
|
|
778
|
+
</p>
|
|
779
|
+
</div>
|
|
780
|
+
)}
|
|
675
781
|
|
|
676
|
-
|
|
677
|
-
<Card>
|
|
782
|
+
<Card className="border-border/15 bg-muted/10">
|
|
678
783
|
<CardHeader className="pb-3">
|
|
679
|
-
<CardTitle className="
|
|
680
|
-
<span>Proposal
|
|
784
|
+
<CardTitle className="flex flex-wrap items-center gap-2 text-sm">
|
|
785
|
+
<span>Proposal Summary</span>
|
|
681
786
|
<span className="font-mono text-xs text-muted-foreground">
|
|
682
787
|
#{proposalId.slice(0, 12)}
|
|
683
788
|
</span>
|
|
684
789
|
</CardTitle>
|
|
685
790
|
</CardHeader>
|
|
686
791
|
<CardContent className="space-y-3">
|
|
687
|
-
<div className=
|
|
688
|
-
|
|
689
|
-
<div
|
|
690
|
-
|
|
691
|
-
<div className="flex items-center gap-
|
|
692
|
-
{
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
</span>
|
|
792
|
+
<div className={`rounded-lg border px-4 py-3 ${outcome.tone}`}>
|
|
793
|
+
<div className="flex items-start gap-3">
|
|
794
|
+
<div className="mt-0.5 shrink-0">{outcome.icon}</div>
|
|
795
|
+
<div className="min-w-0 space-y-1.5">
|
|
796
|
+
<div className="flex flex-wrap items-center gap-2">
|
|
797
|
+
<p className="text-sm font-semibold">{outcome.title}</p>
|
|
798
|
+
{latestStep && (
|
|
799
|
+
<Badge
|
|
800
|
+
variant={ACTION_VARIANT[latestStep.action] ?? "secondary"}
|
|
801
|
+
className="text-[10px] capitalize"
|
|
802
|
+
>
|
|
803
|
+
{sentenceCase(latestStep.action)}
|
|
804
|
+
</Badge>
|
|
805
|
+
)}
|
|
702
806
|
</div>
|
|
807
|
+
<p className="text-sm leading-6 text-current/90">{outcome.summary}</p>
|
|
808
|
+
{latestStep?.details && (
|
|
809
|
+
<div className="rounded-md bg-black/10 px-3 py-2 text-sm leading-6 text-current/90 dark:bg-black/20">
|
|
810
|
+
{latestStep.details}
|
|
811
|
+
</div>
|
|
812
|
+
)}
|
|
813
|
+
<p className="text-xs font-medium text-current/75">{outcome.liveSkillNote}</p>
|
|
703
814
|
</div>
|
|
704
|
-
|
|
815
|
+
</div>
|
|
705
816
|
</div>
|
|
706
817
|
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
818
|
+
<div className="flex flex-wrap items-center gap-2">
|
|
819
|
+
{latestStep?.timestamp && (
|
|
820
|
+
<span className="text-[10px] font-mono text-muted-foreground">
|
|
821
|
+
{timeAgo(latestStep.timestamp)}
|
|
822
|
+
</span>
|
|
823
|
+
)}
|
|
824
|
+
<Badge variant="outline" className="text-[10px]">
|
|
825
|
+
{entries.length} evidence {entries.length === 1 ? "row" : "rows"}
|
|
826
|
+
</Badge>
|
|
827
|
+
{latestProposalConfidence != null && (
|
|
828
|
+
<Badge variant="secondary" className="text-[10px]">
|
|
829
|
+
{Math.round(latestProposalConfidence * 100)}% confidence
|
|
830
|
+
</Badge>
|
|
831
|
+
)}
|
|
832
|
+
</div>
|
|
833
|
+
|
|
834
|
+
<div className="flex flex-wrap items-center gap-2 text-[11px] text-muted-foreground">
|
|
835
|
+
<span className="font-headline uppercase tracking-[0.16em] text-muted-foreground/80">
|
|
836
|
+
Lifecycle
|
|
837
|
+
</span>
|
|
838
|
+
<span>{lifecycleLabel ? sentenceCase(lifecycleLabel) : "No lifecycle recorded"}</span>
|
|
839
|
+
</div>
|
|
840
|
+
|
|
841
|
+
{typeof snapshot?.net_change === "number" &&
|
|
842
|
+
typeof snapshot.before_pass_rate === "number" &&
|
|
843
|
+
typeof snapshot.after_pass_rate === "number" && (
|
|
844
|
+
<div className="flex flex-wrap items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
|
|
711
845
|
<div className="flex items-center gap-1">
|
|
712
|
-
{
|
|
846
|
+
{snapshot.net_change > 0 ? (
|
|
713
847
|
<TrendingUpIcon className="size-3.5 text-emerald-500" />
|
|
714
|
-
) : (
|
|
848
|
+
) : snapshot.net_change < 0 ? (
|
|
715
849
|
<TrendingDownIcon className="size-3.5 text-red-500" />
|
|
850
|
+
) : (
|
|
851
|
+
<CircleDotIcon className="size-3.5 text-muted-foreground" />
|
|
716
852
|
)}
|
|
717
853
|
<span
|
|
718
|
-
className={`text-sm font-
|
|
854
|
+
className={`text-sm font-mono font-semibold ${
|
|
855
|
+
snapshot.net_change > 0
|
|
856
|
+
? "text-emerald-600 dark:text-emerald-400"
|
|
857
|
+
: snapshot.net_change < 0
|
|
858
|
+
? "text-red-500"
|
|
859
|
+
: "text-muted-foreground"
|
|
860
|
+
}`}
|
|
719
861
|
>
|
|
720
|
-
{
|
|
721
|
-
{Math.round(
|
|
862
|
+
{snapshot.net_change > 0 ? "+" : ""}
|
|
863
|
+
{Math.round(snapshot.net_change * 100)}%
|
|
722
864
|
</span>
|
|
723
865
|
</div>
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
866
|
+
<span className="text-xs font-mono text-muted-foreground">
|
|
867
|
+
{Math.round(snapshot.before_pass_rate * 100)}% →{" "}
|
|
868
|
+
{Math.round(snapshot.after_pass_rate * 100)}%
|
|
869
|
+
</span>
|
|
870
|
+
{snapshot.net_change > 0 ? (
|
|
871
|
+
<Badge variant="default" className="text-[10px]">
|
|
872
|
+
Improved
|
|
873
|
+
</Badge>
|
|
874
|
+
) : snapshot.net_change < 0 ? (
|
|
875
|
+
<Badge variant="destructive" className="text-[10px]">
|
|
876
|
+
Regressed
|
|
877
|
+
</Badge>
|
|
878
|
+
) : (
|
|
879
|
+
<Badge variant="outline" className="text-[10px]">
|
|
880
|
+
No change
|
|
881
|
+
</Badge>
|
|
731
882
|
)}
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
variant={snapshot.improved ? "default" : "destructive"}
|
|
735
|
-
className="text-[10px]"
|
|
736
|
-
>
|
|
737
|
-
{snapshot.improved ? "Improved" : "Regressed"}
|
|
738
|
-
</Badge>
|
|
739
|
-
)}
|
|
740
|
-
</div>
|
|
741
|
-
)}
|
|
742
|
-
|
|
743
|
-
{/* Details from last step */}
|
|
744
|
-
{steps.length > 0 && steps[steps.length - 1].details && (
|
|
745
|
-
<p className="text-xs text-muted-foreground leading-relaxed">
|
|
746
|
-
{steps[steps.length - 1].details}
|
|
747
|
-
</p>
|
|
748
|
-
)}
|
|
883
|
+
</div>
|
|
884
|
+
)}
|
|
749
885
|
</CardContent>
|
|
750
886
|
</Card>
|
|
751
887
|
|
|
752
888
|
{/* Proposal-stage evidence — standalone cards showing original/proposed text */}
|
|
753
|
-
{
|
|
754
|
-
<
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
889
|
+
{proposalCards.length > 0 && (
|
|
890
|
+
<div className="space-y-2">
|
|
891
|
+
<div className="space-y-1">
|
|
892
|
+
<p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
|
|
893
|
+
What changed
|
|
894
|
+
</p>
|
|
895
|
+
<p className="text-sm text-muted-foreground">
|
|
896
|
+
This is the actual skill text selftune proposed changing.
|
|
897
|
+
</p>
|
|
898
|
+
</div>
|
|
899
|
+
{proposalCards.map((group) => {
|
|
900
|
+
const hasHistory = group.historyEntries.length > 0;
|
|
901
|
+
const isExpanded = expandedProposalTargets.has(group.target);
|
|
763
902
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
|
|
776
|
-
const currPassRate = getAfterPassRate(entry);
|
|
777
|
-
const roundKey = `${target}-${entry.timestamp}`;
|
|
778
|
-
const roundStatus: RoundStatus = !hasMultipleRounds
|
|
779
|
-
? "single"
|
|
780
|
-
: isLast
|
|
781
|
-
? "final"
|
|
782
|
-
: "intermediate";
|
|
783
|
-
|
|
784
|
-
// Earlier rounds: collapsed by default
|
|
785
|
-
if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
|
|
786
|
-
return (
|
|
787
|
-
<CollapsedEvidenceCard
|
|
788
|
-
key={roundKey}
|
|
789
|
-
entry={entry}
|
|
790
|
-
roundLabel={roundLabel!}
|
|
791
|
-
onExpand={() => toggleRound(roundKey)}
|
|
792
|
-
/>
|
|
793
|
-
);
|
|
794
|
-
}
|
|
795
|
-
|
|
796
|
-
// Expanded earlier round — show with collapse toggle
|
|
797
|
-
if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
|
|
798
|
-
return (
|
|
799
|
-
<div key={roundKey} className="space-y-1">
|
|
903
|
+
return (
|
|
904
|
+
<div key={`proposal-${group.target}`} className="space-y-2">
|
|
905
|
+
<EvidenceCard
|
|
906
|
+
entry={group.primaryEntry}
|
|
907
|
+
roundLabel={hasHistory ? `Latest draft of ${group.entries.length}` : null}
|
|
908
|
+
roundStatus={hasHistory ? "final" : "single"}
|
|
909
|
+
prevPassRate={null}
|
|
910
|
+
currPassRate={null}
|
|
911
|
+
/>
|
|
912
|
+
{hasHistory && (
|
|
913
|
+
<div className="space-y-2">
|
|
800
914
|
<button
|
|
801
915
|
type="button"
|
|
802
|
-
onClick={() =>
|
|
803
|
-
className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground
|
|
916
|
+
onClick={() => toggleProposalHistory(group.target)}
|
|
917
|
+
className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
|
|
804
918
|
>
|
|
805
|
-
|
|
806
|
-
|
|
919
|
+
{isExpanded ? (
|
|
920
|
+
<ChevronDownIcon className="size-3" />
|
|
921
|
+
) : (
|
|
922
|
+
<ChevronRightIcon className="size-3" />
|
|
923
|
+
)}
|
|
924
|
+
{isExpanded ? "Hide" : "Show"} {group.historyEntries.length} earlier{" "}
|
|
925
|
+
{group.historyEntries.length === 1 ? "draft" : "drafts"}
|
|
807
926
|
</button>
|
|
927
|
+
{isExpanded &&
|
|
928
|
+
group.historyEntries.map((entry, index) => (
|
|
929
|
+
<EvidenceCard
|
|
930
|
+
key={`proposal-history-${group.target}-${entry.timestamp}-${index}`}
|
|
931
|
+
entry={entry}
|
|
932
|
+
roundLabel={`Draft ${index + 1} of ${group.historyEntries.length}`}
|
|
933
|
+
roundStatus="intermediate"
|
|
934
|
+
prevPassRate={null}
|
|
935
|
+
currPassRate={null}
|
|
936
|
+
/>
|
|
937
|
+
))}
|
|
938
|
+
</div>
|
|
939
|
+
)}
|
|
940
|
+
</div>
|
|
941
|
+
);
|
|
942
|
+
})}
|
|
943
|
+
</div>
|
|
944
|
+
)}
|
|
945
|
+
|
|
946
|
+
{/* Validation-stage evidence — grouped by target with iteration rounds */}
|
|
947
|
+
{Array.from(validationsByTarget.entries()).length > 0 && (
|
|
948
|
+
<div className="space-y-2">
|
|
949
|
+
<div className="space-y-1">
|
|
950
|
+
<p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
|
|
951
|
+
How it was tested
|
|
952
|
+
</p>
|
|
953
|
+
<p className="text-sm text-muted-foreground">
|
|
954
|
+
Validation evidence shows whether the proposal improved the eval signal.
|
|
955
|
+
</p>
|
|
956
|
+
</div>
|
|
957
|
+
{Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
|
|
958
|
+
const hasMultipleRounds = targetEntries.length > 1;
|
|
959
|
+
|
|
960
|
+
return (
|
|
961
|
+
<div key={target} className="space-y-2">
|
|
962
|
+
{targetEntries.map((entry, i) => {
|
|
963
|
+
const isLast = i === targetEntries.length - 1;
|
|
964
|
+
const roundLabel = hasMultipleRounds
|
|
965
|
+
? `Round ${i + 1} of ${targetEntries.length}`
|
|
966
|
+
: null;
|
|
967
|
+
const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
|
|
968
|
+
const currPassRate = getAfterPassRate(entry);
|
|
969
|
+
const roundKey = `${target}-${entry.timestamp}`;
|
|
970
|
+
const roundStatus: RoundStatus = !hasMultipleRounds
|
|
971
|
+
? "single"
|
|
972
|
+
: isLast
|
|
973
|
+
? "final"
|
|
974
|
+
: "intermediate";
|
|
975
|
+
|
|
976
|
+
if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
|
|
977
|
+
return (
|
|
978
|
+
<CollapsedEvidenceCard
|
|
979
|
+
key={roundKey}
|
|
980
|
+
entry={entry}
|
|
981
|
+
roundLabel={roundLabel!}
|
|
982
|
+
onExpand={() => toggleRound(roundKey)}
|
|
983
|
+
/>
|
|
984
|
+
);
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
|
|
988
|
+
return (
|
|
989
|
+
<div key={roundKey} className="space-y-1">
|
|
990
|
+
<button
|
|
991
|
+
type="button"
|
|
992
|
+
onClick={() => toggleRound(roundKey)}
|
|
993
|
+
className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
|
|
994
|
+
>
|
|
995
|
+
<ChevronDownIcon className="size-3" />
|
|
996
|
+
Collapse {roundLabel}
|
|
997
|
+
</button>
|
|
998
|
+
<EvidenceCard
|
|
999
|
+
entry={entry}
|
|
1000
|
+
roundLabel={roundLabel}
|
|
1001
|
+
roundStatus={roundStatus}
|
|
1002
|
+
prevPassRate={prevPassRate}
|
|
1003
|
+
currPassRate={currPassRate}
|
|
1004
|
+
/>
|
|
1005
|
+
</div>
|
|
1006
|
+
);
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
return (
|
|
808
1010
|
<EvidenceCard
|
|
1011
|
+
key={roundKey}
|
|
809
1012
|
entry={entry}
|
|
810
1013
|
roundLabel={roundLabel}
|
|
811
1014
|
roundStatus={roundStatus}
|
|
812
1015
|
prevPassRate={prevPassRate}
|
|
813
1016
|
currPassRate={currPassRate}
|
|
814
1017
|
/>
|
|
815
|
-
|
|
816
|
-
)
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
key={roundKey}
|
|
823
|
-
entry={entry}
|
|
824
|
-
roundLabel={roundLabel}
|
|
825
|
-
roundStatus={roundStatus}
|
|
826
|
-
prevPassRate={prevPassRate}
|
|
827
|
-
currPassRate={currPassRate}
|
|
828
|
-
/>
|
|
829
|
-
);
|
|
830
|
-
})}
|
|
831
|
-
</div>
|
|
832
|
-
);
|
|
833
|
-
})}
|
|
1018
|
+
);
|
|
1019
|
+
})}
|
|
1020
|
+
</div>
|
|
1021
|
+
);
|
|
1022
|
+
})}
|
|
1023
|
+
</div>
|
|
1024
|
+
)}
|
|
834
1025
|
|
|
835
1026
|
{entries.length === 0 && (
|
|
836
1027
|
<div className="flex items-center justify-center rounded-lg border border-dashed py-8">
|