selftune 0.2.18 → 0.2.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -4
- package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +60 -0
- package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
- package/cli/selftune/constants.ts +10 -0
- package/cli/selftune/contribute/contribute.ts +30 -2
- package/cli/selftune/contribution-config.ts +249 -0
- package/cli/selftune/contribution-relay.ts +177 -0
- package/cli/selftune/contribution-signals.ts +219 -0
- package/cli/selftune/contribution-staging.ts +147 -0
- package/cli/selftune/contributions.ts +532 -0
- package/cli/selftune/creator-contributions.ts +333 -0
- package/cli/selftune/dashboard-contract.ts +209 -1
- package/cli/selftune/dashboard-server.ts +45 -11
- package/cli/selftune/eval/family-overlap.ts +714 -0
- package/cli/selftune/eval/hooks-to-evals.ts +182 -28
- package/cli/selftune/eval/synthetic-evals.ts +298 -11
- package/cli/selftune/evolution/evidence.ts +5 -0
- package/cli/selftune/evolution/evolve-body.ts +62 -2
- package/cli/selftune/evolution/evolve.ts +58 -1
- package/cli/selftune/evolution/validate-body.ts +10 -0
- package/cli/selftune/evolution/validate-host-replay.ts +236 -0
- package/cli/selftune/evolution/validate-proposal.ts +10 -0
- package/cli/selftune/evolution/validate-routing.ts +112 -5
- package/cli/selftune/export.ts +2 -2
- package/cli/selftune/index.ts +41 -5
- package/cli/selftune/ingestors/codex-rollout.ts +31 -35
- package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
- package/cli/selftune/localdb/db.ts +2 -2
- package/cli/selftune/localdb/direct-write.ts +8 -3
- package/cli/selftune/localdb/materialize.ts +7 -2
- package/cli/selftune/localdb/queries.ts +712 -31
- package/cli/selftune/localdb/schema.ts +30 -1
- package/cli/selftune/recover.ts +153 -0
- package/cli/selftune/repair/skill-usage.ts +363 -4
- package/cli/selftune/routes/actions.ts +35 -1
- package/cli/selftune/routes/analytics.ts +14 -0
- package/cli/selftune/routes/index.ts +1 -0
- package/cli/selftune/routes/overview.ts +112 -4
- package/cli/selftune/routes/skill-report.ts +575 -11
- package/cli/selftune/status.ts +81 -2
- package/cli/selftune/sync.ts +56 -2
- package/cli/selftune/trust-model.ts +66 -0
- package/cli/selftune/types.ts +103 -0
- package/cli/selftune/utils/skill-detection.ts +43 -0
- package/cli/selftune/utils/text-similarity.ts +73 -0
- package/cli/selftune/watchlist.ts +65 -0
- package/package.json +1 -1
- package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
- package/packages/ui/src/components/EvidenceViewer.tsx +419 -145
- package/packages/ui/src/components/EvolutionTimeline.tsx +81 -29
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
- package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
- package/packages/ui/src/components/section-cards.tsx +12 -9
- package/packages/ui/src/primitives/card.tsx +1 -1
- package/packages/ui/src/types.ts +4 -0
- package/skill/SKILL.md +11 -1
- package/skill/Workflows/AlphaUpload.md +4 -0
- package/skill/Workflows/Composability.md +78 -0
- package/skill/Workflows/Contribute.md +6 -3
- package/skill/Workflows/Contributions.md +97 -0
- package/skill/Workflows/CreatorContributions.md +74 -0
- package/skill/Workflows/Dashboard.md +31 -0
- package/skill/Workflows/Evals.md +57 -8
- package/skill/Workflows/Evolve.md +23 -0
- package/skill/Workflows/Ingest.md +7 -0
- package/skill/Workflows/Initialize.md +20 -1
- package/skill/Workflows/Recover.md +84 -0
- package/skill/Workflows/RepairSkillUsage.md +12 -4
- package/skill/Workflows/Sync.md +18 -12
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
- package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
|
@@ -5,17 +5,14 @@ import {
|
|
|
5
5
|
CircleDotIcon,
|
|
6
6
|
FileTextIcon,
|
|
7
7
|
InfoIcon,
|
|
8
|
-
RocketIcon,
|
|
9
|
-
ShieldCheckIcon,
|
|
10
8
|
ShieldAlertIcon,
|
|
11
9
|
XCircleIcon,
|
|
12
|
-
UndoIcon,
|
|
13
|
-
ArrowRightIcon,
|
|
14
10
|
TrendingUpIcon,
|
|
15
11
|
TrendingDownIcon,
|
|
16
12
|
ListChecksIcon,
|
|
17
13
|
} from "lucide-react";
|
|
18
14
|
import { useMemo, useState } from "react";
|
|
15
|
+
import type { ReactNode } from "react";
|
|
19
16
|
import Markdown from "react-markdown";
|
|
20
17
|
|
|
21
18
|
import { formatRate, timeAgo } from "../lib/format";
|
|
@@ -23,14 +20,6 @@ import { Badge } from "../primitives/badge";
|
|
|
23
20
|
import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
|
|
24
21
|
import type { EvidenceEntry, EvolutionEntry } from "../types";
|
|
25
22
|
|
|
26
|
-
const ACTION_ICON: Record<string, React.ReactNode> = {
|
|
27
|
-
created: <CircleDotIcon className="size-3.5" />,
|
|
28
|
-
validated: <ShieldCheckIcon className="size-3.5" />,
|
|
29
|
-
deployed: <RocketIcon className="size-3.5" />,
|
|
30
|
-
rejected: <XCircleIcon className="size-3.5" />,
|
|
31
|
-
rolled_back: <UndoIcon className="size-3.5" />,
|
|
32
|
-
};
|
|
33
|
-
|
|
34
23
|
const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
|
|
35
24
|
created: "outline",
|
|
36
25
|
validated: "secondary",
|
|
@@ -43,6 +32,94 @@ interface Props {
|
|
|
43
32
|
proposalId: string;
|
|
44
33
|
evolution: EvolutionEntry[];
|
|
45
34
|
evidence: EvidenceEntry[];
|
|
35
|
+
showContextBanner?: boolean;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function getValidationModeMeta(mode?: string | null): {
|
|
39
|
+
label: string;
|
|
40
|
+
variant: "default" | "secondary" | "destructive" | "outline";
|
|
41
|
+
description: string;
|
|
42
|
+
} | null {
|
|
43
|
+
switch (mode) {
|
|
44
|
+
case "host_replay":
|
|
45
|
+
return {
|
|
46
|
+
label: "Replay-backed validation",
|
|
47
|
+
variant: "default",
|
|
48
|
+
description:
|
|
49
|
+
"Validated against a controlled replay fixture instead of a free-form judge prompt.",
|
|
50
|
+
};
|
|
51
|
+
case "llm_judge":
|
|
52
|
+
return {
|
|
53
|
+
label: "Model judgment",
|
|
54
|
+
variant: "secondary",
|
|
55
|
+
description: "Validated by an LLM trigger check rather than a replay fixture.",
|
|
56
|
+
};
|
|
57
|
+
case "structural_guard":
|
|
58
|
+
return {
|
|
59
|
+
label: "Structural guard",
|
|
60
|
+
variant: "outline",
|
|
61
|
+
description:
|
|
62
|
+
"Only deterministic structural checks ran; no replay or judge validation was needed.",
|
|
63
|
+
};
|
|
64
|
+
default:
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function sentenceCase(value: string): string {
|
|
70
|
+
return value.replace(/_/g, " ");
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function getOutcomePresentation(action?: string | null): {
|
|
74
|
+
title: string;
|
|
75
|
+
summary: string;
|
|
76
|
+
tone: string;
|
|
77
|
+
icon: ReactNode;
|
|
78
|
+
liveSkillNote: string;
|
|
79
|
+
} {
|
|
80
|
+
switch (action) {
|
|
81
|
+
case "rejected":
|
|
82
|
+
return {
|
|
83
|
+
title: "Proposal rejected",
|
|
84
|
+
summary: "Selftune proposed a change, but blocked it before your live skill was updated.",
|
|
85
|
+
tone: "border-red-500/20 bg-red-500/8 text-red-700 dark:text-red-50",
|
|
86
|
+
icon: <XCircleIcon className="size-4 text-red-400" />,
|
|
87
|
+
liveSkillNote: "Your live skill is unchanged.",
|
|
88
|
+
};
|
|
89
|
+
case "validated":
|
|
90
|
+
return {
|
|
91
|
+
title: "Proposal validated",
|
|
92
|
+
summary: "The proposed change improved the eval signal and is ready for review or deploy.",
|
|
93
|
+
tone: "border-emerald-500/20 bg-emerald-500/8 text-emerald-700 dark:text-emerald-50",
|
|
94
|
+
icon: <CheckCircleIcon className="size-4 text-emerald-400" />,
|
|
95
|
+
liveSkillNote: "Your live skill has not changed until this proposal is deployed.",
|
|
96
|
+
};
|
|
97
|
+
case "deployed":
|
|
98
|
+
return {
|
|
99
|
+
title: "Proposal deployed",
|
|
100
|
+
summary: "The proposed change passed validation and was applied to the live skill.",
|
|
101
|
+
tone: "border-primary/25 bg-primary/8 text-foreground",
|
|
102
|
+
icon: <TrendingUpIcon className="size-4 text-primary" />,
|
|
103
|
+
liveSkillNote: "Your live skill now includes this change.",
|
|
104
|
+
};
|
|
105
|
+
case "rolled_back":
|
|
106
|
+
return {
|
|
107
|
+
title: "Proposal rolled back",
|
|
108
|
+
summary: "A deployed change was later reversed because follow-up evidence showed risk.",
|
|
109
|
+
tone: "border-amber-500/20 bg-amber-500/8 text-amber-800 dark:text-amber-50",
|
|
110
|
+
icon: <TrendingDownIcon className="size-4 text-amber-400" />,
|
|
111
|
+
liveSkillNote: "Your live skill no longer uses this proposal.",
|
|
112
|
+
};
|
|
113
|
+
case "created":
|
|
114
|
+
default:
|
|
115
|
+
return {
|
|
116
|
+
title: "Proposal under review",
|
|
117
|
+
summary: "Selftune found a possible improvement and recorded the proposed change.",
|
|
118
|
+
tone: "border-border/30 bg-muted/25 text-foreground",
|
|
119
|
+
icon: <CircleDotIcon className="size-4 text-muted-foreground" />,
|
|
120
|
+
liveSkillNote: "Your live skill is unchanged until a proposal is validated and deployed.",
|
|
121
|
+
};
|
|
122
|
+
}
|
|
46
123
|
}
|
|
47
124
|
|
|
48
125
|
/** Parse YAML-ish frontmatter from text, returns { meta, body } */
|
|
@@ -135,7 +212,7 @@ function SkillContentBlock({
|
|
|
135
212
|
}
|
|
136
213
|
|
|
137
214
|
/** Smart formatting for a single validation value */
|
|
138
|
-
function formatValidationValue(key: string, val: unknown):
|
|
215
|
+
function formatValidationValue(key: string, val: unknown): ReactNode {
|
|
139
216
|
// Booleans
|
|
140
217
|
if (typeof val === "boolean") {
|
|
141
218
|
return val ? (
|
|
@@ -234,12 +311,18 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
234
311
|
regressions,
|
|
235
312
|
new_passes,
|
|
236
313
|
per_entry_results,
|
|
314
|
+
validation_mode,
|
|
315
|
+
validation_agent,
|
|
316
|
+
validation_fixture_id,
|
|
317
|
+
validation_evidence_ref,
|
|
237
318
|
...rest
|
|
238
319
|
} = validation;
|
|
239
320
|
|
|
240
321
|
const regressionsArr = Array.isArray(regressions) ? regressions : [];
|
|
241
322
|
const newPassesArr = Array.isArray(new_passes) ? new_passes : [];
|
|
242
323
|
const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : [];
|
|
324
|
+
const validationMeta =
|
|
325
|
+
typeof validation_mode === "string" ? getValidationModeMeta(validation_mode) : null;
|
|
243
326
|
|
|
244
327
|
return (
|
|
245
328
|
<div className="rounded-md border bg-muted/30 p-3 space-y-3">
|
|
@@ -250,6 +333,34 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
250
333
|
</span>
|
|
251
334
|
</p>
|
|
252
335
|
|
|
336
|
+
{validationMeta && (
|
|
337
|
+
<div className="rounded-md border bg-card px-3 py-2">
|
|
338
|
+
<div className="flex flex-wrap items-center gap-2">
|
|
339
|
+
<Badge variant={validationMeta.variant} className="text-[10px]">
|
|
340
|
+
{validationMeta.label}
|
|
341
|
+
</Badge>
|
|
342
|
+
{typeof validation_agent === "string" && validation_agent.trim() && (
|
|
343
|
+
<Badge variant="outline" className="text-[10px]">
|
|
344
|
+
agent: {validation_agent}
|
|
345
|
+
</Badge>
|
|
346
|
+
)}
|
|
347
|
+
{typeof validation_fixture_id === "string" && validation_fixture_id.trim() && (
|
|
348
|
+
<Badge variant="outline" className="text-[10px]">
|
|
349
|
+
fixture: {validation_fixture_id}
|
|
350
|
+
</Badge>
|
|
351
|
+
)}
|
|
352
|
+
</div>
|
|
353
|
+
<p className="mt-1 text-[11px] leading-relaxed text-muted-foreground">
|
|
354
|
+
{validationMeta.description}
|
|
355
|
+
</p>
|
|
356
|
+
{typeof validation_evidence_ref === "string" && validation_evidence_ref.trim() && (
|
|
357
|
+
<p className="mt-1 text-[10px] font-mono text-muted-foreground/70">
|
|
358
|
+
{validation_evidence_ref}
|
|
359
|
+
</p>
|
|
360
|
+
)}
|
|
361
|
+
</div>
|
|
362
|
+
)}
|
|
363
|
+
|
|
253
364
|
{/* Summary bar */}
|
|
254
365
|
<div className="flex items-center gap-3 flex-wrap">
|
|
255
366
|
{improved !== undefined && (
|
|
@@ -264,7 +375,13 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
264
375
|
)}
|
|
265
376
|
{typeof net_change === "number" && (
|
|
266
377
|
<span
|
|
267
|
-
className={`text-xs font-mono font-semibold ${
|
|
378
|
+
className={`text-xs font-mono font-semibold ${
|
|
379
|
+
net_change > 0
|
|
380
|
+
? "text-emerald-600 dark:text-emerald-400"
|
|
381
|
+
: net_change < 0
|
|
382
|
+
? "text-red-500"
|
|
383
|
+
: "text-muted-foreground"
|
|
384
|
+
}`}
|
|
268
385
|
>
|
|
269
386
|
{net_change > 0 ? "+" : ""}
|
|
270
387
|
{(net_change * 100).toFixed(1)}%
|
|
@@ -609,7 +726,12 @@ function CollapsedEvidenceCard({
|
|
|
609
726
|
);
|
|
610
727
|
}
|
|
611
728
|
|
|
612
|
-
export function EvidenceViewer({
|
|
729
|
+
export function EvidenceViewer({
|
|
730
|
+
proposalId,
|
|
731
|
+
evolution,
|
|
732
|
+
evidence,
|
|
733
|
+
showContextBanner = true,
|
|
734
|
+
}: Props) {
|
|
613
735
|
const steps = useMemo(
|
|
614
736
|
() =>
|
|
615
737
|
evolution
|
|
@@ -628,6 +750,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
|
628
750
|
|
|
629
751
|
// Track which earlier rounds are manually expanded
|
|
630
752
|
const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set());
|
|
753
|
+
const [expandedProposalTargets, setExpandedProposalTargets] = useState<Set<string>>(new Set());
|
|
631
754
|
|
|
632
755
|
const toggleRound = (key: string) => {
|
|
633
756
|
setExpandedRounds((prev) => {
|
|
@@ -638,6 +761,15 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
|
638
761
|
});
|
|
639
762
|
};
|
|
640
763
|
|
|
764
|
+
const toggleProposalHistory = (target: string) => {
|
|
765
|
+
setExpandedProposalTargets((prev) => {
|
|
766
|
+
const next = new Set(prev);
|
|
767
|
+
if (next.has(target)) next.delete(target);
|
|
768
|
+
else next.add(target);
|
|
769
|
+
return next;
|
|
770
|
+
});
|
|
771
|
+
};
|
|
772
|
+
|
|
641
773
|
const snapshot = useMemo(() => {
|
|
642
774
|
for (let i = steps.length - 1; i >= 0; i--) {
|
|
643
775
|
if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>;
|
|
@@ -661,176 +793,318 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
|
661
793
|
return { proposalEntries: proposals, validationsByTarget: validationMap };
|
|
662
794
|
}, [entries]);
|
|
663
795
|
|
|
796
|
+
const latestStep = steps[steps.length - 1] ?? null;
|
|
797
|
+
const lifecycleLabel = steps.map((step) => step.action.replace("_", " ")).join(" -> ");
|
|
798
|
+
const outcome = getOutcomePresentation(latestStep?.action);
|
|
799
|
+
const validationMeta = getValidationModeMeta(latestStep?.validation_mode);
|
|
800
|
+
const latestProposalConfidence = useMemo(() => {
|
|
801
|
+
for (let i = proposalEntries.length - 1; i >= 0; i--) {
|
|
802
|
+
if (proposalEntries[i].confidence !== null) {
|
|
803
|
+
return proposalEntries[i].confidence;
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
return null;
|
|
807
|
+
}, [proposalEntries]);
|
|
808
|
+
const proposalCards = useMemo(() => {
|
|
809
|
+
const grouped = new Map<string, EvidenceEntry[]>();
|
|
810
|
+
for (const entry of proposalEntries) {
|
|
811
|
+
const key = entry.target || "proposal";
|
|
812
|
+
const group = grouped.get(key) ?? [];
|
|
813
|
+
group.push(entry);
|
|
814
|
+
grouped.set(key, group);
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
return Array.from(grouped.entries()).map(([target, group]) => {
|
|
818
|
+
let richest = group[group.length - 1];
|
|
819
|
+
for (let i = group.length - 1; i >= 0; i--) {
|
|
820
|
+
if (group[i].original_text || group[i].proposed_text || group[i].rationale) {
|
|
821
|
+
richest = group[i];
|
|
822
|
+
break;
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
const primaryIndex = group.findIndex((entry) => entry === richest);
|
|
826
|
+
return {
|
|
827
|
+
target,
|
|
828
|
+
primaryEntry: richest,
|
|
829
|
+
historyEntries: group.filter((_, index) => index !== primaryIndex),
|
|
830
|
+
entries: group,
|
|
831
|
+
};
|
|
832
|
+
});
|
|
833
|
+
}, [proposalEntries]);
|
|
834
|
+
|
|
664
835
|
return (
|
|
665
836
|
<div className="space-y-4">
|
|
666
837
|
{/* Context banner */}
|
|
667
|
-
|
|
668
|
-
<
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
838
|
+
{showContextBanner && (
|
|
839
|
+
<div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
|
|
840
|
+
<InfoIcon className="mt-0.5 size-4 shrink-0 text-primary/60" />
|
|
841
|
+
<p className="text-xs leading-relaxed text-muted-foreground">
|
|
842
|
+
This view shows the complete evidence trail for a skill evolution proposal — how
|
|
843
|
+
the skill was changed, the eval test results before and after, and whether the change
|
|
844
|
+
improved performance.
|
|
845
|
+
</p>
|
|
846
|
+
</div>
|
|
847
|
+
)}
|
|
675
848
|
|
|
676
|
-
|
|
677
|
-
<Card>
|
|
849
|
+
<Card className="border-border/15 bg-muted/10">
|
|
678
850
|
<CardHeader className="pb-3">
|
|
679
|
-
<CardTitle className="
|
|
680
|
-
<span>Proposal
|
|
851
|
+
<CardTitle className="flex flex-wrap items-center gap-2 text-sm">
|
|
852
|
+
<span>Proposal Summary</span>
|
|
681
853
|
<span className="font-mono text-xs text-muted-foreground">
|
|
682
854
|
#{proposalId.slice(0, 12)}
|
|
683
855
|
</span>
|
|
684
856
|
</CardTitle>
|
|
685
857
|
</CardHeader>
|
|
686
858
|
<CardContent className="space-y-3">
|
|
687
|
-
<div className=
|
|
688
|
-
|
|
689
|
-
<div
|
|
690
|
-
|
|
691
|
-
<div className="flex items-center gap-
|
|
692
|
-
{
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
</span>
|
|
859
|
+
<div className={`rounded-lg border px-4 py-3 ${outcome.tone}`}>
|
|
860
|
+
<div className="flex items-start gap-3">
|
|
861
|
+
<div className="mt-0.5 shrink-0">{outcome.icon}</div>
|
|
862
|
+
<div className="min-w-0 space-y-1.5">
|
|
863
|
+
<div className="flex flex-wrap items-center gap-2">
|
|
864
|
+
<p className="text-sm font-semibold">{outcome.title}</p>
|
|
865
|
+
{latestStep && (
|
|
866
|
+
<Badge
|
|
867
|
+
variant={ACTION_VARIANT[latestStep.action] ?? "secondary"}
|
|
868
|
+
className="text-[10px] capitalize"
|
|
869
|
+
>
|
|
870
|
+
{sentenceCase(latestStep.action)}
|
|
871
|
+
</Badge>
|
|
872
|
+
)}
|
|
702
873
|
</div>
|
|
874
|
+
<p className="text-sm leading-6 text-current/90">{outcome.summary}</p>
|
|
875
|
+
{latestStep?.details && (
|
|
876
|
+
<div className="rounded-md bg-black/10 px-3 py-2 text-sm leading-6 text-current/90 dark:bg-black/20">
|
|
877
|
+
{latestStep.details}
|
|
878
|
+
</div>
|
|
879
|
+
)}
|
|
880
|
+
<p className="text-xs font-medium text-current/75">{outcome.liveSkillNote}</p>
|
|
703
881
|
</div>
|
|
704
|
-
|
|
882
|
+
</div>
|
|
883
|
+
</div>
|
|
884
|
+
|
|
885
|
+
<div className="flex flex-wrap items-center gap-2">
|
|
886
|
+
{latestStep?.timestamp && (
|
|
887
|
+
<span className="text-[10px] font-mono text-muted-foreground">
|
|
888
|
+
{timeAgo(latestStep.timestamp)}
|
|
889
|
+
</span>
|
|
890
|
+
)}
|
|
891
|
+
<Badge variant="outline" className="text-[10px]">
|
|
892
|
+
{entries.length} evidence {entries.length === 1 ? "row" : "rows"}
|
|
893
|
+
</Badge>
|
|
894
|
+
{validationMeta && (
|
|
895
|
+
<Badge variant={validationMeta.variant} className="text-[10px]">
|
|
896
|
+
{validationMeta.label}
|
|
897
|
+
</Badge>
|
|
898
|
+
)}
|
|
899
|
+
{latestStep?.validation_fixture_id && (
|
|
900
|
+
<Badge variant="outline" className="text-[10px]">
|
|
901
|
+
fixture: {latestStep.validation_fixture_id}
|
|
902
|
+
</Badge>
|
|
903
|
+
)}
|
|
904
|
+
{latestProposalConfidence != null && (
|
|
905
|
+
<Badge variant="secondary" className="text-[10px]">
|
|
906
|
+
{Math.round(latestProposalConfidence * 100)}% confidence
|
|
907
|
+
</Badge>
|
|
908
|
+
)}
|
|
909
|
+
</div>
|
|
910
|
+
|
|
911
|
+
{validationMeta && (
|
|
912
|
+
<p className="text-[11px] leading-relaxed text-muted-foreground">
|
|
913
|
+
{validationMeta.description}
|
|
914
|
+
</p>
|
|
915
|
+
)}
|
|
916
|
+
|
|
917
|
+
<div className="flex flex-wrap items-center gap-2 text-[11px] text-muted-foreground">
|
|
918
|
+
<span className="font-headline uppercase tracking-[0.16em] text-muted-foreground/80">
|
|
919
|
+
Lifecycle
|
|
920
|
+
</span>
|
|
921
|
+
<span>{lifecycleLabel ? sentenceCase(lifecycleLabel) : "No lifecycle recorded"}</span>
|
|
705
922
|
</div>
|
|
706
923
|
|
|
707
|
-
{
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
924
|
+
{typeof snapshot?.net_change === "number" &&
|
|
925
|
+
typeof snapshot.before_pass_rate === "number" &&
|
|
926
|
+
typeof snapshot.after_pass_rate === "number" && (
|
|
927
|
+
<div className="flex flex-wrap items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
|
|
711
928
|
<div className="flex items-center gap-1">
|
|
712
|
-
{
|
|
929
|
+
{snapshot.net_change > 0 ? (
|
|
713
930
|
<TrendingUpIcon className="size-3.5 text-emerald-500" />
|
|
714
|
-
) : (
|
|
931
|
+
) : snapshot.net_change < 0 ? (
|
|
715
932
|
<TrendingDownIcon className="size-3.5 text-red-500" />
|
|
933
|
+
) : (
|
|
934
|
+
<CircleDotIcon className="size-3.5 text-muted-foreground" />
|
|
716
935
|
)}
|
|
717
936
|
<span
|
|
718
|
-
className={`text-sm font-
|
|
937
|
+
className={`text-sm font-mono font-semibold ${
|
|
938
|
+
snapshot.net_change > 0
|
|
939
|
+
? "text-emerald-600 dark:text-emerald-400"
|
|
940
|
+
: snapshot.net_change < 0
|
|
941
|
+
? "text-red-500"
|
|
942
|
+
: "text-muted-foreground"
|
|
943
|
+
}`}
|
|
719
944
|
>
|
|
720
|
-
{
|
|
721
|
-
{Math.round(
|
|
945
|
+
{snapshot.net_change > 0 ? "+" : ""}
|
|
946
|
+
{Math.round(snapshot.net_change * 100)}%
|
|
722
947
|
</span>
|
|
723
948
|
</div>
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
949
|
+
<span className="text-xs font-mono text-muted-foreground">
|
|
950
|
+
{Math.round(snapshot.before_pass_rate * 100)}% →{" "}
|
|
951
|
+
{Math.round(snapshot.after_pass_rate * 100)}%
|
|
952
|
+
</span>
|
|
953
|
+
{snapshot.net_change > 0 ? (
|
|
954
|
+
<Badge variant="default" className="text-[10px]">
|
|
955
|
+
Improved
|
|
956
|
+
</Badge>
|
|
957
|
+
) : snapshot.net_change < 0 ? (
|
|
958
|
+
<Badge variant="destructive" className="text-[10px]">
|
|
959
|
+
Regressed
|
|
960
|
+
</Badge>
|
|
961
|
+
) : (
|
|
962
|
+
<Badge variant="outline" className="text-[10px]">
|
|
963
|
+
No change
|
|
964
|
+
</Badge>
|
|
731
965
|
)}
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
variant={snapshot.improved ? "default" : "destructive"}
|
|
735
|
-
className="text-[10px]"
|
|
736
|
-
>
|
|
737
|
-
{snapshot.improved ? "Improved" : "Regressed"}
|
|
738
|
-
</Badge>
|
|
739
|
-
)}
|
|
740
|
-
</div>
|
|
741
|
-
)}
|
|
742
|
-
|
|
743
|
-
{/* Details from last step */}
|
|
744
|
-
{steps.length > 0 && steps[steps.length - 1].details && (
|
|
745
|
-
<p className="text-xs text-muted-foreground leading-relaxed">
|
|
746
|
-
{steps[steps.length - 1].details}
|
|
747
|
-
</p>
|
|
748
|
-
)}
|
|
966
|
+
</div>
|
|
967
|
+
)}
|
|
749
968
|
</CardContent>
|
|
750
969
|
</Card>
|
|
751
970
|
|
|
752
971
|
{/* Proposal-stage evidence — standalone cards showing original/proposed text */}
|
|
753
|
-
{
|
|
754
|
-
<
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
972
|
+
{proposalCards.length > 0 && (
|
|
973
|
+
<div className="space-y-2">
|
|
974
|
+
<div className="space-y-1">
|
|
975
|
+
<p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
|
|
976
|
+
What changed
|
|
977
|
+
</p>
|
|
978
|
+
<p className="text-sm text-muted-foreground">
|
|
979
|
+
This is the actual skill text selftune proposed changing.
|
|
980
|
+
</p>
|
|
981
|
+
</div>
|
|
982
|
+
{proposalCards.map((group) => {
|
|
983
|
+
const hasHistory = group.historyEntries.length > 0;
|
|
984
|
+
const isExpanded = expandedProposalTargets.has(group.target);
|
|
763
985
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
|
|
776
|
-
const currPassRate = getAfterPassRate(entry);
|
|
777
|
-
const roundKey = `${target}-${entry.timestamp}`;
|
|
778
|
-
const roundStatus: RoundStatus = !hasMultipleRounds
|
|
779
|
-
? "single"
|
|
780
|
-
: isLast
|
|
781
|
-
? "final"
|
|
782
|
-
: "intermediate";
|
|
783
|
-
|
|
784
|
-
// Earlier rounds: collapsed by default
|
|
785
|
-
if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
|
|
786
|
-
return (
|
|
787
|
-
<CollapsedEvidenceCard
|
|
788
|
-
key={roundKey}
|
|
789
|
-
entry={entry}
|
|
790
|
-
roundLabel={roundLabel!}
|
|
791
|
-
onExpand={() => toggleRound(roundKey)}
|
|
792
|
-
/>
|
|
793
|
-
);
|
|
794
|
-
}
|
|
795
|
-
|
|
796
|
-
// Expanded earlier round — show with collapse toggle
|
|
797
|
-
if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
|
|
798
|
-
return (
|
|
799
|
-
<div key={roundKey} className="space-y-1">
|
|
986
|
+
return (
|
|
987
|
+
<div key={`proposal-${group.target}`} className="space-y-2">
|
|
988
|
+
<EvidenceCard
|
|
989
|
+
entry={group.primaryEntry}
|
|
990
|
+
roundLabel={hasHistory ? `Latest draft of ${group.entries.length}` : null}
|
|
991
|
+
roundStatus={hasHistory ? "final" : "single"}
|
|
992
|
+
prevPassRate={null}
|
|
993
|
+
currPassRate={null}
|
|
994
|
+
/>
|
|
995
|
+
{hasHistory && (
|
|
996
|
+
<div className="space-y-2">
|
|
800
997
|
<button
|
|
801
998
|
type="button"
|
|
802
|
-
onClick={() =>
|
|
803
|
-
className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground
|
|
999
|
+
onClick={() => toggleProposalHistory(group.target)}
|
|
1000
|
+
className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
|
|
804
1001
|
>
|
|
805
|
-
|
|
806
|
-
|
|
1002
|
+
{isExpanded ? (
|
|
1003
|
+
<ChevronDownIcon className="size-3" />
|
|
1004
|
+
) : (
|
|
1005
|
+
<ChevronRightIcon className="size-3" />
|
|
1006
|
+
)}
|
|
1007
|
+
{isExpanded ? "Hide" : "Show"} {group.historyEntries.length} earlier{" "}
|
|
1008
|
+
{group.historyEntries.length === 1 ? "draft" : "drafts"}
|
|
807
1009
|
</button>
|
|
1010
|
+
{isExpanded &&
|
|
1011
|
+
group.historyEntries.map((entry, index) => (
|
|
1012
|
+
<EvidenceCard
|
|
1013
|
+
key={`proposal-history-${group.target}-${entry.timestamp}-${index}`}
|
|
1014
|
+
entry={entry}
|
|
1015
|
+
roundLabel={`Draft ${index + 1} of ${group.historyEntries.length}`}
|
|
1016
|
+
roundStatus="intermediate"
|
|
1017
|
+
prevPassRate={null}
|
|
1018
|
+
currPassRate={null}
|
|
1019
|
+
/>
|
|
1020
|
+
))}
|
|
1021
|
+
</div>
|
|
1022
|
+
)}
|
|
1023
|
+
</div>
|
|
1024
|
+
);
|
|
1025
|
+
})}
|
|
1026
|
+
</div>
|
|
1027
|
+
)}
|
|
1028
|
+
|
|
1029
|
+
{/* Validation-stage evidence — grouped by target with iteration rounds */}
|
|
1030
|
+
{Array.from(validationsByTarget.entries()).length > 0 && (
|
|
1031
|
+
<div className="space-y-2">
|
|
1032
|
+
<div className="space-y-1">
|
|
1033
|
+
<p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
|
|
1034
|
+
How it was tested
|
|
1035
|
+
</p>
|
|
1036
|
+
<p className="text-sm text-muted-foreground">
|
|
1037
|
+
Validation evidence shows whether the proposal improved the eval signal.
|
|
1038
|
+
</p>
|
|
1039
|
+
</div>
|
|
1040
|
+
{Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
|
|
1041
|
+
const hasMultipleRounds = targetEntries.length > 1;
|
|
1042
|
+
|
|
1043
|
+
return (
|
|
1044
|
+
<div key={target} className="space-y-2">
|
|
1045
|
+
{targetEntries.map((entry, i) => {
|
|
1046
|
+
const isLast = i === targetEntries.length - 1;
|
|
1047
|
+
const roundLabel = hasMultipleRounds
|
|
1048
|
+
? `Round ${i + 1} of ${targetEntries.length}`
|
|
1049
|
+
: null;
|
|
1050
|
+
const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
|
|
1051
|
+
const currPassRate = getAfterPassRate(entry);
|
|
1052
|
+
const roundKey = `${target}-${entry.timestamp}`;
|
|
1053
|
+
const roundStatus: RoundStatus = !hasMultipleRounds
|
|
1054
|
+
? "single"
|
|
1055
|
+
: isLast
|
|
1056
|
+
? "final"
|
|
1057
|
+
: "intermediate";
|
|
1058
|
+
|
|
1059
|
+
if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
|
|
1060
|
+
return (
|
|
1061
|
+
<CollapsedEvidenceCard
|
|
1062
|
+
key={roundKey}
|
|
1063
|
+
entry={entry}
|
|
1064
|
+
roundLabel={roundLabel!}
|
|
1065
|
+
onExpand={() => toggleRound(roundKey)}
|
|
1066
|
+
/>
|
|
1067
|
+
);
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
|
|
1071
|
+
return (
|
|
1072
|
+
<div key={roundKey} className="space-y-1">
|
|
1073
|
+
<button
|
|
1074
|
+
type="button"
|
|
1075
|
+
onClick={() => toggleRound(roundKey)}
|
|
1076
|
+
className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
|
|
1077
|
+
>
|
|
1078
|
+
<ChevronDownIcon className="size-3" />
|
|
1079
|
+
Collapse {roundLabel}
|
|
1080
|
+
</button>
|
|
1081
|
+
<EvidenceCard
|
|
1082
|
+
entry={entry}
|
|
1083
|
+
roundLabel={roundLabel}
|
|
1084
|
+
roundStatus={roundStatus}
|
|
1085
|
+
prevPassRate={prevPassRate}
|
|
1086
|
+
currPassRate={currPassRate}
|
|
1087
|
+
/>
|
|
1088
|
+
</div>
|
|
1089
|
+
);
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
return (
|
|
808
1093
|
<EvidenceCard
|
|
1094
|
+
key={roundKey}
|
|
809
1095
|
entry={entry}
|
|
810
1096
|
roundLabel={roundLabel}
|
|
811
1097
|
roundStatus={roundStatus}
|
|
812
1098
|
prevPassRate={prevPassRate}
|
|
813
1099
|
currPassRate={currPassRate}
|
|
814
1100
|
/>
|
|
815
|
-
|
|
816
|
-
)
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
key={roundKey}
|
|
823
|
-
entry={entry}
|
|
824
|
-
roundLabel={roundLabel}
|
|
825
|
-
roundStatus={roundStatus}
|
|
826
|
-
prevPassRate={prevPassRate}
|
|
827
|
-
currPassRate={currPassRate}
|
|
828
|
-
/>
|
|
829
|
-
);
|
|
830
|
-
})}
|
|
831
|
-
</div>
|
|
832
|
-
);
|
|
833
|
-
})}
|
|
1101
|
+
);
|
|
1102
|
+
})}
|
|
1103
|
+
</div>
|
|
1104
|
+
);
|
|
1105
|
+
})}
|
|
1106
|
+
</div>
|
|
1107
|
+
)}
|
|
834
1108
|
|
|
835
1109
|
{entries.length === 0 && (
|
|
836
1110
|
<div className="flex items-center justify-center rounded-lg border border-dashed py-8">
|