selftune 0.2.22 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +4 -2
  2. package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +1 -0
  3. package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +59 -0
  4. package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +12 -0
  5. package/apps/local-dashboard/dist/index.html +3 -3
  6. package/cli/selftune/adapters/pi/hook.ts +273 -0
  7. package/cli/selftune/adapters/pi/install.ts +207 -0
  8. package/cli/selftune/constants.ts +10 -1
  9. package/cli/selftune/dashboard-contract.ts +14 -0
  10. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  11. package/cli/selftune/evolution/engines/replay-engine.ts +158 -0
  12. package/cli/selftune/evolution/evidence.ts +2 -6
  13. package/cli/selftune/evolution/evolve-body.ts +73 -20
  14. package/cli/selftune/evolution/validate-body.ts +78 -42
  15. package/cli/selftune/evolution/validate-routing.ts +45 -104
  16. package/cli/selftune/hooks/skill-eval.ts +2 -1
  17. package/cli/selftune/hooks-shared/types.ts +1 -0
  18. package/cli/selftune/index.ts +23 -5
  19. package/cli/selftune/ingestors/pi-ingest.ts +726 -0
  20. package/cli/selftune/init.ts +11 -1
  21. package/cli/selftune/localdb/direct-write.ts +85 -0
  22. package/cli/selftune/localdb/materialize.ts +6 -7
  23. package/cli/selftune/localdb/queries.ts +126 -0
  24. package/cli/selftune/localdb/schema.ts +38 -0
  25. package/cli/selftune/observability.ts +8 -1
  26. package/cli/selftune/orchestrate.ts +43 -0
  27. package/cli/selftune/registry/client.ts +74 -0
  28. package/cli/selftune/registry/history.ts +54 -0
  29. package/cli/selftune/registry/index.ts +90 -0
  30. package/cli/selftune/registry/install.ts +141 -0
  31. package/cli/selftune/registry/list.ts +44 -0
  32. package/cli/selftune/registry/push.ts +171 -0
  33. package/cli/selftune/registry/rollback.ts +49 -0
  34. package/cli/selftune/registry/status.ts +62 -0
  35. package/cli/selftune/registry/sync.ts +125 -0
  36. package/cli/selftune/repair/skill-usage.ts +4 -1
  37. package/cli/selftune/status.ts +31 -0
  38. package/cli/selftune/sync.ts +127 -23
  39. package/cli/selftune/types.ts +2 -1
  40. package/cli/selftune/utils/jsonl.ts +1 -30
  41. package/cli/selftune/utils/skill-discovery.ts +22 -0
  42. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  43. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  44. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  45. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  46. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  47. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +22 -4
  48. package/node_modules/@selftune/telemetry-contract/src/types.ts +1 -12
  49. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  50. package/package.json +1 -1
  51. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  52. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  53. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  54. package/packages/telemetry-contract/package.json +1 -1
  55. package/packages/telemetry-contract/src/index.ts +1 -0
  56. package/packages/telemetry-contract/src/schemas.ts +22 -4
  57. package/packages/telemetry-contract/src/types.ts +1 -12
  58. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  59. package/packages/ui/AGENTS.md +16 -0
  60. package/packages/ui/README.md +1 -1
  61. package/packages/ui/package.json +1 -1
  62. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  63. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  64. package/packages/ui/src/components/EvidenceViewer.tsx +153 -443
  65. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  66. package/packages/ui/src/components/InfoTip.tsx +1 -2
  67. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  68. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  69. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  70. package/packages/ui/src/components/OverviewPanels.tsx +652 -0
  71. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  72. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  73. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  74. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  75. package/packages/ui/src/components/index.ts +56 -1
  76. package/packages/ui/src/components/section-cards.tsx +18 -35
  77. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  78. package/packages/ui/src/lib/constants.tsx +0 -1
  79. package/packages/ui/src/primitives/card.tsx +1 -1
  80. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  81. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  82. package/packages/ui/src/primitives/select.tsx +2 -2
  83. package/packages/ui/src/types.ts +172 -4
  84. package/skill/SKILL.md +18 -4
  85. package/skill/Workflows/Ingest.md +60 -2
  86. package/skill/Workflows/Initialize.md +8 -5
  87. package/skill/Workflows/PlatformHooks.md +19 -3
  88. package/skill/Workflows/Registry.md +99 -0
  89. package/skill/Workflows/Sync.md +3 -1
  90. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  91. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  92. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  93. package/cli/selftune/utils/html.ts +0 -27
  94. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
@@ -1,3 +1,8 @@
1
+ import { useMemo, useState } from "react";
2
+ import { Badge } from "../primitives/badge";
3
+ import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
4
+ import type { EvidenceEntry, EvolutionEntry } from "../types";
5
+ import { formatRate, timeAgo } from "../lib/format";
1
6
  import {
2
7
  CheckCircleIcon,
3
8
  ChevronDownIcon,
@@ -5,20 +10,25 @@ import {
5
10
  CircleDotIcon,
6
11
  FileTextIcon,
7
12
  InfoIcon,
13
+ RocketIcon,
14
+ ShieldCheckIcon,
8
15
  ShieldAlertIcon,
9
16
  XCircleIcon,
17
+ UndoIcon,
18
+ ArrowRightIcon,
10
19
  TrendingUpIcon,
11
20
  TrendingDownIcon,
12
21
  ListChecksIcon,
13
22
  } from "lucide-react";
14
- import { useMemo, useState } from "react";
15
- import type { ReactNode } from "react";
16
23
  import Markdown from "react-markdown";
17
24
 
18
- import { formatRate, timeAgo } from "../lib/format";
19
- import { Badge } from "../primitives/badge";
20
- import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
21
- import type { EvidenceEntry, EvolutionEntry } from "../types";
25
+ const ACTION_ICON: Record<string, React.ReactNode> = {
26
+ created: <CircleDotIcon className="size-3.5" />,
27
+ validated: <ShieldCheckIcon className="size-3.5" />,
28
+ deployed: <RocketIcon className="size-3.5" />,
29
+ rejected: <XCircleIcon className="size-3.5" />,
30
+ rolled_back: <UndoIcon className="size-3.5" />,
31
+ };
22
32
 
23
33
  const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
24
34
  created: "outline",
@@ -32,117 +42,15 @@ interface Props {
32
42
  proposalId: string;
33
43
  evolution: EvolutionEntry[];
34
44
  evidence: EvidenceEntry[];
35
- showContextBanner?: boolean;
36
- }
37
-
38
- function getValidationModeMeta(mode?: string | null): {
39
- label: string;
40
- variant: "default" | "secondary" | "destructive" | "outline";
41
- description: string;
42
- } | null {
43
- switch (mode) {
44
- case "host_replay":
45
- return {
46
- label: "Replay-backed validation",
47
- variant: "default",
48
- description:
49
- "Validated against a controlled replay fixture instead of a free-form judge prompt.",
50
- };
51
- case "llm_judge":
52
- return {
53
- label: "Model judgment",
54
- variant: "secondary",
55
- description: "Validated by an LLM trigger check rather than a replay fixture.",
56
- };
57
- case "structural_guard":
58
- return {
59
- label: "Structural guard",
60
- variant: "outline",
61
- description:
62
- "Only deterministic structural checks ran; no replay or judge validation was needed.",
63
- };
64
- default:
65
- return null;
66
- }
67
- }
68
-
69
- function sentenceCase(value: string): string {
70
- return value.replace(/_/g, " ");
71
- }
72
-
73
- function getOutcomePresentation(action?: string | null): {
74
- title: string;
75
- summary: string;
76
- tone: string;
77
- icon: ReactNode;
78
- liveSkillNote: string;
79
- } {
80
- switch (action) {
81
- case "rejected":
82
- return {
83
- title: "Proposal rejected",
84
- summary: "Selftune proposed a change, but blocked it before your live skill was updated.",
85
- tone: "border-red-500/20 bg-red-500/8 text-red-700 dark:text-red-50",
86
- icon: <XCircleIcon className="size-4 text-red-400" />,
87
- liveSkillNote: "Your live skill is unchanged.",
88
- };
89
- case "validated":
90
- return {
91
- title: "Proposal validated",
92
- summary: "The proposed change improved the eval signal and is ready for review or deploy.",
93
- tone: "border-emerald-500/20 bg-emerald-500/8 text-emerald-700 dark:text-emerald-50",
94
- icon: <CheckCircleIcon className="size-4 text-emerald-400" />,
95
- liveSkillNote: "Your live skill has not changed until this proposal is deployed.",
96
- };
97
- case "deployed":
98
- return {
99
- title: "Proposal deployed",
100
- summary: "The proposed change passed validation and was applied to the live skill.",
101
- tone: "border-primary/25 bg-primary/8 text-foreground",
102
- icon: <TrendingUpIcon className="size-4 text-primary" />,
103
- liveSkillNote: "Your live skill now includes this change.",
104
- };
105
- case "rolled_back":
106
- return {
107
- title: "Proposal rolled back",
108
- summary: "A deployed change was later reversed because follow-up evidence showed risk.",
109
- tone: "border-amber-500/20 bg-amber-500/8 text-amber-800 dark:text-amber-50",
110
- icon: <TrendingDownIcon className="size-4 text-amber-400" />,
111
- liveSkillNote: "Your live skill no longer uses this proposal.",
112
- };
113
- case "created":
114
- default:
115
- return {
116
- title: "Proposal under review",
117
- summary: "Selftune found a possible improvement and recorded the proposed change.",
118
- tone: "border-border/30 bg-muted/25 text-foreground",
119
- icon: <CircleDotIcon className="size-4 text-muted-foreground" />,
120
- liveSkillNote: "Your live skill is unchanged until a proposal is validated and deployed.",
121
- };
122
- }
123
45
  }
124
46
 
125
47
  /** Parse YAML-ish frontmatter from text, returns { meta, body } */
126
48
  function parseFrontmatter(text: string): { meta: Record<string, string>; body: string } {
127
- const lines = text.split("\n");
128
- if (lines.length < 3 || lines[0].trim() !== "---") {
129
- return { meta: {}, body: text };
130
- }
131
-
132
- let closingIndex = -1;
133
- for (let i = 1; i < lines.length; i++) {
134
- if (lines[i].trim() === "---") {
135
- closingIndex = i;
136
- break;
137
- }
138
- }
139
-
140
- if (closingIndex === -1) {
141
- return { meta: {}, body: text };
142
- }
49
+ const match = text.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/);
50
+ if (!match) return { meta: {}, body: text };
143
51
 
144
52
  const meta: Record<string, string> = {};
145
- for (const line of lines.slice(1, closingIndex)) {
53
+ for (const line of match[1].split("\n")) {
146
54
  const idx = line.indexOf(":");
147
55
  if (idx > 0) {
148
56
  const key = line.slice(0, idx).trim();
@@ -150,8 +58,7 @@ function parseFrontmatter(text: string): { meta: Record<string, string>; body: s
150
58
  if (key && val) meta[key] = val;
151
59
  }
152
60
  }
153
-
154
- return { meta, body: lines.slice(closingIndex + 1).join("\n") };
61
+ return { meta, body: match[2] };
155
62
  }
156
63
 
157
64
  function FrontmatterTable({ meta }: { meta: Record<string, string> }) {
@@ -212,7 +119,7 @@ function SkillContentBlock({
212
119
  }
213
120
 
214
121
  /** Smart formatting for a single validation value */
215
- function formatValidationValue(key: string, val: unknown): ReactNode {
122
+ function formatValidationValue(key: string, val: unknown): React.ReactNode {
216
123
  // Booleans
217
124
  if (typeof val === "boolean") {
218
125
  return val ? (
@@ -311,18 +218,12 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
311
218
  regressions,
312
219
  new_passes,
313
220
  per_entry_results,
314
- validation_mode,
315
- validation_agent,
316
- validation_fixture_id,
317
- validation_evidence_ref,
318
221
  ...rest
319
222
  } = validation;
320
223
 
321
224
  const regressionsArr = Array.isArray(regressions) ? regressions : [];
322
225
  const newPassesArr = Array.isArray(new_passes) ? new_passes : [];
323
226
  const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : [];
324
- const validationMeta =
325
- typeof validation_mode === "string" ? getValidationModeMeta(validation_mode) : null;
326
227
 
327
228
  return (
328
229
  <div className="rounded-md border bg-muted/30 p-3 space-y-3">
@@ -333,34 +234,6 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
333
234
  </span>
334
235
  </p>
335
236
 
336
- {validationMeta && (
337
- <div className="rounded-md border bg-card px-3 py-2">
338
- <div className="flex flex-wrap items-center gap-2">
339
- <Badge variant={validationMeta.variant} className="text-[10px]">
340
- {validationMeta.label}
341
- </Badge>
342
- {typeof validation_agent === "string" && validation_agent.trim() && (
343
- <Badge variant="outline" className="text-[10px]">
344
- agent: {validation_agent}
345
- </Badge>
346
- )}
347
- {typeof validation_fixture_id === "string" && validation_fixture_id.trim() && (
348
- <Badge variant="outline" className="text-[10px]">
349
- fixture: {validation_fixture_id}
350
- </Badge>
351
- )}
352
- </div>
353
- <p className="mt-1 text-[11px] leading-relaxed text-muted-foreground">
354
- {validationMeta.description}
355
- </p>
356
- {typeof validation_evidence_ref === "string" && validation_evidence_ref.trim() && (
357
- <p className="mt-1 text-[10px] font-mono text-muted-foreground/70">
358
- {validation_evidence_ref}
359
- </p>
360
- )}
361
- </div>
362
- )}
363
-
364
237
  {/* Summary bar */}
365
238
  <div className="flex items-center gap-3 flex-wrap">
366
239
  {improved !== undefined && (
@@ -375,13 +248,7 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
375
248
  )}
376
249
  {typeof net_change === "number" && (
377
250
  <span
378
- className={`text-xs font-mono font-semibold ${
379
- net_change > 0
380
- ? "text-emerald-600 dark:text-emerald-400"
381
- : net_change < 0
382
- ? "text-red-500"
383
- : "text-muted-foreground"
384
- }`}
251
+ className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
385
252
  >
386
253
  {net_change > 0 ? "+" : ""}
387
254
  {(net_change * 100).toFixed(1)}%
@@ -726,12 +593,7 @@ function CollapsedEvidenceCard({
726
593
  );
727
594
  }
728
595
 
729
- export function EvidenceViewer({
730
- proposalId,
731
- evolution,
732
- evidence,
733
- showContextBanner = true,
734
- }: Props) {
596
+ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
735
597
  const steps = useMemo(
736
598
  () =>
737
599
  evolution
@@ -750,7 +612,6 @@ export function EvidenceViewer({
750
612
 
751
613
  // Track which earlier rounds are manually expanded
752
614
  const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set());
753
- const [expandedProposalTargets, setExpandedProposalTargets] = useState<Set<string>>(new Set());
754
615
 
755
616
  const toggleRound = (key: string) => {
756
617
  setExpandedRounds((prev) => {
@@ -761,15 +622,6 @@ export function EvidenceViewer({
761
622
  });
762
623
  };
763
624
 
764
- const toggleProposalHistory = (target: string) => {
765
- setExpandedProposalTargets((prev) => {
766
- const next = new Set(prev);
767
- if (next.has(target)) next.delete(target);
768
- else next.add(target);
769
- return next;
770
- });
771
- };
772
-
773
625
  const snapshot = useMemo(() => {
774
626
  for (let i = steps.length - 1; i >= 0; i--) {
775
627
  if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>;
@@ -793,318 +645,176 @@ export function EvidenceViewer({
793
645
  return { proposalEntries: proposals, validationsByTarget: validationMap };
794
646
  }, [entries]);
795
647
 
796
- const latestStep = steps[steps.length - 1] ?? null;
797
- const lifecycleLabel = steps.map((step) => step.action.replace("_", " ")).join(" -> ");
798
- const outcome = getOutcomePresentation(latestStep?.action);
799
- const validationMeta = getValidationModeMeta(latestStep?.validation_mode);
800
- const latestProposalConfidence = useMemo(() => {
801
- for (let i = proposalEntries.length - 1; i >= 0; i--) {
802
- if (proposalEntries[i].confidence !== null) {
803
- return proposalEntries[i].confidence;
804
- }
805
- }
806
- return null;
807
- }, [proposalEntries]);
808
- const proposalCards = useMemo(() => {
809
- const grouped = new Map<string, EvidenceEntry[]>();
810
- for (const entry of proposalEntries) {
811
- const key = entry.target || "proposal";
812
- const group = grouped.get(key) ?? [];
813
- group.push(entry);
814
- grouped.set(key, group);
815
- }
816
-
817
- return Array.from(grouped.entries()).map(([target, group]) => {
818
- let richest = group[group.length - 1];
819
- for (let i = group.length - 1; i >= 0; i--) {
820
- if (group[i].original_text || group[i].proposed_text || group[i].rationale) {
821
- richest = group[i];
822
- break;
823
- }
824
- }
825
- const primaryIndex = group.findIndex((entry) => entry === richest);
826
- return {
827
- target,
828
- primaryEntry: richest,
829
- historyEntries: group.filter((_, index) => index !== primaryIndex),
830
- entries: group,
831
- };
832
- });
833
- }, [proposalEntries]);
834
-
835
648
  return (
836
649
  <div className="space-y-4">
837
650
  {/* Context banner */}
838
- {showContextBanner && (
839
- <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
840
- <InfoIcon className="mt-0.5 size-4 shrink-0 text-primary/60" />
841
- <p className="text-xs leading-relaxed text-muted-foreground">
842
- This view shows the complete evidence trail for a skill evolution proposal &mdash; how
843
- the skill was changed, the eval test results before and after, and whether the change
844
- improved performance.
845
- </p>
846
- </div>
847
- )}
651
+ <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
652
+ <InfoIcon className="size-4 text-primary/60 shrink-0 mt-0.5" />
653
+ <p className="text-xs text-muted-foreground leading-relaxed">
654
+ This view shows the complete evidence trail for a skill evolution proposal &mdash; how the
655
+ skill was changed, the eval test results before and after, and whether the change improved
656
+ performance.
657
+ </p>
658
+ </div>
848
659
 
849
- <Card className="border-border/15 bg-muted/10">
660
+ {/* Proposal journey */}
661
+ <Card>
850
662
  <CardHeader className="pb-3">
851
- <CardTitle className="flex flex-wrap items-center gap-2 text-sm">
852
- <span>Proposal Summary</span>
663
+ <CardTitle className="text-sm flex items-center gap-2">
664
+ <span>Proposal Journey</span>
853
665
  <span className="font-mono text-xs text-muted-foreground">
854
666
  #{proposalId.slice(0, 12)}
855
667
  </span>
856
668
  </CardTitle>
857
669
  </CardHeader>
858
670
  <CardContent className="space-y-3">
859
- <div className={`rounded-lg border px-4 py-3 ${outcome.tone}`}>
860
- <div className="flex items-start gap-3">
861
- <div className="mt-0.5 shrink-0">{outcome.icon}</div>
862
- <div className="min-w-0 space-y-1.5">
863
- <div className="flex flex-wrap items-center gap-2">
864
- <p className="text-sm font-semibold">{outcome.title}</p>
865
- {latestStep && (
866
- <Badge
867
- variant={ACTION_VARIANT[latestStep.action] ?? "secondary"}
868
- className="text-[10px] capitalize"
869
- >
870
- {sentenceCase(latestStep.action)}
871
- </Badge>
872
- )}
671
+ <div className="flex items-center gap-2 flex-wrap">
672
+ {steps.map((step, i) => (
673
+ <div key={`${step.action}-${i}`} className="contents">
674
+ {i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
675
+ <div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
676
+ {ACTION_ICON[step.action]}
677
+ <Badge
678
+ variant={ACTION_VARIANT[step.action] ?? "secondary"}
679
+ className="text-[10px] capitalize"
680
+ >
681
+ {step.action.replace("_", " ")}
682
+ </Badge>
683
+ <span className="text-[10px] text-muted-foreground">
684
+ {timeAgo(step.timestamp)}
685
+ </span>
873
686
  </div>
874
- <p className="text-sm leading-6 text-current/90">{outcome.summary}</p>
875
- {latestStep?.details && (
876
- <div className="rounded-md bg-black/10 px-3 py-2 text-sm leading-6 text-current/90 dark:bg-black/20">
877
- {latestStep.details}
878
- </div>
879
- )}
880
- <p className="text-xs font-medium text-current/75">{outcome.liveSkillNote}</p>
881
687
  </div>
882
- </div>
883
- </div>
884
-
885
- <div className="flex flex-wrap items-center gap-2">
886
- {latestStep?.timestamp && (
887
- <span className="text-[10px] font-mono text-muted-foreground">
888
- {timeAgo(latestStep.timestamp)}
889
- </span>
890
- )}
891
- <Badge variant="outline" className="text-[10px]">
892
- {entries.length} evidence {entries.length === 1 ? "row" : "rows"}
893
- </Badge>
894
- {validationMeta && (
895
- <Badge variant={validationMeta.variant} className="text-[10px]">
896
- {validationMeta.label}
897
- </Badge>
898
- )}
899
- {latestStep?.validation_fixture_id && (
900
- <Badge variant="outline" className="text-[10px]">
901
- fixture: {latestStep.validation_fixture_id}
902
- </Badge>
903
- )}
904
- {latestProposalConfidence != null && (
905
- <Badge variant="secondary" className="text-[10px]">
906
- {Math.round(latestProposalConfidence * 100)}% confidence
907
- </Badge>
908
- )}
909
- </div>
910
-
911
- {validationMeta && (
912
- <p className="text-[11px] leading-relaxed text-muted-foreground">
913
- {validationMeta.description}
914
- </p>
915
- )}
916
-
917
- <div className="flex flex-wrap items-center gap-2 text-[11px] text-muted-foreground">
918
- <span className="font-headline uppercase tracking-[0.16em] text-muted-foreground/80">
919
- Lifecycle
920
- </span>
921
- <span>{lifecycleLabel ? sentenceCase(lifecycleLabel) : "No lifecycle recorded"}</span>
688
+ ))}
922
689
  </div>
923
690
 
924
- {typeof snapshot?.net_change === "number" &&
925
- typeof snapshot.before_pass_rate === "number" &&
926
- typeof snapshot.after_pass_rate === "number" && (
927
- <div className="flex flex-wrap items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
691
+ {/* Eval snapshot pass rate change */}
692
+ {snapshot && (
693
+ <div className="flex items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
694
+ {typeof snapshot.net_change === "number" && (
928
695
  <div className="flex items-center gap-1">
929
- {snapshot.net_change > 0 ? (
696
+ {(snapshot.net_change as number) > 0 ? (
930
697
  <TrendingUpIcon className="size-3.5 text-emerald-500" />
931
- ) : snapshot.net_change < 0 ? (
932
- <TrendingDownIcon className="size-3.5 text-red-500" />
933
698
  ) : (
934
- <CircleDotIcon className="size-3.5 text-muted-foreground" />
699
+ <TrendingDownIcon className="size-3.5 text-red-500" />
935
700
  )}
936
701
  <span
937
- className={`text-sm font-mono font-semibold ${
938
- snapshot.net_change > 0
939
- ? "text-emerald-600 dark:text-emerald-400"
940
- : snapshot.net_change < 0
941
- ? "text-red-500"
942
- : "text-muted-foreground"
943
- }`}
702
+ className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
944
703
  >
945
- {snapshot.net_change > 0 ? "+" : ""}
946
- {Math.round(snapshot.net_change * 100)}%
704
+ {(snapshot.net_change as number) > 0 ? "+" : ""}
705
+ {Math.round((snapshot.net_change as number) * 100)}%
947
706
  </span>
948
707
  </div>
949
- <span className="text-xs font-mono text-muted-foreground">
950
- {Math.round(snapshot.before_pass_rate * 100)}% &rarr;{" "}
951
- {Math.round(snapshot.after_pass_rate * 100)}%
952
- </span>
953
- {snapshot.net_change > 0 ? (
954
- <Badge variant="default" className="text-[10px]">
955
- Improved
956
- </Badge>
957
- ) : snapshot.net_change < 0 ? (
958
- <Badge variant="destructive" className="text-[10px]">
959
- Regressed
960
- </Badge>
961
- ) : (
962
- <Badge variant="outline" className="text-[10px]">
963
- No change
964
- </Badge>
708
+ )}
709
+ {typeof snapshot.before_pass_rate === "number" &&
710
+ typeof snapshot.after_pass_rate === "number" && (
711
+ <span className="text-xs text-muted-foreground font-mono">
712
+ {Math.round((snapshot.before_pass_rate as number) * 100)}% &rarr;{" "}
713
+ {Math.round((snapshot.after_pass_rate as number) * 100)}%
714
+ </span>
965
715
  )}
966
- </div>
967
- )}
716
+ {snapshot.improved !== undefined && (
717
+ <Badge
718
+ variant={snapshot.improved ? "default" : "destructive"}
719
+ className="text-[10px]"
720
+ >
721
+ {snapshot.improved ? "Improved" : "Regressed"}
722
+ </Badge>
723
+ )}
724
+ </div>
725
+ )}
726
+
727
+ {/* Details from last step */}
728
+ {steps.length > 0 && steps[steps.length - 1].details && (
729
+ <p className="text-xs text-muted-foreground leading-relaxed">
730
+ {steps[steps.length - 1].details}
731
+ </p>
732
+ )}
968
733
  </CardContent>
969
734
  </Card>
970
735
 
971
736
  {/* Proposal-stage evidence — standalone cards showing original/proposed text */}
972
- {proposalCards.length > 0 && (
973
- <div className="space-y-2">
974
- <div className="space-y-1">
975
- <p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
976
- What changed
977
- </p>
978
- <p className="text-sm text-muted-foreground">
979
- This is the actual skill text selftune proposed changing.
980
- </p>
981
- </div>
982
- {proposalCards.map((group) => {
983
- const hasHistory = group.historyEntries.length > 0;
984
- const isExpanded = expandedProposalTargets.has(group.target);
737
+ {proposalEntries.map((entry) => (
738
+ <EvidenceCard
739
+ key={`proposal-${entry.target}-${entry.timestamp}`}
740
+ entry={entry}
741
+ roundLabel={null}
742
+ roundStatus="single"
743
+ prevPassRate={null}
744
+ currPassRate={null}
745
+ />
746
+ ))}
985
747
 
986
- return (
987
- <div key={`proposal-${group.target}`} className="space-y-2">
988
- <EvidenceCard
989
- entry={group.primaryEntry}
990
- roundLabel={hasHistory ? `Latest draft of ${group.entries.length}` : null}
991
- roundStatus={hasHistory ? "final" : "single"}
992
- prevPassRate={null}
993
- currPassRate={null}
994
- />
995
- {hasHistory && (
996
- <div className="space-y-2">
748
+ {/* Validation-stage evidence — grouped by target with iteration rounds */}
749
+ {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
750
+ const hasMultipleRounds = targetEntries.length > 1;
751
+
752
+ return (
753
+ <div key={target} className="space-y-2">
754
+ {targetEntries.map((entry, i) => {
755
+ const isLast = i === targetEntries.length - 1;
756
+ const roundLabel = hasMultipleRounds
757
+ ? `Round ${i + 1} of ${targetEntries.length}`
758
+ : null;
759
+ const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
760
+ const currPassRate = getAfterPassRate(entry);
761
+ const roundKey = `${target}-${entry.timestamp}`;
762
+ const roundStatus: RoundStatus = !hasMultipleRounds
763
+ ? "single"
764
+ : isLast
765
+ ? "final"
766
+ : "intermediate";
767
+
768
+ // Earlier rounds: collapsed by default
769
+ if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
770
+ return (
771
+ <CollapsedEvidenceCard
772
+ key={roundKey}
773
+ entry={entry}
774
+ roundLabel={roundLabel!}
775
+ onExpand={() => toggleRound(roundKey)}
776
+ />
777
+ );
778
+ }
779
+
780
+ // Expanded earlier round — show with collapse toggle
781
+ if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
782
+ return (
783
+ <div key={roundKey} className="space-y-1">
997
784
  <button
998
785
  type="button"
999
- onClick={() => toggleProposalHistory(group.target)}
1000
- className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
786
+ onClick={() => toggleRound(roundKey)}
787
+ className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground transition-colors px-1"
1001
788
  >
1002
- {isExpanded ? (
1003
- <ChevronDownIcon className="size-3" />
1004
- ) : (
1005
- <ChevronRightIcon className="size-3" />
1006
- )}
1007
- {isExpanded ? "Hide" : "Show"} {group.historyEntries.length} earlier{" "}
1008
- {group.historyEntries.length === 1 ? "draft" : "drafts"}
789
+ <ChevronDownIcon className="size-3" />
790
+ Collapse {roundLabel}
1009
791
  </button>
1010
- {isExpanded &&
1011
- group.historyEntries.map((entry, index) => (
1012
- <EvidenceCard
1013
- key={`proposal-history-${group.target}-${entry.timestamp}-${index}`}
1014
- entry={entry}
1015
- roundLabel={`Draft ${index + 1} of ${group.historyEntries.length}`}
1016
- roundStatus="intermediate"
1017
- prevPassRate={null}
1018
- currPassRate={null}
1019
- />
1020
- ))}
1021
- </div>
1022
- )}
1023
- </div>
1024
- );
1025
- })}
1026
- </div>
1027
- )}
1028
-
1029
- {/* Validation-stage evidence — grouped by target with iteration rounds */}
1030
- {Array.from(validationsByTarget.entries()).length > 0 && (
1031
- <div className="space-y-2">
1032
- <div className="space-y-1">
1033
- <p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
1034
- How it was tested
1035
- </p>
1036
- <p className="text-sm text-muted-foreground">
1037
- Validation evidence shows whether the proposal improved the eval signal.
1038
- </p>
1039
- </div>
1040
- {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
1041
- const hasMultipleRounds = targetEntries.length > 1;
1042
-
1043
- return (
1044
- <div key={target} className="space-y-2">
1045
- {targetEntries.map((entry, i) => {
1046
- const isLast = i === targetEntries.length - 1;
1047
- const roundLabel = hasMultipleRounds
1048
- ? `Round ${i + 1} of ${targetEntries.length}`
1049
- : null;
1050
- const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
1051
- const currPassRate = getAfterPassRate(entry);
1052
- const roundKey = `${target}-${entry.timestamp}`;
1053
- const roundStatus: RoundStatus = !hasMultipleRounds
1054
- ? "single"
1055
- : isLast
1056
- ? "final"
1057
- : "intermediate";
1058
-
1059
- if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
1060
- return (
1061
- <CollapsedEvidenceCard
1062
- key={roundKey}
1063
- entry={entry}
1064
- roundLabel={roundLabel!}
1065
- onExpand={() => toggleRound(roundKey)}
1066
- />
1067
- );
1068
- }
1069
-
1070
- if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
1071
- return (
1072
- <div key={roundKey} className="space-y-1">
1073
- <button
1074
- type="button"
1075
- onClick={() => toggleRound(roundKey)}
1076
- className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
1077
- >
1078
- <ChevronDownIcon className="size-3" />
1079
- Collapse {roundLabel}
1080
- </button>
1081
- <EvidenceCard
1082
- entry={entry}
1083
- roundLabel={roundLabel}
1084
- roundStatus={roundStatus}
1085
- prevPassRate={prevPassRate}
1086
- currPassRate={currPassRate}
1087
- />
1088
- </div>
1089
- );
1090
- }
1091
-
1092
- return (
1093
792
  <EvidenceCard
1094
- key={roundKey}
1095
793
  entry={entry}
1096
794
  roundLabel={roundLabel}
1097
795
  roundStatus={roundStatus}
1098
796
  prevPassRate={prevPassRate}
1099
797
  currPassRate={currPassRate}
1100
798
  />
1101
- );
1102
- })}
1103
- </div>
1104
- );
1105
- })}
1106
- </div>
1107
- )}
799
+ </div>
800
+ );
801
+ }
802
+
803
+ // Final round (or single entry) — always expanded
804
+ return (
805
+ <EvidenceCard
806
+ key={roundKey}
807
+ entry={entry}
808
+ roundLabel={roundLabel}
809
+ roundStatus={roundStatus}
810
+ prevPassRate={prevPassRate}
811
+ currPassRate={currPassRate}
812
+ />
813
+ );
814
+ })}
815
+ </div>
816
+ );
817
+ })}
1108
818
 
1109
819
  {entries.length === 0 && (
1110
820
  <div className="flex items-center justify-center rounded-lg border border-dashed py-8">