selftune 0.2.9 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +35 -35
  2. package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +16 -0
  3. package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +2 -0
  4. package/apps/local-dashboard/dist/assets/rolldown-runtime-Dw2cE7zH.js +1 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +11 -0
  6. package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +8 -0
  7. package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +12 -0
  8. package/apps/local-dashboard/dist/index.html +16 -15
  9. package/bin/selftune.cjs +1 -1
  10. package/cli/selftune/activation-rules.ts +1 -0
  11. package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
  12. package/cli/selftune/alpha-upload/stage-canonical.ts +94 -0
  13. package/cli/selftune/auth/device-code.ts +32 -0
  14. package/cli/selftune/auto-update.ts +12 -0
  15. package/cli/selftune/badge/badge.ts +1 -0
  16. package/cli/selftune/canonical-export.ts +5 -0
  17. package/cli/selftune/claude-agents.ts +154 -0
  18. package/cli/selftune/contribute/bundle.ts +1 -0
  19. package/cli/selftune/contribute/contribute.ts +1 -0
  20. package/cli/selftune/cron/setup.ts +2 -2
  21. package/cli/selftune/dashboard-server.ts +1 -0
  22. package/cli/selftune/eval/hooks-to-evals.ts +1 -0
  23. package/cli/selftune/eval/import-skillsbench.ts +1 -0
  24. package/cli/selftune/eval/synthetic-evals.ts +2 -3
  25. package/cli/selftune/eval/unit-test.ts +1 -0
  26. package/cli/selftune/evolution/deploy-proposal.ts +9 -238
  27. package/cli/selftune/evolution/evolve-body.ts +93 -6
  28. package/cli/selftune/evolution/evolve.ts +3 -7
  29. package/cli/selftune/evolution/propose-body.ts +3 -2
  30. package/cli/selftune/evolution/propose-routing.ts +3 -2
  31. package/cli/selftune/evolution/refine-body.ts +3 -2
  32. package/cli/selftune/evolution/rollback.ts +1 -1
  33. package/cli/selftune/export.ts +1 -0
  34. package/cli/selftune/grading/grade-session.ts +8 -0
  35. package/cli/selftune/hooks/auto-activate.ts +1 -0
  36. package/cli/selftune/hooks/evolution-guard.ts +1 -1
  37. package/cli/selftune/hooks/prompt-log.ts +1 -0
  38. package/cli/selftune/hooks/session-stop.ts +34 -40
  39. package/cli/selftune/hooks/skill-change-guard.ts +1 -0
  40. package/cli/selftune/hooks/skill-eval.ts +1 -1
  41. package/cli/selftune/index.ts +23 -14
  42. package/cli/selftune/ingestors/claude-replay.ts +1 -0
  43. package/cli/selftune/ingestors/codex-rollout.ts +1 -0
  44. package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
  45. package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
  46. package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
  47. package/cli/selftune/init.ts +121 -29
  48. package/cli/selftune/localdb/db.ts +1 -0
  49. package/cli/selftune/localdb/direct-write.ts +39 -0
  50. package/cli/selftune/localdb/materialize.ts +2 -0
  51. package/cli/selftune/localdb/queries.ts +53 -0
  52. package/cli/selftune/localdb/schema.ts +28 -0
  53. package/cli/selftune/normalization.ts +1 -0
  54. package/cli/selftune/observability.ts +1 -0
  55. package/cli/selftune/repair/skill-usage.ts +1 -0
  56. package/cli/selftune/routes/orchestrate-runs.ts +1 -0
  57. package/cli/selftune/routes/overview.ts +1 -0
  58. package/cli/selftune/routes/report.ts +1 -1
  59. package/cli/selftune/routes/skill-report.ts +2 -1
  60. package/cli/selftune/status.ts +1 -1
  61. package/cli/selftune/sync.ts +30 -1
  62. package/cli/selftune/uninstall.ts +412 -0
  63. package/cli/selftune/utils/canonical-log.ts +2 -0
  64. package/cli/selftune/utils/frontmatter.ts +50 -7
  65. package/cli/selftune/utils/jsonl.ts +1 -0
  66. package/cli/selftune/utils/llm-call.ts +131 -3
  67. package/cli/selftune/utils/skill-log.ts +1 -0
  68. package/cli/selftune/utils/transcript.ts +1 -0
  69. package/cli/selftune/utils/trigger-check.ts +1 -1
  70. package/cli/selftune/workflows/skill-md-writer.ts +5 -5
  71. package/cli/selftune/workflows/workflows.ts +1 -0
  72. package/package.json +37 -33
  73. package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
  74. package/packages/telemetry-contract/package.json +1 -1
  75. package/packages/telemetry-contract/src/schemas.ts +1 -0
  76. package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
  77. package/packages/ui/README.md +35 -34
  78. package/packages/ui/package.json +3 -3
  79. package/packages/ui/src/components/ActivityTimeline.tsx +50 -43
  80. package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
  81. package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
  82. package/packages/ui/src/components/InfoTip.tsx +4 -3
  83. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
  84. package/packages/ui/src/components/section-cards.tsx +20 -25
  85. package/packages/ui/src/components/skill-health-grid.tsx +213 -193
  86. package/packages/ui/src/lib/constants.tsx +1 -0
  87. package/packages/ui/src/primitives/badge.tsx +12 -15
  88. package/packages/ui/src/primitives/button.tsx +7 -7
  89. package/packages/ui/src/primitives/card.tsx +15 -26
  90. package/packages/ui/src/primitives/checkbox.tsx +7 -8
  91. package/packages/ui/src/primitives/collapsible.tsx +5 -5
  92. package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
  93. package/packages/ui/src/primitives/label.tsx +6 -6
  94. package/packages/ui/src/primitives/select.tsx +28 -37
  95. package/packages/ui/src/primitives/table.tsx +17 -44
  96. package/packages/ui/src/primitives/tabs.tsx +14 -21
  97. package/packages/ui/src/primitives/tooltip.tsx +10 -22
  98. package/skill/SKILL.md +70 -57
  99. package/skill/Workflows/AlphaUpload.md +4 -4
  100. package/skill/Workflows/AutoActivation.md +11 -6
  101. package/skill/Workflows/Badge.md +22 -16
  102. package/skill/Workflows/Baseline.md +34 -36
  103. package/skill/Workflows/Composability.md +16 -11
  104. package/skill/Workflows/Contribute.md +26 -21
  105. package/skill/Workflows/Cron.md +23 -22
  106. package/skill/Workflows/Dashboard.md +32 -27
  107. package/skill/Workflows/Doctor.md +33 -27
  108. package/skill/Workflows/Evals.md +48 -47
  109. package/skill/Workflows/EvolutionMemory.md +31 -21
  110. package/skill/Workflows/Evolve.md +84 -82
  111. package/skill/Workflows/EvolveBody.md +58 -47
  112. package/skill/Workflows/Grade.md +16 -13
  113. package/skill/Workflows/ImportSkillsBench.md +9 -6
  114. package/skill/Workflows/Ingest.md +36 -21
  115. package/skill/Workflows/Initialize.md +108 -40
  116. package/skill/Workflows/Orchestrate.md +22 -16
  117. package/skill/Workflows/Replay.md +12 -7
  118. package/skill/Workflows/Rollback.md +13 -6
  119. package/skill/Workflows/Schedule.md +6 -6
  120. package/skill/Workflows/Sync.md +18 -11
  121. package/skill/Workflows/UnitTest.md +28 -17
  122. package/skill/Workflows/Watch.md +28 -21
  123. package/skill/agents/diagnosis-analyst.md +11 -0
  124. package/skill/agents/evolution-reviewer.md +15 -1
  125. package/skill/agents/integration-guide.md +10 -0
  126. package/skill/agents/pattern-analyst.md +12 -1
  127. package/skill/references/grading-methodology.md +23 -24
  128. package/skill/references/interactive-config.md +7 -7
  129. package/skill/references/invocation-taxonomy.md +22 -20
  130. package/skill/references/logs.md +14 -6
  131. package/skill/references/setup-patterns.md +4 -2
  132. package/.claude/agents/diagnosis-analyst.md +0 -156
  133. package/.claude/agents/evolution-reviewer.md +0 -180
  134. package/.claude/agents/integration-guide.md +0 -212
  135. package/.claude/agents/pattern-analyst.md +0 -160
  136. package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +0 -1
  137. package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +0 -15
  138. package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +0 -60
  139. package/apps/local-dashboard/dist/assets/vendor-table-dK1QMLq9.js +0 -26
  140. package/apps/local-dashboard/dist/assets/vendor-ui-CO2mrx6e.js +0 -341
@@ -1,8 +1,3 @@
1
- import { useMemo, useState } from "react"
2
- import { Badge } from "../primitives/badge"
3
- import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card"
4
- import type { EvidenceEntry, EvolutionEntry } from "../types"
5
- import { formatRate, timeAgo } from "../lib/format"
6
1
  import {
7
2
  CheckCircleIcon,
8
3
  ChevronDownIcon,
@@ -19,8 +14,14 @@ import {
19
14
  TrendingUpIcon,
20
15
  TrendingDownIcon,
21
16
  ListChecksIcon,
22
- } from "lucide-react"
23
- import Markdown from "react-markdown"
17
+ } from "lucide-react";
18
+ import { useMemo, useState } from "react";
19
+ import Markdown from "react-markdown";
20
+
21
+ import { formatRate, timeAgo } from "../lib/format";
22
+ import { Badge } from "../primitives/badge";
23
+ import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
24
+ import type { EvidenceEntry, EvolutionEntry } from "../types";
24
25
 
25
26
  const ACTION_ICON: Record<string, React.ReactNode> = {
26
27
  created: <CircleDotIcon className="size-3.5" />,
@@ -28,7 +29,7 @@ const ACTION_ICON: Record<string, React.ReactNode> = {
28
29
  deployed: <RocketIcon className="size-3.5" />,
29
30
  rejected: <XCircleIcon className="size-3.5" />,
30
31
  rolled_back: <UndoIcon className="size-3.5" />,
31
- }
32
+ };
32
33
 
33
34
  const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
34
35
  created: "outline",
@@ -36,34 +37,49 @@ const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "
36
37
  deployed: "default",
37
38
  rejected: "destructive",
38
39
  rolled_back: "destructive",
39
- }
40
+ };
40
41
 
41
42
  interface Props {
42
- proposalId: string
43
- evolution: EvolutionEntry[]
44
- evidence: EvidenceEntry[]
43
+ proposalId: string;
44
+ evolution: EvolutionEntry[];
45
+ evidence: EvidenceEntry[];
45
46
  }
46
47
 
47
48
  /** Parse YAML-ish frontmatter from text, returns { meta, body } */
48
49
  function parseFrontmatter(text: string): { meta: Record<string, string>; body: string } {
49
- const match = text.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/)
50
- if (!match) return { meta: {}, body: text }
50
+ const lines = text.split("\n");
51
+ if (lines.length < 3 || lines[0].trim() !== "---") {
52
+ return { meta: {}, body: text };
53
+ }
54
+
55
+ let closingIndex = -1;
56
+ for (let i = 1; i < lines.length; i++) {
57
+ if (lines[i].trim() === "---") {
58
+ closingIndex = i;
59
+ break;
60
+ }
61
+ }
62
+
63
+ if (closingIndex === -1) {
64
+ return { meta: {}, body: text };
65
+ }
51
66
 
52
- const meta: Record<string, string> = {}
53
- for (const line of match[1].split("\n")) {
54
- const idx = line.indexOf(":")
67
+ const meta: Record<string, string> = {};
68
+ for (const line of lines.slice(1, closingIndex)) {
69
+ const idx = line.indexOf(":");
55
70
  if (idx > 0) {
56
- const key = line.slice(0, idx).trim()
57
- const val = line.slice(idx + 1).trim()
58
- if (key && val) meta[key] = val
71
+ const key = line.slice(0, idx).trim();
72
+ const val = line.slice(idx + 1).trim();
73
+ if (key && val) meta[key] = val;
59
74
  }
60
75
  }
61
- return { meta, body: match[2] }
76
+
77
+ return { meta, body: lines.slice(closingIndex + 1).join("\n") };
62
78
  }
63
79
 
64
80
  function FrontmatterTable({ meta }: { meta: Record<string, string> }) {
65
- const entries = Object.entries(meta)
66
- if (entries.length === 0) return null
81
+ const entries = Object.entries(meta);
82
+ if (entries.length === 0) return null;
67
83
 
68
84
  return (
69
85
  <div className="grid grid-cols-[auto_1fr] gap-x-3 gap-y-1 text-xs">
@@ -74,20 +90,32 @@ function FrontmatterTable({ meta }: { meta: Record<string, string> }) {
74
90
  </div>
75
91
  ))}
76
92
  </div>
77
- )
93
+ );
78
94
  }
79
95
 
80
- function SkillContentBlock({ label, text, variant }: { label: string; text: string; variant: "original" | "proposed" }) {
81
- const { meta, body } = parseFrontmatter(text)
82
- const hasMeta = Object.keys(meta).length > 0
96
+ function SkillContentBlock({
97
+ label,
98
+ text,
99
+ variant,
100
+ }: {
101
+ label: string;
102
+ text: string;
103
+ variant: "original" | "proposed";
104
+ }) {
105
+ const { meta, body } = parseFrontmatter(text);
106
+ const hasMeta = Object.keys(meta).length > 0;
83
107
 
84
108
  return (
85
109
  <div className="flex-1 min-w-0 space-y-3">
86
110
  <div className="flex items-center gap-2">
87
111
  <FileTextIcon className="size-3.5 text-muted-foreground" />
88
- <span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">{label}</span>
112
+ <span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">
113
+ {label}
114
+ </span>
89
115
  {variant === "proposed" && (
90
- <Badge variant="secondary" className="text-[10px]">New</Badge>
116
+ <Badge variant="secondary" className="text-[10px]">
117
+ New
118
+ </Badge>
91
119
  )}
92
120
  </div>
93
121
 
@@ -103,61 +131,73 @@ function SkillContentBlock({ label, text, variant }: { label: string; text: stri
103
131
  <Markdown>{body}</Markdown>
104
132
  </div>
105
133
  </div>
106
- )
134
+ );
107
135
  }
108
136
 
109
137
  /** Smart formatting for a single validation value */
110
138
  function formatValidationValue(key: string, val: unknown): React.ReactNode {
111
139
  // Booleans
112
140
  if (typeof val === "boolean") {
113
- return val
114
- ? <CheckCircleIcon className="size-3.5 text-emerald-500 inline" />
115
- : <XCircleIcon className="size-3.5 text-red-500 inline" />
141
+ return val ? (
142
+ <CheckCircleIcon className="size-3.5 text-emerald-500 inline" />
143
+ ) : (
144
+ <XCircleIcon className="size-3.5 text-red-500 inline" />
145
+ );
116
146
  }
117
147
  // Numbers that look like rates (0-1 range, or key contains "rate"/"change")
118
148
  if (typeof val === "number") {
119
- const isRate = key.includes("rate") || key.includes("change") || (val >= -1 && val <= 1 && key !== "count")
149
+ const isRate =
150
+ key.includes("rate") || key.includes("change") || (val >= -1 && val <= 1 && key !== "count");
120
151
  if (isRate) {
121
- const pct = (val * 100).toFixed(1)
122
- const prefix = val > 0 && key.includes("change") ? "+" : ""
123
- return <span className="font-mono">{prefix}{pct}%</span>
152
+ const pct = (val * 100).toFixed(1);
153
+ const prefix = val > 0 && key.includes("change") ? "+" : "";
154
+ return (
155
+ <span className="font-mono">
156
+ {prefix}
157
+ {pct}%
158
+ </span>
159
+ );
124
160
  }
125
- return <span className="font-mono">{val}</span>
161
+ return <span className="font-mono">{val}</span>;
126
162
  }
127
163
  // null/undefined
128
- if (val === null || val === undefined) return <span className="text-muted-foreground">--</span>
164
+ if (val === null || val === undefined) return <span className="text-muted-foreground">--</span>;
129
165
  // Strings
130
- if (typeof val === "string") return <span>{val}</span>
166
+ if (typeof val === "string") return <span>{val}</span>;
131
167
  // Arrays — render as list of items
132
168
  if (Array.isArray(val)) {
133
- if (val.length === 0) return <span className="text-muted-foreground italic">none</span>
134
- return <span className="font-mono">{val.length} entries</span>
169
+ if (val.length === 0) return <span className="text-muted-foreground italic">none</span>;
170
+ return <span className="font-mono">{val.length} entries</span>;
135
171
  }
136
172
  // Objects
137
- if (typeof val === "object") return <span className="font-mono">1 entry</span>
138
- return <span>{String(val)}</span>
173
+ if (typeof val === "object") return <span className="font-mono">1 entry</span>;
174
+ return <span>{String(val)}</span>;
139
175
  }
140
176
 
141
177
  /** Render a per_entry_result row — handles both flat EvalEntry and nested { entry, before_pass, after_pass } */
142
178
  function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
143
179
  // Handle nested shape: { entry: { query, should_trigger }, before_pass, after_pass }
144
- const nested = entry.entry as Record<string, unknown> | undefined
145
- const query = nested?.query ?? entry.query ?? entry.prompt ?? entry.input ?? entry.text
146
- const shouldTrigger = nested?.should_trigger ?? entry.should_trigger
147
- const invocationType = nested?.invocation_type ?? entry.invocation_type
148
- const beforePass = entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline
149
- const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result
150
- const passed = entry.passed ?? entry.matched
180
+ const nested = entry.entry as Record<string, unknown> | undefined;
181
+ const query = nested?.query ?? entry.query ?? entry.prompt ?? entry.input ?? entry.text;
182
+ const shouldTrigger = nested?.should_trigger ?? entry.should_trigger;
183
+ const invocationType = nested?.invocation_type ?? entry.invocation_type;
184
+ const beforePass =
185
+ entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline;
186
+ const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result;
187
+ const passed = entry.passed ?? entry.matched;
151
188
 
152
189
  // Determine icon: use after_pass for per_entry_results, passed for others
153
- const isPass = typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null
190
+ const isPass =
191
+ typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null;
154
192
 
155
193
  return (
156
194
  <div className="flex items-start gap-2 text-xs py-1.5 border-b border-border/50 last:border-0">
157
195
  {isPass !== null ? (
158
- isPass
159
- ? <CheckCircleIcon className="size-3.5 text-emerald-500 shrink-0 mt-0.5" />
160
- : <XCircleIcon className="size-3.5 text-red-500 shrink-0 mt-0.5" />
196
+ isPass ? (
197
+ <CheckCircleIcon className="size-3.5 text-emerald-500 shrink-0 mt-0.5" />
198
+ ) : (
199
+ <XCircleIcon className="size-3.5 text-red-500 shrink-0 mt-0.5" />
200
+ )
161
201
  ) : (
162
202
  <CircleDotIcon className="size-3.5 text-muted-foreground shrink-0 mt-0.5" />
163
203
  )}
@@ -182,21 +222,32 @@ function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
182
222
  )}
183
223
  </div>
184
224
  </div>
185
- )
225
+ );
186
226
  }
187
227
 
188
228
  function ValidationResults({ validation }: { validation: Record<string, unknown> }) {
189
- const { improved, before_pass_rate, after_pass_rate, net_change, regressions, new_passes, per_entry_results, ...rest } = validation
190
-
191
- const regressionsArr = Array.isArray(regressions) ? regressions : []
192
- const newPassesArr = Array.isArray(new_passes) ? new_passes : []
193
- const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : []
229
+ const {
230
+ improved,
231
+ before_pass_rate,
232
+ after_pass_rate,
233
+ net_change,
234
+ regressions,
235
+ new_passes,
236
+ per_entry_results,
237
+ ...rest
238
+ } = validation;
239
+
240
+ const regressionsArr = Array.isArray(regressions) ? regressions : [];
241
+ const newPassesArr = Array.isArray(new_passes) ? new_passes : [];
242
+ const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : [];
194
243
 
195
244
  return (
196
245
  <div className="rounded-md border bg-muted/30 p-3 space-y-3">
197
246
  <p className="text-xs font-medium text-muted-foreground">
198
247
  Validation Results
199
- <span className="font-normal text-muted-foreground/60 ml-1.5">&mdash; Before/after comparison from eval tests</span>
248
+ <span className="font-normal text-muted-foreground/60 ml-1.5">
249
+ &mdash; Before/after comparison from eval tests
250
+ </span>
200
251
  </p>
201
252
 
202
253
  {/* Summary bar */}
@@ -212,8 +263,11 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
212
263
  </span>
213
264
  )}
214
265
  {typeof net_change === "number" && (
215
- <span className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}>
216
- {net_change > 0 ? "+" : ""}{(net_change * 100).toFixed(1)}%
266
+ <span
267
+ className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
268
+ >
269
+ {net_change > 0 ? "+" : ""}
270
+ {(net_change * 100).toFixed(1)}%
217
271
  </span>
218
272
  )}
219
273
  </div>
@@ -226,7 +280,14 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
226
280
  </p>
227
281
  <div className="rounded border bg-card p-2">
228
282
  {newPassesArr.map((entry, j) => (
229
- <PerEntryResult key={j} entry={typeof entry === "object" && entry !== null ? entry as Record<string, unknown> : { value: entry }} />
283
+ <PerEntryResult
284
+ key={j}
285
+ entry={
286
+ typeof entry === "object" && entry !== null
287
+ ? (entry as Record<string, unknown>)
288
+ : { value: entry }
289
+ }
290
+ />
230
291
  ))}
231
292
  </div>
232
293
  </div>
@@ -240,16 +301,21 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
240
301
  </p>
241
302
  <div className="rounded border border-red-200 dark:border-red-900/50 bg-card p-2">
242
303
  {regressionsArr.map((entry, j) => (
243
- <PerEntryResult key={j} entry={typeof entry === "object" && entry !== null ? entry as Record<string, unknown> : { value: entry }} />
304
+ <PerEntryResult
305
+ key={j}
306
+ entry={
307
+ typeof entry === "object" && entry !== null
308
+ ? (entry as Record<string, unknown>)
309
+ : { value: entry }
310
+ }
311
+ />
244
312
  ))}
245
313
  </div>
246
314
  </div>
247
315
  )}
248
316
 
249
317
  {/* Per-entry results (collapsible if many) */}
250
- {perEntryArr.length > 0 && (
251
- <PerEntryResultsSection entries={perEntryArr} />
252
- )}
318
+ {perEntryArr.length > 0 && <PerEntryResultsSection entries={perEntryArr} />}
253
319
 
254
320
  {/* Any remaining keys */}
255
321
  {Object.keys(rest).length > 0 && (
@@ -263,18 +329,24 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
263
329
  </div>
264
330
  )}
265
331
  </div>
266
- )
332
+ );
267
333
  }
268
334
 
269
335
  function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
270
- const [expanded, setExpanded] = useState(false)
336
+ const [expanded, setExpanded] = useState(false);
271
337
  const passCount = entries.filter((e) => {
272
- if (typeof e !== "object" || e === null) return false
273
- const obj = e as Record<string, unknown>
274
- return obj.passed === true || obj.matched === true || obj.triggered === true || obj.after === true || obj.result === true
275
- }).length
276
-
277
- const display = expanded ? entries : entries.slice(0, 5)
338
+ if (typeof e !== "object" || e === null) return false;
339
+ const obj = e as Record<string, unknown>;
340
+ return (
341
+ obj.passed === true ||
342
+ obj.matched === true ||
343
+ obj.triggered === true ||
344
+ obj.after === true ||
345
+ obj.result === true
346
+ );
347
+ }).length;
348
+
349
+ const display = expanded ? entries : entries.slice(0, 5);
278
350
 
279
351
  return (
280
352
  <div>
@@ -303,41 +375,48 @@ function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
303
375
  {display.map((entry, j) => (
304
376
  <PerEntryResult
305
377
  key={j}
306
- entry={typeof entry === "object" && entry !== null ? entry as Record<string, unknown> : { value: entry }}
378
+ entry={
379
+ typeof entry === "object" && entry !== null
380
+ ? (entry as Record<string, unknown>)
381
+ : { value: entry }
382
+ }
307
383
  />
308
384
  ))}
309
385
  </div>
310
386
  </div>
311
- )
387
+ );
312
388
  }
313
389
 
314
390
  /** Extract after_pass_rate from an evidence entry's validation data */
315
391
  function getAfterPassRate(entry: EvidenceEntry): number | null {
316
- if (!entry.validation) return null
317
- const rate = entry.validation.after_pass_rate
318
- return typeof rate === "number" ? rate : null
392
+ if (!entry.validation) return null;
393
+ const rate = entry.validation.after_pass_rate;
394
+ return typeof rate === "number" ? rate : null;
319
395
  }
320
396
 
321
397
  /** Render a delta badge between two pass rates, returns null if not computable */
322
398
  function DeltaBadge({ prev, curr }: { prev: number | null; curr: number | null }) {
323
- if (prev === null || curr === null) return null
324
- const delta = curr - prev
325
- if (delta === 0) return null
326
- const pct = (delta * 100).toFixed(1)
327
- const positive = delta > 0
399
+ if (prev === null || curr === null) return null;
400
+ const delta = curr - prev;
401
+ if (delta === 0) return null;
402
+ const pct = (delta * 100).toFixed(1);
403
+ const positive = delta > 0;
328
404
  return (
329
- <span className={`text-[10px] font-mono font-semibold ${positive ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}>
330
- {positive ? "+" : ""}{pct}% vs previous
405
+ <span
406
+ className={`text-[10px] font-mono font-semibold ${positive ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
407
+ >
408
+ {positive ? "+" : ""}
409
+ {pct}% vs previous
331
410
  </span>
332
- )
411
+ );
333
412
  }
334
413
 
335
414
  function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }) {
336
- const [expanded, setExpanded] = useState(false)
415
+ const [expanded, setExpanded] = useState(false);
337
416
  const passCount = evalSet.filter((e) => {
338
- const passed = e.passed ?? e.result
339
- return passed === true
340
- }).length
417
+ const passed = e.passed ?? e.result;
418
+ return passed === true;
419
+ }).length;
341
420
 
342
421
  return (
343
422
  <div className="rounded-md border bg-muted/30 p-3 space-y-2">
@@ -346,9 +425,11 @@ function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }
346
425
  onClick={() => setExpanded(!expanded)}
347
426
  className="flex items-center gap-1.5 w-full text-left"
348
427
  >
349
- {expanded
350
- ? <ChevronDownIcon className="size-3.5 text-muted-foreground shrink-0" />
351
- : <ChevronRightIcon className="size-3.5 text-muted-foreground shrink-0" />}
428
+ {expanded ? (
429
+ <ChevronDownIcon className="size-3.5 text-muted-foreground shrink-0" />
430
+ ) : (
431
+ <ChevronRightIcon className="size-3.5 text-muted-foreground shrink-0" />
432
+ )}
352
433
  <ListChecksIcon className="size-3.5 text-muted-foreground" />
353
434
  <span className="text-xs font-medium text-muted-foreground">
354
435
  Eval Set ({passCount}/{evalSet.length} passed)
@@ -357,34 +438,41 @@ function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }
357
438
  {expanded && (
358
439
  <div className="space-y-1">
359
440
  {evalSet.map((evalEntry, j) => {
360
- const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input
361
- const expected = evalEntry.expected ?? evalEntry.should_trigger
362
- const passed = evalEntry.passed ?? evalEntry.result
441
+ const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input;
442
+ const expected = evalEntry.expected ?? evalEntry.should_trigger;
443
+ const passed = evalEntry.passed ?? evalEntry.result;
363
444
  return (
364
- <div key={j} className="flex items-start gap-2 text-xs py-1 border-b border-border/50 last:border-0">
445
+ <div
446
+ key={j}
447
+ className="flex items-start gap-2 text-xs py-1 border-b border-border/50 last:border-0"
448
+ >
365
449
  {typeof passed === "boolean" ? (
366
- passed
367
- ? <CheckCircleIcon className="size-3.5 text-emerald-500 shrink-0 mt-0.5" />
368
- : <XCircleIcon className="size-3.5 text-red-500 shrink-0 mt-0.5" />
450
+ passed ? (
451
+ <CheckCircleIcon className="size-3.5 text-emerald-500 shrink-0 mt-0.5" />
452
+ ) : (
453
+ <XCircleIcon className="size-3.5 text-red-500 shrink-0 mt-0.5" />
454
+ )
369
455
  ) : (
370
456
  <CircleDotIcon className="size-3.5 text-muted-foreground shrink-0 mt-0.5" />
371
457
  )}
372
- <span className="flex-1 min-w-0 line-clamp-2">{String(query ?? JSON.stringify(evalEntry))}</span>
458
+ <span className="flex-1 min-w-0 line-clamp-2">
459
+ {String(query ?? JSON.stringify(evalEntry))}
460
+ </span>
373
461
  {expected !== undefined && (
374
462
  <Badge variant="secondary" className="text-[9px] shrink-0">
375
463
  expect: {String(expected)}
376
464
  </Badge>
377
465
  )}
378
466
  </div>
379
- )
467
+ );
380
468
  })}
381
469
  </div>
382
470
  )}
383
471
  </div>
384
- )
472
+ );
385
473
  }
386
474
 
387
- type RoundStatus = "single" | "intermediate" | "final"
475
+ type RoundStatus = "single" | "intermediate" | "final";
388
476
 
389
477
  /** Render a single evidence card — used for both expanded and collapsed states */
390
478
  function EvidenceCard({
@@ -394,13 +482,13 @@ function EvidenceCard({
394
482
  prevPassRate,
395
483
  currPassRate,
396
484
  }: {
397
- entry: EvidenceEntry
398
- roundLabel: string | null
399
- roundStatus: RoundStatus
400
- prevPassRate: number | null
401
- currPassRate: number | null
485
+ entry: EvidenceEntry;
486
+ roundLabel: string | null;
487
+ roundStatus: RoundStatus;
488
+ prevPassRate: number | null;
489
+ currPassRate: number | null;
402
490
  }) {
403
- const showRound = roundStatus !== "single"
491
+ const showRound = roundStatus !== "single";
404
492
  return (
405
493
  <Card className={roundStatus === "final" ? "border-primary/50 shadow-sm" : undefined}>
406
494
  <CardHeader className="pb-3">
@@ -412,15 +500,25 @@ function EvidenceCard({
412
500
  <span className="text-[10px] font-mono text-muted-foreground">{roundLabel}</span>
413
501
  )}
414
502
  {roundStatus === "final" && (
415
- <Badge variant="default" className="text-[10px]">Final</Badge>
503
+ <Badge variant="default" className="text-[10px]">
504
+ Final
505
+ </Badge>
416
506
  )}
417
507
  </CardTitle>
418
508
  <div className="flex items-center gap-2">
419
509
  {showRound && <DeltaBadge prev={prevPassRate} curr={currPassRate} />}
420
- <Badge variant="secondary" className="text-[10px]">{entry.stage}</Badge>
510
+ <Badge variant="secondary" className="text-[10px]">
511
+ {entry.stage}
512
+ </Badge>
421
513
  {entry.confidence !== null && (
422
514
  <Badge
423
- variant={entry.confidence >= 0.8 ? "default" : entry.confidence >= 0.5 ? "secondary" : "destructive"}
515
+ variant={
516
+ entry.confidence >= 0.8
517
+ ? "default"
518
+ : entry.confidence >= 0.5
519
+ ? "secondary"
520
+ : "destructive"
521
+ }
424
522
  className="text-[10px] font-mono"
425
523
  >
426
524
  {formatRate(entry.confidence)} confidence
@@ -457,9 +555,7 @@ function EvidenceCard({
457
555
  )}
458
556
 
459
557
  {/* Eval set — test cases used for validation (collapsible) */}
460
- {entry.eval_set && entry.eval_set.length > 0 && (
461
- <EvalSetSection evalSet={entry.eval_set} />
462
- )}
558
+ {entry.eval_set && entry.eval_set.length > 0 && <EvalSetSection evalSet={entry.eval_set} />}
463
559
 
464
560
  {/* Validation details */}
465
561
  {entry.validation && Object.keys(entry.validation).length > 0 && (
@@ -467,7 +563,7 @@ function EvidenceCard({
467
563
  )}
468
564
  </CardContent>
469
565
  </Card>
470
- )
566
+ );
471
567
  }
472
568
 
473
569
  /** Collapsed summary for earlier iteration rounds */
@@ -476,12 +572,12 @@ function CollapsedEvidenceCard({
476
572
  roundLabel,
477
573
  onExpand,
478
574
  }: {
479
- entry: EvidenceEntry
480
- roundLabel: string
481
- onExpand: () => void
575
+ entry: EvidenceEntry;
576
+ roundLabel: string;
577
+ onExpand: () => void;
482
578
  }) {
483
- const passRate = getAfterPassRate(entry)
484
- const improved = entry.validation?.improved
579
+ const passRate = getAfterPassRate(entry);
580
+ const improved = entry.validation?.improved;
485
581
 
486
582
  return (
487
583
  <button
@@ -504,62 +600,66 @@ function CollapsedEvidenceCard({
504
600
  {improved ? "Improved" : "Regressed"}
505
601
  </Badge>
506
602
  )}
507
- <Badge variant="secondary" className="text-[10px]">{entry.stage}</Badge>
603
+ <Badge variant="secondary" className="text-[10px]">
604
+ {entry.stage}
605
+ </Badge>
508
606
  <span className="text-[10px] text-muted-foreground">{timeAgo(entry.timestamp)}</span>
509
607
  </div>
510
608
  </button>
511
- )
609
+ );
512
610
  }
513
611
 
514
612
  export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
515
613
  const steps = useMemo(
516
- () => evolution
517
- .filter((e) => e.proposal_id === proposalId)
518
- .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()),
614
+ () =>
615
+ evolution
616
+ .filter((e) => e.proposal_id === proposalId)
617
+ .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()),
519
618
  [evolution, proposalId],
520
- )
619
+ );
521
620
 
522
621
  const entries = useMemo(
523
- () => evidence
524
- .filter((e) => e.proposal_id === proposalId)
525
- .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()),
622
+ () =>
623
+ evidence
624
+ .filter((e) => e.proposal_id === proposalId)
625
+ .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()),
526
626
  [evidence, proposalId],
527
- )
627
+ );
528
628
 
529
629
  // Track which earlier rounds are manually expanded
530
- const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set())
630
+ const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set());
531
631
 
532
632
  const toggleRound = (key: string) => {
533
633
  setExpandedRounds((prev) => {
534
- const next = new Set(prev)
535
- if (next.has(key)) next.delete(key)
536
- else next.add(key)
537
- return next
538
- })
539
- }
634
+ const next = new Set(prev);
635
+ if (next.has(key)) next.delete(key);
636
+ else next.add(key);
637
+ return next;
638
+ });
639
+ };
540
640
 
541
641
  const snapshot = useMemo(() => {
542
642
  for (let i = steps.length - 1; i >= 0; i--) {
543
- if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>
643
+ if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>;
544
644
  }
545
- return null
546
- }, [steps])
645
+ return null;
646
+ }, [steps]);
547
647
 
548
648
  // Separate proposal-stage entries from validation-stage entries, then group validations by target
549
649
  const { proposalEntries, validationsByTarget } = useMemo(() => {
550
- const proposals: EvidenceEntry[] = []
551
- const validationMap = new Map<string, EvidenceEntry[]>()
650
+ const proposals: EvidenceEntry[] = [];
651
+ const validationMap = new Map<string, EvidenceEntry[]>();
552
652
  for (const entry of entries) {
553
653
  if (entry.stage !== "validated") {
554
- proposals.push(entry)
654
+ proposals.push(entry);
555
655
  } else {
556
- const key = entry.target
557
- if (!validationMap.has(key)) validationMap.set(key, [])
558
- validationMap.get(key)!.push(entry)
656
+ const key = entry.target;
657
+ if (!validationMap.has(key)) validationMap.set(key, []);
658
+ validationMap.get(key)!.push(entry);
559
659
  }
560
660
  }
561
- return { proposalEntries: proposals, validationsByTarget: validationMap }
562
- }, [entries])
661
+ return { proposalEntries: proposals, validationsByTarget: validationMap };
662
+ }, [entries]);
563
663
 
564
664
  return (
565
665
  <div className="space-y-4">
@@ -567,8 +667,9 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
567
667
  <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
568
668
  <InfoIcon className="size-4 text-primary/60 shrink-0 mt-0.5" />
569
669
  <p className="text-xs text-muted-foreground leading-relaxed">
570
- This view shows the complete evidence trail for a skill evolution proposal &mdash; how the skill was changed,
571
- the eval test results before and after, and whether the change improved performance.
670
+ This view shows the complete evidence trail for a skill evolution proposal &mdash; how the
671
+ skill was changed, the eval test results before and after, and whether the change improved
672
+ performance.
572
673
  </p>
573
674
  </div>
574
675
 
@@ -577,7 +678,9 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
577
678
  <CardHeader className="pb-3">
578
679
  <CardTitle className="text-sm flex items-center gap-2">
579
680
  <span>Proposal Journey</span>
580
- <span className="font-mono text-xs text-muted-foreground">#{proposalId.slice(0, 12)}</span>
681
+ <span className="font-mono text-xs text-muted-foreground">
682
+ #{proposalId.slice(0, 12)}
683
+ </span>
581
684
  </CardTitle>
582
685
  </CardHeader>
583
686
  <CardContent className="space-y-3">
@@ -587,10 +690,15 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
587
690
  {i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
588
691
  <div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
589
692
  {ACTION_ICON[step.action]}
590
- <Badge variant={ACTION_VARIANT[step.action] ?? "secondary"} className="text-[10px] capitalize">
693
+ <Badge
694
+ variant={ACTION_VARIANT[step.action] ?? "secondary"}
695
+ className="text-[10px] capitalize"
696
+ >
591
697
  {step.action.replace("_", " ")}
592
698
  </Badge>
593
- <span className="text-[10px] text-muted-foreground">{timeAgo(step.timestamp)}</span>
699
+ <span className="text-[10px] text-muted-foreground">
700
+ {timeAgo(step.timestamp)}
701
+ </span>
594
702
  </div>
595
703
  </div>
596
704
  ))}
@@ -601,21 +709,31 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
601
709
  <div className="flex items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
602
710
  {typeof snapshot.net_change === "number" && (
603
711
  <div className="flex items-center gap-1">
604
- {(snapshot.net_change as number) > 0
605
- ? <TrendingUpIcon className="size-3.5 text-emerald-500" />
606
- : <TrendingDownIcon className="size-3.5 text-red-500" />}
607
- <span className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}>
608
- {(snapshot.net_change as number) > 0 ? "+" : ""}{Math.round((snapshot.net_change as number) * 100)}%
712
+ {(snapshot.net_change as number) > 0 ? (
713
+ <TrendingUpIcon className="size-3.5 text-emerald-500" />
714
+ ) : (
715
+ <TrendingDownIcon className="size-3.5 text-red-500" />
716
+ )}
717
+ <span
718
+ className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
719
+ >
720
+ {(snapshot.net_change as number) > 0 ? "+" : ""}
721
+ {Math.round((snapshot.net_change as number) * 100)}%
609
722
  </span>
610
723
  </div>
611
724
  )}
612
- {typeof snapshot.before_pass_rate === "number" && typeof snapshot.after_pass_rate === "number" && (
613
- <span className="text-xs text-muted-foreground font-mono">
614
- {Math.round((snapshot.before_pass_rate as number) * 100)}% &rarr; {Math.round((snapshot.after_pass_rate as number) * 100)}%
615
- </span>
616
- )}
725
+ {typeof snapshot.before_pass_rate === "number" &&
726
+ typeof snapshot.after_pass_rate === "number" && (
727
+ <span className="text-xs text-muted-foreground font-mono">
728
+ {Math.round((snapshot.before_pass_rate as number) * 100)}% &rarr;{" "}
729
+ {Math.round((snapshot.after_pass_rate as number) * 100)}%
730
+ </span>
731
+ )}
617
732
  {snapshot.improved !== undefined && (
618
- <Badge variant={snapshot.improved ? "default" : "destructive"} className="text-[10px]">
733
+ <Badge
734
+ variant={snapshot.improved ? "default" : "destructive"}
735
+ className="text-[10px]"
736
+ >
619
737
  {snapshot.improved ? "Improved" : "Regressed"}
620
738
  </Badge>
621
739
  )}
@@ -645,17 +763,23 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
645
763
 
646
764
  {/* Validation-stage evidence — grouped by target with iteration rounds */}
647
765
  {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
648
- const hasMultipleRounds = targetEntries.length > 1
766
+ const hasMultipleRounds = targetEntries.length > 1;
649
767
 
650
768
  return (
651
769
  <div key={target} className="space-y-2">
652
770
  {targetEntries.map((entry, i) => {
653
- const isLast = i === targetEntries.length - 1
654
- const roundLabel = hasMultipleRounds ? `Round ${i + 1} of ${targetEntries.length}` : null
655
- const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null
656
- const currPassRate = getAfterPassRate(entry)
657
- const roundKey = `${target}-${entry.timestamp}`
658
- const roundStatus: RoundStatus = !hasMultipleRounds ? "single" : isLast ? "final" : "intermediate"
771
+ const isLast = i === targetEntries.length - 1;
772
+ const roundLabel = hasMultipleRounds
773
+ ? `Round ${i + 1} of ${targetEntries.length}`
774
+ : null;
775
+ const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
776
+ const currPassRate = getAfterPassRate(entry);
777
+ const roundKey = `${target}-${entry.timestamp}`;
778
+ const roundStatus: RoundStatus = !hasMultipleRounds
779
+ ? "single"
780
+ : isLast
781
+ ? "final"
782
+ : "intermediate";
659
783
 
660
784
  // Earlier rounds: collapsed by default
661
785
  if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
@@ -666,7 +790,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
666
790
  roundLabel={roundLabel!}
667
791
  onExpand={() => toggleRound(roundKey)}
668
792
  />
669
- )
793
+ );
670
794
  }
671
795
 
672
796
  // Expanded earlier round — show with collapse toggle
@@ -689,7 +813,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
689
813
  currPassRate={currPassRate}
690
814
  />
691
815
  </div>
692
- )
816
+ );
693
817
  }
694
818
 
695
819
  // Final round (or single entry) — always expanded
@@ -702,10 +826,10 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
702
826
  prevPassRate={prevPassRate}
703
827
  currPassRate={currPassRate}
704
828
  />
705
- )
829
+ );
706
830
  })}
707
831
  </div>
708
- )
832
+ );
709
833
  })}
710
834
 
711
835
  {entries.length === 0 && (
@@ -714,5 +838,5 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
714
838
  </div>
715
839
  )}
716
840
  </div>
717
- )
841
+ );
718
842
  }