selftune 0.2.2 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +11 -0
  2. package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +1 -0
  3. package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +15 -0
  4. package/apps/local-dashboard/dist/assets/vendor-ui-r2k_Ku_V.js +346 -0
  5. package/apps/local-dashboard/dist/index.html +3 -3
  6. package/cli/selftune/analytics.ts +354 -0
  7. package/cli/selftune/badge/badge.ts +2 -2
  8. package/cli/selftune/dashboard-server.ts +3 -3
  9. package/cli/selftune/evolution/evolve-body.ts +1 -1
  10. package/cli/selftune/evolution/evolve.ts +1 -1
  11. package/cli/selftune/index.ts +15 -1
  12. package/cli/selftune/init.ts +5 -1
  13. package/cli/selftune/observability.ts +63 -2
  14. package/cli/selftune/orchestrate.ts +1 -1
  15. package/cli/selftune/quickstart.ts +1 -1
  16. package/cli/selftune/status.ts +2 -2
  17. package/cli/selftune/types.ts +1 -0
  18. package/cli/selftune/utils/llm-call.ts +2 -1
  19. package/package.json +6 -4
  20. package/packages/ui/README.md +113 -0
  21. package/packages/ui/index.ts +10 -0
  22. package/packages/ui/package.json +62 -0
  23. package/packages/ui/src/components/ActivityTimeline.tsx +171 -0
  24. package/packages/ui/src/components/EvidenceViewer.tsx +718 -0
  25. package/packages/ui/src/components/EvolutionTimeline.tsx +252 -0
  26. package/packages/ui/src/components/InfoTip.tsx +19 -0
  27. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +164 -0
  28. package/packages/ui/src/components/index.ts +7 -0
  29. package/packages/ui/src/components/section-cards.tsx +155 -0
  30. package/packages/ui/src/components/skill-health-grid.tsx +686 -0
  31. package/packages/ui/src/lib/constants.tsx +43 -0
  32. package/packages/ui/src/lib/format.ts +37 -0
  33. package/packages/ui/src/lib/index.ts +3 -0
  34. package/packages/ui/src/lib/utils.ts +6 -0
  35. package/packages/ui/src/primitives/badge.tsx +52 -0
  36. package/packages/ui/src/primitives/button.tsx +58 -0
  37. package/packages/ui/src/primitives/card.tsx +103 -0
  38. package/packages/ui/src/primitives/checkbox.tsx +27 -0
  39. package/packages/ui/src/primitives/collapsible.tsx +7 -0
  40. package/packages/ui/src/primitives/dropdown-menu.tsx +266 -0
  41. package/packages/ui/src/primitives/index.ts +55 -0
  42. package/packages/ui/src/primitives/label.tsx +20 -0
  43. package/packages/ui/src/primitives/select.tsx +197 -0
  44. package/packages/ui/src/primitives/table.tsx +114 -0
  45. package/packages/ui/src/primitives/tabs.tsx +82 -0
  46. package/packages/ui/src/primitives/tooltip.tsx +64 -0
  47. package/packages/ui/src/types.ts +87 -0
  48. package/packages/ui/tsconfig.json +17 -0
  49. package/skill/SKILL.md +3 -0
  50. package/skill/Workflows/Telemetry.md +59 -0
  51. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +0 -15
  52. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +0 -1
  53. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +0 -346
@@ -0,0 +1,718 @@
1
+ import { useMemo, useState } from "react"
2
+ import { Badge } from "../primitives/badge"
3
+ import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card"
4
+ import type { EvidenceEntry, EvolutionEntry } from "../types"
5
+ import { formatRate, timeAgo } from "../lib/format"
6
+ import {
7
+ CheckCircleIcon,
8
+ ChevronDownIcon,
9
+ ChevronRightIcon,
10
+ CircleDotIcon,
11
+ FileTextIcon,
12
+ InfoIcon,
13
+ RocketIcon,
14
+ ShieldCheckIcon,
15
+ ShieldAlertIcon,
16
+ XCircleIcon,
17
+ UndoIcon,
18
+ ArrowRightIcon,
19
+ TrendingUpIcon,
20
+ TrendingDownIcon,
21
+ ListChecksIcon,
22
+ } from "lucide-react"
23
+ import Markdown from "react-markdown"
24
+
25
+ const ACTION_ICON: Record<string, React.ReactNode> = {
26
+ created: <CircleDotIcon className="size-3.5" />,
27
+ validated: <ShieldCheckIcon className="size-3.5" />,
28
+ deployed: <RocketIcon className="size-3.5" />,
29
+ rejected: <XCircleIcon className="size-3.5" />,
30
+ rolled_back: <UndoIcon className="size-3.5" />,
31
+ }
32
+
33
+ const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
34
+ created: "outline",
35
+ validated: "secondary",
36
+ deployed: "default",
37
+ rejected: "destructive",
38
+ rolled_back: "destructive",
39
+ }
40
+
41
+ interface Props {
42
+ proposalId: string
43
+ evolution: EvolutionEntry[]
44
+ evidence: EvidenceEntry[]
45
+ }
46
+
47
+ /** Parse YAML-ish frontmatter from text, returns { meta, body } */
48
+ function parseFrontmatter(text: string): { meta: Record<string, string>; body: string } {
49
+ const match = text.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/)
50
+ if (!match) return { meta: {}, body: text }
51
+
52
+ const meta: Record<string, string> = {}
53
+ for (const line of match[1].split("\n")) {
54
+ const idx = line.indexOf(":")
55
+ if (idx > 0) {
56
+ const key = line.slice(0, idx).trim()
57
+ const val = line.slice(idx + 1).trim()
58
+ if (key && val) meta[key] = val
59
+ }
60
+ }
61
+ return { meta, body: match[2] }
62
+ }
63
+
64
+ function FrontmatterTable({ meta }: { meta: Record<string, string> }) {
65
+ const entries = Object.entries(meta)
66
+ if (entries.length === 0) return null
67
+
68
+ return (
69
+ <div className="grid grid-cols-[auto_1fr] gap-x-3 gap-y-1 text-xs">
70
+ {entries.map(([key, val]) => (
71
+ <div key={key} className="contents">
72
+ <span className="font-medium text-muted-foreground capitalize">{key}</span>
73
+ <span className="text-foreground truncate">{val}</span>
74
+ </div>
75
+ ))}
76
+ </div>
77
+ )
78
+ }
79
+
80
+ function SkillContentBlock({ label, text, variant }: { label: string; text: string; variant: "original" | "proposed" }) {
81
+ const { meta, body } = parseFrontmatter(text)
82
+ const hasMeta = Object.keys(meta).length > 0
83
+
84
+ return (
85
+ <div className="flex-1 min-w-0 space-y-3">
86
+ <div className="flex items-center gap-2">
87
+ <FileTextIcon className="size-3.5 text-muted-foreground" />
88
+ <span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">{label}</span>
89
+ {variant === "proposed" && (
90
+ <Badge variant="secondary" className="text-[10px]">New</Badge>
91
+ )}
92
+ </div>
93
+
94
+ {/* Frontmatter */}
95
+ {hasMeta && (
96
+ <div className="rounded-md border bg-muted/30 p-3">
97
+ <FrontmatterTable meta={meta} />
98
+ </div>
99
+ )}
100
+
101
+ {/* Rendered markdown body */}
102
+ <div className="skill-markdown rounded-md border bg-card p-4">
103
+ <Markdown>{body}</Markdown>
104
+ </div>
105
+ </div>
106
+ )
107
+ }
108
+
109
+ /** Smart formatting for a single validation value */
110
+ function formatValidationValue(key: string, val: unknown): React.ReactNode {
111
+ // Booleans
112
+ if (typeof val === "boolean") {
113
+ return val
114
+ ? <CheckCircleIcon className="size-3.5 text-emerald-500 inline" />
115
+ : <XCircleIcon className="size-3.5 text-red-500 inline" />
116
+ }
117
+ // Numbers that look like rates (0-1 range, or key contains "rate"/"change")
118
+ if (typeof val === "number") {
119
+ const isRate = key.includes("rate") || key.includes("change") || (val >= -1 && val <= 1 && key !== "count")
120
+ if (isRate) {
121
+ const pct = (val * 100).toFixed(1)
122
+ const prefix = val > 0 && key.includes("change") ? "+" : ""
123
+ return <span className="font-mono">{prefix}{pct}%</span>
124
+ }
125
+ return <span className="font-mono">{val}</span>
126
+ }
127
+ // null/undefined
128
+ if (val === null || val === undefined) return <span className="text-muted-foreground">--</span>
129
+ // Strings
130
+ if (typeof val === "string") return <span>{val}</span>
131
+ // Arrays — render as list of items
132
+ if (Array.isArray(val)) {
133
+ if (val.length === 0) return <span className="text-muted-foreground italic">none</span>
134
+ return <span className="font-mono">{val.length} entries</span>
135
+ }
136
+ // Objects
137
+ if (typeof val === "object") return <span className="font-mono">1 entry</span>
138
+ return <span>{String(val)}</span>
139
+ }
140
+
141
+ /** Render a per_entry_result row — handles both flat EvalEntry and nested { entry, before_pass, after_pass } */
142
+ function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
143
+ // Handle nested shape: { entry: { query, should_trigger }, before_pass, after_pass }
144
+ const nested = entry.entry as Record<string, unknown> | undefined
145
+ const query = nested?.query ?? entry.query ?? entry.prompt ?? entry.input ?? entry.text
146
+ const shouldTrigger = nested?.should_trigger ?? entry.should_trigger
147
+ const invocationType = nested?.invocation_type ?? entry.invocation_type
148
+ const beforePass = entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline
149
+ const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result
150
+ const passed = entry.passed ?? entry.matched
151
+
152
+ // Determine icon: use after_pass for per_entry_results, passed for others
153
+ const isPass = typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null
154
+
155
+ return (
156
+ <div className="flex items-start gap-2 text-xs py-1.5 border-b border-border/50 last:border-0">
157
+ {isPass !== null ? (
158
+ isPass
159
+ ? <CheckCircleIcon className="size-3.5 text-emerald-500 shrink-0 mt-0.5" />
160
+ : <XCircleIcon className="size-3.5 text-red-500 shrink-0 mt-0.5" />
161
+ ) : (
162
+ <CircleDotIcon className="size-3.5 text-muted-foreground shrink-0 mt-0.5" />
163
+ )}
164
+ <span className="flex-1 min-w-0 line-clamp-2">
165
+ {query ? String(query) : JSON.stringify(entry)}
166
+ </span>
167
+ <div className="flex items-center gap-1.5 shrink-0">
168
+ {typeof beforePass === "boolean" && typeof afterPass === "boolean" && (
169
+ <span className="text-[10px] text-muted-foreground font-mono">
170
+ {beforePass ? "pass" : "fail"} &rarr; {afterPass ? "pass" : "fail"}
171
+ </span>
172
+ )}
173
+ {shouldTrigger !== undefined && (
174
+ <Badge variant="secondary" className="text-[9px]">
175
+ expect: {String(shouldTrigger)}
176
+ </Badge>
177
+ )}
178
+ {invocationType != null && (
179
+ <Badge variant="secondary" className="text-[9px]">
180
+ {String(invocationType)}
181
+ </Badge>
182
+ )}
183
+ </div>
184
+ </div>
185
+ )
186
+ }
187
+
188
+ function ValidationResults({ validation }: { validation: Record<string, unknown> }) {
189
+ const { improved, before_pass_rate, after_pass_rate, net_change, regressions, new_passes, per_entry_results, ...rest } = validation
190
+
191
+ const regressionsArr = Array.isArray(regressions) ? regressions : []
192
+ const newPassesArr = Array.isArray(new_passes) ? new_passes : []
193
+ const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : []
194
+
195
+ return (
196
+ <div className="rounded-md border bg-muted/30 p-3 space-y-3">
197
+ <p className="text-xs font-medium text-muted-foreground">
198
+ Validation Results
199
+ <span className="font-normal text-muted-foreground/60 ml-1.5">&mdash; Before/after comparison from eval tests</span>
200
+ </p>
201
+
202
+ {/* Summary bar */}
203
+ <div className="flex items-center gap-3 flex-wrap">
204
+ {improved !== undefined && (
205
+ <Badge variant={improved ? "default" : "destructive"} className="text-[10px]">
206
+ {improved ? "Improved" : "Regressed"}
207
+ </Badge>
208
+ )}
209
+ {typeof before_pass_rate === "number" && typeof after_pass_rate === "number" && (
210
+ <span className="text-xs font-mono text-muted-foreground">
211
+ {(before_pass_rate * 100).toFixed(1)}% &rarr; {(after_pass_rate * 100).toFixed(1)}%
212
+ </span>
213
+ )}
214
+ {typeof net_change === "number" && (
215
+ <span className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}>
216
+ {net_change > 0 ? "+" : ""}{(net_change * 100).toFixed(1)}%
217
+ </span>
218
+ )}
219
+ </div>
220
+
221
+ {/* New passes */}
222
+ {newPassesArr.length > 0 && (
223
+ <div>
224
+ <p className="text-[11px] font-medium text-emerald-600 dark:text-emerald-400 mb-1">
225
+ New Passes ({newPassesArr.length})
226
+ </p>
227
+ <div className="rounded border bg-card p-2">
228
+ {newPassesArr.map((entry, j) => (
229
+ <PerEntryResult key={j} entry={typeof entry === "object" && entry !== null ? entry as Record<string, unknown> : { value: entry }} />
230
+ ))}
231
+ </div>
232
+ </div>
233
+ )}
234
+
235
+ {/* Regressions */}
236
+ {regressionsArr.length > 0 && (
237
+ <div>
238
+ <p className="text-[11px] font-medium text-red-500 mb-1">
239
+ Regressions ({regressionsArr.length})
240
+ </p>
241
+ <div className="rounded border border-red-200 dark:border-red-900/50 bg-card p-2">
242
+ {regressionsArr.map((entry, j) => (
243
+ <PerEntryResult key={j} entry={typeof entry === "object" && entry !== null ? entry as Record<string, unknown> : { value: entry }} />
244
+ ))}
245
+ </div>
246
+ </div>
247
+ )}
248
+
249
+ {/* Per-entry results (collapsible if many) */}
250
+ {perEntryArr.length > 0 && (
251
+ <PerEntryResultsSection entries={perEntryArr} />
252
+ )}
253
+
254
+ {/* Any remaining keys */}
255
+ {Object.keys(rest).length > 0 && (
256
+ <div className="grid grid-cols-[auto_1fr] gap-x-3 gap-y-1 text-xs">
257
+ {Object.entries(rest).map(([key, val]) => (
258
+ <div key={key} className="contents">
259
+ <span className="font-mono text-muted-foreground">{key}</span>
260
+ <span className="text-foreground">{formatValidationValue(key, val)}</span>
261
+ </div>
262
+ ))}
263
+ </div>
264
+ )}
265
+ </div>
266
+ )
267
+ }
268
+
269
+ function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
270
+ const [expanded, setExpanded] = useState(false)
271
+ const passCount = entries.filter((e) => {
272
+ if (typeof e !== "object" || e === null) return false
273
+ const obj = e as Record<string, unknown>
274
+ return obj.passed === true || obj.matched === true || obj.triggered === true || obj.after === true || obj.result === true
275
+ }).length
276
+
277
+ const display = expanded ? entries : entries.slice(0, 5)
278
+
279
+ return (
280
+ <div>
281
+ <div className="flex items-center justify-between mb-1">
282
+ <p className="text-[11px] font-medium text-muted-foreground">
283
+ Individual Test Cases ({passCount}/{entries.length} passed)
284
+ </p>
285
+ {entries.length > 5 && (
286
+ <button
287
+ type="button"
288
+ onClick={() => setExpanded(!expanded)}
289
+ className="text-[10px] text-primary hover:underline"
290
+ >
291
+ {expanded ? "Show less" : `Show all ${entries.length}`}
292
+ </button>
293
+ )}
294
+ </div>
295
+ {/* Pass rate bar */}
296
+ <div className="h-1.5 rounded-full bg-muted overflow-hidden mb-2">
297
+ <div
298
+ className="h-full rounded-full bg-emerald-500 transition-all"
299
+ style={{ width: `${entries.length > 0 ? (passCount / entries.length) * 100 : 0}%` }}
300
+ />
301
+ </div>
302
+ <div className="rounded border bg-card p-2 max-h-[300px] overflow-y-auto">
303
+ {display.map((entry, j) => (
304
+ <PerEntryResult
305
+ key={j}
306
+ entry={typeof entry === "object" && entry !== null ? entry as Record<string, unknown> : { value: entry }}
307
+ />
308
+ ))}
309
+ </div>
310
+ </div>
311
+ )
312
+ }
313
+
314
+ /** Extract after_pass_rate from an evidence entry's validation data */
315
+ function getAfterPassRate(entry: EvidenceEntry): number | null {
316
+ if (!entry.validation) return null
317
+ const rate = entry.validation.after_pass_rate
318
+ return typeof rate === "number" ? rate : null
319
+ }
320
+
321
+ /** Render a delta badge between two pass rates, returns null if not computable */
322
+ function DeltaBadge({ prev, curr }: { prev: number | null; curr: number | null }) {
323
+ if (prev === null || curr === null) return null
324
+ const delta = curr - prev
325
+ if (delta === 0) return null
326
+ const pct = (delta * 100).toFixed(1)
327
+ const positive = delta > 0
328
+ return (
329
+ <span className={`text-[10px] font-mono font-semibold ${positive ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}>
330
+ {positive ? "+" : ""}{pct}% vs previous
331
+ </span>
332
+ )
333
+ }
334
+
335
+ function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }) {
336
+ const [expanded, setExpanded] = useState(false)
337
+ const passCount = evalSet.filter((e) => {
338
+ const passed = e.passed ?? e.result
339
+ return passed === true
340
+ }).length
341
+
342
+ return (
343
+ <div className="rounded-md border bg-muted/30 p-3 space-y-2">
344
+ <button
345
+ type="button"
346
+ onClick={() => setExpanded(!expanded)}
347
+ className="flex items-center gap-1.5 w-full text-left"
348
+ >
349
+ {expanded
350
+ ? <ChevronDownIcon className="size-3.5 text-muted-foreground shrink-0" />
351
+ : <ChevronRightIcon className="size-3.5 text-muted-foreground shrink-0" />}
352
+ <ListChecksIcon className="size-3.5 text-muted-foreground" />
353
+ <span className="text-xs font-medium text-muted-foreground">
354
+ Eval Set ({passCount}/{evalSet.length} passed)
355
+ </span>
356
+ </button>
357
+ {expanded && (
358
+ <div className="space-y-1">
359
+ {evalSet.map((evalEntry, j) => {
360
+ const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input
361
+ const expected = evalEntry.expected ?? evalEntry.should_trigger
362
+ const passed = evalEntry.passed ?? evalEntry.result
363
+ return (
364
+ <div key={j} className="flex items-start gap-2 text-xs py-1 border-b border-border/50 last:border-0">
365
+ {typeof passed === "boolean" ? (
366
+ passed
367
+ ? <CheckCircleIcon className="size-3.5 text-emerald-500 shrink-0 mt-0.5" />
368
+ : <XCircleIcon className="size-3.5 text-red-500 shrink-0 mt-0.5" />
369
+ ) : (
370
+ <CircleDotIcon className="size-3.5 text-muted-foreground shrink-0 mt-0.5" />
371
+ )}
372
+ <span className="flex-1 min-w-0 line-clamp-2">{String(query ?? JSON.stringify(evalEntry))}</span>
373
+ {expected !== undefined && (
374
+ <Badge variant="secondary" className="text-[9px] shrink-0">
375
+ expect: {String(expected)}
376
+ </Badge>
377
+ )}
378
+ </div>
379
+ )
380
+ })}
381
+ </div>
382
+ )}
383
+ </div>
384
+ )
385
+ }
386
+
387
+ type RoundStatus = "single" | "intermediate" | "final"
388
+
389
+ /** Render a single evidence card — used for both expanded and collapsed states */
390
+ function EvidenceCard({
391
+ entry,
392
+ roundLabel,
393
+ roundStatus,
394
+ prevPassRate,
395
+ currPassRate,
396
+ }: {
397
+ entry: EvidenceEntry
398
+ roundLabel: string | null
399
+ roundStatus: RoundStatus
400
+ prevPassRate: number | null
401
+ currPassRate: number | null
402
+ }) {
403
+ const showRound = roundStatus !== "single"
404
+ return (
405
+ <Card className={roundStatus === "final" ? "border-primary/50 shadow-sm" : undefined}>
406
+ <CardHeader className="pb-3">
407
+ <div className="flex items-center justify-between">
408
+ <CardTitle className="text-sm flex items-center gap-2">
409
+ <ShieldAlertIcon className="size-4 text-muted-foreground" />
410
+ Evidence: {entry.target}
411
+ {showRound && roundLabel && (
412
+ <span className="text-[10px] font-mono text-muted-foreground">{roundLabel}</span>
413
+ )}
414
+ {roundStatus === "final" && (
415
+ <Badge variant="default" className="text-[10px]">Final</Badge>
416
+ )}
417
+ </CardTitle>
418
+ <div className="flex items-center gap-2">
419
+ {showRound && <DeltaBadge prev={prevPassRate} curr={currPassRate} />}
420
+ <Badge variant="secondary" className="text-[10px]">{entry.stage}</Badge>
421
+ {entry.confidence !== null && (
422
+ <Badge
423
+ variant={entry.confidence >= 0.8 ? "default" : entry.confidence >= 0.5 ? "secondary" : "destructive"}
424
+ className="text-[10px] font-mono"
425
+ >
426
+ {formatRate(entry.confidence)} confidence
427
+ </Badge>
428
+ )}
429
+ <span className="text-[10px] text-muted-foreground">{timeAgo(entry.timestamp)}</span>
430
+ </div>
431
+ </div>
432
+ </CardHeader>
433
+ <CardContent className="space-y-4">
434
+ {/* Rationale */}
435
+ {entry.rationale && (
436
+ <div className="rounded-md border-l-2 border-primary/40 bg-primary/5 px-3 py-2">
437
+ <p className="text-xs font-medium text-muted-foreground mb-1">Rationale</p>
438
+ <p className="text-sm leading-relaxed">{entry.rationale}</p>
439
+ </div>
440
+ )}
441
+
442
+ {/* Evidence details */}
443
+ {entry.details && (
444
+ <p className="text-xs text-muted-foreground leading-relaxed">{entry.details}</p>
445
+ )}
446
+
447
+ {/* Side-by-side content diff */}
448
+ {(entry.original_text || entry.proposed_text) && (
449
+ <div className="grid grid-cols-1 gap-4 lg:grid-cols-2">
450
+ {entry.original_text && (
451
+ <SkillContentBlock label="Original" text={entry.original_text} variant="original" />
452
+ )}
453
+ {entry.proposed_text && (
454
+ <SkillContentBlock label="Proposed" text={entry.proposed_text} variant="proposed" />
455
+ )}
456
+ </div>
457
+ )}
458
+
459
+ {/* Eval set — test cases used for validation (collapsible) */}
460
+ {entry.eval_set && entry.eval_set.length > 0 && (
461
+ <EvalSetSection evalSet={entry.eval_set} />
462
+ )}
463
+
464
+ {/* Validation details */}
465
+ {entry.validation && Object.keys(entry.validation).length > 0 && (
466
+ <ValidationResults validation={entry.validation} />
467
+ )}
468
+ </CardContent>
469
+ </Card>
470
+ )
471
+ }
472
+
473
+ /** Collapsed summary for earlier iteration rounds */
474
+ function CollapsedEvidenceCard({
475
+ entry,
476
+ roundLabel,
477
+ onExpand,
478
+ }: {
479
+ entry: EvidenceEntry
480
+ roundLabel: string
481
+ onExpand: () => void
482
+ }) {
483
+ const passRate = getAfterPassRate(entry)
484
+ const improved = entry.validation?.improved
485
+
486
+ return (
487
+ <button
488
+ type="button"
489
+ onClick={onExpand}
490
+ className="flex items-center gap-3 w-full rounded-lg border border-dashed px-4 py-3 text-left hover:bg-accent/50 transition-colors"
491
+ >
492
+ <ChevronRightIcon className="size-4 text-muted-foreground shrink-0" />
493
+ <ShieldAlertIcon className="size-3.5 text-muted-foreground shrink-0" />
494
+ <span className="text-xs text-muted-foreground">{entry.target}</span>
495
+ <span className="text-[10px] font-mono text-muted-foreground">{roundLabel}</span>
496
+ <div className="flex items-center gap-2 ml-auto shrink-0">
497
+ {passRate !== null && (
498
+ <span className="text-[10px] font-mono text-muted-foreground">
499
+ {(passRate * 100).toFixed(1)}% pass rate
500
+ </span>
501
+ )}
502
+ {typeof improved === "boolean" && (
503
+ <Badge variant={improved ? "default" : "destructive"} className="text-[9px]">
504
+ {improved ? "Improved" : "Regressed"}
505
+ </Badge>
506
+ )}
507
+ <Badge variant="secondary" className="text-[10px]">{entry.stage}</Badge>
508
+ <span className="text-[10px] text-muted-foreground">{timeAgo(entry.timestamp)}</span>
509
+ </div>
510
+ </button>
511
+ )
512
+ }
513
+
514
+ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
515
+ const steps = useMemo(
516
+ () => evolution
517
+ .filter((e) => e.proposal_id === proposalId)
518
+ .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()),
519
+ [evolution, proposalId],
520
+ )
521
+
522
+ const entries = useMemo(
523
+ () => evidence
524
+ .filter((e) => e.proposal_id === proposalId)
525
+ .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()),
526
+ [evidence, proposalId],
527
+ )
528
+
529
+ // Track which earlier rounds are manually expanded
530
+ const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set())
531
+
532
+ const toggleRound = (key: string) => {
533
+ setExpandedRounds((prev) => {
534
+ const next = new Set(prev)
535
+ if (next.has(key)) next.delete(key)
536
+ else next.add(key)
537
+ return next
538
+ })
539
+ }
540
+
541
+ const snapshot = useMemo(() => {
542
+ for (let i = steps.length - 1; i >= 0; i--) {
543
+ if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>
544
+ }
545
+ return null
546
+ }, [steps])
547
+
548
+ // Separate proposal-stage entries from validation-stage entries, then group validations by target
549
+ const { proposalEntries, validationsByTarget } = useMemo(() => {
550
+ const proposals: EvidenceEntry[] = []
551
+ const validationMap = new Map<string, EvidenceEntry[]>()
552
+ for (const entry of entries) {
553
+ if (entry.stage !== "validated") {
554
+ proposals.push(entry)
555
+ } else {
556
+ const key = entry.target
557
+ if (!validationMap.has(key)) validationMap.set(key, [])
558
+ validationMap.get(key)!.push(entry)
559
+ }
560
+ }
561
+ return { proposalEntries: proposals, validationsByTarget: validationMap }
562
+ }, [entries])
563
+
564
+ return (
565
+ <div className="space-y-4">
566
+ {/* Context banner */}
567
+ <div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
568
+ <InfoIcon className="size-4 text-primary/60 shrink-0 mt-0.5" />
569
+ <p className="text-xs text-muted-foreground leading-relaxed">
570
+ This view shows the complete evidence trail for a skill evolution proposal &mdash; how the skill was changed,
571
+ the eval test results before and after, and whether the change improved performance.
572
+ </p>
573
+ </div>
574
+
575
+ {/* Proposal journey */}
576
+ <Card>
577
+ <CardHeader className="pb-3">
578
+ <CardTitle className="text-sm flex items-center gap-2">
579
+ <span>Proposal Journey</span>
580
+ <span className="font-mono text-xs text-muted-foreground">#{proposalId.slice(0, 12)}</span>
581
+ </CardTitle>
582
+ </CardHeader>
583
+ <CardContent className="space-y-3">
584
+ <div className="flex items-center gap-2 flex-wrap">
585
+ {steps.map((step, i) => (
586
+ <div key={`${step.action}-${i}`} className="contents">
587
+ {i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
588
+ <div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
589
+ {ACTION_ICON[step.action]}
590
+ <Badge variant={ACTION_VARIANT[step.action] ?? "secondary"} className="text-[10px] capitalize">
591
+ {step.action.replace("_", " ")}
592
+ </Badge>
593
+ <span className="text-[10px] text-muted-foreground">{timeAgo(step.timestamp)}</span>
594
+ </div>
595
+ </div>
596
+ ))}
597
+ </div>
598
+
599
+ {/* Eval snapshot — pass rate change */}
600
+ {snapshot && (
601
+ <div className="flex items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
602
+ {typeof snapshot.net_change === "number" && (
603
+ <div className="flex items-center gap-1">
604
+ {(snapshot.net_change as number) > 0
605
+ ? <TrendingUpIcon className="size-3.5 text-emerald-500" />
606
+ : <TrendingDownIcon className="size-3.5 text-red-500" />}
607
+ <span className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}>
608
+ {(snapshot.net_change as number) > 0 ? "+" : ""}{Math.round((snapshot.net_change as number) * 100)}%
609
+ </span>
610
+ </div>
611
+ )}
612
+ {typeof snapshot.before_pass_rate === "number" && typeof snapshot.after_pass_rate === "number" && (
613
+ <span className="text-xs text-muted-foreground font-mono">
614
+ {Math.round((snapshot.before_pass_rate as number) * 100)}% &rarr; {Math.round((snapshot.after_pass_rate as number) * 100)}%
615
+ </span>
616
+ )}
617
+ {snapshot.improved !== undefined && (
618
+ <Badge variant={snapshot.improved ? "default" : "destructive"} className="text-[10px]">
619
+ {snapshot.improved ? "Improved" : "Regressed"}
620
+ </Badge>
621
+ )}
622
+ </div>
623
+ )}
624
+
625
+ {/* Details from last step */}
626
+ {steps.length > 0 && steps[steps.length - 1].details && (
627
+ <p className="text-xs text-muted-foreground leading-relaxed">
628
+ {steps[steps.length - 1].details}
629
+ </p>
630
+ )}
631
+ </CardContent>
632
+ </Card>
633
+
634
+ {/* Proposal-stage evidence — standalone cards showing original/proposed text */}
635
+ {proposalEntries.map((entry) => (
636
+ <EvidenceCard
637
+ key={`proposal-${entry.target}-${entry.timestamp}`}
638
+ entry={entry}
639
+ roundLabel={null}
640
+ roundStatus="single"
641
+ prevPassRate={null}
642
+ currPassRate={null}
643
+ />
644
+ ))}
645
+
646
+ {/* Validation-stage evidence — grouped by target with iteration rounds */}
647
+ {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
648
+ const hasMultipleRounds = targetEntries.length > 1
649
+
650
+ return (
651
+ <div key={target} className="space-y-2">
652
+ {targetEntries.map((entry, i) => {
653
+ const isLast = i === targetEntries.length - 1
654
+ const roundLabel = hasMultipleRounds ? `Round ${i + 1} of ${targetEntries.length}` : null
655
+ const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null
656
+ const currPassRate = getAfterPassRate(entry)
657
+ const roundKey = `${target}-${entry.timestamp}`
658
+ const roundStatus: RoundStatus = !hasMultipleRounds ? "single" : isLast ? "final" : "intermediate"
659
+
660
+ // Earlier rounds: collapsed by default
661
+ if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
662
+ return (
663
+ <CollapsedEvidenceCard
664
+ key={roundKey}
665
+ entry={entry}
666
+ roundLabel={roundLabel!}
667
+ onExpand={() => toggleRound(roundKey)}
668
+ />
669
+ )
670
+ }
671
+
672
+ // Expanded earlier round — show with collapse toggle
673
+ if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
674
+ return (
675
+ <div key={roundKey} className="space-y-1">
676
+ <button
677
+ type="button"
678
+ onClick={() => toggleRound(roundKey)}
679
+ className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground transition-colors px-1"
680
+ >
681
+ <ChevronDownIcon className="size-3" />
682
+ Collapse {roundLabel}
683
+ </button>
684
+ <EvidenceCard
685
+ entry={entry}
686
+ roundLabel={roundLabel}
687
+ roundStatus={roundStatus}
688
+ prevPassRate={prevPassRate}
689
+ currPassRate={currPassRate}
690
+ />
691
+ </div>
692
+ )
693
+ }
694
+
695
+ // Final round (or single entry) — always expanded
696
+ return (
697
+ <EvidenceCard
698
+ key={roundKey}
699
+ entry={entry}
700
+ roundLabel={roundLabel}
701
+ roundStatus={roundStatus}
702
+ prevPassRate={prevPassRate}
703
+ currPassRate={currPassRate}
704
+ />
705
+ )
706
+ })}
707
+ </div>
708
+ )
709
+ })}
710
+
711
+ {entries.length === 0 && (
712
+ <div className="flex items-center justify-center rounded-lg border border-dashed py-8">
713
+ <p className="text-sm text-muted-foreground">No evidence entries for this proposal</p>
714
+ </div>
715
+ )}
716
+ </div>
717
+ )
718
+ }