selftune 0.2.2 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +1 -0
- package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +15 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-r2k_Ku_V.js +346 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/analytics.ts +354 -0
- package/cli/selftune/badge/badge.ts +2 -2
- package/cli/selftune/dashboard-server.ts +3 -3
- package/cli/selftune/evolution/evolve-body.ts +1 -1
- package/cli/selftune/evolution/evolve.ts +1 -1
- package/cli/selftune/index.ts +15 -1
- package/cli/selftune/init.ts +5 -1
- package/cli/selftune/observability.ts +63 -2
- package/cli/selftune/orchestrate.ts +1 -1
- package/cli/selftune/quickstart.ts +1 -1
- package/cli/selftune/status.ts +2 -2
- package/cli/selftune/types.ts +1 -0
- package/cli/selftune/utils/llm-call.ts +2 -1
- package/package.json +6 -4
- package/packages/ui/README.md +113 -0
- package/packages/ui/index.ts +10 -0
- package/packages/ui/package.json +62 -0
- package/packages/ui/src/components/ActivityTimeline.tsx +171 -0
- package/packages/ui/src/components/EvidenceViewer.tsx +718 -0
- package/packages/ui/src/components/EvolutionTimeline.tsx +252 -0
- package/packages/ui/src/components/InfoTip.tsx +19 -0
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +164 -0
- package/packages/ui/src/components/index.ts +7 -0
- package/packages/ui/src/components/section-cards.tsx +155 -0
- package/packages/ui/src/components/skill-health-grid.tsx +686 -0
- package/packages/ui/src/lib/constants.tsx +43 -0
- package/packages/ui/src/lib/format.ts +37 -0
- package/packages/ui/src/lib/index.ts +3 -0
- package/packages/ui/src/lib/utils.ts +6 -0
- package/packages/ui/src/primitives/badge.tsx +52 -0
- package/packages/ui/src/primitives/button.tsx +58 -0
- package/packages/ui/src/primitives/card.tsx +103 -0
- package/packages/ui/src/primitives/checkbox.tsx +27 -0
- package/packages/ui/src/primitives/collapsible.tsx +7 -0
- package/packages/ui/src/primitives/dropdown-menu.tsx +266 -0
- package/packages/ui/src/primitives/index.ts +55 -0
- package/packages/ui/src/primitives/label.tsx +20 -0
- package/packages/ui/src/primitives/select.tsx +197 -0
- package/packages/ui/src/primitives/table.tsx +114 -0
- package/packages/ui/src/primitives/tabs.tsx +82 -0
- package/packages/ui/src/primitives/tooltip.tsx +64 -0
- package/packages/ui/src/types.ts +87 -0
- package/packages/ui/tsconfig.json +17 -0
- package/skill/SKILL.md +3 -0
- package/skill/Workflows/Telemetry.md +59 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +0 -15
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +0 -1
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +0 -346
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
import { useMemo, useState } from "react"
|
|
2
|
+
import { Badge } from "../primitives/badge"
|
|
3
|
+
import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card"
|
|
4
|
+
import type { EvidenceEntry, EvolutionEntry } from "../types"
|
|
5
|
+
import { formatRate, timeAgo } from "../lib/format"
|
|
6
|
+
import {
|
|
7
|
+
CheckCircleIcon,
|
|
8
|
+
ChevronDownIcon,
|
|
9
|
+
ChevronRightIcon,
|
|
10
|
+
CircleDotIcon,
|
|
11
|
+
FileTextIcon,
|
|
12
|
+
InfoIcon,
|
|
13
|
+
RocketIcon,
|
|
14
|
+
ShieldCheckIcon,
|
|
15
|
+
ShieldAlertIcon,
|
|
16
|
+
XCircleIcon,
|
|
17
|
+
UndoIcon,
|
|
18
|
+
ArrowRightIcon,
|
|
19
|
+
TrendingUpIcon,
|
|
20
|
+
TrendingDownIcon,
|
|
21
|
+
ListChecksIcon,
|
|
22
|
+
} from "lucide-react"
|
|
23
|
+
import Markdown from "react-markdown"
|
|
24
|
+
|
|
25
|
+
const ACTION_ICON: Record<string, React.ReactNode> = {
|
|
26
|
+
created: <CircleDotIcon className="size-3.5" />,
|
|
27
|
+
validated: <ShieldCheckIcon className="size-3.5" />,
|
|
28
|
+
deployed: <RocketIcon className="size-3.5" />,
|
|
29
|
+
rejected: <XCircleIcon className="size-3.5" />,
|
|
30
|
+
rolled_back: <UndoIcon className="size-3.5" />,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
|
|
34
|
+
created: "outline",
|
|
35
|
+
validated: "secondary",
|
|
36
|
+
deployed: "default",
|
|
37
|
+
rejected: "destructive",
|
|
38
|
+
rolled_back: "destructive",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
interface Props {
|
|
42
|
+
proposalId: string
|
|
43
|
+
evolution: EvolutionEntry[]
|
|
44
|
+
evidence: EvidenceEntry[]
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** Parse YAML-ish frontmatter from text, returns { meta, body } */
|
|
48
|
+
function parseFrontmatter(text: string): { meta: Record<string, string>; body: string } {
|
|
49
|
+
const match = text.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/)
|
|
50
|
+
if (!match) return { meta: {}, body: text }
|
|
51
|
+
|
|
52
|
+
const meta: Record<string, string> = {}
|
|
53
|
+
for (const line of match[1].split("\n")) {
|
|
54
|
+
const idx = line.indexOf(":")
|
|
55
|
+
if (idx > 0) {
|
|
56
|
+
const key = line.slice(0, idx).trim()
|
|
57
|
+
const val = line.slice(idx + 1).trim()
|
|
58
|
+
if (key && val) meta[key] = val
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return { meta, body: match[2] }
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function FrontmatterTable({ meta }: { meta: Record<string, string> }) {
|
|
65
|
+
const entries = Object.entries(meta)
|
|
66
|
+
if (entries.length === 0) return null
|
|
67
|
+
|
|
68
|
+
return (
|
|
69
|
+
<div className="grid grid-cols-[auto_1fr] gap-x-3 gap-y-1 text-xs">
|
|
70
|
+
{entries.map(([key, val]) => (
|
|
71
|
+
<div key={key} className="contents">
|
|
72
|
+
<span className="font-medium text-muted-foreground capitalize">{key}</span>
|
|
73
|
+
<span className="text-foreground truncate">{val}</span>
|
|
74
|
+
</div>
|
|
75
|
+
))}
|
|
76
|
+
</div>
|
|
77
|
+
)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function SkillContentBlock({ label, text, variant }: { label: string; text: string; variant: "original" | "proposed" }) {
|
|
81
|
+
const { meta, body } = parseFrontmatter(text)
|
|
82
|
+
const hasMeta = Object.keys(meta).length > 0
|
|
83
|
+
|
|
84
|
+
return (
|
|
85
|
+
<div className="flex-1 min-w-0 space-y-3">
|
|
86
|
+
<div className="flex items-center gap-2">
|
|
87
|
+
<FileTextIcon className="size-3.5 text-muted-foreground" />
|
|
88
|
+
<span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">{label}</span>
|
|
89
|
+
{variant === "proposed" && (
|
|
90
|
+
<Badge variant="secondary" className="text-[10px]">New</Badge>
|
|
91
|
+
)}
|
|
92
|
+
</div>
|
|
93
|
+
|
|
94
|
+
{/* Frontmatter */}
|
|
95
|
+
{hasMeta && (
|
|
96
|
+
<div className="rounded-md border bg-muted/30 p-3">
|
|
97
|
+
<FrontmatterTable meta={meta} />
|
|
98
|
+
</div>
|
|
99
|
+
)}
|
|
100
|
+
|
|
101
|
+
{/* Rendered markdown body */}
|
|
102
|
+
<div className="skill-markdown rounded-md border bg-card p-4">
|
|
103
|
+
<Markdown>{body}</Markdown>
|
|
104
|
+
</div>
|
|
105
|
+
</div>
|
|
106
|
+
)
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** Smart formatting for a single validation value */
|
|
110
|
+
function formatValidationValue(key: string, val: unknown): React.ReactNode {
|
|
111
|
+
// Booleans
|
|
112
|
+
if (typeof val === "boolean") {
|
|
113
|
+
return val
|
|
114
|
+
? <CheckCircleIcon className="size-3.5 text-emerald-500 inline" />
|
|
115
|
+
: <XCircleIcon className="size-3.5 text-red-500 inline" />
|
|
116
|
+
}
|
|
117
|
+
// Numbers that look like rates (0-1 range, or key contains "rate"/"change")
|
|
118
|
+
if (typeof val === "number") {
|
|
119
|
+
const isRate = key.includes("rate") || key.includes("change") || (val >= -1 && val <= 1 && key !== "count")
|
|
120
|
+
if (isRate) {
|
|
121
|
+
const pct = (val * 100).toFixed(1)
|
|
122
|
+
const prefix = val > 0 && key.includes("change") ? "+" : ""
|
|
123
|
+
return <span className="font-mono">{prefix}{pct}%</span>
|
|
124
|
+
}
|
|
125
|
+
return <span className="font-mono">{val}</span>
|
|
126
|
+
}
|
|
127
|
+
// null/undefined
|
|
128
|
+
if (val === null || val === undefined) return <span className="text-muted-foreground">--</span>
|
|
129
|
+
// Strings
|
|
130
|
+
if (typeof val === "string") return <span>{val}</span>
|
|
131
|
+
// Arrays — render as list of items
|
|
132
|
+
if (Array.isArray(val)) {
|
|
133
|
+
if (val.length === 0) return <span className="text-muted-foreground italic">none</span>
|
|
134
|
+
return <span className="font-mono">{val.length} entries</span>
|
|
135
|
+
}
|
|
136
|
+
// Objects
|
|
137
|
+
if (typeof val === "object") return <span className="font-mono">1 entry</span>
|
|
138
|
+
return <span>{String(val)}</span>
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/** Render a per_entry_result row — handles both flat EvalEntry and nested { entry, before_pass, after_pass } */
|
|
142
|
+
function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
|
|
143
|
+
// Handle nested shape: { entry: { query, should_trigger }, before_pass, after_pass }
|
|
144
|
+
const nested = entry.entry as Record<string, unknown> | undefined
|
|
145
|
+
const query = nested?.query ?? entry.query ?? entry.prompt ?? entry.input ?? entry.text
|
|
146
|
+
const shouldTrigger = nested?.should_trigger ?? entry.should_trigger
|
|
147
|
+
const invocationType = nested?.invocation_type ?? entry.invocation_type
|
|
148
|
+
const beforePass = entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline
|
|
149
|
+
const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result
|
|
150
|
+
const passed = entry.passed ?? entry.matched
|
|
151
|
+
|
|
152
|
+
// Determine icon: use after_pass for per_entry_results, passed for others
|
|
153
|
+
const isPass = typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null
|
|
154
|
+
|
|
155
|
+
return (
|
|
156
|
+
<div className="flex items-start gap-2 text-xs py-1.5 border-b border-border/50 last:border-0">
|
|
157
|
+
{isPass !== null ? (
|
|
158
|
+
isPass
|
|
159
|
+
? <CheckCircleIcon className="size-3.5 text-emerald-500 shrink-0 mt-0.5" />
|
|
160
|
+
: <XCircleIcon className="size-3.5 text-red-500 shrink-0 mt-0.5" />
|
|
161
|
+
) : (
|
|
162
|
+
<CircleDotIcon className="size-3.5 text-muted-foreground shrink-0 mt-0.5" />
|
|
163
|
+
)}
|
|
164
|
+
<span className="flex-1 min-w-0 line-clamp-2">
|
|
165
|
+
{query ? String(query) : JSON.stringify(entry)}
|
|
166
|
+
</span>
|
|
167
|
+
<div className="flex items-center gap-1.5 shrink-0">
|
|
168
|
+
{typeof beforePass === "boolean" && typeof afterPass === "boolean" && (
|
|
169
|
+
<span className="text-[10px] text-muted-foreground font-mono">
|
|
170
|
+
{beforePass ? "pass" : "fail"} → {afterPass ? "pass" : "fail"}
|
|
171
|
+
</span>
|
|
172
|
+
)}
|
|
173
|
+
{shouldTrigger !== undefined && (
|
|
174
|
+
<Badge variant="secondary" className="text-[9px]">
|
|
175
|
+
expect: {String(shouldTrigger)}
|
|
176
|
+
</Badge>
|
|
177
|
+
)}
|
|
178
|
+
{invocationType != null && (
|
|
179
|
+
<Badge variant="secondary" className="text-[9px]">
|
|
180
|
+
{String(invocationType)}
|
|
181
|
+
</Badge>
|
|
182
|
+
)}
|
|
183
|
+
</div>
|
|
184
|
+
</div>
|
|
185
|
+
)
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function ValidationResults({ validation }: { validation: Record<string, unknown> }) {
|
|
189
|
+
const { improved, before_pass_rate, after_pass_rate, net_change, regressions, new_passes, per_entry_results, ...rest } = validation
|
|
190
|
+
|
|
191
|
+
const regressionsArr = Array.isArray(regressions) ? regressions : []
|
|
192
|
+
const newPassesArr = Array.isArray(new_passes) ? new_passes : []
|
|
193
|
+
const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : []
|
|
194
|
+
|
|
195
|
+
return (
|
|
196
|
+
<div className="rounded-md border bg-muted/30 p-3 space-y-3">
|
|
197
|
+
<p className="text-xs font-medium text-muted-foreground">
|
|
198
|
+
Validation Results
|
|
199
|
+
<span className="font-normal text-muted-foreground/60 ml-1.5">— Before/after comparison from eval tests</span>
|
|
200
|
+
</p>
|
|
201
|
+
|
|
202
|
+
{/* Summary bar */}
|
|
203
|
+
<div className="flex items-center gap-3 flex-wrap">
|
|
204
|
+
{improved !== undefined && (
|
|
205
|
+
<Badge variant={improved ? "default" : "destructive"} className="text-[10px]">
|
|
206
|
+
{improved ? "Improved" : "Regressed"}
|
|
207
|
+
</Badge>
|
|
208
|
+
)}
|
|
209
|
+
{typeof before_pass_rate === "number" && typeof after_pass_rate === "number" && (
|
|
210
|
+
<span className="text-xs font-mono text-muted-foreground">
|
|
211
|
+
{(before_pass_rate * 100).toFixed(1)}% → {(after_pass_rate * 100).toFixed(1)}%
|
|
212
|
+
</span>
|
|
213
|
+
)}
|
|
214
|
+
{typeof net_change === "number" && (
|
|
215
|
+
<span className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}>
|
|
216
|
+
{net_change > 0 ? "+" : ""}{(net_change * 100).toFixed(1)}%
|
|
217
|
+
</span>
|
|
218
|
+
)}
|
|
219
|
+
</div>
|
|
220
|
+
|
|
221
|
+
{/* New passes */}
|
|
222
|
+
{newPassesArr.length > 0 && (
|
|
223
|
+
<div>
|
|
224
|
+
<p className="text-[11px] font-medium text-emerald-600 dark:text-emerald-400 mb-1">
|
|
225
|
+
New Passes ({newPassesArr.length})
|
|
226
|
+
</p>
|
|
227
|
+
<div className="rounded border bg-card p-2">
|
|
228
|
+
{newPassesArr.map((entry, j) => (
|
|
229
|
+
<PerEntryResult key={j} entry={typeof entry === "object" && entry !== null ? entry as Record<string, unknown> : { value: entry }} />
|
|
230
|
+
))}
|
|
231
|
+
</div>
|
|
232
|
+
</div>
|
|
233
|
+
)}
|
|
234
|
+
|
|
235
|
+
{/* Regressions */}
|
|
236
|
+
{regressionsArr.length > 0 && (
|
|
237
|
+
<div>
|
|
238
|
+
<p className="text-[11px] font-medium text-red-500 mb-1">
|
|
239
|
+
Regressions ({regressionsArr.length})
|
|
240
|
+
</p>
|
|
241
|
+
<div className="rounded border border-red-200 dark:border-red-900/50 bg-card p-2">
|
|
242
|
+
{regressionsArr.map((entry, j) => (
|
|
243
|
+
<PerEntryResult key={j} entry={typeof entry === "object" && entry !== null ? entry as Record<string, unknown> : { value: entry }} />
|
|
244
|
+
))}
|
|
245
|
+
</div>
|
|
246
|
+
</div>
|
|
247
|
+
)}
|
|
248
|
+
|
|
249
|
+
{/* Per-entry results (collapsible if many) */}
|
|
250
|
+
{perEntryArr.length > 0 && (
|
|
251
|
+
<PerEntryResultsSection entries={perEntryArr} />
|
|
252
|
+
)}
|
|
253
|
+
|
|
254
|
+
{/* Any remaining keys */}
|
|
255
|
+
{Object.keys(rest).length > 0 && (
|
|
256
|
+
<div className="grid grid-cols-[auto_1fr] gap-x-3 gap-y-1 text-xs">
|
|
257
|
+
{Object.entries(rest).map(([key, val]) => (
|
|
258
|
+
<div key={key} className="contents">
|
|
259
|
+
<span className="font-mono text-muted-foreground">{key}</span>
|
|
260
|
+
<span className="text-foreground">{formatValidationValue(key, val)}</span>
|
|
261
|
+
</div>
|
|
262
|
+
))}
|
|
263
|
+
</div>
|
|
264
|
+
)}
|
|
265
|
+
</div>
|
|
266
|
+
)
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
|
|
270
|
+
const [expanded, setExpanded] = useState(false)
|
|
271
|
+
const passCount = entries.filter((e) => {
|
|
272
|
+
if (typeof e !== "object" || e === null) return false
|
|
273
|
+
const obj = e as Record<string, unknown>
|
|
274
|
+
return obj.passed === true || obj.matched === true || obj.triggered === true || obj.after === true || obj.result === true
|
|
275
|
+
}).length
|
|
276
|
+
|
|
277
|
+
const display = expanded ? entries : entries.slice(0, 5)
|
|
278
|
+
|
|
279
|
+
return (
|
|
280
|
+
<div>
|
|
281
|
+
<div className="flex items-center justify-between mb-1">
|
|
282
|
+
<p className="text-[11px] font-medium text-muted-foreground">
|
|
283
|
+
Individual Test Cases ({passCount}/{entries.length} passed)
|
|
284
|
+
</p>
|
|
285
|
+
{entries.length > 5 && (
|
|
286
|
+
<button
|
|
287
|
+
type="button"
|
|
288
|
+
onClick={() => setExpanded(!expanded)}
|
|
289
|
+
className="text-[10px] text-primary hover:underline"
|
|
290
|
+
>
|
|
291
|
+
{expanded ? "Show less" : `Show all ${entries.length}`}
|
|
292
|
+
</button>
|
|
293
|
+
)}
|
|
294
|
+
</div>
|
|
295
|
+
{/* Pass rate bar */}
|
|
296
|
+
<div className="h-1.5 rounded-full bg-muted overflow-hidden mb-2">
|
|
297
|
+
<div
|
|
298
|
+
className="h-full rounded-full bg-emerald-500 transition-all"
|
|
299
|
+
style={{ width: `${entries.length > 0 ? (passCount / entries.length) * 100 : 0}%` }}
|
|
300
|
+
/>
|
|
301
|
+
</div>
|
|
302
|
+
<div className="rounded border bg-card p-2 max-h-[300px] overflow-y-auto">
|
|
303
|
+
{display.map((entry, j) => (
|
|
304
|
+
<PerEntryResult
|
|
305
|
+
key={j}
|
|
306
|
+
entry={typeof entry === "object" && entry !== null ? entry as Record<string, unknown> : { value: entry }}
|
|
307
|
+
/>
|
|
308
|
+
))}
|
|
309
|
+
</div>
|
|
310
|
+
</div>
|
|
311
|
+
)
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/** Extract after_pass_rate from an evidence entry's validation data */
|
|
315
|
+
function getAfterPassRate(entry: EvidenceEntry): number | null {
|
|
316
|
+
if (!entry.validation) return null
|
|
317
|
+
const rate = entry.validation.after_pass_rate
|
|
318
|
+
return typeof rate === "number" ? rate : null
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/** Render a delta badge between two pass rates, returns null if not computable */
|
|
322
|
+
function DeltaBadge({ prev, curr }: { prev: number | null; curr: number | null }) {
|
|
323
|
+
if (prev === null || curr === null) return null
|
|
324
|
+
const delta = curr - prev
|
|
325
|
+
if (delta === 0) return null
|
|
326
|
+
const pct = (delta * 100).toFixed(1)
|
|
327
|
+
const positive = delta > 0
|
|
328
|
+
return (
|
|
329
|
+
<span className={`text-[10px] font-mono font-semibold ${positive ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}>
|
|
330
|
+
{positive ? "+" : ""}{pct}% vs previous
|
|
331
|
+
</span>
|
|
332
|
+
)
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }) {
|
|
336
|
+
const [expanded, setExpanded] = useState(false)
|
|
337
|
+
const passCount = evalSet.filter((e) => {
|
|
338
|
+
const passed = e.passed ?? e.result
|
|
339
|
+
return passed === true
|
|
340
|
+
}).length
|
|
341
|
+
|
|
342
|
+
return (
|
|
343
|
+
<div className="rounded-md border bg-muted/30 p-3 space-y-2">
|
|
344
|
+
<button
|
|
345
|
+
type="button"
|
|
346
|
+
onClick={() => setExpanded(!expanded)}
|
|
347
|
+
className="flex items-center gap-1.5 w-full text-left"
|
|
348
|
+
>
|
|
349
|
+
{expanded
|
|
350
|
+
? <ChevronDownIcon className="size-3.5 text-muted-foreground shrink-0" />
|
|
351
|
+
: <ChevronRightIcon className="size-3.5 text-muted-foreground shrink-0" />}
|
|
352
|
+
<ListChecksIcon className="size-3.5 text-muted-foreground" />
|
|
353
|
+
<span className="text-xs font-medium text-muted-foreground">
|
|
354
|
+
Eval Set ({passCount}/{evalSet.length} passed)
|
|
355
|
+
</span>
|
|
356
|
+
</button>
|
|
357
|
+
{expanded && (
|
|
358
|
+
<div className="space-y-1">
|
|
359
|
+
{evalSet.map((evalEntry, j) => {
|
|
360
|
+
const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input
|
|
361
|
+
const expected = evalEntry.expected ?? evalEntry.should_trigger
|
|
362
|
+
const passed = evalEntry.passed ?? evalEntry.result
|
|
363
|
+
return (
|
|
364
|
+
<div key={j} className="flex items-start gap-2 text-xs py-1 border-b border-border/50 last:border-0">
|
|
365
|
+
{typeof passed === "boolean" ? (
|
|
366
|
+
passed
|
|
367
|
+
? <CheckCircleIcon className="size-3.5 text-emerald-500 shrink-0 mt-0.5" />
|
|
368
|
+
: <XCircleIcon className="size-3.5 text-red-500 shrink-0 mt-0.5" />
|
|
369
|
+
) : (
|
|
370
|
+
<CircleDotIcon className="size-3.5 text-muted-foreground shrink-0 mt-0.5" />
|
|
371
|
+
)}
|
|
372
|
+
<span className="flex-1 min-w-0 line-clamp-2">{String(query ?? JSON.stringify(evalEntry))}</span>
|
|
373
|
+
{expected !== undefined && (
|
|
374
|
+
<Badge variant="secondary" className="text-[9px] shrink-0">
|
|
375
|
+
expect: {String(expected)}
|
|
376
|
+
</Badge>
|
|
377
|
+
)}
|
|
378
|
+
</div>
|
|
379
|
+
)
|
|
380
|
+
})}
|
|
381
|
+
</div>
|
|
382
|
+
)}
|
|
383
|
+
</div>
|
|
384
|
+
)
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
type RoundStatus = "single" | "intermediate" | "final"
|
|
388
|
+
|
|
389
|
+
/** Render a single evidence card — used for both expanded and collapsed states */
|
|
390
|
+
function EvidenceCard({
|
|
391
|
+
entry,
|
|
392
|
+
roundLabel,
|
|
393
|
+
roundStatus,
|
|
394
|
+
prevPassRate,
|
|
395
|
+
currPassRate,
|
|
396
|
+
}: {
|
|
397
|
+
entry: EvidenceEntry
|
|
398
|
+
roundLabel: string | null
|
|
399
|
+
roundStatus: RoundStatus
|
|
400
|
+
prevPassRate: number | null
|
|
401
|
+
currPassRate: number | null
|
|
402
|
+
}) {
|
|
403
|
+
const showRound = roundStatus !== "single"
|
|
404
|
+
return (
|
|
405
|
+
<Card className={roundStatus === "final" ? "border-primary/50 shadow-sm" : undefined}>
|
|
406
|
+
<CardHeader className="pb-3">
|
|
407
|
+
<div className="flex items-center justify-between">
|
|
408
|
+
<CardTitle className="text-sm flex items-center gap-2">
|
|
409
|
+
<ShieldAlertIcon className="size-4 text-muted-foreground" />
|
|
410
|
+
Evidence: {entry.target}
|
|
411
|
+
{showRound && roundLabel && (
|
|
412
|
+
<span className="text-[10px] font-mono text-muted-foreground">{roundLabel}</span>
|
|
413
|
+
)}
|
|
414
|
+
{roundStatus === "final" && (
|
|
415
|
+
<Badge variant="default" className="text-[10px]">Final</Badge>
|
|
416
|
+
)}
|
|
417
|
+
</CardTitle>
|
|
418
|
+
<div className="flex items-center gap-2">
|
|
419
|
+
{showRound && <DeltaBadge prev={prevPassRate} curr={currPassRate} />}
|
|
420
|
+
<Badge variant="secondary" className="text-[10px]">{entry.stage}</Badge>
|
|
421
|
+
{entry.confidence !== null && (
|
|
422
|
+
<Badge
|
|
423
|
+
variant={entry.confidence >= 0.8 ? "default" : entry.confidence >= 0.5 ? "secondary" : "destructive"}
|
|
424
|
+
className="text-[10px] font-mono"
|
|
425
|
+
>
|
|
426
|
+
{formatRate(entry.confidence)} confidence
|
|
427
|
+
</Badge>
|
|
428
|
+
)}
|
|
429
|
+
<span className="text-[10px] text-muted-foreground">{timeAgo(entry.timestamp)}</span>
|
|
430
|
+
</div>
|
|
431
|
+
</div>
|
|
432
|
+
</CardHeader>
|
|
433
|
+
<CardContent className="space-y-4">
|
|
434
|
+
{/* Rationale */}
|
|
435
|
+
{entry.rationale && (
|
|
436
|
+
<div className="rounded-md border-l-2 border-primary/40 bg-primary/5 px-3 py-2">
|
|
437
|
+
<p className="text-xs font-medium text-muted-foreground mb-1">Rationale</p>
|
|
438
|
+
<p className="text-sm leading-relaxed">{entry.rationale}</p>
|
|
439
|
+
</div>
|
|
440
|
+
)}
|
|
441
|
+
|
|
442
|
+
{/* Evidence details */}
|
|
443
|
+
{entry.details && (
|
|
444
|
+
<p className="text-xs text-muted-foreground leading-relaxed">{entry.details}</p>
|
|
445
|
+
)}
|
|
446
|
+
|
|
447
|
+
{/* Side-by-side content diff */}
|
|
448
|
+
{(entry.original_text || entry.proposed_text) && (
|
|
449
|
+
<div className="grid grid-cols-1 gap-4 lg:grid-cols-2">
|
|
450
|
+
{entry.original_text && (
|
|
451
|
+
<SkillContentBlock label="Original" text={entry.original_text} variant="original" />
|
|
452
|
+
)}
|
|
453
|
+
{entry.proposed_text && (
|
|
454
|
+
<SkillContentBlock label="Proposed" text={entry.proposed_text} variant="proposed" />
|
|
455
|
+
)}
|
|
456
|
+
</div>
|
|
457
|
+
)}
|
|
458
|
+
|
|
459
|
+
{/* Eval set — test cases used for validation (collapsible) */}
|
|
460
|
+
{entry.eval_set && entry.eval_set.length > 0 && (
|
|
461
|
+
<EvalSetSection evalSet={entry.eval_set} />
|
|
462
|
+
)}
|
|
463
|
+
|
|
464
|
+
{/* Validation details */}
|
|
465
|
+
{entry.validation && Object.keys(entry.validation).length > 0 && (
|
|
466
|
+
<ValidationResults validation={entry.validation} />
|
|
467
|
+
)}
|
|
468
|
+
</CardContent>
|
|
469
|
+
</Card>
|
|
470
|
+
)
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/** Collapsed summary for earlier iteration rounds */
|
|
474
|
+
function CollapsedEvidenceCard({
|
|
475
|
+
entry,
|
|
476
|
+
roundLabel,
|
|
477
|
+
onExpand,
|
|
478
|
+
}: {
|
|
479
|
+
entry: EvidenceEntry
|
|
480
|
+
roundLabel: string
|
|
481
|
+
onExpand: () => void
|
|
482
|
+
}) {
|
|
483
|
+
const passRate = getAfterPassRate(entry)
|
|
484
|
+
const improved = entry.validation?.improved
|
|
485
|
+
|
|
486
|
+
return (
|
|
487
|
+
<button
|
|
488
|
+
type="button"
|
|
489
|
+
onClick={onExpand}
|
|
490
|
+
className="flex items-center gap-3 w-full rounded-lg border border-dashed px-4 py-3 text-left hover:bg-accent/50 transition-colors"
|
|
491
|
+
>
|
|
492
|
+
<ChevronRightIcon className="size-4 text-muted-foreground shrink-0" />
|
|
493
|
+
<ShieldAlertIcon className="size-3.5 text-muted-foreground shrink-0" />
|
|
494
|
+
<span className="text-xs text-muted-foreground">{entry.target}</span>
|
|
495
|
+
<span className="text-[10px] font-mono text-muted-foreground">{roundLabel}</span>
|
|
496
|
+
<div className="flex items-center gap-2 ml-auto shrink-0">
|
|
497
|
+
{passRate !== null && (
|
|
498
|
+
<span className="text-[10px] font-mono text-muted-foreground">
|
|
499
|
+
{(passRate * 100).toFixed(1)}% pass rate
|
|
500
|
+
</span>
|
|
501
|
+
)}
|
|
502
|
+
{typeof improved === "boolean" && (
|
|
503
|
+
<Badge variant={improved ? "default" : "destructive"} className="text-[9px]">
|
|
504
|
+
{improved ? "Improved" : "Regressed"}
|
|
505
|
+
</Badge>
|
|
506
|
+
)}
|
|
507
|
+
<Badge variant="secondary" className="text-[10px]">{entry.stage}</Badge>
|
|
508
|
+
<span className="text-[10px] text-muted-foreground">{timeAgo(entry.timestamp)}</span>
|
|
509
|
+
</div>
|
|
510
|
+
</button>
|
|
511
|
+
)
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
515
|
+
const steps = useMemo(
|
|
516
|
+
() => evolution
|
|
517
|
+
.filter((e) => e.proposal_id === proposalId)
|
|
518
|
+
.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()),
|
|
519
|
+
[evolution, proposalId],
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
const entries = useMemo(
|
|
523
|
+
() => evidence
|
|
524
|
+
.filter((e) => e.proposal_id === proposalId)
|
|
525
|
+
.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()),
|
|
526
|
+
[evidence, proposalId],
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
// Track which earlier rounds are manually expanded
|
|
530
|
+
const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set())
|
|
531
|
+
|
|
532
|
+
const toggleRound = (key: string) => {
|
|
533
|
+
setExpandedRounds((prev) => {
|
|
534
|
+
const next = new Set(prev)
|
|
535
|
+
if (next.has(key)) next.delete(key)
|
|
536
|
+
else next.add(key)
|
|
537
|
+
return next
|
|
538
|
+
})
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
const snapshot = useMemo(() => {
|
|
542
|
+
for (let i = steps.length - 1; i >= 0; i--) {
|
|
543
|
+
if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>
|
|
544
|
+
}
|
|
545
|
+
return null
|
|
546
|
+
}, [steps])
|
|
547
|
+
|
|
548
|
+
// Separate proposal-stage entries from validation-stage entries, then group validations by target
|
|
549
|
+
const { proposalEntries, validationsByTarget } = useMemo(() => {
|
|
550
|
+
const proposals: EvidenceEntry[] = []
|
|
551
|
+
const validationMap = new Map<string, EvidenceEntry[]>()
|
|
552
|
+
for (const entry of entries) {
|
|
553
|
+
if (entry.stage !== "validated") {
|
|
554
|
+
proposals.push(entry)
|
|
555
|
+
} else {
|
|
556
|
+
const key = entry.target
|
|
557
|
+
if (!validationMap.has(key)) validationMap.set(key, [])
|
|
558
|
+
validationMap.get(key)!.push(entry)
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
return { proposalEntries: proposals, validationsByTarget: validationMap }
|
|
562
|
+
}, [entries])
|
|
563
|
+
|
|
564
|
+
return (
|
|
565
|
+
<div className="space-y-4">
|
|
566
|
+
{/* Context banner */}
|
|
567
|
+
<div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
|
|
568
|
+
<InfoIcon className="size-4 text-primary/60 shrink-0 mt-0.5" />
|
|
569
|
+
<p className="text-xs text-muted-foreground leading-relaxed">
|
|
570
|
+
This view shows the complete evidence trail for a skill evolution proposal — how the skill was changed,
|
|
571
|
+
the eval test results before and after, and whether the change improved performance.
|
|
572
|
+
</p>
|
|
573
|
+
</div>
|
|
574
|
+
|
|
575
|
+
{/* Proposal journey */}
|
|
576
|
+
<Card>
|
|
577
|
+
<CardHeader className="pb-3">
|
|
578
|
+
<CardTitle className="text-sm flex items-center gap-2">
|
|
579
|
+
<span>Proposal Journey</span>
|
|
580
|
+
<span className="font-mono text-xs text-muted-foreground">#{proposalId.slice(0, 12)}</span>
|
|
581
|
+
</CardTitle>
|
|
582
|
+
</CardHeader>
|
|
583
|
+
<CardContent className="space-y-3">
|
|
584
|
+
<div className="flex items-center gap-2 flex-wrap">
|
|
585
|
+
{steps.map((step, i) => (
|
|
586
|
+
<div key={`${step.action}-${i}`} className="contents">
|
|
587
|
+
{i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
|
|
588
|
+
<div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
|
|
589
|
+
{ACTION_ICON[step.action]}
|
|
590
|
+
<Badge variant={ACTION_VARIANT[step.action] ?? "secondary"} className="text-[10px] capitalize">
|
|
591
|
+
{step.action.replace("_", " ")}
|
|
592
|
+
</Badge>
|
|
593
|
+
<span className="text-[10px] text-muted-foreground">{timeAgo(step.timestamp)}</span>
|
|
594
|
+
</div>
|
|
595
|
+
</div>
|
|
596
|
+
))}
|
|
597
|
+
</div>
|
|
598
|
+
|
|
599
|
+
{/* Eval snapshot — pass rate change */}
|
|
600
|
+
{snapshot && (
|
|
601
|
+
<div className="flex items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
|
|
602
|
+
{typeof snapshot.net_change === "number" && (
|
|
603
|
+
<div className="flex items-center gap-1">
|
|
604
|
+
{(snapshot.net_change as number) > 0
|
|
605
|
+
? <TrendingUpIcon className="size-3.5 text-emerald-500" />
|
|
606
|
+
: <TrendingDownIcon className="size-3.5 text-red-500" />}
|
|
607
|
+
<span className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}>
|
|
608
|
+
{(snapshot.net_change as number) > 0 ? "+" : ""}{Math.round((snapshot.net_change as number) * 100)}%
|
|
609
|
+
</span>
|
|
610
|
+
</div>
|
|
611
|
+
)}
|
|
612
|
+
{typeof snapshot.before_pass_rate === "number" && typeof snapshot.after_pass_rate === "number" && (
|
|
613
|
+
<span className="text-xs text-muted-foreground font-mono">
|
|
614
|
+
{Math.round((snapshot.before_pass_rate as number) * 100)}% → {Math.round((snapshot.after_pass_rate as number) * 100)}%
|
|
615
|
+
</span>
|
|
616
|
+
)}
|
|
617
|
+
{snapshot.improved !== undefined && (
|
|
618
|
+
<Badge variant={snapshot.improved ? "default" : "destructive"} className="text-[10px]">
|
|
619
|
+
{snapshot.improved ? "Improved" : "Regressed"}
|
|
620
|
+
</Badge>
|
|
621
|
+
)}
|
|
622
|
+
</div>
|
|
623
|
+
)}
|
|
624
|
+
|
|
625
|
+
{/* Details from last step */}
|
|
626
|
+
{steps.length > 0 && steps[steps.length - 1].details && (
|
|
627
|
+
<p className="text-xs text-muted-foreground leading-relaxed">
|
|
628
|
+
{steps[steps.length - 1].details}
|
|
629
|
+
</p>
|
|
630
|
+
)}
|
|
631
|
+
</CardContent>
|
|
632
|
+
</Card>
|
|
633
|
+
|
|
634
|
+
{/* Proposal-stage evidence — standalone cards showing original/proposed text */}
|
|
635
|
+
{proposalEntries.map((entry) => (
|
|
636
|
+
<EvidenceCard
|
|
637
|
+
key={`proposal-${entry.target}-${entry.timestamp}`}
|
|
638
|
+
entry={entry}
|
|
639
|
+
roundLabel={null}
|
|
640
|
+
roundStatus="single"
|
|
641
|
+
prevPassRate={null}
|
|
642
|
+
currPassRate={null}
|
|
643
|
+
/>
|
|
644
|
+
))}
|
|
645
|
+
|
|
646
|
+
{/* Validation-stage evidence — grouped by target with iteration rounds */}
|
|
647
|
+
{Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
|
|
648
|
+
const hasMultipleRounds = targetEntries.length > 1
|
|
649
|
+
|
|
650
|
+
return (
|
|
651
|
+
<div key={target} className="space-y-2">
|
|
652
|
+
{targetEntries.map((entry, i) => {
|
|
653
|
+
const isLast = i === targetEntries.length - 1
|
|
654
|
+
const roundLabel = hasMultipleRounds ? `Round ${i + 1} of ${targetEntries.length}` : null
|
|
655
|
+
const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null
|
|
656
|
+
const currPassRate = getAfterPassRate(entry)
|
|
657
|
+
const roundKey = `${target}-${entry.timestamp}`
|
|
658
|
+
const roundStatus: RoundStatus = !hasMultipleRounds ? "single" : isLast ? "final" : "intermediate"
|
|
659
|
+
|
|
660
|
+
// Earlier rounds: collapsed by default
|
|
661
|
+
if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
|
|
662
|
+
return (
|
|
663
|
+
<CollapsedEvidenceCard
|
|
664
|
+
key={roundKey}
|
|
665
|
+
entry={entry}
|
|
666
|
+
roundLabel={roundLabel!}
|
|
667
|
+
onExpand={() => toggleRound(roundKey)}
|
|
668
|
+
/>
|
|
669
|
+
)
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
// Expanded earlier round — show with collapse toggle
|
|
673
|
+
if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
|
|
674
|
+
return (
|
|
675
|
+
<div key={roundKey} className="space-y-1">
|
|
676
|
+
<button
|
|
677
|
+
type="button"
|
|
678
|
+
onClick={() => toggleRound(roundKey)}
|
|
679
|
+
className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground transition-colors px-1"
|
|
680
|
+
>
|
|
681
|
+
<ChevronDownIcon className="size-3" />
|
|
682
|
+
Collapse {roundLabel}
|
|
683
|
+
</button>
|
|
684
|
+
<EvidenceCard
|
|
685
|
+
entry={entry}
|
|
686
|
+
roundLabel={roundLabel}
|
|
687
|
+
roundStatus={roundStatus}
|
|
688
|
+
prevPassRate={prevPassRate}
|
|
689
|
+
currPassRate={currPassRate}
|
|
690
|
+
/>
|
|
691
|
+
</div>
|
|
692
|
+
)
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// Final round (or single entry) — always expanded
|
|
696
|
+
return (
|
|
697
|
+
<EvidenceCard
|
|
698
|
+
key={roundKey}
|
|
699
|
+
entry={entry}
|
|
700
|
+
roundLabel={roundLabel}
|
|
701
|
+
roundStatus={roundStatus}
|
|
702
|
+
prevPassRate={prevPassRate}
|
|
703
|
+
currPassRate={currPassRate}
|
|
704
|
+
/>
|
|
705
|
+
)
|
|
706
|
+
})}
|
|
707
|
+
</div>
|
|
708
|
+
)
|
|
709
|
+
})}
|
|
710
|
+
|
|
711
|
+
{entries.length === 0 && (
|
|
712
|
+
<div className="flex items-center justify-center rounded-lg border border-dashed py-8">
|
|
713
|
+
<p className="text-sm text-muted-foreground">No evidence entries for this proposal</p>
|
|
714
|
+
</div>
|
|
715
|
+
)}
|
|
716
|
+
</div>
|
|
717
|
+
)
|
|
718
|
+
}
|