selftune 0.2.28 → 0.2.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/local-dashboard/dist/assets/index-BcXquWFB.css +1 -0
- package/apps/local-dashboard/dist/assets/index-Coq42hE4.js +15 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +1 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/auto-update.ts +40 -8
- package/cli/selftune/command-surface.ts +1 -1
- package/cli/selftune/constants.ts +5 -0
- package/cli/selftune/dashboard-action-events.ts +117 -0
- package/cli/selftune/dashboard-action-instrumentation.ts +103 -0
- package/cli/selftune/dashboard-action-result.ts +90 -0
- package/cli/selftune/dashboard-action-stream.ts +252 -0
- package/cli/selftune/dashboard-contract.ts +81 -1
- package/cli/selftune/dashboard-server.ts +133 -16
- package/cli/selftune/eval/hooks-to-evals.ts +157 -0
- package/cli/selftune/eval/synthetic-evals.ts +33 -2
- package/cli/selftune/eval/unit-test-cli.ts +53 -5
- package/cli/selftune/evolution/validate-host-replay.ts +191 -14
- package/cli/selftune/index.ts +4 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +117 -8
- package/cli/selftune/localdb/schema.ts +34 -0
- package/cli/selftune/routes/actions.ts +273 -42
- package/cli/selftune/testing-readiness.ts +203 -10
- package/cli/selftune/utils/llm-call.ts +90 -1
- package/package.json +1 -1
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +1 -1
- package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +1 -5
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +10 -18
- package/packages/ui/src/components/EvidenceViewer.tsx +15 -142
- package/packages/ui/src/components/EvolutionTimeline.tsx +20 -44
- package/packages/ui/src/components/SkillReportPanels.tsx +1 -4
- package/skill/SKILL.md +1 -1
- package/skill/workflows/Dashboard.md +50 -23
- package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +0 -1
- package/apps/local-dashboard/dist/assets/index-MMLFlnVn.js +0 -15
- package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +0 -1
|
@@ -17,6 +17,23 @@ const logger = createLogger("llm-call");
|
|
|
17
17
|
export const LLM_BACKED_AGENT_CANDIDATES = ["claude", "codex", "opencode", "pi"] as const;
|
|
18
18
|
export type LlmBackedAgent = (typeof LLM_BACKED_AGENT_CANDIDATES)[number];
|
|
19
19
|
|
|
20
|
+
export interface LlmInvocationIdentity {
|
|
21
|
+
platform: string;
|
|
22
|
+
model: string | null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface LlmCallLifecycleEvent extends LlmInvocationIdentity {
|
|
26
|
+
agent: string;
|
|
27
|
+
durationMs: number | null;
|
|
28
|
+
success: boolean | null;
|
|
29
|
+
error: string | null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface LlmCallObserver {
|
|
33
|
+
onStart?: (event: LlmCallLifecycleEvent) => void;
|
|
34
|
+
onFinish?: (event: LlmCallLifecycleEvent) => void;
|
|
35
|
+
}
|
|
36
|
+
|
|
20
37
|
// ---------------------------------------------------------------------------
|
|
21
38
|
// Model alias resolution
|
|
22
39
|
// ---------------------------------------------------------------------------
|
|
@@ -61,6 +78,41 @@ function resolvePiThinking(effort: EffortLevel): string {
|
|
|
61
78
|
return PI_THINKING_MAP[effort];
|
|
62
79
|
}
|
|
63
80
|
|
|
81
|
+
export function describeLlmInvocation(agent: string, modelFlag?: string): LlmInvocationIdentity {
|
|
82
|
+
if (agent === "claude") {
|
|
83
|
+
return {
|
|
84
|
+
platform: "claude_code",
|
|
85
|
+
model: modelFlag ? resolveModelFlag(modelFlag) : null,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (agent === "opencode") {
|
|
90
|
+
return {
|
|
91
|
+
platform: "opencode",
|
|
92
|
+
model: modelFlag ? resolveOpenCodeModel(modelFlag) : null,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (agent === "codex") {
|
|
97
|
+
return {
|
|
98
|
+
platform: "codex",
|
|
99
|
+
model: modelFlag ?? null,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (agent === "pi") {
|
|
104
|
+
return {
|
|
105
|
+
platform: "pi",
|
|
106
|
+
model: modelFlag ?? null,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return {
|
|
111
|
+
platform: agent,
|
|
112
|
+
model: modelFlag ?? null,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
64
116
|
// ---------------------------------------------------------------------------
|
|
65
117
|
// Bundled agent file loading (for codex inline prompt injection)
|
|
66
118
|
// ---------------------------------------------------------------------------
|
|
@@ -208,6 +260,7 @@ export async function callViaAgent(
|
|
|
208
260
|
modelFlag?: string,
|
|
209
261
|
retryOpts?: RetryOptions,
|
|
210
262
|
effort?: EffortLevel,
|
|
263
|
+
observer?: LlmCallObserver,
|
|
211
264
|
): Promise<string> {
|
|
212
265
|
// Write prompt to temp file to avoid shell quoting issues
|
|
213
266
|
const promptFile = join(tmpdir(), `selftune-llm-${Date.now()}.txt`);
|
|
@@ -216,6 +269,7 @@ export async function callViaAgent(
|
|
|
216
269
|
try {
|
|
217
270
|
const promptContent = readFileSync(promptFile, "utf-8");
|
|
218
271
|
let cmd: string[];
|
|
272
|
+
const identity = describeLlmInvocation(agent, modelFlag);
|
|
219
273
|
|
|
220
274
|
if (agent === "claude") {
|
|
221
275
|
cmd = ["claude", "-p", promptContent];
|
|
@@ -264,6 +318,18 @@ export async function callViaAgent(
|
|
|
264
318
|
const maxRetries = retryOpts?.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
265
319
|
const initialBackoffMs = retryOpts?.initialBackoffMs ?? DEFAULT_INITIAL_BACKOFF_MS;
|
|
266
320
|
let lastError: Error | undefined;
|
|
321
|
+
const startedAt = Date.now();
|
|
322
|
+
try {
|
|
323
|
+
observer?.onStart?.({
|
|
324
|
+
agent,
|
|
325
|
+
...identity,
|
|
326
|
+
durationMs: null,
|
|
327
|
+
success: null,
|
|
328
|
+
error: null,
|
|
329
|
+
});
|
|
330
|
+
} catch {
|
|
331
|
+
// fail-open: instrumentation must never block the real LLM call
|
|
332
|
+
}
|
|
267
333
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
268
334
|
if (attempt > 0) {
|
|
269
335
|
const backoffMs = initialBackoffMs * 2 ** (attempt - 1);
|
|
@@ -296,10 +362,32 @@ export async function callViaAgent(
|
|
|
296
362
|
}
|
|
297
363
|
|
|
298
364
|
const raw = await new Response(proc.stdout).text();
|
|
365
|
+
try {
|
|
366
|
+
observer?.onFinish?.({
|
|
367
|
+
agent,
|
|
368
|
+
...identity,
|
|
369
|
+
durationMs: Date.now() - startedAt,
|
|
370
|
+
success: true,
|
|
371
|
+
error: null,
|
|
372
|
+
});
|
|
373
|
+
} catch {
|
|
374
|
+
// fail-open: instrumentation must never block the real LLM call
|
|
375
|
+
}
|
|
299
376
|
return raw;
|
|
300
377
|
} catch (err) {
|
|
301
378
|
lastError = err instanceof Error ? err : new Error(String(err));
|
|
302
379
|
if (!isTransientError(lastError) || attempt === maxRetries) {
|
|
380
|
+
try {
|
|
381
|
+
observer?.onFinish?.({
|
|
382
|
+
agent,
|
|
383
|
+
...identity,
|
|
384
|
+
durationMs: Date.now() - startedAt,
|
|
385
|
+
success: false,
|
|
386
|
+
error: lastError.message,
|
|
387
|
+
});
|
|
388
|
+
} catch {
|
|
389
|
+
// fail-open: instrumentation must never block the real LLM call
|
|
390
|
+
}
|
|
303
391
|
throw lastError;
|
|
304
392
|
}
|
|
305
393
|
logger.warn(`Transient failure on attempt ${attempt + 1}: ${lastError.message}`);
|
|
@@ -533,9 +621,10 @@ export async function callLlm(
|
|
|
533
621
|
agent: string,
|
|
534
622
|
modelFlag?: string,
|
|
535
623
|
effort?: EffortLevel,
|
|
624
|
+
observer?: LlmCallObserver,
|
|
536
625
|
): Promise<string> {
|
|
537
626
|
if (!agent) {
|
|
538
627
|
throw new Error("Agent must be specified for callLlm");
|
|
539
628
|
}
|
|
540
|
-
return callViaAgent(systemPrompt, userPrompt, agent, modelFlag, undefined, effort);
|
|
629
|
+
return callViaAgent(systemPrompt, userPrompt, agent, modelFlag, undefined, effort, observer);
|
|
541
630
|
}
|
package/package.json
CHANGED
|
@@ -28,7 +28,6 @@ export interface SkillReportScaffoldProps {
|
|
|
28
28
|
summary?: ReactNode;
|
|
29
29
|
showOnboardingBanner?: boolean;
|
|
30
30
|
guideButtonLabel?: string;
|
|
31
|
-
prioritizeChildren?: boolean;
|
|
32
31
|
nextAction: SkillReportNextAction;
|
|
33
32
|
trustState: TrustState;
|
|
34
33
|
coverage?: TrustFields["coverage"];
|
|
@@ -53,7 +52,6 @@ export function SkillReportScaffold({
|
|
|
53
52
|
summary,
|
|
54
53
|
showOnboardingBanner = false,
|
|
55
54
|
guideButtonLabel = "How to read this page",
|
|
56
|
-
prioritizeChildren = false,
|
|
57
55
|
nextAction,
|
|
58
56
|
trustState,
|
|
59
57
|
coverage,
|
|
@@ -117,8 +115,6 @@ export function SkillReportScaffold({
|
|
|
117
115
|
}
|
|
118
116
|
/>
|
|
119
117
|
|
|
120
|
-
{prioritizeChildren && children ? <div className="space-y-4">{children}</div> : null}
|
|
121
|
-
|
|
122
118
|
<SkillTrustNarrativePanel
|
|
123
119
|
trustState={trustState}
|
|
124
120
|
coverage={coverage}
|
|
@@ -145,7 +141,7 @@ export function SkillReportScaffold({
|
|
|
145
141
|
/>
|
|
146
142
|
</div>
|
|
147
143
|
|
|
148
|
-
{children
|
|
144
|
+
{children ? (
|
|
149
145
|
<div className="space-y-4 border-t border-border/10 pt-4">{children}</div>
|
|
150
146
|
) : null}
|
|
151
147
|
</div>
|
|
@@ -1,12 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
import {
|
|
3
|
-
EyeIcon,
|
|
4
|
-
ShieldAlertIcon,
|
|
5
|
-
ShieldCheckIcon,
|
|
6
|
-
ShieldIcon,
|
|
7
|
-
ShieldQuestionIcon,
|
|
8
|
-
} from "lucide-react";
|
|
9
|
-
|
|
1
|
+
import { cn } from "@selftune/ui/lib";
|
|
10
2
|
import { Badge } from "@selftune/ui/primitives";
|
|
11
3
|
import type { TrustState } from "@selftune/ui/types";
|
|
12
4
|
|
|
@@ -14,8 +6,8 @@ export function SkillReportTrustBadge({ state }: { state: TrustState }) {
|
|
|
14
6
|
const config = getSkillReportTrustBadgeConfig(state);
|
|
15
7
|
|
|
16
8
|
return (
|
|
17
|
-
<Badge variant={config.variant} className="gap-1 shrink-0 text-[10px]">
|
|
18
|
-
{config.
|
|
9
|
+
<Badge variant={config.variant} className="gap-1.5 shrink-0 text-[10px]">
|
|
10
|
+
<span className={cn("size-1.5 shrink-0 rounded-full", config.dotClassName)} />
|
|
19
11
|
{config.label}
|
|
20
12
|
</Badge>
|
|
21
13
|
);
|
|
@@ -24,44 +16,44 @@ export function SkillReportTrustBadge({ state }: { state: TrustState }) {
|
|
|
24
16
|
export function getSkillReportTrustBadgeConfig(state: TrustState): {
|
|
25
17
|
label: string;
|
|
26
18
|
variant: "default" | "secondary" | "destructive" | "outline";
|
|
27
|
-
|
|
19
|
+
dotClassName: string;
|
|
28
20
|
} {
|
|
29
21
|
switch (state) {
|
|
30
22
|
case "low_sample":
|
|
31
23
|
return {
|
|
32
24
|
label: "Low Sample",
|
|
33
25
|
variant: "secondary",
|
|
34
|
-
|
|
26
|
+
dotClassName: "bg-muted-foreground/60",
|
|
35
27
|
};
|
|
36
28
|
case "observed":
|
|
37
29
|
return {
|
|
38
30
|
label: "Observed",
|
|
39
31
|
variant: "outline",
|
|
40
|
-
|
|
32
|
+
dotClassName: "bg-muted-foreground",
|
|
41
33
|
};
|
|
42
34
|
case "watch":
|
|
43
35
|
return {
|
|
44
36
|
label: "Watch",
|
|
45
37
|
variant: "secondary",
|
|
46
|
-
|
|
38
|
+
dotClassName: "bg-amber-400",
|
|
47
39
|
};
|
|
48
40
|
case "validated":
|
|
49
41
|
return {
|
|
50
42
|
label: "Validated",
|
|
51
43
|
variant: "default",
|
|
52
|
-
|
|
44
|
+
dotClassName: "bg-primary",
|
|
53
45
|
};
|
|
54
46
|
case "deployed":
|
|
55
47
|
return {
|
|
56
48
|
label: "Deployed",
|
|
57
49
|
variant: "default",
|
|
58
|
-
|
|
50
|
+
dotClassName: "bg-primary",
|
|
59
51
|
};
|
|
60
52
|
case "rolled_back":
|
|
61
53
|
return {
|
|
62
54
|
label: "Rolled Back",
|
|
63
55
|
variant: "destructive",
|
|
64
|
-
|
|
56
|
+
dotClassName: "bg-destructive",
|
|
65
57
|
};
|
|
66
58
|
}
|
|
67
59
|
}
|
|
@@ -4,40 +4,14 @@ import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
|
|
|
4
4
|
import type { EvidenceEntry, EvolutionEntry } from "../types";
|
|
5
5
|
import { formatRate, timeAgo } from "../lib/format";
|
|
6
6
|
import {
|
|
7
|
-
CheckCircleIcon,
|
|
8
7
|
ChevronDownIcon,
|
|
9
8
|
ChevronRightIcon,
|
|
10
|
-
CircleDotIcon,
|
|
11
9
|
FileTextIcon,
|
|
12
|
-
InfoIcon,
|
|
13
|
-
RocketIcon,
|
|
14
|
-
ShieldCheckIcon,
|
|
15
10
|
ShieldAlertIcon,
|
|
16
|
-
XCircleIcon,
|
|
17
|
-
UndoIcon,
|
|
18
|
-
ArrowRightIcon,
|
|
19
|
-
TrendingUpIcon,
|
|
20
|
-
TrendingDownIcon,
|
|
21
11
|
ListChecksIcon,
|
|
22
12
|
} from "lucide-react";
|
|
23
13
|
import Markdown from "react-markdown";
|
|
24
14
|
|
|
25
|
-
const ACTION_ICON: Record<string, React.ReactNode> = {
|
|
26
|
-
created: <CircleDotIcon className="size-3.5" />,
|
|
27
|
-
validated: <ShieldCheckIcon className="size-3.5" />,
|
|
28
|
-
deployed: <RocketIcon className="size-3.5" />,
|
|
29
|
-
rejected: <XCircleIcon className="size-3.5" />,
|
|
30
|
-
rolled_back: <UndoIcon className="size-3.5" />,
|
|
31
|
-
};
|
|
32
|
-
|
|
33
|
-
const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
|
|
34
|
-
created: "outline",
|
|
35
|
-
validated: "secondary",
|
|
36
|
-
deployed: "default",
|
|
37
|
-
rejected: "destructive",
|
|
38
|
-
rolled_back: "destructive",
|
|
39
|
-
};
|
|
40
|
-
|
|
41
15
|
interface Props {
|
|
42
16
|
proposalId: string;
|
|
43
17
|
evolution: EvolutionEntry[];
|
|
@@ -123,9 +97,9 @@ function formatValidationValue(key: string, val: unknown): React.ReactNode {
|
|
|
123
97
|
// Booleans
|
|
124
98
|
if (typeof val === "boolean") {
|
|
125
99
|
return val ? (
|
|
126
|
-
<
|
|
100
|
+
<span className="inline-block size-2 rounded-full bg-primary align-middle" />
|
|
127
101
|
) : (
|
|
128
|
-
<
|
|
102
|
+
<span className="inline-block size-2 rounded-full bg-destructive align-middle" />
|
|
129
103
|
);
|
|
130
104
|
}
|
|
131
105
|
// Numbers that look like rates (0-1 range, or key contains "rate"/"change")
|
|
@@ -212,12 +186,12 @@ function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
|
|
|
212
186
|
<div className="flex items-start gap-2 text-xs py-1.5 border-b border-border/50 last:border-0">
|
|
213
187
|
{isPass !== null ? (
|
|
214
188
|
isPass ? (
|
|
215
|
-
<
|
|
189
|
+
<span className="mt-1 size-2 shrink-0 rounded-full bg-primary" />
|
|
216
190
|
) : (
|
|
217
|
-
<
|
|
191
|
+
<span className="mt-1 size-2 shrink-0 rounded-full bg-destructive" />
|
|
218
192
|
)
|
|
219
193
|
) : (
|
|
220
|
-
<
|
|
194
|
+
<span className="mt-1 size-2 shrink-0 rounded-full bg-muted-foreground/60" />
|
|
221
195
|
)}
|
|
222
196
|
<span className="flex-1 min-w-0 line-clamp-2">
|
|
223
197
|
{query ? String(query) : JSON.stringify(entry)}
|
|
@@ -307,7 +281,7 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
307
281
|
)}
|
|
308
282
|
{typeof net_change === "number" && (
|
|
309
283
|
<span
|
|
310
|
-
className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-
|
|
284
|
+
className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-primary" : "text-destructive"}`}
|
|
311
285
|
>
|
|
312
286
|
{net_change > 0 ? "+" : ""}
|
|
313
287
|
{(net_change * 100).toFixed(1)}%
|
|
@@ -324,7 +298,7 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
324
298
|
{/* New passes */}
|
|
325
299
|
{newPassesArr.length > 0 && (
|
|
326
300
|
<div>
|
|
327
|
-
<p className="text-[11px] font-medium text-
|
|
301
|
+
<p className="mb-1 text-[11px] font-medium text-primary">
|
|
328
302
|
New Passes ({newPassesArr.length})
|
|
329
303
|
</p>
|
|
330
304
|
<div className="rounded border bg-card p-2">
|
|
@@ -345,10 +319,10 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
345
319
|
{/* Regressions */}
|
|
346
320
|
{regressionsArr.length > 0 && (
|
|
347
321
|
<div>
|
|
348
|
-
<p className="text-[11px] font-medium text-
|
|
322
|
+
<p className="text-[11px] font-medium text-destructive mb-1">
|
|
349
323
|
Regressions ({regressionsArr.length})
|
|
350
324
|
</p>
|
|
351
|
-
<div className="rounded border border-
|
|
325
|
+
<div className="rounded border border-destructive/20 bg-card p-2">
|
|
352
326
|
{regressionsArr.map((entry) => (
|
|
353
327
|
<PerEntryResult
|
|
354
328
|
key={getEvidenceListKey("regression", entry)}
|
|
@@ -406,7 +380,7 @@ function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
|
|
|
406
380
|
{/* Pass rate bar */}
|
|
407
381
|
<div className="h-1.5 rounded-full bg-muted overflow-hidden mb-2">
|
|
408
382
|
<div
|
|
409
|
-
className="h-full rounded-full bg-
|
|
383
|
+
className="h-full rounded-full bg-primary transition-all"
|
|
410
384
|
style={{ width: `${entries.length > 0 ? (passCount / entries.length) * 100 : 0}%` }}
|
|
411
385
|
/>
|
|
412
386
|
</div>
|
|
@@ -442,7 +416,7 @@ function DeltaBadge({ prev, curr }: { prev: number | null; curr: number | null }
|
|
|
442
416
|
const positive = delta > 0;
|
|
443
417
|
return (
|
|
444
418
|
<span
|
|
445
|
-
className={`text-[10px] font-mono font-semibold ${positive ? "text-
|
|
419
|
+
className={`text-[10px] font-mono font-semibold ${positive ? "text-primary" : "text-destructive"}`}
|
|
446
420
|
>
|
|
447
421
|
{positive ? "+" : ""}
|
|
448
422
|
{pct}% vs previous
|
|
@@ -487,12 +461,12 @@ function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }
|
|
|
487
461
|
>
|
|
488
462
|
{typeof passed === "boolean" ? (
|
|
489
463
|
passed ? (
|
|
490
|
-
<
|
|
464
|
+
<span className="mt-1 size-2 shrink-0 rounded-full bg-primary" />
|
|
491
465
|
) : (
|
|
492
|
-
<
|
|
466
|
+
<span className="mt-1 size-2 shrink-0 rounded-full bg-destructive" />
|
|
493
467
|
)
|
|
494
468
|
) : (
|
|
495
|
-
<
|
|
469
|
+
<span className="mt-1 size-2 shrink-0 rounded-full bg-muted-foreground/60" />
|
|
496
470
|
)}
|
|
497
471
|
<span className="flex-1 min-w-0 line-clamp-2">
|
|
498
472
|
{String(query ?? JSON.stringify(evalEntry))}
|
|
@@ -648,15 +622,7 @@ function CollapsedEvidenceCard({
|
|
|
648
622
|
);
|
|
649
623
|
}
|
|
650
624
|
|
|
651
|
-
export function EvidenceViewer({ proposalId,
|
|
652
|
-
const steps = useMemo(
|
|
653
|
-
() =>
|
|
654
|
-
evolution
|
|
655
|
-
.filter((e) => e.proposal_id === proposalId)
|
|
656
|
-
.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()),
|
|
657
|
-
[evolution, proposalId],
|
|
658
|
-
);
|
|
659
|
-
|
|
625
|
+
export function EvidenceViewer({ proposalId, evidence }: Props) {
|
|
660
626
|
const entries = useMemo(
|
|
661
627
|
() =>
|
|
662
628
|
evidence
|
|
@@ -677,13 +643,6 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
|
677
643
|
});
|
|
678
644
|
};
|
|
679
645
|
|
|
680
|
-
const snapshot = useMemo(() => {
|
|
681
|
-
for (let i = steps.length - 1; i >= 0; i--) {
|
|
682
|
-
if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>;
|
|
683
|
-
}
|
|
684
|
-
return null;
|
|
685
|
-
}, [steps]);
|
|
686
|
-
|
|
687
646
|
// Separate proposal-stage entries from validation-stage entries, then group validations by target
|
|
688
647
|
const { proposalEntries, validationsByTarget } = useMemo(() => {
|
|
689
648
|
const proposals: EvidenceEntry[] = [];
|
|
@@ -702,92 +661,6 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
|
702
661
|
|
|
703
662
|
return (
|
|
704
663
|
<div className="space-y-4">
|
|
705
|
-
{/* Context banner */}
|
|
706
|
-
<div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
|
|
707
|
-
<InfoIcon className="size-4 text-primary/60 shrink-0 mt-0.5" />
|
|
708
|
-
<p className="text-xs text-muted-foreground leading-relaxed">
|
|
709
|
-
This view shows the complete evidence trail for a skill evolution proposal — how the
|
|
710
|
-
skill was changed, the eval test results before and after, and whether the change improved
|
|
711
|
-
performance.
|
|
712
|
-
</p>
|
|
713
|
-
</div>
|
|
714
|
-
|
|
715
|
-
{/* Proposal journey */}
|
|
716
|
-
<Card>
|
|
717
|
-
<CardHeader className="pb-3">
|
|
718
|
-
<CardTitle className="text-sm flex items-center gap-2">
|
|
719
|
-
<span>Proposal Journey</span>
|
|
720
|
-
<span className="font-mono text-xs text-muted-foreground">
|
|
721
|
-
#{proposalId.slice(0, 12)}
|
|
722
|
-
</span>
|
|
723
|
-
</CardTitle>
|
|
724
|
-
</CardHeader>
|
|
725
|
-
<CardContent className="space-y-3">
|
|
726
|
-
<div className="flex items-center gap-2 flex-wrap">
|
|
727
|
-
{steps.map((step, i) => (
|
|
728
|
-
<div key={`${step.action}-${step.timestamp}`} className="contents">
|
|
729
|
-
{i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
|
|
730
|
-
<div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
|
|
731
|
-
{ACTION_ICON[step.action]}
|
|
732
|
-
<Badge
|
|
733
|
-
variant={ACTION_VARIANT[step.action] ?? "secondary"}
|
|
734
|
-
className="text-[10px] capitalize"
|
|
735
|
-
>
|
|
736
|
-
{step.action.replace("_", " ")}
|
|
737
|
-
</Badge>
|
|
738
|
-
<span className="text-[10px] text-muted-foreground">
|
|
739
|
-
{timeAgo(step.timestamp)}
|
|
740
|
-
</span>
|
|
741
|
-
</div>
|
|
742
|
-
</div>
|
|
743
|
-
))}
|
|
744
|
-
</div>
|
|
745
|
-
|
|
746
|
-
{/* Eval snapshot — pass rate change */}
|
|
747
|
-
{snapshot && (
|
|
748
|
-
<div className="flex items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
|
|
749
|
-
{typeof snapshot.net_change === "number" && (
|
|
750
|
-
<div className="flex items-center gap-1">
|
|
751
|
-
{(snapshot.net_change as number) > 0 ? (
|
|
752
|
-
<TrendingUpIcon className="size-3.5 text-emerald-500" />
|
|
753
|
-
) : (
|
|
754
|
-
<TrendingDownIcon className="size-3.5 text-red-500" />
|
|
755
|
-
)}
|
|
756
|
-
<span
|
|
757
|
-
className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
|
|
758
|
-
>
|
|
759
|
-
{(snapshot.net_change as number) > 0 ? "+" : ""}
|
|
760
|
-
{Math.round((snapshot.net_change as number) * 100)}%
|
|
761
|
-
</span>
|
|
762
|
-
</div>
|
|
763
|
-
)}
|
|
764
|
-
{typeof snapshot.before_pass_rate === "number" &&
|
|
765
|
-
typeof snapshot.after_pass_rate === "number" && (
|
|
766
|
-
<span className="text-xs text-muted-foreground font-mono">
|
|
767
|
-
{Math.round((snapshot.before_pass_rate as number) * 100)}% →{" "}
|
|
768
|
-
{Math.round((snapshot.after_pass_rate as number) * 100)}%
|
|
769
|
-
</span>
|
|
770
|
-
)}
|
|
771
|
-
{snapshot.improved !== undefined && (
|
|
772
|
-
<Badge
|
|
773
|
-
variant={snapshot.improved ? "default" : "destructive"}
|
|
774
|
-
className="text-[10px]"
|
|
775
|
-
>
|
|
776
|
-
{snapshot.improved ? "Improved" : "Regressed"}
|
|
777
|
-
</Badge>
|
|
778
|
-
)}
|
|
779
|
-
</div>
|
|
780
|
-
)}
|
|
781
|
-
|
|
782
|
-
{/* Details from last step */}
|
|
783
|
-
{steps.length > 0 && steps[steps.length - 1].details && (
|
|
784
|
-
<p className="text-xs text-muted-foreground leading-relaxed">
|
|
785
|
-
{steps[steps.length - 1].details}
|
|
786
|
-
</p>
|
|
787
|
-
)}
|
|
788
|
-
</CardContent>
|
|
789
|
-
</Card>
|
|
790
|
-
|
|
791
664
|
{/* Proposal-stage evidence — standalone cards showing original/proposed text */}
|
|
792
665
|
{proposalEntries.map((entry) => (
|
|
793
666
|
<EvidenceCard
|
|
@@ -3,48 +3,30 @@ import { Badge } from "../primitives/badge";
|
|
|
3
3
|
import { cn } from "../lib/utils";
|
|
4
4
|
import type { EvalSnapshot, EvolutionEntry } from "../types";
|
|
5
5
|
import { timeAgo } from "../lib/format";
|
|
6
|
-
import {
|
|
7
|
-
CircleDotIcon,
|
|
8
|
-
RocketIcon,
|
|
9
|
-
ShieldCheckIcon,
|
|
10
|
-
XCircleIcon,
|
|
11
|
-
UndoIcon,
|
|
12
|
-
TrendingUpIcon,
|
|
13
|
-
TrendingDownIcon,
|
|
14
|
-
ChevronDownIcon,
|
|
15
|
-
ChevronRightIcon,
|
|
16
|
-
} from "lucide-react";
|
|
17
|
-
|
|
18
|
-
const ACTION_ICON: Record<string, React.ReactNode> = {
|
|
19
|
-
created: <CircleDotIcon className="size-3.5" />,
|
|
20
|
-
validated: <ShieldCheckIcon className="size-3.5" />,
|
|
21
|
-
deployed: <RocketIcon className="size-3.5" />,
|
|
22
|
-
rejected: <XCircleIcon className="size-3.5" />,
|
|
23
|
-
rolled_back: <UndoIcon className="size-3.5" />,
|
|
24
|
-
};
|
|
6
|
+
import { TrendingUpIcon, TrendingDownIcon, ChevronDownIcon, ChevronRightIcon } from "lucide-react";
|
|
25
7
|
|
|
26
8
|
const ACTION_COLOR: Record<string, string> = {
|
|
27
|
-
created: "bg-
|
|
28
|
-
validated: "bg-
|
|
29
|
-
deployed: "bg-
|
|
30
|
-
rejected: "bg-
|
|
31
|
-
rolled_back: "bg-
|
|
9
|
+
created: "bg-primary/35",
|
|
10
|
+
validated: "bg-primary/65",
|
|
11
|
+
deployed: "bg-primary",
|
|
12
|
+
rejected: "bg-destructive/85",
|
|
13
|
+
rolled_back: "bg-destructive/45",
|
|
32
14
|
};
|
|
33
15
|
|
|
34
16
|
const ACTION_RING: Record<string, string> = {
|
|
35
|
-
created: "ring-
|
|
36
|
-
validated: "ring-
|
|
37
|
-
deployed: "ring-
|
|
38
|
-
rejected: "ring-
|
|
39
|
-
rolled_back: "ring-
|
|
17
|
+
created: "ring-primary/15",
|
|
18
|
+
validated: "ring-primary/20",
|
|
19
|
+
deployed: "ring-primary/30",
|
|
20
|
+
rejected: "ring-destructive/20",
|
|
21
|
+
rolled_back: "ring-destructive/15",
|
|
40
22
|
};
|
|
41
23
|
|
|
42
24
|
const ACTION_LINE: Record<string, string> = {
|
|
43
|
-
created: "bg-
|
|
44
|
-
validated: "bg-
|
|
45
|
-
deployed: "bg-
|
|
46
|
-
rejected: "bg-
|
|
47
|
-
rolled_back: "bg-
|
|
25
|
+
created: "bg-primary/12",
|
|
26
|
+
validated: "bg-primary/18",
|
|
27
|
+
deployed: "bg-primary/30",
|
|
28
|
+
rejected: "bg-destructive/18",
|
|
29
|
+
rolled_back: "bg-destructive/12",
|
|
48
30
|
};
|
|
49
31
|
|
|
50
32
|
interface Props {
|
|
@@ -92,7 +74,7 @@ function PassRateDelta({ snapshot }: { snapshot: EvalSnapshot }) {
|
|
|
92
74
|
<span
|
|
93
75
|
className={cn(
|
|
94
76
|
"inline-flex items-center gap-0.5 text-[10px] font-mono font-medium",
|
|
95
|
-
isPositive ? "text-
|
|
77
|
+
isPositive ? "text-primary" : "text-destructive",
|
|
96
78
|
)}
|
|
97
79
|
>
|
|
98
80
|
{isPositive ? (
|
|
@@ -157,7 +139,7 @@ export function EvolutionTimeline({ entries, selectedProposalId, onSelect }: Pro
|
|
|
157
139
|
|
|
158
140
|
return (
|
|
159
141
|
<div className="flex flex-col gap-0">
|
|
160
|
-
<h2 className="
|
|
142
|
+
<h2 className="sticky top-0 z-10 px-2 pb-2 text-xs font-semibold uppercase tracking-wider text-muted-foreground">
|
|
161
143
|
Evolution
|
|
162
144
|
</h2>
|
|
163
145
|
<LifecycleLegend />
|
|
@@ -177,14 +159,8 @@ export function EvolutionTimeline({ entries, selectedProposalId, onSelect }: Pro
|
|
|
177
159
|
{/* Vertical connector line */}
|
|
178
160
|
<div className="flex flex-col items-center">
|
|
179
161
|
<div
|
|
180
|
-
className={cn(
|
|
181
|
-
|
|
182
|
-
dotColor,
|
|
183
|
-
ringColor,
|
|
184
|
-
)}
|
|
185
|
-
>
|
|
186
|
-
{ACTION_ICON[terminal] ?? <CircleDotIcon className="size-3.5" />}
|
|
187
|
-
</div>
|
|
162
|
+
className={cn("size-3 shrink-0 rounded-full ring-2 z-10", dotColor, ringColor)}
|
|
163
|
+
/>
|
|
188
164
|
{!isLast && <div className={cn("w-0.5 flex-1 min-h-[16px]", lineColor)} />}
|
|
189
165
|
</div>
|
|
190
166
|
|
|
@@ -84,10 +84,7 @@ function ExampleRowItem({ row }: { row: ExampleRow }) {
|
|
|
84
84
|
<TableCell className="py-2">
|
|
85
85
|
<div className="flex items-center gap-1.5">
|
|
86
86
|
{row.triggered ? (
|
|
87
|
-
<Badge
|
|
88
|
-
variant="outline"
|
|
89
|
-
className="border-green-600/30 text-[10px] font-normal text-green-600"
|
|
90
|
-
>
|
|
87
|
+
<Badge variant="outline" className="text-[10px] font-normal">
|
|
91
88
|
triggered
|
|
92
89
|
</Badge>
|
|
93
90
|
) : (
|