selftune 0.2.22 → 0.2.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +95 -15
- package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
- package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
- package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
- package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
- package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/adapters/codex/install.ts +310 -78
- package/cli/selftune/adapters/opencode/install.ts +3 -4
- package/cli/selftune/adapters/pi/hook.ts +273 -0
- package/cli/selftune/adapters/pi/install.ts +207 -0
- package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
- package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
- package/cli/selftune/auto-update.ts +200 -8
- package/cli/selftune/canonical-export.ts +55 -25
- package/cli/selftune/command-surface.ts +397 -0
- package/cli/selftune/constants.ts +10 -1
- package/cli/selftune/contribute/contribute.ts +64 -13
- package/cli/selftune/contribution-config.ts +57 -3
- package/cli/selftune/contribution-preferences.ts +117 -0
- package/cli/selftune/contribution-signals.ts +8 -4
- package/cli/selftune/contribution-staging.ts +13 -2
- package/cli/selftune/contributions.ts +55 -121
- package/cli/selftune/creator-contributions.ts +29 -10
- package/cli/selftune/cron/setup.ts +7 -3
- package/cli/selftune/dashboard-contract.ts +87 -0
- package/cli/selftune/dashboard-server.ts +168 -17
- package/cli/selftune/dashboard.ts +350 -17
- package/cli/selftune/eval/baseline.ts +21 -5
- package/cli/selftune/eval/execution-eval.ts +170 -0
- package/cli/selftune/eval/family-overlap.ts +2 -2
- package/cli/selftune/eval/hooks-to-evals.ts +228 -82
- package/cli/selftune/eval/import-skillsbench.ts +2 -2
- package/cli/selftune/eval/invocation-classifier.ts +56 -0
- package/cli/selftune/eval/synthetic-evals.ts +5 -3
- package/cli/selftune/eval/unit-test-cli.ts +7 -4
- package/cli/selftune/evolution/apply-proposal.ts +295 -0
- package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
- package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
- package/cli/selftune/evolution/evidence.ts +2 -6
- package/cli/selftune/evolution/evolve-body.ts +152 -38
- package/cli/selftune/evolution/evolve.ts +244 -52
- package/cli/selftune/evolution/rollback.ts +0 -1
- package/cli/selftune/evolution/validate-body.ts +111 -49
- package/cli/selftune/evolution/validate-host-replay.ts +510 -60
- package/cli/selftune/evolution/validate-proposal.ts +11 -150
- package/cli/selftune/evolution/validate-routing.ts +51 -108
- package/cli/selftune/evolution/validation-contract.ts +91 -0
- package/cli/selftune/grading/auto-grade.ts +11 -7
- package/cli/selftune/grading/grade-session.ts +10 -16
- package/cli/selftune/hooks/skill-eval.ts +2 -1
- package/cli/selftune/hooks-shared/types.ts +1 -0
- package/cli/selftune/index.ts +58 -15
- package/cli/selftune/ingestors/claude-replay.ts +15 -10
- package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
- package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
- package/cli/selftune/ingestors/pi-ingest.ts +727 -0
- package/cli/selftune/init.ts +38 -4
- package/cli/selftune/localdb/direct-write.ts +120 -1
- package/cli/selftune/localdb/materialize.ts +6 -7
- package/cli/selftune/localdb/queries/cron.ts +34 -0
- package/cli/selftune/localdb/queries/dashboard.ts +834 -0
- package/cli/selftune/localdb/queries/evolution.ts +158 -0
- package/cli/selftune/localdb/queries/execution.ts +133 -0
- package/cli/selftune/localdb/queries/json.ts +18 -0
- package/cli/selftune/localdb/queries/monitoring.ts +263 -0
- package/cli/selftune/localdb/queries/raw.ts +95 -0
- package/cli/selftune/localdb/queries/staging.ts +270 -0
- package/cli/selftune/localdb/queries/trust.ts +392 -0
- package/cli/selftune/localdb/queries.ts +60 -2162
- package/cli/selftune/localdb/schema.ts +59 -0
- package/cli/selftune/monitoring/watch.ts +96 -29
- package/cli/selftune/normalization.ts +3 -0
- package/cli/selftune/observability.ts +12 -3
- package/cli/selftune/orchestrate/cli.ts +161 -0
- package/cli/selftune/orchestrate/execute.ts +295 -0
- package/cli/selftune/orchestrate/finalize.ts +157 -0
- package/cli/selftune/orchestrate/locks.ts +40 -0
- package/cli/selftune/orchestrate/plan.ts +131 -0
- package/cli/selftune/orchestrate/post-run.ts +59 -0
- package/cli/selftune/orchestrate/prepare.ts +334 -0
- package/cli/selftune/orchestrate/report.ts +182 -0
- package/cli/selftune/orchestrate/runtime.ts +120 -0
- package/cli/selftune/orchestrate/signals.ts +48 -0
- package/cli/selftune/orchestrate.ts +162 -1142
- package/cli/selftune/registry/client.ts +74 -0
- package/cli/selftune/registry/history.ts +54 -0
- package/cli/selftune/registry/index.ts +90 -0
- package/cli/selftune/registry/install.ts +141 -0
- package/cli/selftune/registry/list.ts +44 -0
- package/cli/selftune/registry/push.ts +171 -0
- package/cli/selftune/registry/rollback.ts +49 -0
- package/cli/selftune/registry/status.ts +62 -0
- package/cli/selftune/registry/sync.ts +125 -0
- package/cli/selftune/repair/skill-usage.ts +9 -3
- package/cli/selftune/routes/overview.ts +5 -2
- package/cli/selftune/routes/skill-report.ts +15 -2
- package/cli/selftune/schedule.ts +5 -5
- package/cli/selftune/status.ts +70 -2
- package/cli/selftune/sync.ts +127 -23
- package/cli/selftune/testing-readiness.ts +597 -0
- package/cli/selftune/types.ts +46 -5
- package/cli/selftune/uninstall.ts +2 -1
- package/cli/selftune/utils/canonical-log.ts +1 -9
- package/cli/selftune/utils/cli-error.ts +9 -0
- package/cli/selftune/utils/jsonl.ts +1 -30
- package/cli/selftune/utils/llm-call.ts +126 -6
- package/cli/selftune/utils/skill-discovery.ts +24 -0
- package/cli/selftune/workflows/proposals.ts +184 -0
- package/cli/selftune/workflows/skill-scaffold.ts +241 -0
- package/cli/selftune/workflows/workflows.ts +100 -26
- package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
- package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
- package/node_modules/@selftune/telemetry-contract/package.json +1 -1
- package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
- package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
- package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
- package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
- package/package.json +25 -9
- package/packages/dashboard-core/AGENTS.md +18 -0
- package/packages/dashboard-core/README.md +30 -0
- package/packages/dashboard-core/index.ts +3 -0
- package/packages/dashboard-core/package.json +39 -0
- package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
- package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
- package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
- package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
- package/packages/dashboard-core/src/chrome/index.ts +14 -0
- package/packages/dashboard-core/src/chrome/types.ts +81 -0
- package/packages/dashboard-core/src/chrome/utils.ts +23 -0
- package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
- package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
- package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
- package/packages/dashboard-core/src/gates/index.ts +3 -0
- package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
- package/packages/dashboard-core/src/host/adapter.ts +47 -0
- package/packages/dashboard-core/src/host/capabilities.ts +55 -0
- package/packages/dashboard-core/src/host/index.ts +3 -0
- package/packages/dashboard-core/src/models/analytics.ts +39 -0
- package/packages/dashboard-core/src/models/index.ts +4 -0
- package/packages/dashboard-core/src/models/overview.ts +98 -0
- package/packages/dashboard-core/src/models/runtime.ts +7 -0
- package/packages/dashboard-core/src/models/skills.ts +34 -0
- package/packages/dashboard-core/src/routes/index.ts +2 -0
- package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
- package/packages/dashboard-core/src/routes/manifest.ts +451 -0
- package/packages/dashboard-core/src/routes/types.ts +39 -0
- package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
- package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
- package/packages/dashboard-core/src/screens/index.ts +37 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
- package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
- package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
- package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
- package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
- package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
- package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
- package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
- package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
- package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
- package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
- package/packages/telemetry-contract/package.json +1 -1
- package/packages/telemetry-contract/src/index.ts +1 -0
- package/packages/telemetry-contract/src/schemas.ts +63 -5
- package/packages/telemetry-contract/src/types.ts +97 -7
- package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
- package/packages/ui/AGENTS.md +16 -0
- package/packages/ui/README.md +1 -1
- package/packages/ui/package.json +1 -1
- package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
- package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
- package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
- package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
- package/packages/ui/src/components/InfoTip.tsx +1 -2
- package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
- package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
- package/packages/ui/src/components/OverviewPanels.tsx +693 -0
- package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
- package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
- package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
- package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
- package/packages/ui/src/components/index.ts +56 -1
- package/packages/ui/src/components/section-cards.tsx +18 -35
- package/packages/ui/src/components/skill-health-grid.tsx +47 -37
- package/packages/ui/src/lib/constants.tsx +0 -1
- package/packages/ui/src/primitives/card.tsx +1 -1
- package/packages/ui/src/primitives/checkbox.tsx +1 -1
- package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
- package/packages/ui/src/primitives/select.tsx +2 -2
- package/packages/ui/src/primitives/tabs.tsx +7 -6
- package/packages/ui/src/types.ts +182 -4
- package/skill/SKILL.md +130 -318
- package/skill/agents/diagnosis-analyst.md +3 -3
- package/skill/agents/evolution-reviewer.md +3 -3
- package/skill/agents/integration-guide.md +3 -3
- package/skill/agents/pattern-analyst.md +2 -2
- package/skill/references/cli-quick-reference.md +89 -0
- package/skill/references/creator-playbook.md +131 -0
- package/skill/references/examples.md +48 -0
- package/skill/references/troubleshooting.md +47 -0
- package/skill/references/version-history.md +1 -1
- package/skill/selftune.contribute.json +11 -0
- package/skill/{Workflows → workflows}/Baseline.md +20 -1
- package/skill/{Workflows → workflows}/Contribute.md +23 -10
- package/skill/{Workflows → workflows}/Contributions.md +13 -5
- package/skill/workflows/CreateTestDeploy.md +170 -0
- package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
- package/skill/{Workflows → workflows}/Cron.md +1 -1
- package/skill/{Workflows → workflows}/Dashboard.md +20 -0
- package/skill/{Workflows → workflows}/Doctor.md +1 -1
- package/skill/{Workflows → workflows}/Evals.md +67 -2
- package/skill/{Workflows → workflows}/Evolve.md +119 -30
- package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
- package/skill/{Workflows → workflows}/Grade.md +1 -1
- package/skill/{Workflows → workflows}/Ingest.md +60 -2
- package/skill/{Workflows → workflows}/Initialize.md +16 -9
- package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
- package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
- package/skill/workflows/Registry.md +99 -0
- package/skill/{Workflows → workflows}/Schedule.md +3 -3
- package/skill/workflows/SignalsDashboard.md +87 -0
- package/skill/{Workflows → workflows}/Sync.md +3 -1
- package/skill/{Workflows → workflows}/UnitTest.md +19 -0
- package/skill/{Workflows → workflows}/Watch.md +42 -2
- package/skill/{Workflows → workflows}/Workflows.md +39 -2
- package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
- package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
- package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
- package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
- package/cli/selftune/utils/html.ts +0 -27
- package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
- /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
- /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
- /package/skill/{Workflows → workflows}/Badge.md +0 -0
- /package/skill/{Workflows → workflows}/Composability.md +0 -0
- /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
- /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
- /package/skill/{Workflows → workflows}/Hook.md +0 -0
- /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
- /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
- /package/skill/{Workflows → workflows}/Recover.md +0 -0
- /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
- /package/skill/{Workflows → workflows}/Replay.md +0 -0
- /package/skill/{Workflows → workflows}/Rollback.md +0 -0
- /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
- /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
import { useMemo, useState } from "react";
|
|
2
|
+
import { Badge } from "../primitives/badge";
|
|
3
|
+
import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card";
|
|
4
|
+
import type { EvidenceEntry, EvolutionEntry } from "../types";
|
|
5
|
+
import { formatRate, timeAgo } from "../lib/format";
|
|
1
6
|
import {
|
|
2
7
|
CheckCircleIcon,
|
|
3
8
|
ChevronDownIcon,
|
|
@@ -5,20 +10,25 @@ import {
|
|
|
5
10
|
CircleDotIcon,
|
|
6
11
|
FileTextIcon,
|
|
7
12
|
InfoIcon,
|
|
13
|
+
RocketIcon,
|
|
14
|
+
ShieldCheckIcon,
|
|
8
15
|
ShieldAlertIcon,
|
|
9
16
|
XCircleIcon,
|
|
17
|
+
UndoIcon,
|
|
18
|
+
ArrowRightIcon,
|
|
10
19
|
TrendingUpIcon,
|
|
11
20
|
TrendingDownIcon,
|
|
12
21
|
ListChecksIcon,
|
|
13
22
|
} from "lucide-react";
|
|
14
|
-
import { useMemo, useState } from "react";
|
|
15
|
-
import type { ReactNode } from "react";
|
|
16
23
|
import Markdown from "react-markdown";
|
|
17
24
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
25
|
+
const ACTION_ICON: Record<string, React.ReactNode> = {
|
|
26
|
+
created: <CircleDotIcon className="size-3.5" />,
|
|
27
|
+
validated: <ShieldCheckIcon className="size-3.5" />,
|
|
28
|
+
deployed: <RocketIcon className="size-3.5" />,
|
|
29
|
+
rejected: <XCircleIcon className="size-3.5" />,
|
|
30
|
+
rolled_back: <UndoIcon className="size-3.5" />,
|
|
31
|
+
};
|
|
22
32
|
|
|
23
33
|
const ACTION_VARIANT: Record<string, "default" | "secondary" | "destructive" | "outline"> = {
|
|
24
34
|
created: "outline",
|
|
@@ -32,117 +42,15 @@ interface Props {
|
|
|
32
42
|
proposalId: string;
|
|
33
43
|
evolution: EvolutionEntry[];
|
|
34
44
|
evidence: EvidenceEntry[];
|
|
35
|
-
showContextBanner?: boolean;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
function getValidationModeMeta(mode?: string | null): {
|
|
39
|
-
label: string;
|
|
40
|
-
variant: "default" | "secondary" | "destructive" | "outline";
|
|
41
|
-
description: string;
|
|
42
|
-
} | null {
|
|
43
|
-
switch (mode) {
|
|
44
|
-
case "host_replay":
|
|
45
|
-
return {
|
|
46
|
-
label: "Replay-backed validation",
|
|
47
|
-
variant: "default",
|
|
48
|
-
description:
|
|
49
|
-
"Validated against a controlled replay fixture instead of a free-form judge prompt.",
|
|
50
|
-
};
|
|
51
|
-
case "llm_judge":
|
|
52
|
-
return {
|
|
53
|
-
label: "Model judgment",
|
|
54
|
-
variant: "secondary",
|
|
55
|
-
description: "Validated by an LLM trigger check rather than a replay fixture.",
|
|
56
|
-
};
|
|
57
|
-
case "structural_guard":
|
|
58
|
-
return {
|
|
59
|
-
label: "Structural guard",
|
|
60
|
-
variant: "outline",
|
|
61
|
-
description:
|
|
62
|
-
"Only deterministic structural checks ran; no replay or judge validation was needed.",
|
|
63
|
-
};
|
|
64
|
-
default:
|
|
65
|
-
return null;
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
function sentenceCase(value: string): string {
|
|
70
|
-
return value.replace(/_/g, " ");
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
function getOutcomePresentation(action?: string | null): {
|
|
74
|
-
title: string;
|
|
75
|
-
summary: string;
|
|
76
|
-
tone: string;
|
|
77
|
-
icon: ReactNode;
|
|
78
|
-
liveSkillNote: string;
|
|
79
|
-
} {
|
|
80
|
-
switch (action) {
|
|
81
|
-
case "rejected":
|
|
82
|
-
return {
|
|
83
|
-
title: "Proposal rejected",
|
|
84
|
-
summary: "Selftune proposed a change, but blocked it before your live skill was updated.",
|
|
85
|
-
tone: "border-red-500/20 bg-red-500/8 text-red-700 dark:text-red-50",
|
|
86
|
-
icon: <XCircleIcon className="size-4 text-red-400" />,
|
|
87
|
-
liveSkillNote: "Your live skill is unchanged.",
|
|
88
|
-
};
|
|
89
|
-
case "validated":
|
|
90
|
-
return {
|
|
91
|
-
title: "Proposal validated",
|
|
92
|
-
summary: "The proposed change improved the eval signal and is ready for review or deploy.",
|
|
93
|
-
tone: "border-emerald-500/20 bg-emerald-500/8 text-emerald-700 dark:text-emerald-50",
|
|
94
|
-
icon: <CheckCircleIcon className="size-4 text-emerald-400" />,
|
|
95
|
-
liveSkillNote: "Your live skill has not changed until this proposal is deployed.",
|
|
96
|
-
};
|
|
97
|
-
case "deployed":
|
|
98
|
-
return {
|
|
99
|
-
title: "Proposal deployed",
|
|
100
|
-
summary: "The proposed change passed validation and was applied to the live skill.",
|
|
101
|
-
tone: "border-primary/25 bg-primary/8 text-foreground",
|
|
102
|
-
icon: <TrendingUpIcon className="size-4 text-primary" />,
|
|
103
|
-
liveSkillNote: "Your live skill now includes this change.",
|
|
104
|
-
};
|
|
105
|
-
case "rolled_back":
|
|
106
|
-
return {
|
|
107
|
-
title: "Proposal rolled back",
|
|
108
|
-
summary: "A deployed change was later reversed because follow-up evidence showed risk.",
|
|
109
|
-
tone: "border-amber-500/20 bg-amber-500/8 text-amber-800 dark:text-amber-50",
|
|
110
|
-
icon: <TrendingDownIcon className="size-4 text-amber-400" />,
|
|
111
|
-
liveSkillNote: "Your live skill no longer uses this proposal.",
|
|
112
|
-
};
|
|
113
|
-
case "created":
|
|
114
|
-
default:
|
|
115
|
-
return {
|
|
116
|
-
title: "Proposal under review",
|
|
117
|
-
summary: "Selftune found a possible improvement and recorded the proposed change.",
|
|
118
|
-
tone: "border-border/30 bg-muted/25 text-foreground",
|
|
119
|
-
icon: <CircleDotIcon className="size-4 text-muted-foreground" />,
|
|
120
|
-
liveSkillNote: "Your live skill is unchanged until a proposal is validated and deployed.",
|
|
121
|
-
};
|
|
122
|
-
}
|
|
123
45
|
}
|
|
124
46
|
|
|
125
47
|
/** Parse YAML-ish frontmatter from text, returns { meta, body } */
|
|
126
48
|
function parseFrontmatter(text: string): { meta: Record<string, string>; body: string } {
|
|
127
|
-
const
|
|
128
|
-
if (
|
|
129
|
-
return { meta: {}, body: text };
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
let closingIndex = -1;
|
|
133
|
-
for (let i = 1; i < lines.length; i++) {
|
|
134
|
-
if (lines[i].trim() === "---") {
|
|
135
|
-
closingIndex = i;
|
|
136
|
-
break;
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
if (closingIndex === -1) {
|
|
141
|
-
return { meta: {}, body: text };
|
|
142
|
-
}
|
|
49
|
+
const match = text.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/);
|
|
50
|
+
if (!match) return { meta: {}, body: text };
|
|
143
51
|
|
|
144
52
|
const meta: Record<string, string> = {};
|
|
145
|
-
for (const line of
|
|
53
|
+
for (const line of match[1].split("\n")) {
|
|
146
54
|
const idx = line.indexOf(":");
|
|
147
55
|
if (idx > 0) {
|
|
148
56
|
const key = line.slice(0, idx).trim();
|
|
@@ -150,8 +58,7 @@ function parseFrontmatter(text: string): { meta: Record<string, string>; body: s
|
|
|
150
58
|
if (key && val) meta[key] = val;
|
|
151
59
|
}
|
|
152
60
|
}
|
|
153
|
-
|
|
154
|
-
return { meta, body: lines.slice(closingIndex + 1).join("\n") };
|
|
61
|
+
return { meta, body: match[2] };
|
|
155
62
|
}
|
|
156
63
|
|
|
157
64
|
function FrontmatterTable({ meta }: { meta: Record<string, string> }) {
|
|
@@ -212,7 +119,7 @@ function SkillContentBlock({
|
|
|
212
119
|
}
|
|
213
120
|
|
|
214
121
|
/** Smart formatting for a single validation value */
|
|
215
|
-
function formatValidationValue(key: string, val: unknown): ReactNode {
|
|
122
|
+
function formatValidationValue(key: string, val: unknown): React.ReactNode {
|
|
216
123
|
// Booleans
|
|
217
124
|
if (typeof val === "boolean") {
|
|
218
125
|
return val ? (
|
|
@@ -251,6 +158,44 @@ function formatValidationValue(key: string, val: unknown): ReactNode {
|
|
|
251
158
|
return <span>{String(val)}</span>;
|
|
252
159
|
}
|
|
253
160
|
|
|
161
|
+
function getPerEntryPassStatus(entry: unknown): boolean | null {
|
|
162
|
+
if (typeof entry !== "object" || entry === null) return null;
|
|
163
|
+
const obj = entry as Record<string, unknown>;
|
|
164
|
+
const afterPass = obj.after_pass ?? obj.after ?? obj.triggered ?? obj.result;
|
|
165
|
+
const passed = obj.passed ?? obj.matched;
|
|
166
|
+
return typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function getEvidenceListKey(prefix: string, value: unknown): string {
|
|
170
|
+
if (typeof value !== "object" || value === null) {
|
|
171
|
+
return `${prefix}:${JSON.stringify(value)}`;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const record = value as Record<string, unknown>;
|
|
175
|
+
const nested =
|
|
176
|
+
typeof record.entry === "object" && record.entry !== null
|
|
177
|
+
? (record.entry as Record<string, unknown>)
|
|
178
|
+
: null;
|
|
179
|
+
const query =
|
|
180
|
+
typeof nested?.query === "string"
|
|
181
|
+
? nested.query
|
|
182
|
+
: typeof record.query === "string"
|
|
183
|
+
? record.query
|
|
184
|
+
: typeof record.prompt === "string"
|
|
185
|
+
? record.prompt
|
|
186
|
+
: typeof record.input === "string"
|
|
187
|
+
? record.input
|
|
188
|
+
: null;
|
|
189
|
+
|
|
190
|
+
if (query) return `${prefix}:${query}`;
|
|
191
|
+
|
|
192
|
+
const action = typeof record.action === "string" ? record.action : null;
|
|
193
|
+
const timestamp = typeof record.timestamp === "string" ? record.timestamp : null;
|
|
194
|
+
if (action && timestamp) return `${prefix}:${action}:${timestamp}`;
|
|
195
|
+
|
|
196
|
+
return `${prefix}:${JSON.stringify(record)}`;
|
|
197
|
+
}
|
|
198
|
+
|
|
254
199
|
/** Render a per_entry_result row — handles both flat EvalEntry and nested { entry, before_pass, after_pass } */
|
|
255
200
|
function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
|
|
256
201
|
// Handle nested shape: { entry: { query, should_trigger }, before_pass, after_pass }
|
|
@@ -261,11 +206,7 @@ function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
|
|
|
261
206
|
const beforePass =
|
|
262
207
|
entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline;
|
|
263
208
|
const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result;
|
|
264
|
-
const
|
|
265
|
-
|
|
266
|
-
// Determine icon: use after_pass for per_entry_results, passed for others
|
|
267
|
-
const isPass =
|
|
268
|
-
typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null;
|
|
209
|
+
const isPass = getPerEntryPassStatus(entry);
|
|
269
210
|
|
|
270
211
|
return (
|
|
271
212
|
<div className="flex items-start gap-2 text-xs py-1.5 border-b border-border/50 last:border-0">
|
|
@@ -314,15 +255,19 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
314
255
|
validation_mode,
|
|
315
256
|
validation_agent,
|
|
316
257
|
validation_fixture_id,
|
|
317
|
-
|
|
258
|
+
validation_fallback_reason,
|
|
318
259
|
...rest
|
|
319
260
|
} = validation;
|
|
320
261
|
|
|
321
262
|
const regressionsArr = Array.isArray(regressions) ? regressions : [];
|
|
322
263
|
const newPassesArr = Array.isArray(new_passes) ? new_passes : [];
|
|
323
264
|
const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : [];
|
|
324
|
-
const
|
|
325
|
-
|
|
265
|
+
const validationMode = typeof validation_mode === "string" ? validation_mode : null;
|
|
266
|
+
const validationAgent = typeof validation_agent === "string" ? validation_agent : null;
|
|
267
|
+
const validationFixtureId =
|
|
268
|
+
typeof validation_fixture_id === "string" ? validation_fixture_id : null;
|
|
269
|
+
const validationFallbackReason =
|
|
270
|
+
typeof validation_fallback_reason === "string" ? validation_fallback_reason : null;
|
|
326
271
|
|
|
327
272
|
return (
|
|
328
273
|
<div className="rounded-md border bg-muted/30 p-3 space-y-3">
|
|
@@ -333,34 +278,6 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
333
278
|
</span>
|
|
334
279
|
</p>
|
|
335
280
|
|
|
336
|
-
{validationMeta && (
|
|
337
|
-
<div className="rounded-md border bg-card px-3 py-2">
|
|
338
|
-
<div className="flex flex-wrap items-center gap-2">
|
|
339
|
-
<Badge variant={validationMeta.variant} className="text-[10px]">
|
|
340
|
-
{validationMeta.label}
|
|
341
|
-
</Badge>
|
|
342
|
-
{typeof validation_agent === "string" && validation_agent.trim() && (
|
|
343
|
-
<Badge variant="outline" className="text-[10px]">
|
|
344
|
-
agent: {validation_agent}
|
|
345
|
-
</Badge>
|
|
346
|
-
)}
|
|
347
|
-
{typeof validation_fixture_id === "string" && validation_fixture_id.trim() && (
|
|
348
|
-
<Badge variant="outline" className="text-[10px]">
|
|
349
|
-
fixture: {validation_fixture_id}
|
|
350
|
-
</Badge>
|
|
351
|
-
)}
|
|
352
|
-
</div>
|
|
353
|
-
<p className="mt-1 text-[11px] leading-relaxed text-muted-foreground">
|
|
354
|
-
{validationMeta.description}
|
|
355
|
-
</p>
|
|
356
|
-
{typeof validation_evidence_ref === "string" && validation_evidence_ref.trim() && (
|
|
357
|
-
<p className="mt-1 text-[10px] font-mono text-muted-foreground/70">
|
|
358
|
-
{validation_evidence_ref}
|
|
359
|
-
</p>
|
|
360
|
-
)}
|
|
361
|
-
</div>
|
|
362
|
-
)}
|
|
363
|
-
|
|
364
281
|
{/* Summary bar */}
|
|
365
282
|
<div className="flex items-center gap-3 flex-wrap">
|
|
366
283
|
{improved !== undefined && (
|
|
@@ -368,6 +285,21 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
368
285
|
{improved ? "Improved" : "Regressed"}
|
|
369
286
|
</Badge>
|
|
370
287
|
)}
|
|
288
|
+
{validationMode && (
|
|
289
|
+
<Badge variant="outline" className="text-[10px] capitalize">
|
|
290
|
+
{validationMode.replace(/_/g, " ")}
|
|
291
|
+
</Badge>
|
|
292
|
+
)}
|
|
293
|
+
{validationAgent && (
|
|
294
|
+
<Badge variant="secondary" className="text-[10px]">
|
|
295
|
+
{validationAgent}
|
|
296
|
+
</Badge>
|
|
297
|
+
)}
|
|
298
|
+
{validationFixtureId && (
|
|
299
|
+
<Badge variant="secondary" className="text-[10px] font-mono" title={validationFixtureId}>
|
|
300
|
+
fixture #{validationFixtureId.slice(0, 8)}
|
|
301
|
+
</Badge>
|
|
302
|
+
)}
|
|
371
303
|
{typeof before_pass_rate === "number" && typeof after_pass_rate === "number" && (
|
|
372
304
|
<span className="text-xs font-mono text-muted-foreground">
|
|
373
305
|
{(before_pass_rate * 100).toFixed(1)}% → {(after_pass_rate * 100).toFixed(1)}%
|
|
@@ -375,13 +307,7 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
375
307
|
)}
|
|
376
308
|
{typeof net_change === "number" && (
|
|
377
309
|
<span
|
|
378
|
-
className={`text-xs font-mono font-semibold ${
|
|
379
|
-
net_change > 0
|
|
380
|
-
? "text-emerald-600 dark:text-emerald-400"
|
|
381
|
-
: net_change < 0
|
|
382
|
-
? "text-red-500"
|
|
383
|
-
: "text-muted-foreground"
|
|
384
|
-
}`}
|
|
310
|
+
className={`text-xs font-mono font-semibold ${net_change > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
|
|
385
311
|
>
|
|
386
312
|
{net_change > 0 ? "+" : ""}
|
|
387
313
|
{(net_change * 100).toFixed(1)}%
|
|
@@ -389,6 +315,12 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
389
315
|
)}
|
|
390
316
|
</div>
|
|
391
317
|
|
|
318
|
+
{validationFallbackReason && (
|
|
319
|
+
<div className="rounded border border-amber-300/60 bg-amber-50 px-2.5 py-2 text-[11px] text-amber-900 dark:border-amber-900/60 dark:bg-amber-950/30 dark:text-amber-200">
|
|
320
|
+
Replay fallback: {validationFallbackReason}
|
|
321
|
+
</div>
|
|
322
|
+
)}
|
|
323
|
+
|
|
392
324
|
{/* New passes */}
|
|
393
325
|
{newPassesArr.length > 0 && (
|
|
394
326
|
<div>
|
|
@@ -396,9 +328,9 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
396
328
|
New Passes ({newPassesArr.length})
|
|
397
329
|
</p>
|
|
398
330
|
<div className="rounded border bg-card p-2">
|
|
399
|
-
{newPassesArr.map((entry
|
|
331
|
+
{newPassesArr.map((entry) => (
|
|
400
332
|
<PerEntryResult
|
|
401
|
-
key={
|
|
333
|
+
key={getEvidenceListKey("new-pass", entry)}
|
|
402
334
|
entry={
|
|
403
335
|
typeof entry === "object" && entry !== null
|
|
404
336
|
? (entry as Record<string, unknown>)
|
|
@@ -417,9 +349,9 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
417
349
|
Regressions ({regressionsArr.length})
|
|
418
350
|
</p>
|
|
419
351
|
<div className="rounded border border-red-200 dark:border-red-900/50 bg-card p-2">
|
|
420
|
-
{regressionsArr.map((entry
|
|
352
|
+
{regressionsArr.map((entry) => (
|
|
421
353
|
<PerEntryResult
|
|
422
|
-
key={
|
|
354
|
+
key={getEvidenceListKey("regression", entry)}
|
|
423
355
|
entry={
|
|
424
356
|
typeof entry === "object" && entry !== null
|
|
425
357
|
? (entry as Record<string, unknown>)
|
|
@@ -451,17 +383,7 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
451
383
|
|
|
452
384
|
function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
|
|
453
385
|
const [expanded, setExpanded] = useState(false);
|
|
454
|
-
const passCount = entries.filter((
|
|
455
|
-
if (typeof e !== "object" || e === null) return false;
|
|
456
|
-
const obj = e as Record<string, unknown>;
|
|
457
|
-
return (
|
|
458
|
-
obj.passed === true ||
|
|
459
|
-
obj.matched === true ||
|
|
460
|
-
obj.triggered === true ||
|
|
461
|
-
obj.after === true ||
|
|
462
|
-
obj.result === true
|
|
463
|
-
);
|
|
464
|
-
}).length;
|
|
386
|
+
const passCount = entries.filter((entry) => getPerEntryPassStatus(entry) === true).length;
|
|
465
387
|
|
|
466
388
|
const display = expanded ? entries : entries.slice(0, 5);
|
|
467
389
|
|
|
@@ -489,9 +411,9 @@ function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
|
|
|
489
411
|
/>
|
|
490
412
|
</div>
|
|
491
413
|
<div className="rounded border bg-card p-2 max-h-[300px] overflow-y-auto">
|
|
492
|
-
{display.map((entry
|
|
414
|
+
{display.map((entry) => (
|
|
493
415
|
<PerEntryResult
|
|
494
|
-
key={
|
|
416
|
+
key={getEvidenceListKey("per-entry", entry)}
|
|
495
417
|
entry={
|
|
496
418
|
typeof entry === "object" && entry !== null
|
|
497
419
|
? (entry as Record<string, unknown>)
|
|
@@ -554,13 +476,13 @@ function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }
|
|
|
554
476
|
</button>
|
|
555
477
|
{expanded && (
|
|
556
478
|
<div className="space-y-1">
|
|
557
|
-
{evalSet.map((evalEntry
|
|
479
|
+
{evalSet.map((evalEntry) => {
|
|
558
480
|
const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input;
|
|
559
481
|
const expected = evalEntry.expected ?? evalEntry.should_trigger;
|
|
560
482
|
const passed = evalEntry.passed ?? evalEntry.result;
|
|
561
483
|
return (
|
|
562
484
|
<div
|
|
563
|
-
key={
|
|
485
|
+
key={getEvidenceListKey("eval-set", evalEntry)}
|
|
564
486
|
className="flex items-start gap-2 text-xs py-1 border-b border-border/50 last:border-0"
|
|
565
487
|
>
|
|
566
488
|
{typeof passed === "boolean" ? (
|
|
@@ -726,12 +648,7 @@ function CollapsedEvidenceCard({
|
|
|
726
648
|
);
|
|
727
649
|
}
|
|
728
650
|
|
|
729
|
-
export function EvidenceViewer({
|
|
730
|
-
proposalId,
|
|
731
|
-
evolution,
|
|
732
|
-
evidence,
|
|
733
|
-
showContextBanner = true,
|
|
734
|
-
}: Props) {
|
|
651
|
+
export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
735
652
|
const steps = useMemo(
|
|
736
653
|
() =>
|
|
737
654
|
evolution
|
|
@@ -750,7 +667,6 @@ export function EvidenceViewer({
|
|
|
750
667
|
|
|
751
668
|
// Track which earlier rounds are manually expanded
|
|
752
669
|
const [expandedRounds, setExpandedRounds] = useState<Set<string>>(new Set());
|
|
753
|
-
const [expandedProposalTargets, setExpandedProposalTargets] = useState<Set<string>>(new Set());
|
|
754
670
|
|
|
755
671
|
const toggleRound = (key: string) => {
|
|
756
672
|
setExpandedRounds((prev) => {
|
|
@@ -761,15 +677,6 @@ export function EvidenceViewer({
|
|
|
761
677
|
});
|
|
762
678
|
};
|
|
763
679
|
|
|
764
|
-
const toggleProposalHistory = (target: string) => {
|
|
765
|
-
setExpandedProposalTargets((prev) => {
|
|
766
|
-
const next = new Set(prev);
|
|
767
|
-
if (next.has(target)) next.delete(target);
|
|
768
|
-
else next.add(target);
|
|
769
|
-
return next;
|
|
770
|
-
});
|
|
771
|
-
};
|
|
772
|
-
|
|
773
680
|
const snapshot = useMemo(() => {
|
|
774
681
|
for (let i = steps.length - 1; i >= 0; i--) {
|
|
775
682
|
if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record<string, unknown>;
|
|
@@ -793,318 +700,176 @@ export function EvidenceViewer({
|
|
|
793
700
|
return { proposalEntries: proposals, validationsByTarget: validationMap };
|
|
794
701
|
}, [entries]);
|
|
795
702
|
|
|
796
|
-
const latestStep = steps[steps.length - 1] ?? null;
|
|
797
|
-
const lifecycleLabel = steps.map((step) => step.action.replace("_", " ")).join(" -> ");
|
|
798
|
-
const outcome = getOutcomePresentation(latestStep?.action);
|
|
799
|
-
const validationMeta = getValidationModeMeta(latestStep?.validation_mode);
|
|
800
|
-
const latestProposalConfidence = useMemo(() => {
|
|
801
|
-
for (let i = proposalEntries.length - 1; i >= 0; i--) {
|
|
802
|
-
if (proposalEntries[i].confidence !== null) {
|
|
803
|
-
return proposalEntries[i].confidence;
|
|
804
|
-
}
|
|
805
|
-
}
|
|
806
|
-
return null;
|
|
807
|
-
}, [proposalEntries]);
|
|
808
|
-
const proposalCards = useMemo(() => {
|
|
809
|
-
const grouped = new Map<string, EvidenceEntry[]>();
|
|
810
|
-
for (const entry of proposalEntries) {
|
|
811
|
-
const key = entry.target || "proposal";
|
|
812
|
-
const group = grouped.get(key) ?? [];
|
|
813
|
-
group.push(entry);
|
|
814
|
-
grouped.set(key, group);
|
|
815
|
-
}
|
|
816
|
-
|
|
817
|
-
return Array.from(grouped.entries()).map(([target, group]) => {
|
|
818
|
-
let richest = group[group.length - 1];
|
|
819
|
-
for (let i = group.length - 1; i >= 0; i--) {
|
|
820
|
-
if (group[i].original_text || group[i].proposed_text || group[i].rationale) {
|
|
821
|
-
richest = group[i];
|
|
822
|
-
break;
|
|
823
|
-
}
|
|
824
|
-
}
|
|
825
|
-
const primaryIndex = group.findIndex((entry) => entry === richest);
|
|
826
|
-
return {
|
|
827
|
-
target,
|
|
828
|
-
primaryEntry: richest,
|
|
829
|
-
historyEntries: group.filter((_, index) => index !== primaryIndex),
|
|
830
|
-
entries: group,
|
|
831
|
-
};
|
|
832
|
-
});
|
|
833
|
-
}, [proposalEntries]);
|
|
834
|
-
|
|
835
703
|
return (
|
|
836
704
|
<div className="space-y-4">
|
|
837
705
|
{/* Context banner */}
|
|
838
|
-
|
|
839
|
-
<
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
</div>
|
|
847
|
-
)}
|
|
706
|
+
<div className="flex items-start gap-2.5 rounded-lg border border-primary/20 bg-primary/5 px-3.5 py-2.5">
|
|
707
|
+
<InfoIcon className="size-4 text-primary/60 shrink-0 mt-0.5" />
|
|
708
|
+
<p className="text-xs text-muted-foreground leading-relaxed">
|
|
709
|
+
This view shows the complete evidence trail for a skill evolution proposal — how the
|
|
710
|
+
skill was changed, the eval test results before and after, and whether the change improved
|
|
711
|
+
performance.
|
|
712
|
+
</p>
|
|
713
|
+
</div>
|
|
848
714
|
|
|
849
|
-
|
|
715
|
+
{/* Proposal journey */}
|
|
716
|
+
<Card>
|
|
850
717
|
<CardHeader className="pb-3">
|
|
851
|
-
<CardTitle className="
|
|
852
|
-
<span>Proposal
|
|
718
|
+
<CardTitle className="text-sm flex items-center gap-2">
|
|
719
|
+
<span>Proposal Journey</span>
|
|
853
720
|
<span className="font-mono text-xs text-muted-foreground">
|
|
854
721
|
#{proposalId.slice(0, 12)}
|
|
855
722
|
</span>
|
|
856
723
|
</CardTitle>
|
|
857
724
|
</CardHeader>
|
|
858
725
|
<CardContent className="space-y-3">
|
|
859
|
-
<div className=
|
|
860
|
-
|
|
861
|
-
<div
|
|
862
|
-
|
|
863
|
-
<div className="flex
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
726
|
+
<div className="flex items-center gap-2 flex-wrap">
|
|
727
|
+
{steps.map((step, i) => (
|
|
728
|
+
<div key={`${step.action}-${step.timestamp}`} className="contents">
|
|
729
|
+
{i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
|
|
730
|
+
<div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
|
|
731
|
+
{ACTION_ICON[step.action]}
|
|
732
|
+
<Badge
|
|
733
|
+
variant={ACTION_VARIANT[step.action] ?? "secondary"}
|
|
734
|
+
className="text-[10px] capitalize"
|
|
735
|
+
>
|
|
736
|
+
{step.action.replace("_", " ")}
|
|
737
|
+
</Badge>
|
|
738
|
+
<span className="text-[10px] text-muted-foreground">
|
|
739
|
+
{timeAgo(step.timestamp)}
|
|
740
|
+
</span>
|
|
873
741
|
</div>
|
|
874
|
-
<p className="text-sm leading-6 text-current/90">{outcome.summary}</p>
|
|
875
|
-
{latestStep?.details && (
|
|
876
|
-
<div className="rounded-md bg-black/10 px-3 py-2 text-sm leading-6 text-current/90 dark:bg-black/20">
|
|
877
|
-
{latestStep.details}
|
|
878
|
-
</div>
|
|
879
|
-
)}
|
|
880
|
-
<p className="text-xs font-medium text-current/75">{outcome.liveSkillNote}</p>
|
|
881
742
|
</div>
|
|
882
|
-
|
|
883
|
-
</div>
|
|
884
|
-
|
|
885
|
-
<div className="flex flex-wrap items-center gap-2">
|
|
886
|
-
{latestStep?.timestamp && (
|
|
887
|
-
<span className="text-[10px] font-mono text-muted-foreground">
|
|
888
|
-
{timeAgo(latestStep.timestamp)}
|
|
889
|
-
</span>
|
|
890
|
-
)}
|
|
891
|
-
<Badge variant="outline" className="text-[10px]">
|
|
892
|
-
{entries.length} evidence {entries.length === 1 ? "row" : "rows"}
|
|
893
|
-
</Badge>
|
|
894
|
-
{validationMeta && (
|
|
895
|
-
<Badge variant={validationMeta.variant} className="text-[10px]">
|
|
896
|
-
{validationMeta.label}
|
|
897
|
-
</Badge>
|
|
898
|
-
)}
|
|
899
|
-
{latestStep?.validation_fixture_id && (
|
|
900
|
-
<Badge variant="outline" className="text-[10px]">
|
|
901
|
-
fixture: {latestStep.validation_fixture_id}
|
|
902
|
-
</Badge>
|
|
903
|
-
)}
|
|
904
|
-
{latestProposalConfidence != null && (
|
|
905
|
-
<Badge variant="secondary" className="text-[10px]">
|
|
906
|
-
{Math.round(latestProposalConfidence * 100)}% confidence
|
|
907
|
-
</Badge>
|
|
908
|
-
)}
|
|
909
|
-
</div>
|
|
910
|
-
|
|
911
|
-
{validationMeta && (
|
|
912
|
-
<p className="text-[11px] leading-relaxed text-muted-foreground">
|
|
913
|
-
{validationMeta.description}
|
|
914
|
-
</p>
|
|
915
|
-
)}
|
|
916
|
-
|
|
917
|
-
<div className="flex flex-wrap items-center gap-2 text-[11px] text-muted-foreground">
|
|
918
|
-
<span className="font-headline uppercase tracking-[0.16em] text-muted-foreground/80">
|
|
919
|
-
Lifecycle
|
|
920
|
-
</span>
|
|
921
|
-
<span>{lifecycleLabel ? sentenceCase(lifecycleLabel) : "No lifecycle recorded"}</span>
|
|
743
|
+
))}
|
|
922
744
|
</div>
|
|
923
745
|
|
|
924
|
-
{
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
746
|
+
{/* Eval snapshot — pass rate change */}
|
|
747
|
+
{snapshot && (
|
|
748
|
+
<div className="flex items-center gap-3 rounded-md border bg-muted/20 px-3 py-2">
|
|
749
|
+
{typeof snapshot.net_change === "number" && (
|
|
928
750
|
<div className="flex items-center gap-1">
|
|
929
|
-
{snapshot.net_change > 0 ? (
|
|
751
|
+
{(snapshot.net_change as number) > 0 ? (
|
|
930
752
|
<TrendingUpIcon className="size-3.5 text-emerald-500" />
|
|
931
|
-
) : snapshot.net_change < 0 ? (
|
|
932
|
-
<TrendingDownIcon className="size-3.5 text-red-500" />
|
|
933
753
|
) : (
|
|
934
|
-
<
|
|
754
|
+
<TrendingDownIcon className="size-3.5 text-red-500" />
|
|
935
755
|
)}
|
|
936
756
|
<span
|
|
937
|
-
className={`text-sm font-
|
|
938
|
-
snapshot.net_change > 0
|
|
939
|
-
? "text-emerald-600 dark:text-emerald-400"
|
|
940
|
-
: snapshot.net_change < 0
|
|
941
|
-
? "text-red-500"
|
|
942
|
-
: "text-muted-foreground"
|
|
943
|
-
}`}
|
|
757
|
+
className={`text-sm font-semibold font-mono ${(snapshot.net_change as number) > 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}
|
|
944
758
|
>
|
|
945
|
-
{snapshot.net_change > 0 ? "+" : ""}
|
|
946
|
-
{Math.round(snapshot.net_change * 100)}%
|
|
759
|
+
{(snapshot.net_change as number) > 0 ? "+" : ""}
|
|
760
|
+
{Math.round((snapshot.net_change as number) * 100)}%
|
|
947
761
|
</span>
|
|
948
762
|
</div>
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
</Badge>
|
|
957
|
-
) : snapshot.net_change < 0 ? (
|
|
958
|
-
<Badge variant="destructive" className="text-[10px]">
|
|
959
|
-
Regressed
|
|
960
|
-
</Badge>
|
|
961
|
-
) : (
|
|
962
|
-
<Badge variant="outline" className="text-[10px]">
|
|
963
|
-
No change
|
|
964
|
-
</Badge>
|
|
763
|
+
)}
|
|
764
|
+
{typeof snapshot.before_pass_rate === "number" &&
|
|
765
|
+
typeof snapshot.after_pass_rate === "number" && (
|
|
766
|
+
<span className="text-xs text-muted-foreground font-mono">
|
|
767
|
+
{Math.round((snapshot.before_pass_rate as number) * 100)}% →{" "}
|
|
768
|
+
{Math.round((snapshot.after_pass_rate as number) * 100)}%
|
|
769
|
+
</span>
|
|
965
770
|
)}
|
|
966
|
-
|
|
967
|
-
|
|
771
|
+
{snapshot.improved !== undefined && (
|
|
772
|
+
<Badge
|
|
773
|
+
variant={snapshot.improved ? "default" : "destructive"}
|
|
774
|
+
className="text-[10px]"
|
|
775
|
+
>
|
|
776
|
+
{snapshot.improved ? "Improved" : "Regressed"}
|
|
777
|
+
</Badge>
|
|
778
|
+
)}
|
|
779
|
+
</div>
|
|
780
|
+
)}
|
|
781
|
+
|
|
782
|
+
{/* Details from last step */}
|
|
783
|
+
{steps.length > 0 && steps[steps.length - 1].details && (
|
|
784
|
+
<p className="text-xs text-muted-foreground leading-relaxed">
|
|
785
|
+
{steps[steps.length - 1].details}
|
|
786
|
+
</p>
|
|
787
|
+
)}
|
|
968
788
|
</CardContent>
|
|
969
789
|
</Card>
|
|
970
790
|
|
|
971
791
|
{/* Proposal-stage evidence — standalone cards showing original/proposed text */}
|
|
972
|
-
{
|
|
973
|
-
<
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
{proposalCards.map((group) => {
|
|
983
|
-
const hasHistory = group.historyEntries.length > 0;
|
|
984
|
-
const isExpanded = expandedProposalTargets.has(group.target);
|
|
792
|
+
{proposalEntries.map((entry) => (
|
|
793
|
+
<EvidenceCard
|
|
794
|
+
key={`proposal-${entry.target}-${entry.timestamp}`}
|
|
795
|
+
entry={entry}
|
|
796
|
+
roundLabel={null}
|
|
797
|
+
roundStatus="single"
|
|
798
|
+
prevPassRate={null}
|
|
799
|
+
currPassRate={null}
|
|
800
|
+
/>
|
|
801
|
+
))}
|
|
985
802
|
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
{
|
|
996
|
-
|
|
803
|
+
{/* Validation-stage evidence — grouped by target with iteration rounds */}
|
|
804
|
+
{Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
|
|
805
|
+
const hasMultipleRounds = targetEntries.length > 1;
|
|
806
|
+
|
|
807
|
+
return (
|
|
808
|
+
<div key={target} className="space-y-2">
|
|
809
|
+
{targetEntries.map((entry, i) => {
|
|
810
|
+
const isLast = i === targetEntries.length - 1;
|
|
811
|
+
const roundLabel = hasMultipleRounds
|
|
812
|
+
? `Round ${i + 1} of ${targetEntries.length}`
|
|
813
|
+
: null;
|
|
814
|
+
const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
|
|
815
|
+
const currPassRate = getAfterPassRate(entry);
|
|
816
|
+
const roundKey = `${target}-${entry.timestamp}`;
|
|
817
|
+
const roundStatus: RoundStatus = !hasMultipleRounds
|
|
818
|
+
? "single"
|
|
819
|
+
: isLast
|
|
820
|
+
? "final"
|
|
821
|
+
: "intermediate";
|
|
822
|
+
|
|
823
|
+
// Earlier rounds: collapsed by default
|
|
824
|
+
if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
|
|
825
|
+
return (
|
|
826
|
+
<CollapsedEvidenceCard
|
|
827
|
+
key={roundKey}
|
|
828
|
+
entry={entry}
|
|
829
|
+
roundLabel={roundLabel!}
|
|
830
|
+
onExpand={() => toggleRound(roundKey)}
|
|
831
|
+
/>
|
|
832
|
+
);
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
// Expanded earlier round — show with collapse toggle
|
|
836
|
+
if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
|
|
837
|
+
return (
|
|
838
|
+
<div key={roundKey} className="space-y-1">
|
|
997
839
|
<button
|
|
998
840
|
type="button"
|
|
999
|
-
onClick={() =>
|
|
1000
|
-
className="flex items-center gap-1
|
|
841
|
+
onClick={() => toggleRound(roundKey)}
|
|
842
|
+
className="flex items-center gap-1 text-[10px] text-muted-foreground hover:text-foreground transition-colors px-1"
|
|
1001
843
|
>
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
) : (
|
|
1005
|
-
<ChevronRightIcon className="size-3" />
|
|
1006
|
-
)}
|
|
1007
|
-
{isExpanded ? "Hide" : "Show"} {group.historyEntries.length} earlier{" "}
|
|
1008
|
-
{group.historyEntries.length === 1 ? "draft" : "drafts"}
|
|
844
|
+
<ChevronDownIcon className="size-3" />
|
|
845
|
+
Collapse {roundLabel}
|
|
1009
846
|
</button>
|
|
1010
|
-
{isExpanded &&
|
|
1011
|
-
group.historyEntries.map((entry, index) => (
|
|
1012
|
-
<EvidenceCard
|
|
1013
|
-
key={`proposal-history-${group.target}-${entry.timestamp}-${index}`}
|
|
1014
|
-
entry={entry}
|
|
1015
|
-
roundLabel={`Draft ${index + 1} of ${group.historyEntries.length}`}
|
|
1016
|
-
roundStatus="intermediate"
|
|
1017
|
-
prevPassRate={null}
|
|
1018
|
-
currPassRate={null}
|
|
1019
|
-
/>
|
|
1020
|
-
))}
|
|
1021
|
-
</div>
|
|
1022
|
-
)}
|
|
1023
|
-
</div>
|
|
1024
|
-
);
|
|
1025
|
-
})}
|
|
1026
|
-
</div>
|
|
1027
|
-
)}
|
|
1028
|
-
|
|
1029
|
-
{/* Validation-stage evidence — grouped by target with iteration rounds */}
|
|
1030
|
-
{Array.from(validationsByTarget.entries()).length > 0 && (
|
|
1031
|
-
<div className="space-y-2">
|
|
1032
|
-
<div className="space-y-1">
|
|
1033
|
-
<p className="text-xs font-semibold uppercase tracking-[0.18em] text-muted-foreground/80">
|
|
1034
|
-
How it was tested
|
|
1035
|
-
</p>
|
|
1036
|
-
<p className="text-sm text-muted-foreground">
|
|
1037
|
-
Validation evidence shows whether the proposal improved the eval signal.
|
|
1038
|
-
</p>
|
|
1039
|
-
</div>
|
|
1040
|
-
{Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => {
|
|
1041
|
-
const hasMultipleRounds = targetEntries.length > 1;
|
|
1042
|
-
|
|
1043
|
-
return (
|
|
1044
|
-
<div key={target} className="space-y-2">
|
|
1045
|
-
{targetEntries.map((entry, i) => {
|
|
1046
|
-
const isLast = i === targetEntries.length - 1;
|
|
1047
|
-
const roundLabel = hasMultipleRounds
|
|
1048
|
-
? `Round ${i + 1} of ${targetEntries.length}`
|
|
1049
|
-
: null;
|
|
1050
|
-
const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null;
|
|
1051
|
-
const currPassRate = getAfterPassRate(entry);
|
|
1052
|
-
const roundKey = `${target}-${entry.timestamp}`;
|
|
1053
|
-
const roundStatus: RoundStatus = !hasMultipleRounds
|
|
1054
|
-
? "single"
|
|
1055
|
-
: isLast
|
|
1056
|
-
? "final"
|
|
1057
|
-
: "intermediate";
|
|
1058
|
-
|
|
1059
|
-
if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) {
|
|
1060
|
-
return (
|
|
1061
|
-
<CollapsedEvidenceCard
|
|
1062
|
-
key={roundKey}
|
|
1063
|
-
entry={entry}
|
|
1064
|
-
roundLabel={roundLabel!}
|
|
1065
|
-
onExpand={() => toggleRound(roundKey)}
|
|
1066
|
-
/>
|
|
1067
|
-
);
|
|
1068
|
-
}
|
|
1069
|
-
|
|
1070
|
-
if (roundStatus === "intermediate" && expandedRounds.has(roundKey)) {
|
|
1071
|
-
return (
|
|
1072
|
-
<div key={roundKey} className="space-y-1">
|
|
1073
|
-
<button
|
|
1074
|
-
type="button"
|
|
1075
|
-
onClick={() => toggleRound(roundKey)}
|
|
1076
|
-
className="flex items-center gap-1 px-1 text-[10px] text-muted-foreground transition-colors hover:text-foreground"
|
|
1077
|
-
>
|
|
1078
|
-
<ChevronDownIcon className="size-3" />
|
|
1079
|
-
Collapse {roundLabel}
|
|
1080
|
-
</button>
|
|
1081
|
-
<EvidenceCard
|
|
1082
|
-
entry={entry}
|
|
1083
|
-
roundLabel={roundLabel}
|
|
1084
|
-
roundStatus={roundStatus}
|
|
1085
|
-
prevPassRate={prevPassRate}
|
|
1086
|
-
currPassRate={currPassRate}
|
|
1087
|
-
/>
|
|
1088
|
-
</div>
|
|
1089
|
-
);
|
|
1090
|
-
}
|
|
1091
|
-
|
|
1092
|
-
return (
|
|
1093
847
|
<EvidenceCard
|
|
1094
|
-
key={roundKey}
|
|
1095
848
|
entry={entry}
|
|
1096
849
|
roundLabel={roundLabel}
|
|
1097
850
|
roundStatus={roundStatus}
|
|
1098
851
|
prevPassRate={prevPassRate}
|
|
1099
852
|
currPassRate={currPassRate}
|
|
1100
853
|
/>
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
854
|
+
</div>
|
|
855
|
+
);
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
// Final round (or single entry) — always expanded
|
|
859
|
+
return (
|
|
860
|
+
<EvidenceCard
|
|
861
|
+
key={roundKey}
|
|
862
|
+
entry={entry}
|
|
863
|
+
roundLabel={roundLabel}
|
|
864
|
+
roundStatus={roundStatus}
|
|
865
|
+
prevPassRate={prevPassRate}
|
|
866
|
+
currPassRate={currPassRate}
|
|
867
|
+
/>
|
|
868
|
+
);
|
|
869
|
+
})}
|
|
870
|
+
</div>
|
|
871
|
+
);
|
|
872
|
+
})}
|
|
1108
873
|
|
|
1109
874
|
{entries.length === 0 && (
|
|
1110
875
|
<div className="flex items-center justify-center rounded-lg border border-dashed py-8">
|