selftune 0.2.23 → 0.2.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +93 -15
- package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
- package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
- package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
- package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
- package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/adapters/codex/install.ts +310 -78
- package/cli/selftune/adapters/opencode/install.ts +3 -4
- package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
- package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
- package/cli/selftune/auto-update.ts +200 -8
- package/cli/selftune/canonical-export.ts +55 -25
- package/cli/selftune/command-surface.ts +397 -0
- package/cli/selftune/contribute/contribute.ts +64 -13
- package/cli/selftune/contribution-config.ts +57 -3
- package/cli/selftune/contribution-preferences.ts +117 -0
- package/cli/selftune/contribution-signals.ts +8 -4
- package/cli/selftune/contribution-staging.ts +13 -2
- package/cli/selftune/contributions.ts +55 -121
- package/cli/selftune/creator-contributions.ts +29 -10
- package/cli/selftune/cron/setup.ts +7 -3
- package/cli/selftune/dashboard-contract.ts +73 -0
- package/cli/selftune/dashboard-server.ts +168 -17
- package/cli/selftune/dashboard.ts +350 -17
- package/cli/selftune/eval/baseline.ts +21 -5
- package/cli/selftune/eval/execution-eval.ts +170 -0
- package/cli/selftune/eval/family-overlap.ts +2 -2
- package/cli/selftune/eval/hooks-to-evals.ts +228 -82
- package/cli/selftune/eval/import-skillsbench.ts +2 -2
- package/cli/selftune/eval/invocation-classifier.ts +56 -0
- package/cli/selftune/eval/synthetic-evals.ts +5 -3
- package/cli/selftune/eval/unit-test-cli.ts +7 -4
- package/cli/selftune/evolution/apply-proposal.ts +295 -0
- package/cli/selftune/evolution/engines/replay-engine.ts +79 -57
- package/cli/selftune/evolution/evolve-body.ts +100 -39
- package/cli/selftune/evolution/evolve.ts +244 -52
- package/cli/selftune/evolution/rollback.ts +0 -1
- package/cli/selftune/evolution/validate-body.ts +68 -42
- package/cli/selftune/evolution/validate-host-replay.ts +510 -60
- package/cli/selftune/evolution/validate-proposal.ts +11 -150
- package/cli/selftune/evolution/validate-routing.ts +43 -41
- package/cli/selftune/evolution/validation-contract.ts +91 -0
- package/cli/selftune/grading/auto-grade.ts +11 -7
- package/cli/selftune/grading/grade-session.ts +10 -16
- package/cli/selftune/index.ts +35 -10
- package/cli/selftune/ingestors/claude-replay.ts +15 -10
- package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
- package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
- package/cli/selftune/ingestors/pi-ingest.ts +3 -2
- package/cli/selftune/init.ts +27 -3
- package/cli/selftune/localdb/direct-write.ts +35 -1
- package/cli/selftune/localdb/queries/cron.ts +34 -0
- package/cli/selftune/localdb/queries/dashboard.ts +834 -0
- package/cli/selftune/localdb/queries/evolution.ts +158 -0
- package/cli/selftune/localdb/queries/execution.ts +133 -0
- package/cli/selftune/localdb/queries/json.ts +18 -0
- package/cli/selftune/localdb/queries/monitoring.ts +263 -0
- package/cli/selftune/localdb/queries/raw.ts +95 -0
- package/cli/selftune/localdb/queries/staging.ts +270 -0
- package/cli/selftune/localdb/queries/trust.ts +392 -0
- package/cli/selftune/localdb/queries.ts +60 -2288
- package/cli/selftune/localdb/schema.ts +21 -0
- package/cli/selftune/monitoring/watch.ts +96 -29
- package/cli/selftune/normalization.ts +3 -0
- package/cli/selftune/observability.ts +4 -2
- package/cli/selftune/orchestrate/cli.ts +161 -0
- package/cli/selftune/orchestrate/execute.ts +295 -0
- package/cli/selftune/orchestrate/finalize.ts +157 -0
- package/cli/selftune/orchestrate/locks.ts +40 -0
- package/cli/selftune/orchestrate/plan.ts +131 -0
- package/cli/selftune/orchestrate/post-run.ts +59 -0
- package/cli/selftune/orchestrate/prepare.ts +334 -0
- package/cli/selftune/orchestrate/report.ts +182 -0
- package/cli/selftune/orchestrate/runtime.ts +120 -0
- package/cli/selftune/orchestrate/signals.ts +48 -0
- package/cli/selftune/orchestrate.ts +150 -1173
- package/cli/selftune/repair/skill-usage.ts +5 -2
- package/cli/selftune/routes/overview.ts +5 -2
- package/cli/selftune/routes/skill-report.ts +15 -2
- package/cli/selftune/schedule.ts +5 -5
- package/cli/selftune/status.ts +39 -2
- package/cli/selftune/testing-readiness.ts +597 -0
- package/cli/selftune/types.ts +44 -4
- package/cli/selftune/uninstall.ts +2 -1
- package/cli/selftune/utils/canonical-log.ts +1 -9
- package/cli/selftune/utils/cli-error.ts +9 -0
- package/cli/selftune/utils/llm-call.ts +126 -6
- package/cli/selftune/utils/skill-discovery.ts +2 -0
- package/cli/selftune/workflows/proposals.ts +184 -0
- package/cli/selftune/workflows/skill-scaffold.ts +241 -0
- package/cli/selftune/workflows/workflows.ts +100 -26
- package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/src/schemas.ts +41 -1
- package/node_modules/@selftune/telemetry-contract/src/types.ts +103 -2
- package/package.json +25 -9
- package/packages/dashboard-core/AGENTS.md +18 -0
- package/packages/dashboard-core/README.md +30 -0
- package/packages/dashboard-core/index.ts +3 -0
- package/packages/dashboard-core/package.json +39 -0
- package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
- package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
- package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
- package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
- package/packages/dashboard-core/src/chrome/index.ts +14 -0
- package/packages/dashboard-core/src/chrome/types.ts +81 -0
- package/packages/dashboard-core/src/chrome/utils.ts +23 -0
- package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
- package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
- package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
- package/packages/dashboard-core/src/gates/index.ts +3 -0
- package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
- package/packages/dashboard-core/src/host/adapter.ts +47 -0
- package/packages/dashboard-core/src/host/capabilities.ts +55 -0
- package/packages/dashboard-core/src/host/index.ts +3 -0
- package/packages/dashboard-core/src/models/analytics.ts +39 -0
- package/packages/dashboard-core/src/models/index.ts +4 -0
- package/packages/dashboard-core/src/models/overview.ts +98 -0
- package/packages/dashboard-core/src/models/runtime.ts +7 -0
- package/packages/dashboard-core/src/models/skills.ts +34 -0
- package/packages/dashboard-core/src/routes/index.ts +2 -0
- package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
- package/packages/dashboard-core/src/routes/manifest.ts +451 -0
- package/packages/dashboard-core/src/routes/types.ts +39 -0
- package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
- package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
- package/packages/dashboard-core/src/screens/index.ts +37 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
- package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
- package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
- package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
- package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
- package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
- package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
- package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
- package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
- package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
- package/packages/telemetry-contract/src/schemas.ts +41 -1
- package/packages/telemetry-contract/src/types.ts +103 -2
- package/packages/ui/src/components/EvidenceViewer.tsx +80 -25
- package/packages/ui/src/components/OverviewPanels.tsx +67 -26
- package/packages/ui/src/primitives/tabs.tsx +7 -6
- package/packages/ui/src/types.ts +10 -0
- package/skill/SKILL.md +130 -332
- package/skill/agents/diagnosis-analyst.md +3 -3
- package/skill/agents/evolution-reviewer.md +3 -3
- package/skill/agents/integration-guide.md +3 -3
- package/skill/agents/pattern-analyst.md +2 -2
- package/skill/references/cli-quick-reference.md +89 -0
- package/skill/references/creator-playbook.md +131 -0
- package/skill/references/examples.md +48 -0
- package/skill/references/troubleshooting.md +47 -0
- package/skill/references/version-history.md +1 -1
- package/skill/selftune.contribute.json +11 -0
- package/skill/{Workflows → workflows}/Baseline.md +20 -1
- package/skill/{Workflows → workflows}/Contribute.md +23 -10
- package/skill/{Workflows → workflows}/Contributions.md +13 -5
- package/skill/workflows/CreateTestDeploy.md +170 -0
- package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
- package/skill/{Workflows → workflows}/Cron.md +1 -1
- package/skill/{Workflows → workflows}/Dashboard.md +20 -0
- package/skill/{Workflows → workflows}/Doctor.md +1 -1
- package/skill/{Workflows → workflows}/Evals.md +67 -2
- package/skill/{Workflows → workflows}/Evolve.md +119 -30
- package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
- package/skill/{Workflows → workflows}/Grade.md +1 -1
- package/skill/{Workflows → workflows}/Initialize.md +8 -4
- package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
- package/skill/{Workflows → workflows}/Schedule.md +3 -3
- package/skill/workflows/SignalsDashboard.md +87 -0
- package/skill/{Workflows → workflows}/UnitTest.md +19 -0
- package/skill/{Workflows → workflows}/Watch.md +42 -2
- package/skill/{Workflows → workflows}/Workflows.md +39 -2
- package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +0 -1
- package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +0 -59
- package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
- package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +0 -12
- /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
- /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
- /package/skill/{Workflows → workflows}/Badge.md +0 -0
- /package/skill/{Workflows → workflows}/Composability.md +0 -0
- /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
- /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
- /package/skill/{Workflows → workflows}/Hook.md +0 -0
- /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
- /package/skill/{Workflows → workflows}/Ingest.md +0 -0
- /package/skill/{Workflows → workflows}/PlatformHooks.md +0 -0
- /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
- /package/skill/{Workflows → workflows}/Recover.md +0 -0
- /package/skill/{Workflows → workflows}/Registry.md +0 -0
- /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
- /package/skill/{Workflows → workflows}/Replay.md +0 -0
- /package/skill/{Workflows → workflows}/Rollback.md +0 -0
- /package/skill/{Workflows → workflows}/Sync.md +0 -0
- /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
- /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"use client";
|
|
2
|
+
|
|
3
|
+
import type { ReactNode } from "react";
|
|
4
|
+
import { useMemo, useState } from "react";
|
|
5
|
+
|
|
6
|
+
import type { DerivedSkill, FilterTab } from "@selftune/ui/components";
|
|
7
|
+
import {
|
|
8
|
+
LibraryHealthCard,
|
|
9
|
+
PendingProposalsCard,
|
|
10
|
+
SkillCardItem,
|
|
11
|
+
SkillFilterTabs,
|
|
12
|
+
SkillGridEmpty,
|
|
13
|
+
SkillHeroCard,
|
|
14
|
+
SkillHeroEmpty,
|
|
15
|
+
SkillsLibraryError,
|
|
16
|
+
SkillsLibrarySkeleton,
|
|
17
|
+
} from "@selftune/ui/components";
|
|
18
|
+
|
|
19
|
+
export interface SkillsLibraryHero {
|
|
20
|
+
skillName: string;
|
|
21
|
+
skillScope?: string | null;
|
|
22
|
+
platforms?: string[];
|
|
23
|
+
passRate: number | null;
|
|
24
|
+
totalChecks: number;
|
|
25
|
+
uniqueSessions: number;
|
|
26
|
+
status: DerivedSkill["status"];
|
|
27
|
+
latestEvolutionTimestamp?: string | null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface SkillsLibraryPendingProposal {
|
|
31
|
+
id: string;
|
|
32
|
+
skillName: string | null;
|
|
33
|
+
action: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface SkillsLibraryScreenProps {
|
|
37
|
+
skills: DerivedSkill[];
|
|
38
|
+
heroSkill?: SkillsLibraryHero | null;
|
|
39
|
+
aggregatePassRate: number | null;
|
|
40
|
+
gradedCount: number;
|
|
41
|
+
pendingProposals: SkillsLibraryPendingProposal[];
|
|
42
|
+
isLoading: boolean;
|
|
43
|
+
error?: string | null;
|
|
44
|
+
onRetry(): void;
|
|
45
|
+
renderHeroActions(skillName: string): ReactNode;
|
|
46
|
+
renderCardActions(skillName: string): ReactNode;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function SkillsLibraryScreen({
|
|
50
|
+
skills,
|
|
51
|
+
heroSkill,
|
|
52
|
+
aggregatePassRate,
|
|
53
|
+
gradedCount,
|
|
54
|
+
pendingProposals,
|
|
55
|
+
isLoading,
|
|
56
|
+
error,
|
|
57
|
+
onRetry,
|
|
58
|
+
renderHeroActions,
|
|
59
|
+
renderCardActions,
|
|
60
|
+
}: SkillsLibraryScreenProps) {
|
|
61
|
+
const [filter, setFilter] = useState<FilterTab>("ALL");
|
|
62
|
+
const [sortDesc, setSortDesc] = useState(true);
|
|
63
|
+
|
|
64
|
+
const filteredSkills = useMemo(() => {
|
|
65
|
+
let result = skills;
|
|
66
|
+
if (filter !== "ALL") {
|
|
67
|
+
result = result.filter((skill) => skill.status === filter);
|
|
68
|
+
}
|
|
69
|
+
if (!sortDesc) {
|
|
70
|
+
return result;
|
|
71
|
+
}
|
|
72
|
+
return result.reduceRight<DerivedSkill[]>((acc, skill) => {
|
|
73
|
+
acc.push(skill);
|
|
74
|
+
return acc;
|
|
75
|
+
}, []);
|
|
76
|
+
}, [filter, skills, sortDesc]);
|
|
77
|
+
|
|
78
|
+
const counts = useMemo<Record<FilterTab, number>>(() => {
|
|
79
|
+
const nextCounts: Record<FilterTab, number> = {
|
|
80
|
+
ALL: skills.length,
|
|
81
|
+
HEALTHY: 0,
|
|
82
|
+
WARNING: 0,
|
|
83
|
+
CRITICAL: 0,
|
|
84
|
+
UNGRADED: 0,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
for (const skill of skills) {
|
|
88
|
+
if (skill.status in nextCounts) {
|
|
89
|
+
nextCounts[skill.status as Exclude<FilterTab, "ALL">]++;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return nextCounts;
|
|
94
|
+
}, [skills]);
|
|
95
|
+
|
|
96
|
+
if (isLoading) {
|
|
97
|
+
return <SkillsLibrarySkeleton />;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (error) {
|
|
101
|
+
return <SkillsLibraryError message={error} onRetry={onRetry} />;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return (
|
|
105
|
+
<div
|
|
106
|
+
data-parity-root="skills-library"
|
|
107
|
+
className="@container/main flex flex-1 animate-in fade-in flex-col gap-8 px-4 py-8 duration-500 lg:px-6"
|
|
108
|
+
>
|
|
109
|
+
<div>
|
|
110
|
+
<h1 className="font-headline text-4xl font-bold tracking-tight text-foreground">
|
|
111
|
+
Skills Library
|
|
112
|
+
</h1>
|
|
113
|
+
<p className="mt-2 max-w-2xl text-sm text-muted-foreground">
|
|
114
|
+
Monitor and manage your evolving skill definitions across all scopes.
|
|
115
|
+
</p>
|
|
116
|
+
</div>
|
|
117
|
+
|
|
118
|
+
<div className="grid grid-cols-12 gap-6">
|
|
119
|
+
{heroSkill ? (
|
|
120
|
+
<SkillHeroCard
|
|
121
|
+
skillName={heroSkill.skillName}
|
|
122
|
+
skillScope={heroSkill.skillScope ?? null}
|
|
123
|
+
platforms={heroSkill.platforms}
|
|
124
|
+
passRate={heroSkill.passRate}
|
|
125
|
+
totalChecks={heroSkill.totalChecks}
|
|
126
|
+
uniqueSessions={heroSkill.uniqueSessions}
|
|
127
|
+
status={heroSkill.status}
|
|
128
|
+
latestEvolutionTimestamp={heroSkill.latestEvolutionTimestamp ?? null}
|
|
129
|
+
renderActions={renderHeroActions}
|
|
130
|
+
/>
|
|
131
|
+
) : (
|
|
132
|
+
<SkillHeroEmpty />
|
|
133
|
+
)}
|
|
134
|
+
|
|
135
|
+
<div className="col-span-12 flex flex-col gap-6 lg:col-span-4">
|
|
136
|
+
<LibraryHealthCard aggregatePassRate={aggregatePassRate} gradedCount={gradedCount} />
|
|
137
|
+
<PendingProposalsCard proposals={pendingProposals} />
|
|
138
|
+
</div>
|
|
139
|
+
</div>
|
|
140
|
+
|
|
141
|
+
<div className="space-y-6">
|
|
142
|
+
<SkillFilterTabs
|
|
143
|
+
filter={filter}
|
|
144
|
+
onFilterChange={setFilter}
|
|
145
|
+
counts={counts}
|
|
146
|
+
sortDesc={sortDesc}
|
|
147
|
+
onSortToggle={() => setSortDesc((value) => !value)}
|
|
148
|
+
/>
|
|
149
|
+
|
|
150
|
+
{filteredSkills.length > 0 ? (
|
|
151
|
+
<div className="grid grid-cols-1 gap-6 md:grid-cols-2 xl:grid-cols-3">
|
|
152
|
+
{filteredSkills.map((skill) => (
|
|
153
|
+
<SkillCardItem key={skill.name} skill={skill} renderActions={renderCardActions} />
|
|
154
|
+
))}
|
|
155
|
+
</div>
|
|
156
|
+
) : (
|
|
157
|
+
<SkillGridEmpty />
|
|
158
|
+
)}
|
|
159
|
+
</div>
|
|
160
|
+
</div>
|
|
161
|
+
);
|
|
162
|
+
}
|
|
@@ -140,10 +140,13 @@ export const CanonicalEvolutionEvidenceRecordSchema = z.object({
|
|
|
140
140
|
evidence_id: z.string().min(1),
|
|
141
141
|
skill_name: z.string().min(1),
|
|
142
142
|
proposal_id: z.string().optional(),
|
|
143
|
+
timestamp: z.string().datetime().optional(),
|
|
144
|
+
skill_path: z.string().optional(),
|
|
143
145
|
target: z.string().min(1),
|
|
144
146
|
stage: z.string().min(1),
|
|
145
147
|
rationale: z.string().optional(),
|
|
146
148
|
confidence: z.number().min(0).max(1).optional(),
|
|
149
|
+
details: z.string().optional(),
|
|
147
150
|
original_text: z.string().optional(),
|
|
148
151
|
proposed_text: z.string().optional(),
|
|
149
152
|
eval_set_json: z.unknown().optional(),
|
|
@@ -151,6 +154,37 @@ export const CanonicalEvolutionEvidenceRecordSchema = z.object({
|
|
|
151
154
|
raw_source_ref: rawSourceRefSchema.optional(),
|
|
152
155
|
});
|
|
153
156
|
|
|
157
|
+
export const CanonicalGradingResultRecordSchema = z.object({
|
|
158
|
+
grading_id: z.string().min(1),
|
|
159
|
+
session_id: z.string().min(1),
|
|
160
|
+
skill_name: z.string().min(1),
|
|
161
|
+
transcript_path: z.string().nullable().optional(),
|
|
162
|
+
graded_at: z.string().min(1),
|
|
163
|
+
pass_rate: z.number().min(0).max(1).nullable().optional(),
|
|
164
|
+
mean_score: z.number().min(0).max(1).nullable().optional(),
|
|
165
|
+
score_std_dev: z.number().nullable().optional(),
|
|
166
|
+
passed_count: z.number().int().nonnegative().nullable().optional(),
|
|
167
|
+
failed_count: z.number().int().nonnegative().nullable().optional(),
|
|
168
|
+
total_count: z.number().int().nonnegative().nullable().optional(),
|
|
169
|
+
expectations_json: z.string().nullable().optional(),
|
|
170
|
+
claims_json: z.string().nullable().optional(),
|
|
171
|
+
eval_feedback_json: z.string().nullable().optional(),
|
|
172
|
+
failure_feedback_json: z.string().nullable().optional(),
|
|
173
|
+
execution_metrics_json: z.string().nullable().optional(),
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
export const CanonicalImprovementSignalRecordSchema = z.object({
|
|
177
|
+
signal_id: z.string().min(1),
|
|
178
|
+
timestamp: z.string().min(1),
|
|
179
|
+
session_id: z.string().min(1),
|
|
180
|
+
query: z.string().min(1),
|
|
181
|
+
signal_type: z.string().min(1),
|
|
182
|
+
mentioned_skill: z.string().nullable().optional(),
|
|
183
|
+
consumed: z.boolean(),
|
|
184
|
+
consumed_at: z.string().nullable().optional(),
|
|
185
|
+
consumed_by_run: z.string().nullable().optional(),
|
|
186
|
+
});
|
|
187
|
+
|
|
154
188
|
// ---------- Orchestrate run schemas ----------
|
|
155
189
|
|
|
156
190
|
export const OrchestrateRunSkillActionSchema = z.object({
|
|
@@ -194,12 +228,14 @@ export const PushPayloadV2Schema = z.object({
|
|
|
194
228
|
normalization_runs: z.array(CanonicalNormalizationRunRecordSchema).min(0),
|
|
195
229
|
evolution_evidence: z.array(CanonicalEvolutionEvidenceRecordSchema).optional(),
|
|
196
230
|
orchestrate_runs: z.array(PushOrchestrateRunRecordSchema).optional(),
|
|
231
|
+
grading_results: z.array(CanonicalGradingResultRecordSchema).optional(),
|
|
232
|
+
improvement_signals: z.array(CanonicalImprovementSignalRecordSchema).optional(),
|
|
197
233
|
}),
|
|
198
234
|
});
|
|
199
235
|
|
|
200
236
|
// ---------- Inferred types from Zod schemas ----------
|
|
201
237
|
|
|
202
|
-
export type
|
|
238
|
+
export type ZodPushPayloadV2 = z.infer<typeof PushPayloadV2Schema>;
|
|
203
239
|
export type ZodCanonicalSessionRecord = z.infer<typeof CanonicalSessionRecordSchema>;
|
|
204
240
|
export type ZodCanonicalPromptRecord = z.infer<typeof CanonicalPromptRecordSchema>;
|
|
205
241
|
export type ZodCanonicalSkillInvocationRecord = z.infer<
|
|
@@ -212,4 +248,8 @@ export type ZodCanonicalNormalizationRunRecord = z.infer<
|
|
|
212
248
|
export type ZodCanonicalEvolutionEvidenceRecord = z.infer<
|
|
213
249
|
typeof CanonicalEvolutionEvidenceRecordSchema
|
|
214
250
|
>;
|
|
251
|
+
export type ZodCanonicalGradingResultRecord = z.infer<typeof CanonicalGradingResultRecordSchema>;
|
|
252
|
+
export type ZodCanonicalImprovementSignalRecord = z.infer<
|
|
253
|
+
typeof CanonicalImprovementSignalRecordSchema
|
|
254
|
+
>;
|
|
215
255
|
export type ZodPushOrchestrateRunRecord = z.infer<typeof PushOrchestrateRunRecordSchema>;
|
|
@@ -54,7 +54,7 @@ export const CANONICAL_RECORD_KINDS = [
|
|
|
54
54
|
] as const;
|
|
55
55
|
export type CanonicalRecordKind = (typeof CANONICAL_RECORD_KINDS)[number];
|
|
56
56
|
|
|
57
|
-
export interface CanonicalRawSourceRef {
|
|
57
|
+
export interface CanonicalRawSourceRef extends Record<string, unknown> {
|
|
58
58
|
path?: string;
|
|
59
59
|
line?: number;
|
|
60
60
|
event_type?: string;
|
|
@@ -62,7 +62,7 @@ export interface CanonicalRawSourceRef {
|
|
|
62
62
|
metadata?: Record<string, unknown>;
|
|
63
63
|
}
|
|
64
64
|
|
|
65
|
-
export interface CanonicalRecordBase {
|
|
65
|
+
export interface CanonicalRecordBase extends Record<string, unknown> {
|
|
66
66
|
record_kind: CanonicalRecordKind;
|
|
67
67
|
schema_version: CanonicalSchemaVersion;
|
|
68
68
|
normalizer_version: string;
|
|
@@ -143,6 +143,16 @@ export interface CanonicalExecutionFactRecord extends CanonicalSessionRecordBase
|
|
|
143
143
|
errors_encountered: number;
|
|
144
144
|
input_tokens?: number;
|
|
145
145
|
output_tokens?: number;
|
|
146
|
+
cached_input_tokens?: number;
|
|
147
|
+
reasoning_output_tokens?: number;
|
|
148
|
+
cost_usd?: number;
|
|
149
|
+
files_changed?: number;
|
|
150
|
+
lines_added?: number;
|
|
151
|
+
lines_removed?: number;
|
|
152
|
+
lines_modified?: number;
|
|
153
|
+
artifact_count?: number;
|
|
154
|
+
session_type?: string;
|
|
155
|
+
agent_summary?: string;
|
|
146
156
|
duration_ms?: number;
|
|
147
157
|
completion_status?: CanonicalCompletionStatus;
|
|
148
158
|
end_reason?: string;
|
|
@@ -157,9 +167,100 @@ export interface CanonicalNormalizationRunRecord extends CanonicalRecordBase {
|
|
|
157
167
|
repair_applied: boolean;
|
|
158
168
|
}
|
|
159
169
|
|
|
170
|
+
export interface CanonicalEvolutionEvidenceRecord {
|
|
171
|
+
evidence_id?: string;
|
|
172
|
+
timestamp?: string;
|
|
173
|
+
proposal_id?: string;
|
|
174
|
+
skill_name: string;
|
|
175
|
+
skill_path?: string;
|
|
176
|
+
target: string;
|
|
177
|
+
stage: string;
|
|
178
|
+
rationale?: string;
|
|
179
|
+
confidence?: number;
|
|
180
|
+
details?: string;
|
|
181
|
+
original_text?: string;
|
|
182
|
+
proposed_text?: string;
|
|
183
|
+
eval_set_json?: unknown;
|
|
184
|
+
validation_json?: unknown;
|
|
185
|
+
raw_source_ref?: CanonicalRawSourceRef;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export interface CanonicalGradingResultRecord {
|
|
189
|
+
grading_id: string;
|
|
190
|
+
session_id: string;
|
|
191
|
+
skill_name: string;
|
|
192
|
+
transcript_path?: string | null;
|
|
193
|
+
graded_at: string;
|
|
194
|
+
pass_rate?: number | null;
|
|
195
|
+
mean_score?: number | null;
|
|
196
|
+
score_std_dev?: number | null;
|
|
197
|
+
passed_count?: number | null;
|
|
198
|
+
failed_count?: number | null;
|
|
199
|
+
total_count?: number | null;
|
|
200
|
+
expectations_json?: string | null;
|
|
201
|
+
claims_json?: string | null;
|
|
202
|
+
eval_feedback_json?: string | null;
|
|
203
|
+
failure_feedback_json?: string | null;
|
|
204
|
+
execution_metrics_json?: string | null;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
export interface CanonicalImprovementSignalRecord {
|
|
208
|
+
signal_id: string;
|
|
209
|
+
timestamp: string;
|
|
210
|
+
session_id: string;
|
|
211
|
+
query: string;
|
|
212
|
+
signal_type: string;
|
|
213
|
+
mentioned_skill?: string | null;
|
|
214
|
+
consumed: boolean;
|
|
215
|
+
consumed_at?: string | null;
|
|
216
|
+
consumed_by_run?: string | null;
|
|
217
|
+
}
|
|
218
|
+
|
|
160
219
|
export type CanonicalRecord =
|
|
161
220
|
| CanonicalSessionRecord
|
|
162
221
|
| CanonicalPromptRecord
|
|
163
222
|
| CanonicalSkillInvocationRecord
|
|
164
223
|
| CanonicalExecutionFactRecord
|
|
165
224
|
| CanonicalNormalizationRunRecord;
|
|
225
|
+
|
|
226
|
+
export interface PushOrchestrateRunRecord {
|
|
227
|
+
run_id: string;
|
|
228
|
+
timestamp: string;
|
|
229
|
+
elapsed_ms: number;
|
|
230
|
+
dry_run: boolean;
|
|
231
|
+
approval_mode: "auto" | "review";
|
|
232
|
+
total_skills: number;
|
|
233
|
+
evaluated: number;
|
|
234
|
+
evolved: number;
|
|
235
|
+
deployed: number;
|
|
236
|
+
watched: number;
|
|
237
|
+
skipped: number;
|
|
238
|
+
skill_actions: Array<{
|
|
239
|
+
skill: string;
|
|
240
|
+
action: "evolve" | "watch" | "skip";
|
|
241
|
+
reason: string;
|
|
242
|
+
deployed?: boolean;
|
|
243
|
+
rolledBack?: boolean;
|
|
244
|
+
alert?: string | null;
|
|
245
|
+
elapsed_ms?: number;
|
|
246
|
+
llm_calls?: number;
|
|
247
|
+
}>;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
export interface PushPayloadV2 {
|
|
251
|
+
schema_version: CanonicalSchemaVersion;
|
|
252
|
+
client_version: string;
|
|
253
|
+
push_id: string;
|
|
254
|
+
normalizer_version: string;
|
|
255
|
+
canonical: {
|
|
256
|
+
sessions: CanonicalSessionRecord[];
|
|
257
|
+
prompts: CanonicalPromptRecord[];
|
|
258
|
+
skill_invocations: CanonicalSkillInvocationRecord[];
|
|
259
|
+
execution_facts: CanonicalExecutionFactRecord[];
|
|
260
|
+
normalization_runs: CanonicalNormalizationRunRecord[];
|
|
261
|
+
evolution_evidence?: CanonicalEvolutionEvidenceRecord[];
|
|
262
|
+
orchestrate_runs?: PushOrchestrateRunRecord[];
|
|
263
|
+
grading_results?: CanonicalGradingResultRecord[];
|
|
264
|
+
improvement_signals?: CanonicalImprovementSignalRecord[];
|
|
265
|
+
};
|
|
266
|
+
}
|
|
@@ -158,6 +158,44 @@ function formatValidationValue(key: string, val: unknown): React.ReactNode {
|
|
|
158
158
|
return <span>{String(val)}</span>;
|
|
159
159
|
}
|
|
160
160
|
|
|
161
|
+
function getPerEntryPassStatus(entry: unknown): boolean | null {
|
|
162
|
+
if (typeof entry !== "object" || entry === null) return null;
|
|
163
|
+
const obj = entry as Record<string, unknown>;
|
|
164
|
+
const afterPass = obj.after_pass ?? obj.after ?? obj.triggered ?? obj.result;
|
|
165
|
+
const passed = obj.passed ?? obj.matched;
|
|
166
|
+
return typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function getEvidenceListKey(prefix: string, value: unknown): string {
|
|
170
|
+
if (typeof value !== "object" || value === null) {
|
|
171
|
+
return `${prefix}:${JSON.stringify(value)}`;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const record = value as Record<string, unknown>;
|
|
175
|
+
const nested =
|
|
176
|
+
typeof record.entry === "object" && record.entry !== null
|
|
177
|
+
? (record.entry as Record<string, unknown>)
|
|
178
|
+
: null;
|
|
179
|
+
const query =
|
|
180
|
+
typeof nested?.query === "string"
|
|
181
|
+
? nested.query
|
|
182
|
+
: typeof record.query === "string"
|
|
183
|
+
? record.query
|
|
184
|
+
: typeof record.prompt === "string"
|
|
185
|
+
? record.prompt
|
|
186
|
+
: typeof record.input === "string"
|
|
187
|
+
? record.input
|
|
188
|
+
: null;
|
|
189
|
+
|
|
190
|
+
if (query) return `${prefix}:${query}`;
|
|
191
|
+
|
|
192
|
+
const action = typeof record.action === "string" ? record.action : null;
|
|
193
|
+
const timestamp = typeof record.timestamp === "string" ? record.timestamp : null;
|
|
194
|
+
if (action && timestamp) return `${prefix}:${action}:${timestamp}`;
|
|
195
|
+
|
|
196
|
+
return `${prefix}:${JSON.stringify(record)}`;
|
|
197
|
+
}
|
|
198
|
+
|
|
161
199
|
/** Render a per_entry_result row — handles both flat EvalEntry and nested { entry, before_pass, after_pass } */
|
|
162
200
|
function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
|
|
163
201
|
// Handle nested shape: { entry: { query, should_trigger }, before_pass, after_pass }
|
|
@@ -168,11 +206,7 @@ function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
|
|
|
168
206
|
const beforePass =
|
|
169
207
|
entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline;
|
|
170
208
|
const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result;
|
|
171
|
-
const
|
|
172
|
-
|
|
173
|
-
// Determine icon: use after_pass for per_entry_results, passed for others
|
|
174
|
-
const isPass =
|
|
175
|
-
typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null;
|
|
209
|
+
const isPass = getPerEntryPassStatus(entry);
|
|
176
210
|
|
|
177
211
|
return (
|
|
178
212
|
<div className="flex items-start gap-2 text-xs py-1.5 border-b border-border/50 last:border-0">
|
|
@@ -218,12 +252,22 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
218
252
|
regressions,
|
|
219
253
|
new_passes,
|
|
220
254
|
per_entry_results,
|
|
255
|
+
validation_mode,
|
|
256
|
+
validation_agent,
|
|
257
|
+
validation_fixture_id,
|
|
258
|
+
validation_fallback_reason,
|
|
221
259
|
...rest
|
|
222
260
|
} = validation;
|
|
223
261
|
|
|
224
262
|
const regressionsArr = Array.isArray(regressions) ? regressions : [];
|
|
225
263
|
const newPassesArr = Array.isArray(new_passes) ? new_passes : [];
|
|
226
264
|
const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : [];
|
|
265
|
+
const validationMode = typeof validation_mode === "string" ? validation_mode : null;
|
|
266
|
+
const validationAgent = typeof validation_agent === "string" ? validation_agent : null;
|
|
267
|
+
const validationFixtureId =
|
|
268
|
+
typeof validation_fixture_id === "string" ? validation_fixture_id : null;
|
|
269
|
+
const validationFallbackReason =
|
|
270
|
+
typeof validation_fallback_reason === "string" ? validation_fallback_reason : null;
|
|
227
271
|
|
|
228
272
|
return (
|
|
229
273
|
<div className="rounded-md border bg-muted/30 p-3 space-y-3">
|
|
@@ -241,6 +285,21 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
241
285
|
{improved ? "Improved" : "Regressed"}
|
|
242
286
|
</Badge>
|
|
243
287
|
)}
|
|
288
|
+
{validationMode && (
|
|
289
|
+
<Badge variant="outline" className="text-[10px] capitalize">
|
|
290
|
+
{validationMode.replace(/_/g, " ")}
|
|
291
|
+
</Badge>
|
|
292
|
+
)}
|
|
293
|
+
{validationAgent && (
|
|
294
|
+
<Badge variant="secondary" className="text-[10px]">
|
|
295
|
+
{validationAgent}
|
|
296
|
+
</Badge>
|
|
297
|
+
)}
|
|
298
|
+
{validationFixtureId && (
|
|
299
|
+
<Badge variant="secondary" className="text-[10px] font-mono" title={validationFixtureId}>
|
|
300
|
+
fixture #{validationFixtureId.slice(0, 8)}
|
|
301
|
+
</Badge>
|
|
302
|
+
)}
|
|
244
303
|
{typeof before_pass_rate === "number" && typeof after_pass_rate === "number" && (
|
|
245
304
|
<span className="text-xs font-mono text-muted-foreground">
|
|
246
305
|
{(before_pass_rate * 100).toFixed(1)}% → {(after_pass_rate * 100).toFixed(1)}%
|
|
@@ -256,6 +315,12 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
256
315
|
)}
|
|
257
316
|
</div>
|
|
258
317
|
|
|
318
|
+
{validationFallbackReason && (
|
|
319
|
+
<div className="rounded border border-amber-300/60 bg-amber-50 px-2.5 py-2 text-[11px] text-amber-900 dark:border-amber-900/60 dark:bg-amber-950/30 dark:text-amber-200">
|
|
320
|
+
Replay fallback: {validationFallbackReason}
|
|
321
|
+
</div>
|
|
322
|
+
)}
|
|
323
|
+
|
|
259
324
|
{/* New passes */}
|
|
260
325
|
{newPassesArr.length > 0 && (
|
|
261
326
|
<div>
|
|
@@ -263,9 +328,9 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
263
328
|
New Passes ({newPassesArr.length})
|
|
264
329
|
</p>
|
|
265
330
|
<div className="rounded border bg-card p-2">
|
|
266
|
-
{newPassesArr.map((entry
|
|
331
|
+
{newPassesArr.map((entry) => (
|
|
267
332
|
<PerEntryResult
|
|
268
|
-
key={
|
|
333
|
+
key={getEvidenceListKey("new-pass", entry)}
|
|
269
334
|
entry={
|
|
270
335
|
typeof entry === "object" && entry !== null
|
|
271
336
|
? (entry as Record<string, unknown>)
|
|
@@ -284,9 +349,9 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
284
349
|
Regressions ({regressionsArr.length})
|
|
285
350
|
</p>
|
|
286
351
|
<div className="rounded border border-red-200 dark:border-red-900/50 bg-card p-2">
|
|
287
|
-
{regressionsArr.map((entry
|
|
352
|
+
{regressionsArr.map((entry) => (
|
|
288
353
|
<PerEntryResult
|
|
289
|
-
key={
|
|
354
|
+
key={getEvidenceListKey("regression", entry)}
|
|
290
355
|
entry={
|
|
291
356
|
typeof entry === "object" && entry !== null
|
|
292
357
|
? (entry as Record<string, unknown>)
|
|
@@ -318,17 +383,7 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
|
|
|
318
383
|
|
|
319
384
|
function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
|
|
320
385
|
const [expanded, setExpanded] = useState(false);
|
|
321
|
-
const passCount = entries.filter((
|
|
322
|
-
if (typeof e !== "object" || e === null) return false;
|
|
323
|
-
const obj = e as Record<string, unknown>;
|
|
324
|
-
return (
|
|
325
|
-
obj.passed === true ||
|
|
326
|
-
obj.matched === true ||
|
|
327
|
-
obj.triggered === true ||
|
|
328
|
-
obj.after === true ||
|
|
329
|
-
obj.result === true
|
|
330
|
-
);
|
|
331
|
-
}).length;
|
|
386
|
+
const passCount = entries.filter((entry) => getPerEntryPassStatus(entry) === true).length;
|
|
332
387
|
|
|
333
388
|
const display = expanded ? entries : entries.slice(0, 5);
|
|
334
389
|
|
|
@@ -356,9 +411,9 @@ function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
|
|
|
356
411
|
/>
|
|
357
412
|
</div>
|
|
358
413
|
<div className="rounded border bg-card p-2 max-h-[300px] overflow-y-auto">
|
|
359
|
-
{display.map((entry
|
|
414
|
+
{display.map((entry) => (
|
|
360
415
|
<PerEntryResult
|
|
361
|
-
key={
|
|
416
|
+
key={getEvidenceListKey("per-entry", entry)}
|
|
362
417
|
entry={
|
|
363
418
|
typeof entry === "object" && entry !== null
|
|
364
419
|
? (entry as Record<string, unknown>)
|
|
@@ -421,13 +476,13 @@ function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }
|
|
|
421
476
|
</button>
|
|
422
477
|
{expanded && (
|
|
423
478
|
<div className="space-y-1">
|
|
424
|
-
{evalSet.map((evalEntry
|
|
479
|
+
{evalSet.map((evalEntry) => {
|
|
425
480
|
const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input;
|
|
426
481
|
const expected = evalEntry.expected ?? evalEntry.should_trigger;
|
|
427
482
|
const passed = evalEntry.passed ?? evalEntry.result;
|
|
428
483
|
return (
|
|
429
484
|
<div
|
|
430
|
-
key={
|
|
485
|
+
key={getEvidenceListKey("eval-set", evalEntry)}
|
|
431
486
|
className="flex items-start gap-2 text-xs py-1 border-b border-border/50 last:border-0"
|
|
432
487
|
>
|
|
433
488
|
{typeof passed === "boolean" ? (
|
|
@@ -670,7 +725,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
|
|
|
670
725
|
<CardContent className="space-y-3">
|
|
671
726
|
<div className="flex items-center gap-2 flex-wrap">
|
|
672
727
|
{steps.map((step, i) => (
|
|
673
|
-
<div key={`${step.action}-${
|
|
728
|
+
<div key={`${step.action}-${step.timestamp}`} className="contents">
|
|
674
729
|
{i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
|
|
675
730
|
<div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
|
|
676
731
|
{ACTION_ICON[step.action]}
|