selftune 0.2.23 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +93 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  12. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  13. package/cli/selftune/auto-update.ts +200 -8
  14. package/cli/selftune/canonical-export.ts +55 -25
  15. package/cli/selftune/command-surface.ts +397 -0
  16. package/cli/selftune/contribute/contribute.ts +64 -13
  17. package/cli/selftune/contribution-config.ts +57 -3
  18. package/cli/selftune/contribution-preferences.ts +117 -0
  19. package/cli/selftune/contribution-signals.ts +8 -4
  20. package/cli/selftune/contribution-staging.ts +13 -2
  21. package/cli/selftune/contributions.ts +55 -121
  22. package/cli/selftune/creator-contributions.ts +29 -10
  23. package/cli/selftune/cron/setup.ts +7 -3
  24. package/cli/selftune/dashboard-contract.ts +73 -0
  25. package/cli/selftune/dashboard-server.ts +168 -17
  26. package/cli/selftune/dashboard.ts +350 -17
  27. package/cli/selftune/eval/baseline.ts +21 -5
  28. package/cli/selftune/eval/execution-eval.ts +170 -0
  29. package/cli/selftune/eval/family-overlap.ts +2 -2
  30. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  31. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  32. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  33. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  34. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  35. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  36. package/cli/selftune/evolution/engines/replay-engine.ts +79 -57
  37. package/cli/selftune/evolution/evolve-body.ts +100 -39
  38. package/cli/selftune/evolution/evolve.ts +244 -52
  39. package/cli/selftune/evolution/rollback.ts +0 -1
  40. package/cli/selftune/evolution/validate-body.ts +68 -42
  41. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  42. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  43. package/cli/selftune/evolution/validate-routing.ts +43 -41
  44. package/cli/selftune/evolution/validation-contract.ts +91 -0
  45. package/cli/selftune/grading/auto-grade.ts +11 -7
  46. package/cli/selftune/grading/grade-session.ts +10 -16
  47. package/cli/selftune/index.ts +35 -10
  48. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  49. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  50. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  51. package/cli/selftune/ingestors/pi-ingest.ts +3 -2
  52. package/cli/selftune/init.ts +27 -3
  53. package/cli/selftune/localdb/direct-write.ts +35 -1
  54. package/cli/selftune/localdb/queries/cron.ts +34 -0
  55. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  56. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  57. package/cli/selftune/localdb/queries/execution.ts +133 -0
  58. package/cli/selftune/localdb/queries/json.ts +18 -0
  59. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  60. package/cli/selftune/localdb/queries/raw.ts +95 -0
  61. package/cli/selftune/localdb/queries/staging.ts +270 -0
  62. package/cli/selftune/localdb/queries/trust.ts +392 -0
  63. package/cli/selftune/localdb/queries.ts +60 -2288
  64. package/cli/selftune/localdb/schema.ts +21 -0
  65. package/cli/selftune/monitoring/watch.ts +96 -29
  66. package/cli/selftune/normalization.ts +3 -0
  67. package/cli/selftune/observability.ts +4 -2
  68. package/cli/selftune/orchestrate/cli.ts +161 -0
  69. package/cli/selftune/orchestrate/execute.ts +295 -0
  70. package/cli/selftune/orchestrate/finalize.ts +157 -0
  71. package/cli/selftune/orchestrate/locks.ts +40 -0
  72. package/cli/selftune/orchestrate/plan.ts +131 -0
  73. package/cli/selftune/orchestrate/post-run.ts +59 -0
  74. package/cli/selftune/orchestrate/prepare.ts +334 -0
  75. package/cli/selftune/orchestrate/report.ts +182 -0
  76. package/cli/selftune/orchestrate/runtime.ts +120 -0
  77. package/cli/selftune/orchestrate/signals.ts +48 -0
  78. package/cli/selftune/orchestrate.ts +150 -1173
  79. package/cli/selftune/repair/skill-usage.ts +5 -2
  80. package/cli/selftune/routes/overview.ts +5 -2
  81. package/cli/selftune/routes/skill-report.ts +15 -2
  82. package/cli/selftune/schedule.ts +5 -5
  83. package/cli/selftune/status.ts +39 -2
  84. package/cli/selftune/testing-readiness.ts +597 -0
  85. package/cli/selftune/types.ts +44 -4
  86. package/cli/selftune/uninstall.ts +2 -1
  87. package/cli/selftune/utils/canonical-log.ts +1 -9
  88. package/cli/selftune/utils/cli-error.ts +9 -0
  89. package/cli/selftune/utils/llm-call.ts +126 -6
  90. package/cli/selftune/utils/skill-discovery.ts +2 -0
  91. package/cli/selftune/workflows/proposals.ts +184 -0
  92. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  93. package/cli/selftune/workflows/workflows.ts +100 -26
  94. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  95. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  96. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  97. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  98. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +41 -1
  99. package/node_modules/@selftune/telemetry-contract/src/types.ts +103 -2
  100. package/package.json +25 -9
  101. package/packages/dashboard-core/AGENTS.md +18 -0
  102. package/packages/dashboard-core/README.md +30 -0
  103. package/packages/dashboard-core/index.ts +3 -0
  104. package/packages/dashboard-core/package.json +39 -0
  105. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  106. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  107. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  108. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  109. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  110. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  111. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  112. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  113. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  114. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  115. package/packages/dashboard-core/src/gates/index.ts +3 -0
  116. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  117. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  118. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  119. package/packages/dashboard-core/src/host/index.ts +3 -0
  120. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  121. package/packages/dashboard-core/src/models/index.ts +4 -0
  122. package/packages/dashboard-core/src/models/overview.ts +98 -0
  123. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  124. package/packages/dashboard-core/src/models/skills.ts +34 -0
  125. package/packages/dashboard-core/src/routes/index.ts +2 -0
  126. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  127. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  128. package/packages/dashboard-core/src/routes/types.ts +39 -0
  129. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  130. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  131. package/packages/dashboard-core/src/screens/index.ts +37 -0
  132. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  133. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  134. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  135. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  136. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  137. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  138. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  139. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  140. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  141. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  142. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  143. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  144. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  145. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  146. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  147. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  148. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  149. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  150. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  151. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  152. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  153. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  154. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  155. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  156. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  157. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  158. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  159. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  160. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  161. package/packages/telemetry-contract/src/schemas.ts +41 -1
  162. package/packages/telemetry-contract/src/types.ts +103 -2
  163. package/packages/ui/src/components/EvidenceViewer.tsx +80 -25
  164. package/packages/ui/src/components/OverviewPanels.tsx +67 -26
  165. package/packages/ui/src/primitives/tabs.tsx +7 -6
  166. package/packages/ui/src/types.ts +10 -0
  167. package/skill/SKILL.md +130 -332
  168. package/skill/agents/diagnosis-analyst.md +3 -3
  169. package/skill/agents/evolution-reviewer.md +3 -3
  170. package/skill/agents/integration-guide.md +3 -3
  171. package/skill/agents/pattern-analyst.md +2 -2
  172. package/skill/references/cli-quick-reference.md +89 -0
  173. package/skill/references/creator-playbook.md +131 -0
  174. package/skill/references/examples.md +48 -0
  175. package/skill/references/troubleshooting.md +47 -0
  176. package/skill/references/version-history.md +1 -1
  177. package/skill/selftune.contribute.json +11 -0
  178. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  179. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  180. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  181. package/skill/workflows/CreateTestDeploy.md +170 -0
  182. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  183. package/skill/{Workflows → workflows}/Cron.md +1 -1
  184. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  185. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  186. package/skill/{Workflows → workflows}/Evals.md +67 -2
  187. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  188. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  189. package/skill/{Workflows → workflows}/Grade.md +1 -1
  190. package/skill/{Workflows → workflows}/Initialize.md +8 -4
  191. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  192. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  193. package/skill/workflows/SignalsDashboard.md +87 -0
  194. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  195. package/skill/{Workflows → workflows}/Watch.md +42 -2
  196. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  197. package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +0 -1
  198. package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +0 -59
  199. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  200. package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +0 -12
  201. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  202. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  203. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  204. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  205. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  206. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  207. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  208. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  209. /package/skill/{Workflows → workflows}/Ingest.md +0 -0
  210. /package/skill/{Workflows → workflows}/PlatformHooks.md +0 -0
  211. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  212. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  213. /package/skill/{Workflows → workflows}/Registry.md +0 -0
  214. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  215. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  216. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  217. /package/skill/{Workflows → workflows}/Sync.md +0 -0
  218. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  219. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -0,0 +1,162 @@
1
+ "use client";
2
+
3
+ import type { ReactNode } from "react";
4
+ import { useMemo, useState } from "react";
5
+
6
+ import type { DerivedSkill, FilterTab } from "@selftune/ui/components";
7
+ import {
8
+ LibraryHealthCard,
9
+ PendingProposalsCard,
10
+ SkillCardItem,
11
+ SkillFilterTabs,
12
+ SkillGridEmpty,
13
+ SkillHeroCard,
14
+ SkillHeroEmpty,
15
+ SkillsLibraryError,
16
+ SkillsLibrarySkeleton,
17
+ } from "@selftune/ui/components";
18
+
19
+ export interface SkillsLibraryHero {
20
+ skillName: string;
21
+ skillScope?: string | null;
22
+ platforms?: string[];
23
+ passRate: number | null;
24
+ totalChecks: number;
25
+ uniqueSessions: number;
26
+ status: DerivedSkill["status"];
27
+ latestEvolutionTimestamp?: string | null;
28
+ }
29
+
30
+ export interface SkillsLibraryPendingProposal {
31
+ id: string;
32
+ skillName: string | null;
33
+ action: string;
34
+ }
35
+
36
+ export interface SkillsLibraryScreenProps {
37
+ skills: DerivedSkill[];
38
+ heroSkill?: SkillsLibraryHero | null;
39
+ aggregatePassRate: number | null;
40
+ gradedCount: number;
41
+ pendingProposals: SkillsLibraryPendingProposal[];
42
+ isLoading: boolean;
43
+ error?: string | null;
44
+ onRetry(): void;
45
+ renderHeroActions(skillName: string): ReactNode;
46
+ renderCardActions(skillName: string): ReactNode;
47
+ }
48
+
49
+ export function SkillsLibraryScreen({
50
+ skills,
51
+ heroSkill,
52
+ aggregatePassRate,
53
+ gradedCount,
54
+ pendingProposals,
55
+ isLoading,
56
+ error,
57
+ onRetry,
58
+ renderHeroActions,
59
+ renderCardActions,
60
+ }: SkillsLibraryScreenProps) {
61
+ const [filter, setFilter] = useState<FilterTab>("ALL");
62
+ const [sortDesc, setSortDesc] = useState(true);
63
+
64
+ const filteredSkills = useMemo(() => {
65
+ let result = skills;
66
+ if (filter !== "ALL") {
67
+ result = result.filter((skill) => skill.status === filter);
68
+ }
69
+ if (!sortDesc) {
70
+ return result;
71
+ }
72
+ return result.reduceRight<DerivedSkill[]>((acc, skill) => {
73
+ acc.push(skill);
74
+ return acc;
75
+ }, []);
76
+ }, [filter, skills, sortDesc]);
77
+
78
+ const counts = useMemo<Record<FilterTab, number>>(() => {
79
+ const nextCounts: Record<FilterTab, number> = {
80
+ ALL: skills.length,
81
+ HEALTHY: 0,
82
+ WARNING: 0,
83
+ CRITICAL: 0,
84
+ UNGRADED: 0,
85
+ };
86
+
87
+ for (const skill of skills) {
88
+ if (skill.status in nextCounts) {
89
+ nextCounts[skill.status as Exclude<FilterTab, "ALL">]++;
90
+ }
91
+ }
92
+
93
+ return nextCounts;
94
+ }, [skills]);
95
+
96
+ if (isLoading) {
97
+ return <SkillsLibrarySkeleton />;
98
+ }
99
+
100
+ if (error) {
101
+ return <SkillsLibraryError message={error} onRetry={onRetry} />;
102
+ }
103
+
104
+ return (
105
+ <div
106
+ data-parity-root="skills-library"
107
+ className="@container/main flex flex-1 animate-in fade-in flex-col gap-8 px-4 py-8 duration-500 lg:px-6"
108
+ >
109
+ <div>
110
+ <h1 className="font-headline text-4xl font-bold tracking-tight text-foreground">
111
+ Skills Library
112
+ </h1>
113
+ <p className="mt-2 max-w-2xl text-sm text-muted-foreground">
114
+ Monitor and manage your evolving skill definitions across all scopes.
115
+ </p>
116
+ </div>
117
+
118
+ <div className="grid grid-cols-12 gap-6">
119
+ {heroSkill ? (
120
+ <SkillHeroCard
121
+ skillName={heroSkill.skillName}
122
+ skillScope={heroSkill.skillScope ?? null}
123
+ platforms={heroSkill.platforms}
124
+ passRate={heroSkill.passRate}
125
+ totalChecks={heroSkill.totalChecks}
126
+ uniqueSessions={heroSkill.uniqueSessions}
127
+ status={heroSkill.status}
128
+ latestEvolutionTimestamp={heroSkill.latestEvolutionTimestamp ?? null}
129
+ renderActions={renderHeroActions}
130
+ />
131
+ ) : (
132
+ <SkillHeroEmpty />
133
+ )}
134
+
135
+ <div className="col-span-12 flex flex-col gap-6 lg:col-span-4">
136
+ <LibraryHealthCard aggregatePassRate={aggregatePassRate} gradedCount={gradedCount} />
137
+ <PendingProposalsCard proposals={pendingProposals} />
138
+ </div>
139
+ </div>
140
+
141
+ <div className="space-y-6">
142
+ <SkillFilterTabs
143
+ filter={filter}
144
+ onFilterChange={setFilter}
145
+ counts={counts}
146
+ sortDesc={sortDesc}
147
+ onSortToggle={() => setSortDesc((value) => !value)}
148
+ />
149
+
150
+ {filteredSkills.length > 0 ? (
151
+ <div className="grid grid-cols-1 gap-6 md:grid-cols-2 xl:grid-cols-3">
152
+ {filteredSkills.map((skill) => (
153
+ <SkillCardItem key={skill.name} skill={skill} renderActions={renderCardActions} />
154
+ ))}
155
+ </div>
156
+ ) : (
157
+ <SkillGridEmpty />
158
+ )}
159
+ </div>
160
+ </div>
161
+ );
162
+ }
@@ -0,0 +1,6 @@
1
+ export {
2
+ SkillsLibraryScreen,
3
+ type SkillsLibraryHero,
4
+ type SkillsLibraryPendingProposal,
5
+ type SkillsLibraryScreenProps,
6
+ } from "./SkillsLibraryScreen";
@@ -1,4 +1,4 @@
1
- import type { PushPayloadV2 } from "../src/schemas.js";
1
+ import type { PushPayloadV2 } from "../src/types.js";
2
2
 
3
3
  /**
4
4
  * A valid PushPayloadV2 with at least one of every record type.
@@ -1,4 +1,4 @@
1
- import type { PushPayloadV2 } from "../src/schemas.js";
1
+ import type { PushPayloadV2 } from "../src/types.js";
2
2
 
3
3
  /**
4
4
  * A valid PushPayloadV2 with only evolution_evidence entries and
@@ -1,4 +1,4 @@
1
- import type { PushPayloadV2 } from "../src/schemas.js";
1
+ import type { PushPayloadV2 } from "../src/types.js";
2
2
 
3
3
  /**
4
4
  * A valid PushPayloadV2 with zero sessions but non-empty evolution_evidence.
@@ -1,4 +1,4 @@
1
- import type { PushPayloadV2 } from "../src/schemas.js";
1
+ import type { PushPayloadV2 } from "../src/types.js";
2
2
 
3
3
  /**
4
4
  * A valid PushPayloadV2 with invocations and prompts that reference a
@@ -140,10 +140,13 @@ export const CanonicalEvolutionEvidenceRecordSchema = z.object({
140
140
  evidence_id: z.string().min(1),
141
141
  skill_name: z.string().min(1),
142
142
  proposal_id: z.string().optional(),
143
+ timestamp: z.string().datetime().optional(),
144
+ skill_path: z.string().optional(),
143
145
  target: z.string().min(1),
144
146
  stage: z.string().min(1),
145
147
  rationale: z.string().optional(),
146
148
  confidence: z.number().min(0).max(1).optional(),
149
+ details: z.string().optional(),
147
150
  original_text: z.string().optional(),
148
151
  proposed_text: z.string().optional(),
149
152
  eval_set_json: z.unknown().optional(),
@@ -151,6 +154,37 @@ export const CanonicalEvolutionEvidenceRecordSchema = z.object({
151
154
  raw_source_ref: rawSourceRefSchema.optional(),
152
155
  });
153
156
 
157
+ export const CanonicalGradingResultRecordSchema = z.object({
158
+ grading_id: z.string().min(1),
159
+ session_id: z.string().min(1),
160
+ skill_name: z.string().min(1),
161
+ transcript_path: z.string().nullable().optional(),
162
+ graded_at: z.string().min(1),
163
+ pass_rate: z.number().min(0).max(1).nullable().optional(),
164
+ mean_score: z.number().min(0).max(1).nullable().optional(),
165
+ score_std_dev: z.number().nullable().optional(),
166
+ passed_count: z.number().int().nonnegative().nullable().optional(),
167
+ failed_count: z.number().int().nonnegative().nullable().optional(),
168
+ total_count: z.number().int().nonnegative().nullable().optional(),
169
+ expectations_json: z.string().nullable().optional(),
170
+ claims_json: z.string().nullable().optional(),
171
+ eval_feedback_json: z.string().nullable().optional(),
172
+ failure_feedback_json: z.string().nullable().optional(),
173
+ execution_metrics_json: z.string().nullable().optional(),
174
+ });
175
+
176
+ export const CanonicalImprovementSignalRecordSchema = z.object({
177
+ signal_id: z.string().min(1),
178
+ timestamp: z.string().min(1),
179
+ session_id: z.string().min(1),
180
+ query: z.string().min(1),
181
+ signal_type: z.string().min(1),
182
+ mentioned_skill: z.string().nullable().optional(),
183
+ consumed: z.boolean(),
184
+ consumed_at: z.string().nullable().optional(),
185
+ consumed_by_run: z.string().nullable().optional(),
186
+ });
187
+
154
188
  // ---------- Orchestrate run schemas ----------
155
189
 
156
190
  export const OrchestrateRunSkillActionSchema = z.object({
@@ -194,12 +228,14 @@ export const PushPayloadV2Schema = z.object({
194
228
  normalization_runs: z.array(CanonicalNormalizationRunRecordSchema).min(0),
195
229
  evolution_evidence: z.array(CanonicalEvolutionEvidenceRecordSchema).optional(),
196
230
  orchestrate_runs: z.array(PushOrchestrateRunRecordSchema).optional(),
231
+ grading_results: z.array(CanonicalGradingResultRecordSchema).optional(),
232
+ improvement_signals: z.array(CanonicalImprovementSignalRecordSchema).optional(),
197
233
  }),
198
234
  });
199
235
 
200
236
  // ---------- Inferred types from Zod schemas ----------
201
237
 
202
- export type PushPayloadV2 = z.infer<typeof PushPayloadV2Schema>;
238
+ export type ZodPushPayloadV2 = z.infer<typeof PushPayloadV2Schema>;
203
239
  export type ZodCanonicalSessionRecord = z.infer<typeof CanonicalSessionRecordSchema>;
204
240
  export type ZodCanonicalPromptRecord = z.infer<typeof CanonicalPromptRecordSchema>;
205
241
  export type ZodCanonicalSkillInvocationRecord = z.infer<
@@ -212,4 +248,8 @@ export type ZodCanonicalNormalizationRunRecord = z.infer<
212
248
  export type ZodCanonicalEvolutionEvidenceRecord = z.infer<
213
249
  typeof CanonicalEvolutionEvidenceRecordSchema
214
250
  >;
251
+ export type ZodCanonicalGradingResultRecord = z.infer<typeof CanonicalGradingResultRecordSchema>;
252
+ export type ZodCanonicalImprovementSignalRecord = z.infer<
253
+ typeof CanonicalImprovementSignalRecordSchema
254
+ >;
215
255
  export type ZodPushOrchestrateRunRecord = z.infer<typeof PushOrchestrateRunRecordSchema>;
@@ -54,7 +54,7 @@ export const CANONICAL_RECORD_KINDS = [
54
54
  ] as const;
55
55
  export type CanonicalRecordKind = (typeof CANONICAL_RECORD_KINDS)[number];
56
56
 
57
- export interface CanonicalRawSourceRef {
57
+ export interface CanonicalRawSourceRef extends Record<string, unknown> {
58
58
  path?: string;
59
59
  line?: number;
60
60
  event_type?: string;
@@ -62,7 +62,7 @@ export interface CanonicalRawSourceRef {
62
62
  metadata?: Record<string, unknown>;
63
63
  }
64
64
 
65
- export interface CanonicalRecordBase {
65
+ export interface CanonicalRecordBase extends Record<string, unknown> {
66
66
  record_kind: CanonicalRecordKind;
67
67
  schema_version: CanonicalSchemaVersion;
68
68
  normalizer_version: string;
@@ -143,6 +143,16 @@ export interface CanonicalExecutionFactRecord extends CanonicalSessionRecordBase
143
143
  errors_encountered: number;
144
144
  input_tokens?: number;
145
145
  output_tokens?: number;
146
+ cached_input_tokens?: number;
147
+ reasoning_output_tokens?: number;
148
+ cost_usd?: number;
149
+ files_changed?: number;
150
+ lines_added?: number;
151
+ lines_removed?: number;
152
+ lines_modified?: number;
153
+ artifact_count?: number;
154
+ session_type?: string;
155
+ agent_summary?: string;
146
156
  duration_ms?: number;
147
157
  completion_status?: CanonicalCompletionStatus;
148
158
  end_reason?: string;
@@ -157,9 +167,100 @@ export interface CanonicalNormalizationRunRecord extends CanonicalRecordBase {
157
167
  repair_applied: boolean;
158
168
  }
159
169
 
170
+ export interface CanonicalEvolutionEvidenceRecord {
171
+ evidence_id?: string;
172
+ timestamp?: string;
173
+ proposal_id?: string;
174
+ skill_name: string;
175
+ skill_path?: string;
176
+ target: string;
177
+ stage: string;
178
+ rationale?: string;
179
+ confidence?: number;
180
+ details?: string;
181
+ original_text?: string;
182
+ proposed_text?: string;
183
+ eval_set_json?: unknown;
184
+ validation_json?: unknown;
185
+ raw_source_ref?: CanonicalRawSourceRef;
186
+ }
187
+
188
+ export interface CanonicalGradingResultRecord {
189
+ grading_id: string;
190
+ session_id: string;
191
+ skill_name: string;
192
+ transcript_path?: string | null;
193
+ graded_at: string;
194
+ pass_rate?: number | null;
195
+ mean_score?: number | null;
196
+ score_std_dev?: number | null;
197
+ passed_count?: number | null;
198
+ failed_count?: number | null;
199
+ total_count?: number | null;
200
+ expectations_json?: string | null;
201
+ claims_json?: string | null;
202
+ eval_feedback_json?: string | null;
203
+ failure_feedback_json?: string | null;
204
+ execution_metrics_json?: string | null;
205
+ }
206
+
207
+ export interface CanonicalImprovementSignalRecord {
208
+ signal_id: string;
209
+ timestamp: string;
210
+ session_id: string;
211
+ query: string;
212
+ signal_type: string;
213
+ mentioned_skill?: string | null;
214
+ consumed: boolean;
215
+ consumed_at?: string | null;
216
+ consumed_by_run?: string | null;
217
+ }
218
+
160
219
  export type CanonicalRecord =
161
220
  | CanonicalSessionRecord
162
221
  | CanonicalPromptRecord
163
222
  | CanonicalSkillInvocationRecord
164
223
  | CanonicalExecutionFactRecord
165
224
  | CanonicalNormalizationRunRecord;
225
+
226
+ export interface PushOrchestrateRunRecord {
227
+ run_id: string;
228
+ timestamp: string;
229
+ elapsed_ms: number;
230
+ dry_run: boolean;
231
+ approval_mode: "auto" | "review";
232
+ total_skills: number;
233
+ evaluated: number;
234
+ evolved: number;
235
+ deployed: number;
236
+ watched: number;
237
+ skipped: number;
238
+ skill_actions: Array<{
239
+ skill: string;
240
+ action: "evolve" | "watch" | "skip";
241
+ reason: string;
242
+ deployed?: boolean;
243
+ rolledBack?: boolean;
244
+ alert?: string | null;
245
+ elapsed_ms?: number;
246
+ llm_calls?: number;
247
+ }>;
248
+ }
249
+
250
+ export interface PushPayloadV2 {
251
+ schema_version: CanonicalSchemaVersion;
252
+ client_version: string;
253
+ push_id: string;
254
+ normalizer_version: string;
255
+ canonical: {
256
+ sessions: CanonicalSessionRecord[];
257
+ prompts: CanonicalPromptRecord[];
258
+ skill_invocations: CanonicalSkillInvocationRecord[];
259
+ execution_facts: CanonicalExecutionFactRecord[];
260
+ normalization_runs: CanonicalNormalizationRunRecord[];
261
+ evolution_evidence?: CanonicalEvolutionEvidenceRecord[];
262
+ orchestrate_runs?: PushOrchestrateRunRecord[];
263
+ grading_results?: CanonicalGradingResultRecord[];
264
+ improvement_signals?: CanonicalImprovementSignalRecord[];
265
+ };
266
+ }
@@ -158,6 +158,44 @@ function formatValidationValue(key: string, val: unknown): React.ReactNode {
158
158
  return <span>{String(val)}</span>;
159
159
  }
160
160
 
161
+ function getPerEntryPassStatus(entry: unknown): boolean | null {
162
+ if (typeof entry !== "object" || entry === null) return null;
163
+ const obj = entry as Record<string, unknown>;
164
+ const afterPass = obj.after_pass ?? obj.after ?? obj.triggered ?? obj.result;
165
+ const passed = obj.passed ?? obj.matched;
166
+ return typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null;
167
+ }
168
+
169
+ function getEvidenceListKey(prefix: string, value: unknown): string {
170
+ if (typeof value !== "object" || value === null) {
171
+ return `${prefix}:${JSON.stringify(value)}`;
172
+ }
173
+
174
+ const record = value as Record<string, unknown>;
175
+ const nested =
176
+ typeof record.entry === "object" && record.entry !== null
177
+ ? (record.entry as Record<string, unknown>)
178
+ : null;
179
+ const query =
180
+ typeof nested?.query === "string"
181
+ ? nested.query
182
+ : typeof record.query === "string"
183
+ ? record.query
184
+ : typeof record.prompt === "string"
185
+ ? record.prompt
186
+ : typeof record.input === "string"
187
+ ? record.input
188
+ : null;
189
+
190
+ if (query) return `${prefix}:${query}`;
191
+
192
+ const action = typeof record.action === "string" ? record.action : null;
193
+ const timestamp = typeof record.timestamp === "string" ? record.timestamp : null;
194
+ if (action && timestamp) return `${prefix}:${action}:${timestamp}`;
195
+
196
+ return `${prefix}:${JSON.stringify(record)}`;
197
+ }
198
+
161
199
  /** Render a per_entry_result row — handles both flat EvalEntry and nested { entry, before_pass, after_pass } */
162
200
  function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
163
201
  // Handle nested shape: { entry: { query, should_trigger }, before_pass, after_pass }
@@ -168,11 +206,7 @@ function PerEntryResult({ entry }: { entry: Record<string, unknown> }) {
168
206
  const beforePass =
169
207
  entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline;
170
208
  const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result;
171
- const passed = entry.passed ?? entry.matched;
172
-
173
- // Determine icon: use after_pass for per_entry_results, passed for others
174
- const isPass =
175
- typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null;
209
+ const isPass = getPerEntryPassStatus(entry);
176
210
 
177
211
  return (
178
212
  <div className="flex items-start gap-2 text-xs py-1.5 border-b border-border/50 last:border-0">
@@ -218,12 +252,22 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
218
252
  regressions,
219
253
  new_passes,
220
254
  per_entry_results,
255
+ validation_mode,
256
+ validation_agent,
257
+ validation_fixture_id,
258
+ validation_fallback_reason,
221
259
  ...rest
222
260
  } = validation;
223
261
 
224
262
  const regressionsArr = Array.isArray(regressions) ? regressions : [];
225
263
  const newPassesArr = Array.isArray(new_passes) ? new_passes : [];
226
264
  const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : [];
265
+ const validationMode = typeof validation_mode === "string" ? validation_mode : null;
266
+ const validationAgent = typeof validation_agent === "string" ? validation_agent : null;
267
+ const validationFixtureId =
268
+ typeof validation_fixture_id === "string" ? validation_fixture_id : null;
269
+ const validationFallbackReason =
270
+ typeof validation_fallback_reason === "string" ? validation_fallback_reason : null;
227
271
 
228
272
  return (
229
273
  <div className="rounded-md border bg-muted/30 p-3 space-y-3">
@@ -241,6 +285,21 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
241
285
  {improved ? "Improved" : "Regressed"}
242
286
  </Badge>
243
287
  )}
288
+ {validationMode && (
289
+ <Badge variant="outline" className="text-[10px] capitalize">
290
+ {validationMode.replace(/_/g, " ")}
291
+ </Badge>
292
+ )}
293
+ {validationAgent && (
294
+ <Badge variant="secondary" className="text-[10px]">
295
+ {validationAgent}
296
+ </Badge>
297
+ )}
298
+ {validationFixtureId && (
299
+ <Badge variant="secondary" className="text-[10px] font-mono" title={validationFixtureId}>
300
+ fixture #{validationFixtureId.slice(0, 8)}
301
+ </Badge>
302
+ )}
244
303
  {typeof before_pass_rate === "number" && typeof after_pass_rate === "number" && (
245
304
  <span className="text-xs font-mono text-muted-foreground">
246
305
  {(before_pass_rate * 100).toFixed(1)}% &rarr; {(after_pass_rate * 100).toFixed(1)}%
@@ -256,6 +315,12 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
256
315
  )}
257
316
  </div>
258
317
 
318
+ {validationFallbackReason && (
319
+ <div className="rounded border border-amber-300/60 bg-amber-50 px-2.5 py-2 text-[11px] text-amber-900 dark:border-amber-900/60 dark:bg-amber-950/30 dark:text-amber-200">
320
+ Replay fallback: {validationFallbackReason}
321
+ </div>
322
+ )}
323
+
259
324
  {/* New passes */}
260
325
  {newPassesArr.length > 0 && (
261
326
  <div>
@@ -263,9 +328,9 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
263
328
  New Passes ({newPassesArr.length})
264
329
  </p>
265
330
  <div className="rounded border bg-card p-2">
266
- {newPassesArr.map((entry, j) => (
331
+ {newPassesArr.map((entry) => (
267
332
  <PerEntryResult
268
- key={j}
333
+ key={getEvidenceListKey("new-pass", entry)}
269
334
  entry={
270
335
  typeof entry === "object" && entry !== null
271
336
  ? (entry as Record<string, unknown>)
@@ -284,9 +349,9 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
284
349
  Regressions ({regressionsArr.length})
285
350
  </p>
286
351
  <div className="rounded border border-red-200 dark:border-red-900/50 bg-card p-2">
287
- {regressionsArr.map((entry, j) => (
352
+ {regressionsArr.map((entry) => (
288
353
  <PerEntryResult
289
- key={j}
354
+ key={getEvidenceListKey("regression", entry)}
290
355
  entry={
291
356
  typeof entry === "object" && entry !== null
292
357
  ? (entry as Record<string, unknown>)
@@ -318,17 +383,7 @@ function ValidationResults({ validation }: { validation: Record<string, unknown>
318
383
 
319
384
  function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
320
385
  const [expanded, setExpanded] = useState(false);
321
- const passCount = entries.filter((e) => {
322
- if (typeof e !== "object" || e === null) return false;
323
- const obj = e as Record<string, unknown>;
324
- return (
325
- obj.passed === true ||
326
- obj.matched === true ||
327
- obj.triggered === true ||
328
- obj.after === true ||
329
- obj.result === true
330
- );
331
- }).length;
386
+ const passCount = entries.filter((entry) => getPerEntryPassStatus(entry) === true).length;
332
387
 
333
388
  const display = expanded ? entries : entries.slice(0, 5);
334
389
 
@@ -356,9 +411,9 @@ function PerEntryResultsSection({ entries }: { entries: unknown[] }) {
356
411
  />
357
412
  </div>
358
413
  <div className="rounded border bg-card p-2 max-h-[300px] overflow-y-auto">
359
- {display.map((entry, j) => (
414
+ {display.map((entry) => (
360
415
  <PerEntryResult
361
- key={j}
416
+ key={getEvidenceListKey("per-entry", entry)}
362
417
  entry={
363
418
  typeof entry === "object" && entry !== null
364
419
  ? (entry as Record<string, unknown>)
@@ -421,13 +476,13 @@ function EvalSetSection({ evalSet }: { evalSet: Array<Record<string, unknown>> }
421
476
  </button>
422
477
  {expanded && (
423
478
  <div className="space-y-1">
424
- {evalSet.map((evalEntry, j) => {
479
+ {evalSet.map((evalEntry) => {
425
480
  const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input;
426
481
  const expected = evalEntry.expected ?? evalEntry.should_trigger;
427
482
  const passed = evalEntry.passed ?? evalEntry.result;
428
483
  return (
429
484
  <div
430
- key={j}
485
+ key={getEvidenceListKey("eval-set", evalEntry)}
431
486
  className="flex items-start gap-2 text-xs py-1 border-b border-border/50 last:border-0"
432
487
  >
433
488
  {typeof passed === "boolean" ? (
@@ -670,7 +725,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
670
725
  <CardContent className="space-y-3">
671
726
  <div className="flex items-center gap-2 flex-wrap">
672
727
  {steps.map((step, i) => (
673
- <div key={`${step.action}-${i}`} className="contents">
728
+ <div key={`${step.action}-${step.timestamp}`} className="contents">
674
729
  {i > 0 && <ArrowRightIcon className="size-3 text-muted-foreground/50 shrink-0" />}
675
730
  <div className="flex items-center gap-1.5 rounded-md border px-2.5 py-1.5 bg-card">
676
731
  {ACTION_ICON[step.action]}