selftune 0.2.22 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +95 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/adapters/pi/hook.ts +273 -0
  12. package/cli/selftune/adapters/pi/install.ts +207 -0
  13. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  15. package/cli/selftune/auto-update.ts +200 -8
  16. package/cli/selftune/canonical-export.ts +55 -25
  17. package/cli/selftune/command-surface.ts +397 -0
  18. package/cli/selftune/constants.ts +10 -1
  19. package/cli/selftune/contribute/contribute.ts +64 -13
  20. package/cli/selftune/contribution-config.ts +57 -3
  21. package/cli/selftune/contribution-preferences.ts +117 -0
  22. package/cli/selftune/contribution-signals.ts +8 -4
  23. package/cli/selftune/contribution-staging.ts +13 -2
  24. package/cli/selftune/contributions.ts +55 -121
  25. package/cli/selftune/creator-contributions.ts +29 -10
  26. package/cli/selftune/cron/setup.ts +7 -3
  27. package/cli/selftune/dashboard-contract.ts +87 -0
  28. package/cli/selftune/dashboard-server.ts +168 -17
  29. package/cli/selftune/dashboard.ts +350 -17
  30. package/cli/selftune/eval/baseline.ts +21 -5
  31. package/cli/selftune/eval/execution-eval.ts +170 -0
  32. package/cli/selftune/eval/family-overlap.ts +2 -2
  33. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  34. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  35. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  36. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  37. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  38. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  39. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  40. package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
  41. package/cli/selftune/evolution/evidence.ts +2 -6
  42. package/cli/selftune/evolution/evolve-body.ts +152 -38
  43. package/cli/selftune/evolution/evolve.ts +244 -52
  44. package/cli/selftune/evolution/rollback.ts +0 -1
  45. package/cli/selftune/evolution/validate-body.ts +111 -49
  46. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  47. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  48. package/cli/selftune/evolution/validate-routing.ts +51 -108
  49. package/cli/selftune/evolution/validation-contract.ts +91 -0
  50. package/cli/selftune/grading/auto-grade.ts +11 -7
  51. package/cli/selftune/grading/grade-session.ts +10 -16
  52. package/cli/selftune/hooks/skill-eval.ts +2 -1
  53. package/cli/selftune/hooks-shared/types.ts +1 -0
  54. package/cli/selftune/index.ts +58 -15
  55. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  56. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  57. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  58. package/cli/selftune/ingestors/pi-ingest.ts +727 -0
  59. package/cli/selftune/init.ts +38 -4
  60. package/cli/selftune/localdb/direct-write.ts +120 -1
  61. package/cli/selftune/localdb/materialize.ts +6 -7
  62. package/cli/selftune/localdb/queries/cron.ts +34 -0
  63. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  64. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  65. package/cli/selftune/localdb/queries/execution.ts +133 -0
  66. package/cli/selftune/localdb/queries/json.ts +18 -0
  67. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  68. package/cli/selftune/localdb/queries/raw.ts +95 -0
  69. package/cli/selftune/localdb/queries/staging.ts +270 -0
  70. package/cli/selftune/localdb/queries/trust.ts +392 -0
  71. package/cli/selftune/localdb/queries.ts +60 -2162
  72. package/cli/selftune/localdb/schema.ts +59 -0
  73. package/cli/selftune/monitoring/watch.ts +96 -29
  74. package/cli/selftune/normalization.ts +3 -0
  75. package/cli/selftune/observability.ts +12 -3
  76. package/cli/selftune/orchestrate/cli.ts +161 -0
  77. package/cli/selftune/orchestrate/execute.ts +295 -0
  78. package/cli/selftune/orchestrate/finalize.ts +157 -0
  79. package/cli/selftune/orchestrate/locks.ts +40 -0
  80. package/cli/selftune/orchestrate/plan.ts +131 -0
  81. package/cli/selftune/orchestrate/post-run.ts +59 -0
  82. package/cli/selftune/orchestrate/prepare.ts +334 -0
  83. package/cli/selftune/orchestrate/report.ts +182 -0
  84. package/cli/selftune/orchestrate/runtime.ts +120 -0
  85. package/cli/selftune/orchestrate/signals.ts +48 -0
  86. package/cli/selftune/orchestrate.ts +162 -1142
  87. package/cli/selftune/registry/client.ts +74 -0
  88. package/cli/selftune/registry/history.ts +54 -0
  89. package/cli/selftune/registry/index.ts +90 -0
  90. package/cli/selftune/registry/install.ts +141 -0
  91. package/cli/selftune/registry/list.ts +44 -0
  92. package/cli/selftune/registry/push.ts +171 -0
  93. package/cli/selftune/registry/rollback.ts +49 -0
  94. package/cli/selftune/registry/status.ts +62 -0
  95. package/cli/selftune/registry/sync.ts +125 -0
  96. package/cli/selftune/repair/skill-usage.ts +9 -3
  97. package/cli/selftune/routes/overview.ts +5 -2
  98. package/cli/selftune/routes/skill-report.ts +15 -2
  99. package/cli/selftune/schedule.ts +5 -5
  100. package/cli/selftune/status.ts +70 -2
  101. package/cli/selftune/sync.ts +127 -23
  102. package/cli/selftune/testing-readiness.ts +597 -0
  103. package/cli/selftune/types.ts +46 -5
  104. package/cli/selftune/uninstall.ts +2 -1
  105. package/cli/selftune/utils/canonical-log.ts +1 -9
  106. package/cli/selftune/utils/cli-error.ts +9 -0
  107. package/cli/selftune/utils/jsonl.ts +1 -30
  108. package/cli/selftune/utils/llm-call.ts +126 -6
  109. package/cli/selftune/utils/skill-discovery.ts +24 -0
  110. package/cli/selftune/workflows/proposals.ts +184 -0
  111. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  112. package/cli/selftune/workflows/workflows.ts +100 -26
  113. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  114. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  115. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  116. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  117. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  118. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  119. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  120. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
  121. package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
  122. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  123. package/package.json +25 -9
  124. package/packages/dashboard-core/AGENTS.md +18 -0
  125. package/packages/dashboard-core/README.md +30 -0
  126. package/packages/dashboard-core/index.ts +3 -0
  127. package/packages/dashboard-core/package.json +39 -0
  128. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  129. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  130. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  131. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  132. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  133. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  134. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  135. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  136. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  137. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  138. package/packages/dashboard-core/src/gates/index.ts +3 -0
  139. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  140. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  141. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  142. package/packages/dashboard-core/src/host/index.ts +3 -0
  143. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  144. package/packages/dashboard-core/src/models/index.ts +4 -0
  145. package/packages/dashboard-core/src/models/overview.ts +98 -0
  146. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  147. package/packages/dashboard-core/src/models/skills.ts +34 -0
  148. package/packages/dashboard-core/src/routes/index.ts +2 -0
  149. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  150. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  151. package/packages/dashboard-core/src/routes/types.ts +39 -0
  152. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  153. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  154. package/packages/dashboard-core/src/screens/index.ts +37 -0
  155. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  156. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  157. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  158. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  159. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  160. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  161. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  162. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  163. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  164. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  165. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  166. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  167. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  168. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  169. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  170. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  171. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  172. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  173. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  174. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  175. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  176. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  177. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  178. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  179. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  180. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  181. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  182. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  183. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  184. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  185. package/packages/telemetry-contract/package.json +1 -1
  186. package/packages/telemetry-contract/src/index.ts +1 -0
  187. package/packages/telemetry-contract/src/schemas.ts +63 -5
  188. package/packages/telemetry-contract/src/types.ts +97 -7
  189. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  190. package/packages/ui/AGENTS.md +16 -0
  191. package/packages/ui/README.md +1 -1
  192. package/packages/ui/package.json +1 -1
  193. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  194. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  195. package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
  196. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  197. package/packages/ui/src/components/InfoTip.tsx +1 -2
  198. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  199. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  200. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  201. package/packages/ui/src/components/OverviewPanels.tsx +693 -0
  202. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  203. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  204. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  205. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  206. package/packages/ui/src/components/index.ts +56 -1
  207. package/packages/ui/src/components/section-cards.tsx +18 -35
  208. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  209. package/packages/ui/src/lib/constants.tsx +0 -1
  210. package/packages/ui/src/primitives/card.tsx +1 -1
  211. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  212. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  213. package/packages/ui/src/primitives/select.tsx +2 -2
  214. package/packages/ui/src/primitives/tabs.tsx +7 -6
  215. package/packages/ui/src/types.ts +182 -4
  216. package/skill/SKILL.md +130 -318
  217. package/skill/agents/diagnosis-analyst.md +3 -3
  218. package/skill/agents/evolution-reviewer.md +3 -3
  219. package/skill/agents/integration-guide.md +3 -3
  220. package/skill/agents/pattern-analyst.md +2 -2
  221. package/skill/references/cli-quick-reference.md +89 -0
  222. package/skill/references/creator-playbook.md +131 -0
  223. package/skill/references/examples.md +48 -0
  224. package/skill/references/troubleshooting.md +47 -0
  225. package/skill/references/version-history.md +1 -1
  226. package/skill/selftune.contribute.json +11 -0
  227. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  228. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  229. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  230. package/skill/workflows/CreateTestDeploy.md +170 -0
  231. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  232. package/skill/{Workflows → workflows}/Cron.md +1 -1
  233. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  234. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  235. package/skill/{Workflows → workflows}/Evals.md +67 -2
  236. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  237. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  238. package/skill/{Workflows → workflows}/Grade.md +1 -1
  239. package/skill/{Workflows → workflows}/Ingest.md +60 -2
  240. package/skill/{Workflows → workflows}/Initialize.md +16 -9
  241. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  242. package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
  243. package/skill/workflows/Registry.md +99 -0
  244. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  245. package/skill/workflows/SignalsDashboard.md +87 -0
  246. package/skill/{Workflows → workflows}/Sync.md +3 -1
  247. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  248. package/skill/{Workflows → workflows}/Watch.md +42 -2
  249. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  250. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  251. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  252. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  253. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  254. package/cli/selftune/utils/html.ts +0 -27
  255. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
  256. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  257. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  258. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  259. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  260. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  261. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  262. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  263. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  264. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  265. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  266. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  267. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  268. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  269. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  270. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -14,14 +14,20 @@ import { basename, dirname, isAbsolute, join } from "node:path";
14
14
 
15
15
  import type { EvalEntry, RoutingReplayEntryResult, RoutingReplayFixture } from "../types.js";
16
16
  import { parseFrontmatter } from "../utils/frontmatter.js";
17
- import { containsWholeSkillMention } from "../utils/skill-discovery.js";
18
- import { findGitRepositoryRoot } from "../utils/skill-discovery.js";
17
+ import {
18
+ containsWholeSkillMention,
19
+ extractExplicitSkillMentions,
20
+ extractSkillNamesFromPathReferences,
21
+ findGitRepositoryRoot,
22
+ } from "../utils/skill-discovery.js";
19
23
  import {
20
24
  extractWhenToUseLines,
21
25
  jaccardSimilarity,
22
26
  tokenizeText,
23
27
  } from "../utils/text-similarity.js";
24
- import { replaceSection } from "./deploy-proposal.js";
28
+ import { replaceBody, replaceSection } from "./deploy-proposal.js";
29
+ import { replaceDescription } from "../utils/frontmatter.js";
30
+ import type { ReplayValidationOptions } from "./engines/replay-engine.js";
25
31
 
26
32
  interface ReplaySkillSurface {
27
33
  skillName: string;
@@ -31,29 +37,34 @@ interface ReplaySkillSurface {
31
37
 
32
38
  interface ReplayWorkspace {
33
39
  rootDir: string;
40
+ skillRegistryDir: string;
34
41
  targetSkillPath: string;
35
42
  competingSkillPaths: string[];
36
43
  }
37
44
 
38
- export interface ClaudeRuntimeReplayInvokerInput {
45
+ export type RuntimeReplayContentTarget = "routing" | "description" | "body";
46
+
47
+ export interface RuntimeReplayInvokerInput {
39
48
  query: string;
49
+ platform: RoutingReplayFixture["platform"];
40
50
  workspaceRoot: string;
51
+ skillRegistryDir: string;
41
52
  targetSkillName: string;
42
53
  targetSkillPath: string;
43
54
  competingSkillPaths: string[];
44
55
  }
45
56
 
46
- export interface ClaudeRuntimeReplayObservation {
47
- invokedSkillNames: string[];
57
+ export interface RuntimeReplayObservation {
58
+ triggeredSkillNames: string[];
48
59
  readSkillPaths: string[];
49
60
  rawOutput: string;
50
61
  sessionId?: string;
51
62
  runtimeError?: string;
52
63
  }
53
64
 
54
- export type ClaudeRuntimeReplayInvoker = (
55
- input: ClaudeRuntimeReplayInvokerInput,
56
- ) => Promise<ClaudeRuntimeReplayObservation>;
65
+ export type RuntimeReplayInvoker = (
66
+ input: RuntimeReplayInvokerInput,
67
+ ) => Promise<RuntimeReplayObservation>;
57
68
 
58
69
  /**
59
70
  * Minimum score needed before replay treats routing text or skill-surface overlap
@@ -64,6 +75,13 @@ const HOST_REPLAY_MATCH_THRESHOLD = 0.18;
64
75
  const CLAUDE_RUNTIME_REPLAY_TIMEOUT_MS = 30_000;
65
76
  const CLAUDE_RUNTIME_ROUTING_PROMPT =
66
77
  "You are being evaluated only on skill routing. Do not solve the user's task. If a local project skill is relevant, invoke exactly one skill immediately. If no local project skill fits, respond with NO_SKILL and do not browse unrelated files.";
78
+ const HOST_RUNTIME_REPLAY_TIMEOUT_MS = 45_000;
79
+ const GENERIC_RUNTIME_ROUTING_PROMPT = [
80
+ "You are being evaluated only on local skill routing.",
81
+ "Do not solve the user's task.",
82
+ "If exactly one local project skill is relevant, open only that skill's SKILL.md immediately and stop after selecting it.",
83
+ "If no local project skill fits, reply with NO_SKILL and do not browse unrelated files.",
84
+ ].join(" ");
67
85
 
68
86
  function resolveReplayPath(path: string): string {
69
87
  try {
@@ -105,6 +123,26 @@ function listCompetingSkillPaths(targetSkillPath: string): string[] {
105
123
  return competingPaths.sort((a, b) => a.localeCompare(b));
106
124
  }
107
125
 
126
+ function getRuntimeReplayRegistryRelativeDir(platform: RoutingReplayFixture["platform"]): string {
127
+ switch (platform) {
128
+ case "claude_code":
129
+ return join(".claude", "skills");
130
+ case "codex":
131
+ return join(".agents", "skills");
132
+ case "opencode":
133
+ return join(".opencode", "skills");
134
+ }
135
+ }
136
+
137
+ export function resolveRuntimeReplayPlatform(
138
+ agent: string | null | undefined,
139
+ ): RoutingReplayFixture["platform"] | undefined {
140
+ if (agent === "claude") return "claude_code";
141
+ if (agent === "codex") return "codex";
142
+ if (agent === "opencode") return "opencode";
143
+ return undefined;
144
+ }
145
+
108
146
  export function buildRoutingReplayFixture(options: {
109
147
  skillName: string;
110
148
  skillPath: string;
@@ -127,9 +165,19 @@ export function buildRoutingReplayFixture(options: {
127
165
  };
128
166
  }
129
167
 
130
- function buildRuntimeReplayTargetContent(skillPath: string, routing: string): string {
168
+ function buildRuntimeReplayTargetContent(
169
+ skillPath: string,
170
+ content: string,
171
+ contentTarget: RuntimeReplayContentTarget,
172
+ ): string {
131
173
  const currentContent = readFileSync(skillPath, "utf8");
132
- return replaceSection(currentContent, "Workflow Routing", routing.trim());
174
+ if (contentTarget === "body") {
175
+ return replaceBody(currentContent, content.trim());
176
+ }
177
+ if (contentTarget === "description") {
178
+ return replaceDescription(currentContent, content.trim());
179
+ }
180
+ return replaceSection(currentContent, "Workflow Routing", content.trim());
133
181
  }
134
182
 
135
183
  function stageReplaySkill(
@@ -148,18 +196,19 @@ function stageReplaySkill(
148
196
 
149
197
  function buildRuntimeReplayWorkspace(
150
198
  fixture: RoutingReplayFixture,
151
- routing: string,
199
+ content: string,
200
+ contentTarget: RuntimeReplayContentTarget,
152
201
  ): ReplayWorkspace {
153
202
  const rootDir = mkdtempSync(join(tmpdir(), "selftune-runtime-replay-"));
154
203
  try {
155
- const registryDir = join(rootDir, ".claude", "skills");
204
+ const registryDir = join(rootDir, getRuntimeReplayRegistryRelativeDir(fixture.platform));
156
205
  mkdirSync(join(rootDir, ".git"), { recursive: true });
157
206
  mkdirSync(registryDir, { recursive: true });
158
207
 
159
208
  const targetSkillPath = stageReplaySkill(
160
209
  registryDir,
161
210
  fixture.target_skill_path,
162
- buildRuntimeReplayTargetContent(fixture.target_skill_path, routing),
211
+ buildRuntimeReplayTargetContent(fixture.target_skill_path, content, contentTarget),
163
212
  );
164
213
  const competingSkillPaths = fixture.competing_skill_paths.map((skillPath) =>
165
214
  stageReplaySkill(registryDir, skillPath),
@@ -167,6 +216,7 @@ function buildRuntimeReplayWorkspace(
167
216
 
168
217
  return {
169
218
  rootDir,
219
+ skillRegistryDir: registryDir,
170
220
  targetSkillPath,
171
221
  competingSkillPaths,
172
222
  };
@@ -180,8 +230,8 @@ function cleanupRuntimeReplayWorkspace(workspace: ReplayWorkspace): void {
180
230
  rmSync(workspace.rootDir, { recursive: true, force: true });
181
231
  }
182
232
 
183
- function parseClaudeRuntimeReplayOutput(rawOutput: string): ClaudeRuntimeReplayObservation {
184
- const invokedSkillNames = new Set<string>();
233
+ function parseClaudeRuntimeReplayOutput(rawOutput: string): RuntimeReplayObservation {
234
+ const triggeredSkillNames = new Set<string>();
185
235
  const readSkillPaths = new Set<string>();
186
236
  let sessionId: string | undefined;
187
237
  let runtimeError: string | undefined;
@@ -227,7 +277,7 @@ function parseClaudeRuntimeReplayOutput(rawOutput: string): ClaudeRuntimeReplayO
227
277
  if (toolName === "Skill") {
228
278
  const skillName = input.skill;
229
279
  if (typeof skillName === "string" && skillName.trim()) {
230
- invokedSkillNames.add(skillName.trim());
280
+ triggeredSkillNames.add(skillName.trim());
231
281
  }
232
282
  }
233
283
 
@@ -241,7 +291,268 @@ function parseClaudeRuntimeReplayOutput(rawOutput: string): ClaudeRuntimeReplayO
241
291
  }
242
292
 
243
293
  return {
244
- invokedSkillNames: [...invokedSkillNames],
294
+ triggeredSkillNames: [...triggeredSkillNames],
295
+ readSkillPaths: [...readSkillPaths],
296
+ rawOutput,
297
+ ...(sessionId ? { sessionId } : {}),
298
+ ...(runtimeError ? { runtimeError } : {}),
299
+ };
300
+ }
301
+
302
+ function buildKnownSkillNames(input: RuntimeReplayInvokerInput): Set<string> {
303
+ return new Set([
304
+ input.targetSkillName.trim(),
305
+ ...input.competingSkillPaths.map((skillPath) => basename(dirname(skillPath)).trim()),
306
+ ]);
307
+ }
308
+
309
+ function extractReplaySkillPathReferences(text: string): string[] {
310
+ if (!text) return [];
311
+
312
+ const matches = new Set<string>();
313
+ const patterns = [
314
+ /(?:^|[\s"'`])((?:\/etc\/codex\/skills\/[^/\s"'`]+|[^"'`\s]*?\.agents\/skills\/[^/\s"'`]+|[^"'`\s]*?\.codex\/skills\/(?:\.system\/)?[^/\s"'`]+|[^"'`\s]*?\.opencode\/skills\/[^/\s"'`]+|[^"'`\s]*?\.claude\/skills\/[^/\s"'`]+)\/SKILL\.md)(?=[\s"'`]|$)/gi,
315
+ ];
316
+
317
+ for (const pattern of patterns) {
318
+ let match = pattern.exec(text);
319
+ while (match !== null) {
320
+ const value = match[1]?.trim();
321
+ if (value) {
322
+ matches.add(value);
323
+ }
324
+ match = pattern.exec(text);
325
+ }
326
+ }
327
+
328
+ return [...matches];
329
+ }
330
+
331
+ function normalizeReplayEventType(value: unknown): string {
332
+ return typeof value === "string" ? value.replace(/[._]/g, "-").trim().toLowerCase() : "";
333
+ }
334
+
335
+ export function parseCodexRuntimeReplayOutput(
336
+ rawOutput: string,
337
+ knownSkillNames: Set<string>,
338
+ ): RuntimeReplayObservation {
339
+ const triggeredSkillNames = new Set<string>();
340
+ const readSkillPaths = new Set<string>();
341
+ let sessionId: string | undefined;
342
+ let runtimeError: string | undefined;
343
+
344
+ const noteSkillPathsAndNames = (text: unknown): void => {
345
+ if (typeof text !== "string" || !text) return;
346
+
347
+ for (const filePath of extractReplaySkillPathReferences(text)) {
348
+ readSkillPaths.add(filePath);
349
+ }
350
+
351
+ for (const skillName of extractSkillNamesFromPathReferences(text, knownSkillNames)) {
352
+ triggeredSkillNames.add(skillName);
353
+ }
354
+ };
355
+
356
+ const noteExplicitMentions = (text: unknown): void => {
357
+ if (typeof text !== "string" || !text) return;
358
+ for (const skillName of extractExplicitSkillMentions(text, knownSkillNames)) {
359
+ triggeredSkillNames.add(skillName);
360
+ }
361
+ };
362
+
363
+ for (const line of rawOutput.split("\n")) {
364
+ const trimmed = line.trim();
365
+ if (!trimmed) continue;
366
+
367
+ let parsed: Record<string, unknown>;
368
+ try {
369
+ parsed = JSON.parse(trimmed);
370
+ } catch {
371
+ continue;
372
+ }
373
+
374
+ const eventType = normalizeReplayEventType(parsed.type);
375
+
376
+ const threadId = parsed.thread_id;
377
+ if (typeof threadId === "string" && threadId) {
378
+ sessionId = threadId;
379
+ }
380
+
381
+ if (typeof parsed.error === "string" && parsed.error) {
382
+ runtimeError = parsed.error;
383
+ } else if (eventType === "turn-failed") {
384
+ const error = parsed.error;
385
+ if (typeof error === "object" && error !== null) {
386
+ const message = (error as Record<string, unknown>).message;
387
+ if (typeof message === "string") {
388
+ runtimeError = message;
389
+ }
390
+ }
391
+ } else if (eventType === "error" && typeof parsed.message === "string" && parsed.message) {
392
+ runtimeError = parsed.message;
393
+ }
394
+
395
+ if (
396
+ eventType === "item-completed" ||
397
+ eventType === "item-started" ||
398
+ eventType === "item-updated"
399
+ ) {
400
+ const item =
401
+ typeof parsed.item === "object" && parsed.item !== null
402
+ ? (parsed.item as Record<string, unknown>)
403
+ : undefined;
404
+ const itemType = normalizeReplayEventType(item?.item_type ?? item?.type);
405
+
406
+ if (itemType === "command-execution") {
407
+ noteSkillPathsAndNames(item?.command);
408
+ if (item?.exit_code !== undefined && item.exit_code !== 0 && !runtimeError) {
409
+ runtimeError = `command execution exited with code ${String(item.exit_code)}`;
410
+ }
411
+ }
412
+ }
413
+
414
+ if (eventType === "response-item") {
415
+ const payload =
416
+ typeof parsed.payload === "object" && parsed.payload !== null
417
+ ? (parsed.payload as Record<string, unknown>)
418
+ : undefined;
419
+ const payloadType = normalizeReplayEventType(payload?.type);
420
+
421
+ if (payloadType === "function-call") {
422
+ noteSkillPathsAndNames(payload?.arguments);
423
+ } else if (payloadType === "message") {
424
+ const role = payload?.role;
425
+ const content = Array.isArray(payload?.content)
426
+ ? (payload.content as Array<Record<string, unknown>>)
427
+ : [];
428
+ for (const part of content) {
429
+ const text = part?.text;
430
+ noteSkillPathsAndNames(text);
431
+ if (role === "user") {
432
+ noteExplicitMentions(text);
433
+ }
434
+ }
435
+ } else if (payloadType === "agent-reasoning") {
436
+ noteSkillPathsAndNames(payload?.text);
437
+ }
438
+ }
439
+ }
440
+
441
+ return {
442
+ triggeredSkillNames: [...triggeredSkillNames],
443
+ readSkillPaths: [...readSkillPaths],
444
+ rawOutput,
445
+ ...(sessionId ? { sessionId } : {}),
446
+ ...(runtimeError ? { runtimeError } : {}),
447
+ };
448
+ }
449
+
450
+ export function parseOpenCodeRuntimeReplayOutput(
451
+ rawOutput: string,
452
+ knownSkillNames: Set<string>,
453
+ ): RuntimeReplayObservation {
454
+ const triggeredSkillNames = new Set<string>();
455
+ const readSkillPaths = new Set<string>();
456
+ let sessionId: string | undefined;
457
+ let runtimeError: string | undefined;
458
+
459
+ const noteSkillPathsAndNames = (text: unknown): void => {
460
+ if (typeof text !== "string" || !text) return;
461
+
462
+ for (const filePath of extractReplaySkillPathReferences(text)) {
463
+ readSkillPaths.add(filePath);
464
+ }
465
+
466
+ for (const skillName of extractSkillNamesFromPathReferences(text, knownSkillNames)) {
467
+ triggeredSkillNames.add(skillName);
468
+ }
469
+ };
470
+
471
+ const noteExplicitMentions = (text: unknown): void => {
472
+ if (typeof text !== "string" || !text) return;
473
+ for (const skillName of extractExplicitSkillMentions(text, knownSkillNames)) {
474
+ triggeredSkillNames.add(skillName);
475
+ }
476
+ };
477
+
478
+ for (const line of rawOutput.split("\n")) {
479
+ const trimmed = line.trim();
480
+ if (!trimmed) continue;
481
+
482
+ let parsed: Record<string, unknown>;
483
+ try {
484
+ parsed = JSON.parse(trimmed);
485
+ } catch {
486
+ continue;
487
+ }
488
+
489
+ const nestedPart =
490
+ typeof parsed.part === "object" && parsed.part !== null
491
+ ? (parsed.part as Record<string, unknown>)
492
+ : undefined;
493
+ const eventType = normalizeReplayEventType(nestedPart?.type ?? parsed.type);
494
+ const payload =
495
+ nestedPart &&
496
+ (nestedPart.tool !== undefined || nestedPart.state !== undefined || nestedPart.text)
497
+ ? nestedPart
498
+ : parsed;
499
+
500
+ const possibleSessionId = parsed.sessionID ?? parsed.session_id ?? payload.sessionID;
501
+ if (typeof possibleSessionId === "string" && possibleSessionId) {
502
+ sessionId = possibleSessionId;
503
+ }
504
+
505
+ if (typeof parsed.error === "string" && parsed.error) {
506
+ runtimeError = parsed.error;
507
+ } else if (typeof payload.error === "string" && payload.error) {
508
+ runtimeError = payload.error;
509
+ }
510
+
511
+ if (eventType === "tool") {
512
+ const toolName = normalizeReplayEventType(payload.tool ?? payload.name);
513
+ const state =
514
+ typeof payload.state === "object" && payload.state !== null
515
+ ? (payload.state as Record<string, unknown>)
516
+ : {};
517
+ const input =
518
+ typeof state.input === "object" && state.input !== null
519
+ ? (state.input as Record<string, unknown>)
520
+ : {};
521
+ const status = normalizeReplayEventType(state.status);
522
+
523
+ if (toolName === "read" || toolName === "read-file") {
524
+ const filePath = input.filePath ?? input.file_path ?? input.path;
525
+ if (typeof filePath === "string" && basename(filePath).toUpperCase() === "SKILL.MD") {
526
+ readSkillPaths.add(filePath);
527
+ triggeredSkillNames.add(basename(dirname(filePath)));
528
+ }
529
+ } else if (toolName === "bash" || toolName === "execute-bash") {
530
+ noteSkillPathsAndNames(input.command ?? input.cmd);
531
+ }
532
+
533
+ const metadata =
534
+ typeof state.metadata === "object" && state.metadata !== null
535
+ ? (state.metadata as Record<string, unknown>)
536
+ : {};
537
+ const exitCode = metadata.exit;
538
+ if (status === "completed" && exitCode !== undefined && exitCode !== 0 && !runtimeError) {
539
+ runtimeError = `tool exited with code ${String(exitCode)}`;
540
+ }
541
+ } else if (eventType === "text" || eventType === "reasoning") {
542
+ noteSkillPathsAndNames(payload.text);
543
+ noteExplicitMentions(payload.text);
544
+ } else if (eventType === "error" && typeof payload.message === "string" && payload.message) {
545
+ runtimeError = payload.message;
546
+ } else if (eventType === "step-finish") {
547
+ const reason = payload.reason;
548
+ if (typeof reason === "string" && reason.toLowerCase() === "error" && !runtimeError) {
549
+ runtimeError = "step finished with error";
550
+ }
551
+ }
552
+ }
553
+
554
+ return {
555
+ triggeredSkillNames: [...triggeredSkillNames],
245
556
  readSkillPaths: [...readSkillPaths],
246
557
  rawOutput,
247
558
  ...(sessionId ? { sessionId } : {}),
@@ -250,8 +561,8 @@ function parseClaudeRuntimeReplayOutput(rawOutput: string): ClaudeRuntimeReplayO
250
561
  }
251
562
 
252
563
  async function invokeClaudeRuntimeReplay(
253
- input: ClaudeRuntimeReplayInvokerInput,
254
- ): Promise<ClaudeRuntimeReplayObservation> {
564
+ input: RuntimeReplayInvokerInput,
565
+ ): Promise<RuntimeReplayObservation> {
255
566
  const command = [
256
567
  "claude",
257
568
  "-p",
@@ -289,7 +600,7 @@ async function invokeClaudeRuntimeReplay(
289
600
  const observation = parseClaudeRuntimeReplayOutput(stdoutText);
290
601
  const combinedError = [observation.runtimeError, stderrText.trim()].filter(Boolean).join(" | ");
291
602
  const hasRoutingSignal =
292
- observation.invokedSkillNames.length > 0 || observation.readSkillPaths.length > 0;
603
+ observation.triggeredSkillNames.length > 0 || observation.readSkillPaths.length > 0;
293
604
 
294
605
  if (exitCode !== 0 && !hasRoutingSignal) {
295
606
  throw new Error(combinedError || `claude runtime replay exited with code ${exitCode}`);
@@ -301,20 +612,101 @@ async function invokeClaudeRuntimeReplay(
301
612
  };
302
613
  }
303
614
 
304
- function prefixReplayEvidence(
305
- results: RoutingReplayEntryResult[],
306
- prefix: string,
307
- ): RoutingReplayEntryResult[] {
308
- return results.map((result) => ({
309
- ...result,
310
- evidence: result.evidence ? `${prefix}; ${result.evidence}` : prefix,
311
- }));
615
+ async function invokeCodexRuntimeReplay(
616
+ input: RuntimeReplayInvokerInput,
617
+ ): Promise<RuntimeReplayObservation> {
618
+ const prompt = `${GENERIC_RUNTIME_ROUTING_PROMPT}\n\nUser request: ${input.query}`;
619
+ const command = [
620
+ "codex",
621
+ "exec",
622
+ "--json",
623
+ "--skip-git-repo-check",
624
+ "--sandbox",
625
+ "read-only",
626
+ "-C",
627
+ input.workspaceRoot,
628
+ prompt,
629
+ ];
630
+
631
+ const proc = Bun.spawn(command, {
632
+ cwd: input.workspaceRoot,
633
+ stdout: "pipe",
634
+ stderr: "pipe",
635
+ env: { ...process.env, CLAUDECODE: "" },
636
+ });
637
+ const timeout = setTimeout(() => proc.kill(), HOST_RUNTIME_REPLAY_TIMEOUT_MS);
638
+
639
+ const [stdoutText, stderrText, exitCode] = await Promise.all([
640
+ new Response(proc.stdout).text(),
641
+ new Response(proc.stderr).text(),
642
+ proc.exited,
643
+ ]);
644
+ clearTimeout(timeout);
645
+
646
+ const observation = parseCodexRuntimeReplayOutput(stdoutText, buildKnownSkillNames(input));
647
+ const combinedError = [observation.runtimeError, stderrText.trim()].filter(Boolean).join(" | ");
648
+ const hasRoutingSignal =
649
+ observation.triggeredSkillNames.length > 0 || observation.readSkillPaths.length > 0;
650
+
651
+ if (exitCode !== 0 && !hasRoutingSignal) {
652
+ throw new Error(combinedError || `codex runtime replay exited with code ${exitCode}`);
653
+ }
654
+
655
+ return {
656
+ ...observation,
657
+ ...(combinedError ? { runtimeError: combinedError } : {}),
658
+ };
659
+ }
660
+
661
+ async function invokeOpenCodeRuntimeReplay(
662
+ input: RuntimeReplayInvokerInput,
663
+ ): Promise<RuntimeReplayObservation> {
664
+ const prompt = `${GENERIC_RUNTIME_ROUTING_PROMPT}\n\nUser request: ${input.query}`;
665
+ const command = [
666
+ "opencode",
667
+ "run",
668
+ "--format",
669
+ "json",
670
+ "--dir",
671
+ input.workspaceRoot,
672
+ "--dangerously-skip-permissions",
673
+ prompt,
674
+ ];
675
+
676
+ const proc = Bun.spawn(command, {
677
+ cwd: input.workspaceRoot,
678
+ stdout: "pipe",
679
+ stderr: "pipe",
680
+ env: { ...process.env, CLAUDECODE: "" },
681
+ });
682
+ const timeout = setTimeout(() => proc.kill(), HOST_RUNTIME_REPLAY_TIMEOUT_MS);
683
+
684
+ const [stdoutText, stderrText, exitCode] = await Promise.all([
685
+ new Response(proc.stdout).text(),
686
+ new Response(proc.stderr).text(),
687
+ proc.exited,
688
+ ]);
689
+ clearTimeout(timeout);
690
+
691
+ const observation = parseOpenCodeRuntimeReplayOutput(stdoutText, buildKnownSkillNames(input));
692
+ const combinedError = [observation.runtimeError, stderrText.trim()].filter(Boolean).join(" | ");
693
+ const hasRoutingSignal =
694
+ observation.triggeredSkillNames.length > 0 || observation.readSkillPaths.length > 0;
695
+
696
+ if (exitCode !== 0 && !hasRoutingSignal) {
697
+ throw new Error(combinedError || `opencode runtime replay exited with code ${exitCode}`);
698
+ }
699
+
700
+ return {
701
+ ...observation,
702
+ ...(combinedError ? { runtimeError: combinedError } : {}),
703
+ };
312
704
  }
313
705
 
314
706
  function evaluateRuntimeReplayObservation(
315
707
  entry: EvalEntry,
316
708
  fixture: RoutingReplayFixture,
317
- observation: ClaudeRuntimeReplayObservation,
709
+ observation: RuntimeReplayObservation,
318
710
  workspace: ReplayWorkspace,
319
711
  ): RoutingReplayEntryResult {
320
712
  const normalizedReadPaths = new Set(
@@ -325,14 +717,14 @@ function evaluateRuntimeReplayObservation(
325
717
  ...workspace.competingSkillPaths.map(resolveReplayPath),
326
718
  ]);
327
719
  const targetSkillName = fixture.target_skill_name.trim();
328
- const targetInvoked = observation.invokedSkillNames.includes(targetSkillName);
329
- const competingInvoked = observation.invokedSkillNames.find((skillName) =>
720
+ const targetTriggered = observation.triggeredSkillNames.includes(targetSkillName);
721
+ const competingTriggered = observation.triggeredSkillNames.find((skillName) =>
330
722
  fixture.competing_skill_paths.some(
331
723
  (skillPath) => basename(dirname(skillPath)).trim() === skillName.trim(),
332
724
  ),
333
725
  );
334
- const unrelatedInvoked = observation.invokedSkillNames.find(
335
- (skillName) => skillName.trim() !== targetSkillName && skillName.trim() !== competingInvoked,
726
+ const unrelatedTriggered = observation.triggeredSkillNames.find(
727
+ (skillName) => skillName.trim() !== targetSkillName && skillName.trim() !== competingTriggered,
336
728
  );
337
729
  const unrelatedReadPaths = [...normalizedReadPaths].filter((path) => !allowedReadPaths.has(path));
338
730
  const targetRead = normalizedReadPaths.has(resolveReplayPath(workspace.targetSkillPath));
@@ -342,43 +734,43 @@ function evaluateRuntimeReplayObservation(
342
734
  const sessionPrefix = observation.sessionId
343
735
  ? `runtime replay session ${observation.sessionId}`
344
736
  : "runtime replay";
345
- if (observation.invokedSkillNames.length > 1) {
737
+ if (observation.triggeredSkillNames.length > 1) {
346
738
  return {
347
739
  query: entry.query,
348
740
  should_trigger: entry.should_trigger,
349
741
  triggered: false,
350
742
  passed: false,
351
- evidence: `${sessionPrefix} invoked multiple skills: ${observation.invokedSkillNames.join(", ")}`,
743
+ evidence: `${sessionPrefix} selected multiple skills: ${observation.triggeredSkillNames.join(", ")}`,
352
744
  };
353
745
  }
354
746
 
355
- if (targetInvoked) {
747
+ if (targetTriggered) {
356
748
  return {
357
749
  query: entry.query,
358
750
  should_trigger: entry.should_trigger,
359
751
  triggered: true,
360
752
  passed: entry.should_trigger,
361
- evidence: `${sessionPrefix} invoked target skill: ${targetSkillName}`,
753
+ evidence: `${sessionPrefix} selected target skill: ${targetSkillName}`,
362
754
  };
363
755
  }
364
756
 
365
- if (competingInvoked) {
757
+ if (competingTriggered) {
366
758
  return {
367
759
  query: entry.query,
368
760
  should_trigger: entry.should_trigger,
369
761
  triggered: false,
370
762
  passed: !entry.should_trigger,
371
- evidence: `${sessionPrefix} invoked competing skill: ${competingInvoked}`,
763
+ evidence: `${sessionPrefix} selected competing skill: ${competingTriggered}`,
372
764
  };
373
765
  }
374
766
 
375
- if (unrelatedInvoked) {
767
+ if (unrelatedTriggered) {
376
768
  return {
377
769
  query: entry.query,
378
770
  should_trigger: entry.should_trigger,
379
771
  triggered: false,
380
772
  passed: false,
381
- evidence: `${sessionPrefix} invoked unrelated skill: ${unrelatedInvoked}`,
773
+ evidence: `${sessionPrefix} selected unrelated skill: ${unrelatedTriggered}`,
382
774
  };
383
775
  }
384
776
 
@@ -398,7 +790,7 @@ function evaluateRuntimeReplayObservation(
398
790
  should_trigger: entry.should_trigger,
399
791
  triggered: false,
400
792
  passed: !entry.should_trigger,
401
- evidence: `${sessionPrefix} only read the target skill without invoking it`,
793
+ evidence: `${sessionPrefix} only read the target skill without selecting it`,
402
794
  };
403
795
  }
404
796
 
@@ -408,7 +800,7 @@ function evaluateRuntimeReplayObservation(
408
800
  should_trigger: entry.should_trigger,
409
801
  triggered: false,
410
802
  passed: !entry.should_trigger,
411
- evidence: `${sessionPrefix} only read a competing skill without invoking it`,
803
+ evidence: `${sessionPrefix} only read a competing skill without selecting it`,
412
804
  };
413
805
  }
414
806
 
@@ -421,7 +813,7 @@ function evaluateRuntimeReplayObservation(
421
813
  should_trigger: entry.should_trigger,
422
814
  triggered: false,
423
815
  passed: !entry.should_trigger,
424
- evidence: `${sessionPrefix} did not invoke any local project skill`,
816
+ evidence: `${sessionPrefix} did not select any local project skill`,
425
817
  };
426
818
  }
427
819
 
@@ -578,33 +970,75 @@ export function runHostReplayFixture(options: {
578
970
  });
579
971
  }
580
972
 
581
- export async function runClaudeRuntimeReplayFixture(options: {
973
+ function getDefaultRuntimeReplayInvoker(
974
+ platform: RoutingReplayFixture["platform"],
975
+ ): RuntimeReplayInvoker {
976
+ switch (platform) {
977
+ case "claude_code":
978
+ return invokeClaudeRuntimeReplay;
979
+ case "codex":
980
+ return invokeCodexRuntimeReplay;
981
+ case "opencode":
982
+ return invokeOpenCodeRuntimeReplay;
983
+ }
984
+ }
985
+
986
+ export function buildRuntimeReplayValidationOptions(options: {
987
+ skillName: string;
988
+ skillPath: string;
989
+ agent: string | null | undefined;
990
+ contentTarget?: RuntimeReplayContentTarget;
991
+ }): ReplayValidationOptions | undefined {
992
+ const platform = resolveRuntimeReplayPlatform(options.agent);
993
+ if (!platform) return undefined;
994
+
995
+ try {
996
+ const replayFixture = buildRoutingReplayFixture({
997
+ skillName: options.skillName,
998
+ skillPath: options.skillPath,
999
+ platform,
1000
+ });
1001
+
1002
+ return {
1003
+ replayFixture,
1004
+ replayRunner: async ({ routing, evalSet, fixture }) =>
1005
+ await runHostRuntimeReplayFixture({
1006
+ routing,
1007
+ evalSet,
1008
+ fixture,
1009
+ contentTarget: options.contentTarget ?? "routing",
1010
+ }),
1011
+ };
1012
+ } catch {
1013
+ return undefined;
1014
+ }
1015
+ }
1016
+
1017
+ export async function runHostRuntimeReplayFixture(options: {
582
1018
  routing: string;
583
1019
  evalSet: EvalEntry[];
584
1020
  fixture: RoutingReplayFixture;
585
- runtimeInvoker?: ClaudeRuntimeReplayInvoker;
1021
+ contentTarget?: RuntimeReplayContentTarget;
1022
+ runtimeInvoker?: RuntimeReplayInvoker;
586
1023
  }): Promise<RoutingReplayEntryResult[]> {
587
- const fallbackReason = (reason: string) =>
588
- `runtime replay unavailable; fell back to fixture simulation (${reason})`;
589
-
590
- if (options.fixture.platform !== "claude_code") {
591
- return prefixReplayEvidence(
592
- runHostReplayFixture(options),
593
- fallbackReason(`unsupported platform ${options.fixture.platform}`),
594
- );
595
- }
596
-
597
- const invokeRuntime = options.runtimeInvoker ?? invokeClaudeRuntimeReplay;
1024
+ const invokeRuntime =
1025
+ options.runtimeInvoker ?? getDefaultRuntimeReplayInvoker(options.fixture.platform);
598
1026
  let workspace: ReplayWorkspace | undefined;
599
1027
 
600
1028
  try {
601
- workspace = buildRuntimeReplayWorkspace(options.fixture, options.routing);
1029
+ workspace = buildRuntimeReplayWorkspace(
1030
+ options.fixture,
1031
+ options.routing,
1032
+ options.contentTarget ?? "routing",
1033
+ );
602
1034
  const results: RoutingReplayEntryResult[] = [];
603
1035
 
604
1036
  for (const entry of options.evalSet) {
605
1037
  const observation = await invokeRuntime({
606
1038
  query: entry.query,
1039
+ platform: options.fixture.platform,
607
1040
  workspaceRoot: workspace.rootDir,
1041
+ skillRegistryDir: workspace.skillRegistryDir,
608
1042
  targetSkillName: options.fixture.target_skill_name,
609
1043
  targetSkillPath: workspace.targetSkillPath,
610
1044
  competingSkillPaths: workspace.competingSkillPaths,
@@ -617,8 +1051,24 @@ export async function runClaudeRuntimeReplayFixture(options: {
617
1051
  return results;
618
1052
  } catch (error) {
619
1053
  const message = error instanceof Error ? error.message : String(error);
620
- return prefixReplayEvidence(runHostReplayFixture(options), fallbackReason(message));
1054
+ throw new Error(message);
621
1055
  } finally {
622
1056
  if (workspace) cleanupRuntimeReplayWorkspace(workspace);
623
1057
  }
624
1058
  }
1059
+
1060
+ export async function runClaudeRuntimeReplayFixture(options: {
1061
+ routing: string;
1062
+ evalSet: EvalEntry[];
1063
+ fixture: RoutingReplayFixture;
1064
+ contentTarget?: RuntimeReplayContentTarget;
1065
+ runtimeInvoker?: RuntimeReplayInvoker;
1066
+ }): Promise<RoutingReplayEntryResult[]> {
1067
+ if (options.fixture.platform !== "claude_code") {
1068
+ throw new Error(
1069
+ `runtime replay is only supported for claude_code fixtures (received ${options.fixture.platform})`,
1070
+ );
1071
+ }
1072
+
1073
+ return runHostRuntimeReplayFixture(options);
1074
+ }