selftune 0.2.23 → 0.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +93 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dhgv5BQO.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  12. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  13. package/cli/selftune/auto-update.ts +200 -8
  14. package/cli/selftune/canonical-export.ts +55 -25
  15. package/cli/selftune/command-surface.ts +397 -0
  16. package/cli/selftune/contribute/contribute.ts +64 -13
  17. package/cli/selftune/contribution-config.ts +57 -3
  18. package/cli/selftune/contribution-preferences.ts +117 -0
  19. package/cli/selftune/contribution-signals.ts +8 -4
  20. package/cli/selftune/contribution-staging.ts +13 -2
  21. package/cli/selftune/contributions.ts +55 -121
  22. package/cli/selftune/creator-contributions.ts +29 -10
  23. package/cli/selftune/cron/setup.ts +7 -3
  24. package/cli/selftune/dashboard-contract.ts +73 -0
  25. package/cli/selftune/dashboard-server.ts +168 -17
  26. package/cli/selftune/dashboard.ts +350 -17
  27. package/cli/selftune/eval/baseline.ts +21 -5
  28. package/cli/selftune/eval/execution-eval.ts +170 -0
  29. package/cli/selftune/eval/family-overlap.ts +2 -2
  30. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  31. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  32. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  33. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  34. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  35. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  36. package/cli/selftune/evolution/engines/replay-engine.ts +79 -57
  37. package/cli/selftune/evolution/evolve-body.ts +100 -39
  38. package/cli/selftune/evolution/evolve.ts +244 -52
  39. package/cli/selftune/evolution/rollback.ts +0 -1
  40. package/cli/selftune/evolution/validate-body.ts +68 -42
  41. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  42. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  43. package/cli/selftune/evolution/validate-routing.ts +43 -41
  44. package/cli/selftune/evolution/validation-contract.ts +91 -0
  45. package/cli/selftune/grading/auto-grade.ts +11 -7
  46. package/cli/selftune/grading/grade-session.ts +10 -16
  47. package/cli/selftune/index.ts +35 -10
  48. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  49. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  50. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  51. package/cli/selftune/ingestors/pi-ingest.ts +3 -2
  52. package/cli/selftune/init.ts +27 -3
  53. package/cli/selftune/localdb/direct-write.ts +35 -1
  54. package/cli/selftune/localdb/queries/cron.ts +34 -0
  55. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  56. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  57. package/cli/selftune/localdb/queries/execution.ts +133 -0
  58. package/cli/selftune/localdb/queries/json.ts +18 -0
  59. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  60. package/cli/selftune/localdb/queries/raw.ts +95 -0
  61. package/cli/selftune/localdb/queries/staging.ts +270 -0
  62. package/cli/selftune/localdb/queries/trust.ts +392 -0
  63. package/cli/selftune/localdb/queries.ts +60 -2288
  64. package/cli/selftune/localdb/schema.ts +21 -0
  65. package/cli/selftune/monitoring/watch.ts +96 -29
  66. package/cli/selftune/normalization.ts +3 -0
  67. package/cli/selftune/observability.ts +4 -2
  68. package/cli/selftune/orchestrate/cli.ts +161 -0
  69. package/cli/selftune/orchestrate/execute.ts +295 -0
  70. package/cli/selftune/orchestrate/finalize.ts +157 -0
  71. package/cli/selftune/orchestrate/locks.ts +40 -0
  72. package/cli/selftune/orchestrate/plan.ts +131 -0
  73. package/cli/selftune/orchestrate/post-run.ts +59 -0
  74. package/cli/selftune/orchestrate/prepare.ts +334 -0
  75. package/cli/selftune/orchestrate/report.ts +182 -0
  76. package/cli/selftune/orchestrate/runtime.ts +120 -0
  77. package/cli/selftune/orchestrate/signals.ts +48 -0
  78. package/cli/selftune/orchestrate.ts +150 -1173
  79. package/cli/selftune/repair/skill-usage.ts +5 -2
  80. package/cli/selftune/routes/overview.ts +5 -2
  81. package/cli/selftune/routes/skill-report.ts +15 -2
  82. package/cli/selftune/schedule.ts +5 -5
  83. package/cli/selftune/status.ts +39 -2
  84. package/cli/selftune/testing-readiness.ts +597 -0
  85. package/cli/selftune/types.ts +44 -4
  86. package/cli/selftune/uninstall.ts +2 -1
  87. package/cli/selftune/utils/canonical-log.ts +1 -9
  88. package/cli/selftune/utils/cli-error.ts +9 -0
  89. package/cli/selftune/utils/llm-call.ts +126 -6
  90. package/cli/selftune/utils/skill-discovery.ts +2 -0
  91. package/cli/selftune/workflows/proposals.ts +184 -0
  92. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  93. package/cli/selftune/workflows/workflows.ts +100 -26
  94. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  95. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  96. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  97. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  98. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +41 -1
  99. package/node_modules/@selftune/telemetry-contract/src/types.ts +103 -2
  100. package/package.json +25 -9
  101. package/packages/dashboard-core/AGENTS.md +18 -0
  102. package/packages/dashboard-core/README.md +30 -0
  103. package/packages/dashboard-core/index.ts +3 -0
  104. package/packages/dashboard-core/package.json +39 -0
  105. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  106. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  107. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  108. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  109. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  110. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  111. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  112. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  113. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  114. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  115. package/packages/dashboard-core/src/gates/index.ts +3 -0
  116. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  117. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  118. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  119. package/packages/dashboard-core/src/host/index.ts +3 -0
  120. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  121. package/packages/dashboard-core/src/models/index.ts +4 -0
  122. package/packages/dashboard-core/src/models/overview.ts +98 -0
  123. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  124. package/packages/dashboard-core/src/models/skills.ts +34 -0
  125. package/packages/dashboard-core/src/routes/index.ts +2 -0
  126. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  127. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  128. package/packages/dashboard-core/src/routes/types.ts +39 -0
  129. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  130. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  131. package/packages/dashboard-core/src/screens/index.ts +37 -0
  132. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  133. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  134. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  135. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  136. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  137. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  138. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  139. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  140. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  141. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  142. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  143. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  144. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  145. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  146. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  147. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  148. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  149. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  150. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  151. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  152. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  153. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  154. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  155. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  156. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  157. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  158. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  159. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  160. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  161. package/packages/telemetry-contract/src/schemas.ts +41 -1
  162. package/packages/telemetry-contract/src/types.ts +103 -2
  163. package/packages/ui/src/components/EvidenceViewer.tsx +80 -25
  164. package/packages/ui/src/components/OverviewPanels.tsx +67 -26
  165. package/packages/ui/src/primitives/tabs.tsx +7 -6
  166. package/packages/ui/src/types.ts +10 -0
  167. package/skill/SKILL.md +130 -332
  168. package/skill/agents/diagnosis-analyst.md +3 -3
  169. package/skill/agents/evolution-reviewer.md +3 -3
  170. package/skill/agents/integration-guide.md +3 -3
  171. package/skill/agents/pattern-analyst.md +2 -2
  172. package/skill/references/cli-quick-reference.md +89 -0
  173. package/skill/references/creator-playbook.md +131 -0
  174. package/skill/references/examples.md +48 -0
  175. package/skill/references/troubleshooting.md +47 -0
  176. package/skill/references/version-history.md +1 -1
  177. package/skill/selftune.contribute.json +11 -0
  178. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  179. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  180. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  181. package/skill/workflows/CreateTestDeploy.md +170 -0
  182. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  183. package/skill/{Workflows → workflows}/Cron.md +1 -1
  184. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  185. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  186. package/skill/{Workflows → workflows}/Evals.md +67 -2
  187. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  188. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  189. package/skill/{Workflows → workflows}/Grade.md +1 -1
  190. package/skill/{Workflows → workflows}/Initialize.md +8 -4
  191. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  192. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  193. package/skill/workflows/SignalsDashboard.md +87 -0
  194. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  195. package/skill/{Workflows → workflows}/Watch.md +42 -2
  196. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  197. package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +0 -1
  198. package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +0 -59
  199. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  200. package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +0 -12
  201. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  202. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  203. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  204. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  205. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  206. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  207. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  208. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  209. /package/skill/{Workflows → workflows}/Ingest.md +0 -0
  210. /package/skill/{Workflows → workflows}/PlatformHooks.md +0 -0
  211. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  212. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  213. /package/skill/{Workflows → workflows}/Registry.md +0 -0
  214. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  215. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  216. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  217. /package/skill/{Workflows → workflows}/Sync.md +0 -0
  218. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  219. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -7,19 +7,22 @@
7
7
  * Gate 3 (quality): Student model rates body clarity/completeness 0.0-1.0
8
8
  *
9
9
  * Gate 2 now supports replay-backed validation (via replay engine) in addition
10
- * to LLM-judge-based checking. When replay options are provided and succeed,
11
- * the replay path is preferred. Falls back to LLM judge otherwise.
10
+ * to LLM-judge-based checking. When real host/runtime replay is available and
11
+ * succeeds, the replay path is preferred. Falls back to LLM judge otherwise.
12
12
  */
13
13
 
14
14
  import type {
15
15
  BodyEvolutionProposal,
16
16
  BodyValidationResult,
17
17
  EvalEntry,
18
+ RoutingReplayEntryResult,
19
+ ValidationGate,
18
20
  ValidationMode,
19
21
  } from "../types.js";
20
22
  import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
21
23
  import { runJudgeValidation } from "./engines/judge-engine.js";
22
- import { runReplayValidation, type ReplayValidationOptions } from "./engines/replay-engine.js";
24
+ import type { ReplayValidationOptions } from "./engines/replay-engine.js";
25
+ import { runValidationContract, type ValidationStrategy } from "./validation-contract.js";
23
26
 
24
27
  // ---------------------------------------------------------------------------
25
28
  // Types
@@ -28,6 +31,8 @@ import { runReplayValidation, type ReplayValidationOptions } from "./engines/rep
28
31
  export interface BodyValidationOptions {
29
32
  /** Replay options for Gate 2 trigger accuracy. */
30
33
  replay?: ReplayValidationOptions;
34
+ mode?: ValidationStrategy;
35
+ onReplayFallback?: (reason?: string) => void;
31
36
  }
32
37
 
33
38
  // ---------------------------------------------------------------------------
@@ -83,7 +88,7 @@ export function validateBodyStructure(proposedBody: string): { valid: boolean; r
83
88
  * Run trigger checks on the eval set using the proposed body content.
84
89
  * Returns before/after pass rates.
85
90
  *
86
- * When replay options are provided, attempts replay-backed validation first.
91
+ * When replay options are provided, attempts host/runtime replay first.
87
92
  * Falls back to LLM judge when replay is unavailable or no options given.
88
93
  */
89
94
  export async function validateBodyTriggerAccuracy(
@@ -99,6 +104,9 @@ export async function validateBodyTriggerAccuracy(
99
104
  improved: boolean;
100
105
  regressions: string[];
101
106
  validation_mode: ValidationMode;
107
+ validation_agent?: string;
108
+ validation_fixture_id?: string;
109
+ validation_fallback_reason?: string;
102
110
  per_entry_results?: import("../types.js").RoutingReplayEntryResult[];
103
111
  before_entry_results?: import("../types.js").RoutingReplayEntryResult[];
104
112
  }> {
@@ -109,48 +117,64 @@ export async function validateBodyTriggerAccuracy(
109
117
  improved: false,
110
118
  regressions: [],
111
119
  validation_mode: "llm_judge",
120
+ validation_agent: agent,
112
121
  };
113
122
  }
114
123
 
115
- // Try replay-backed validation when options are provided
116
- if (options?.replay) {
117
- const replayResult = await runReplayValidation(
118
- originalBody,
119
- proposedBody,
120
- evalSet,
121
- agent,
122
- options.replay,
123
- );
124
-
125
- if (replayResult) {
126
- return {
127
- before_pass_rate: replayResult.before_pass_rate,
128
- after_pass_rate: replayResult.after_pass_rate,
129
- improved: replayResult.improved,
130
- regressions: [],
131
- validation_mode: replayResult.validation_mode,
132
- per_entry_results: replayResult.per_entry_results,
133
- before_entry_results: replayResult.before_entry_results,
134
- };
135
- }
136
- }
137
-
138
- // Fall back to LLM judge
139
- const judgeResult = await runJudgeValidation(
140
- originalBody,
141
- proposedBody,
124
+ const { result, fallbackReason } = await runValidationContract<{
125
+ before_pass_rate: number;
126
+ after_pass_rate: number;
127
+ improved: boolean;
128
+ regressions: string[];
129
+ validation_mode: ValidationMode;
130
+ validation_agent?: string;
131
+ validation_fixture_id?: string;
132
+ validation_fallback_reason?: string;
133
+ per_entry_results?: RoutingReplayEntryResult[];
134
+ before_entry_results?: RoutingReplayEntryResult[];
135
+ }>({
136
+ mode: options?.mode ?? "auto",
137
+ originalContent: originalBody,
138
+ proposedContent: proposedBody,
142
139
  evalSet,
143
140
  agent,
144
- modelFlag,
145
- );
141
+ replayOptions: options?.replay,
142
+ runJudge: async () => {
143
+ const judgeResult = await runJudgeValidation(
144
+ originalBody,
145
+ proposedBody,
146
+ evalSet,
147
+ agent,
148
+ modelFlag,
149
+ );
146
150
 
147
- return {
148
- before_pass_rate: judgeResult.before_pass_rate,
149
- after_pass_rate: judgeResult.after_pass_rate,
150
- improved: judgeResult.improved,
151
- regressions: judgeResult.regressions,
152
- validation_mode: judgeResult.validation_mode,
153
- };
151
+ return {
152
+ result: {
153
+ before_pass_rate: judgeResult.before_pass_rate,
154
+ after_pass_rate: judgeResult.after_pass_rate,
155
+ improved: judgeResult.improved,
156
+ regressions: judgeResult.regressions,
157
+ validation_mode: judgeResult.validation_mode,
158
+ validation_agent: judgeResult.validation_agent,
159
+ },
160
+ modeUsed: judgeResult.validation_mode,
161
+ };
162
+ },
163
+ onReplayFallback: options?.onReplayFallback,
164
+ adaptReplayResult: (replayResult) => ({
165
+ before_pass_rate: replayResult.before_pass_rate,
166
+ after_pass_rate: replayResult.after_pass_rate,
167
+ improved: replayResult.improved,
168
+ regressions: [],
169
+ validation_mode: replayResult.validation_mode,
170
+ validation_agent: replayResult.validation_agent,
171
+ validation_fixture_id: replayResult.validation_fixture_id,
172
+ per_entry_results: replayResult.per_entry_results,
173
+ before_entry_results: replayResult.before_entry_results,
174
+ }),
175
+ });
176
+
177
+ return fallbackReason ? { ...result, validation_fallback_reason: fallbackReason } : result;
154
178
  }
155
179
 
156
180
  // ---------------------------------------------------------------------------
@@ -224,7 +248,7 @@ export async function validateBodyProposal(
224
248
  qualityThreshold = QUALITY_THRESHOLD,
225
249
  options?: BodyValidationOptions,
226
250
  ): Promise<BodyValidationResult> {
227
- const gateResults: Array<{ gate: string; passed: boolean; reason: string }> = [];
251
+ const gateResults: Array<{ gate: ValidationGate; passed: boolean; reason: string }> = [];
228
252
 
229
253
  // Gate 1: Structural validation (pure code)
230
254
  const structural = validateBodyStructure(proposal.proposed_body);
@@ -287,7 +311,9 @@ export async function validateBodyProposal(
287
311
  improved: gatesPassed === 3,
288
312
  regressions: accuracy.regressions,
289
313
  validation_mode: accuracy.validation_mode,
290
- validation_agent: agent,
314
+ validation_agent: accuracy.validation_agent ?? agent,
315
+ validation_fallback_reason: accuracy.validation_fallback_reason,
316
+ validation_fixture_id: accuracy.validation_fixture_id,
291
317
  ...(evalSet.length > 0
292
318
  ? {
293
319
  before_pass_rate: accuracy.before_pass_rate,