selftune 0.2.19 → 0.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,13 @@
1
1
  #!/usr/bin/env bun
2
2
 
3
+ import { readFileSync } from "node:fs";
3
4
  import { parseArgs } from "node:util";
4
5
 
5
6
  import { getDb } from "../localdb/db.js";
6
7
  import { queryQueryLog, querySkillUsageRecords } from "../localdb/queries.js";
7
8
  import type {
9
+ SkillFamilyColdStartPair,
10
+ SkillFamilyColdStartSuspicion,
8
11
  QueryLogRecord,
9
12
  SkillFamilyOverlapMember,
10
13
  SkillFamilyOverlapPair,
@@ -13,17 +16,54 @@ import type {
13
16
  SkillUsageRecord,
14
17
  } from "../types.js";
15
18
  import { CLIError } from "../utils/cli-error.js";
19
+ import { parseFrontmatter } from "../utils/frontmatter.js";
16
20
  import {
17
21
  findInstalledSkillNames,
18
22
  findInstalledSkillPath,
19
23
  findRepositoryClaudeSkillDirs,
20
24
  findRepositorySkillDirs,
21
25
  } from "../utils/skill-discovery.js";
26
+ import {
27
+ buildStopwordSet,
28
+ extractWhenToUseLines,
29
+ jaccardSimilarity,
30
+ tokenizeText,
31
+ } from "../utils/text-similarity.js";
22
32
  import { buildEvalSet } from "./hooks-to-evals.js";
23
33
 
24
34
  const DEFAULT_MIN_OVERLAP = 0.3;
25
35
  const DEFAULT_MIN_SHARED = 2;
26
36
  const DEFAULT_MAX_SHARED = 10;
37
+ const DESCRIPTION_SIMILARITY_THRESHOLD = 0.18;
38
+ const WHEN_TO_USE_SIMILARITY_THRESHOLD = 0.18;
39
+ const CONFUSION_QUERY_LINE_OVERLAP_THRESHOLD = 0.12;
40
+ const COMMAND_AUGMENTED_HIGH_SIMILARITY_THRESHOLD = 0.22;
41
+ const LOW_SUSPICION_SIMILARITY_THRESHOLD = 0.28;
42
+ const SHARED_TERM_LIMIT = 6;
43
+ const STATIC_PAIR_LIMIT = 10;
44
+
45
+ const STOPWORDS = buildStopwordSet([
46
+ "between",
47
+ "by",
48
+ "can",
49
+ "change",
50
+ "content",
51
+ "decision",
52
+ "decisions",
53
+ "do",
54
+ "get",
55
+ "help",
56
+ "i",
57
+ "if",
58
+ "my",
59
+ "state",
60
+ "their",
61
+ "users",
62
+ "want",
63
+ "wants",
64
+ "you",
65
+ "your",
66
+ ]);
27
67
 
28
68
  interface FamilyOverlapOptions {
29
69
  familyPrefix?: string;
@@ -34,6 +74,15 @@ interface FamilyOverlapOptions {
34
74
  searchDirs?: string[];
35
75
  }
36
76
 
77
+ interface InstalledSkillSurface {
78
+ skillName: string;
79
+ skillPath?: string;
80
+ descriptionTokens: Set<string>;
81
+ whenToUseTokens: Set<string>;
82
+ whenToUseLines: string[];
83
+ commandSurfaces: string[];
84
+ }
85
+
37
86
  function getEvalSkillSearchDirs(): string[] {
38
87
  const cwd = process.cwd();
39
88
  const homeDir = process.env.HOME ?? "";
@@ -119,6 +168,266 @@ function scoreConsolidationPressure(overlapPct: number): "low" | "medium" | "hig
119
168
  return "low";
120
169
  }
121
170
 
171
+ function sharedTerms(
172
+ leftDescription: Set<string>,
173
+ leftWhenToUse: Set<string>,
174
+ rightDescription: Set<string>,
175
+ rightWhenToUse: Set<string>,
176
+ ): string[] {
177
+ const shared = new Set<string>();
178
+ for (const token of leftDescription) {
179
+ if (rightDescription.has(token) || rightWhenToUse.has(token)) shared.add(token);
180
+ }
181
+ for (const token of leftWhenToUse) {
182
+ if (rightDescription.has(token) || rightWhenToUse.has(token)) shared.add(token);
183
+ }
184
+ return [...shared].sort((a, b) => a.localeCompare(b)).slice(0, SHARED_TERM_LIMIT);
185
+ }
186
+
187
+ function buildSyntheticSiblingConfusionQueries(
188
+ left: InstalledSkillSurface,
189
+ right: InstalledSkillSurface,
190
+ sharedCommandSurfaces: string[],
191
+ ): string[] {
192
+ const leftSurfaceTokens = new Set([...left.descriptionTokens, ...left.whenToUseTokens]);
193
+ const rightSurfaceTokens = new Set([...right.descriptionTokens, ...right.whenToUseTokens]);
194
+ const candidates = new Set<string>();
195
+
196
+ const maybeAdd = (line: string, sourceTokens: Set<string>, compareTokens: Set<string>) => {
197
+ const trimmed = line.trim();
198
+ if (!trimmed) return;
199
+ const lineTokens = tokenizeText(trimmed, STOPWORDS);
200
+ const overlap = jaccardSimilarity(lineTokens, compareTokens);
201
+ if (
202
+ overlap >= CONFUSION_QUERY_LINE_OVERLAP_THRESHOLD ||
203
+ jaccardSimilarity(sourceTokens, compareTokens) >= WHEN_TO_USE_SIMILARITY_THRESHOLD
204
+ ) {
205
+ candidates.add(trimmed);
206
+ }
207
+ };
208
+
209
+ for (const line of left.whenToUseLines) {
210
+ maybeAdd(line, leftSurfaceTokens, rightSurfaceTokens);
211
+ }
212
+ for (const line of right.whenToUseLines) {
213
+ maybeAdd(line, rightSurfaceTokens, leftSurfaceTokens);
214
+ }
215
+ for (const command of sharedCommandSurfaces) {
216
+ candidates.add(`${command} for a sibling-family request`);
217
+ }
218
+
219
+ return [...candidates].slice(0, 4);
220
+ }
221
+
222
+ function extractCommandSurfaces(body: string): string[] {
223
+ const matches = body.matchAll(/```[\w-]*\r?\n([\s\S]*?)```/g);
224
+ const commands = new Set<string>();
225
+ for (const match of matches) {
226
+ const block = match[1] ?? "";
227
+ for (const line of block.split("\n")) {
228
+ const trimmed = line.trim();
229
+ if (!trimmed || trimmed.startsWith("#") || trimmed.startsWith(">")) continue;
230
+ const tokens = trimmed.split(/\s+/).filter(Boolean);
231
+ if (tokens.length < 2 || tokens[1]?.startsWith("-")) continue;
232
+ commands.add(`${tokens[0]} ${tokens[1]}`);
233
+ }
234
+ }
235
+ return [...commands].sort((a, b) => a.localeCompare(b));
236
+ }
237
+
238
+ function loadInstalledSkillSurface(skillName: string, searchDirs: string[]): InstalledSkillSurface {
239
+ const skillPath = findInstalledSkillPath(skillName, searchDirs);
240
+ if (!skillPath) {
241
+ return {
242
+ skillName,
243
+ descriptionTokens: new Set<string>(),
244
+ whenToUseTokens: new Set<string>(),
245
+ whenToUseLines: [],
246
+ commandSurfaces: [],
247
+ };
248
+ }
249
+
250
+ try {
251
+ const raw = readFileSync(skillPath, "utf8");
252
+ const parsed = parseFrontmatter(raw);
253
+ const whenToUseLines = extractWhenToUseLines(parsed.body);
254
+ return {
255
+ skillName,
256
+ skillPath,
257
+ descriptionTokens: tokenizeText(parsed.description, STOPWORDS),
258
+ whenToUseTokens: tokenizeText(whenToUseLines.join(" "), STOPWORDS),
259
+ whenToUseLines,
260
+ commandSurfaces: extractCommandSurfaces(parsed.body),
261
+ };
262
+ } catch {
263
+ // Discovery is intentionally silent here: missing or malformed skill files are
264
+ // expected in mixed registries, and callers should degrade to empty surfaces.
265
+ return {
266
+ skillName,
267
+ skillPath,
268
+ descriptionTokens: new Set<string>(),
269
+ whenToUseTokens: new Set<string>(),
270
+ whenToUseLines: [],
271
+ commandSurfaces: [],
272
+ };
273
+ }
274
+ }
275
+
276
+ function scoreStaticSuspicion(
277
+ descriptionSimilarity: number,
278
+ whenToUseSimilarity: number,
279
+ sharedCommandSurfaces: string[],
280
+ ): "low" | "medium" | "high" | null {
281
+ const descriptionSignal = descriptionSimilarity >= DESCRIPTION_SIMILARITY_THRESHOLD;
282
+ const whenToUseSignal = whenToUseSimilarity >= WHEN_TO_USE_SIMILARITY_THRESHOLD;
283
+ const commandSignal = sharedCommandSurfaces.length > 0;
284
+ const signalCount = Number(descriptionSignal) + Number(whenToUseSignal) + Number(commandSignal);
285
+
286
+ if (
287
+ signalCount >= 3 ||
288
+ (commandSignal &&
289
+ Math.max(descriptionSimilarity, whenToUseSimilarity) >=
290
+ COMMAND_AUGMENTED_HIGH_SIMILARITY_THRESHOLD)
291
+ ) {
292
+ return "high";
293
+ }
294
+ if (signalCount >= 2) return "medium";
295
+ if (Math.max(descriptionSimilarity, whenToUseSimilarity) >= LOW_SUSPICION_SIMILARITY_THRESHOLD) {
296
+ return "low";
297
+ }
298
+ return null;
299
+ }
300
+
301
+ function analyzeColdStartSuspicion(
302
+ skills: string[],
303
+ searchDirs: string[],
304
+ readySkillCount: number,
305
+ ): SkillFamilyColdStartSuspicion | undefined {
306
+ const surfaces = skills.map((skillName) => loadInstalledSkillSurface(skillName, searchDirs));
307
+ const availableSurfaces = surfaces.filter(
308
+ (surface) =>
309
+ Boolean(surface.skillPath) &&
310
+ (surface.descriptionTokens.size > 0 ||
311
+ surface.whenToUseTokens.size > 0 ||
312
+ surface.commandSurfaces.length > 0),
313
+ );
314
+ if (availableSurfaces.length < 2) return undefined;
315
+
316
+ const pairs: SkillFamilyColdStartPair[] = [];
317
+ let analyzedPairs = 0;
318
+ for (let i = 0; i < availableSurfaces.length; i++) {
319
+ for (let j = i + 1; j < availableSurfaces.length; j++) {
320
+ analyzedPairs += 1;
321
+ const left = availableSurfaces[i];
322
+ const right = availableSurfaces[j];
323
+ if (!left || !right) continue;
324
+ const descriptionSimilarity = jaccardSimilarity(
325
+ left.descriptionTokens,
326
+ right.descriptionTokens,
327
+ );
328
+ const whenToUseSimilarity = jaccardSimilarity(left.whenToUseTokens, right.whenToUseTokens);
329
+ const sharedCommandSurfaces = left.commandSurfaces.filter((command) =>
330
+ right.commandSurfaces.includes(command),
331
+ );
332
+ const suspicionLevel = scoreStaticSuspicion(
333
+ descriptionSimilarity,
334
+ whenToUseSimilarity,
335
+ sharedCommandSurfaces,
336
+ );
337
+ if (!suspicionLevel) continue;
338
+
339
+ pairs.push({
340
+ skill_a: left.skillName,
341
+ skill_b: right.skillName,
342
+ description_similarity: descriptionSimilarity,
343
+ when_to_use_similarity: whenToUseSimilarity,
344
+ shared_command_surfaces: sharedCommandSurfaces,
345
+ shared_terms: sharedTerms(
346
+ left.descriptionTokens,
347
+ left.whenToUseTokens,
348
+ right.descriptionTokens,
349
+ right.whenToUseTokens,
350
+ ),
351
+ synthetic_confusion_queries: buildSyntheticSiblingConfusionQueries(
352
+ left,
353
+ right,
354
+ sharedCommandSurfaces,
355
+ ),
356
+ suspicion_level: suspicionLevel,
357
+ });
358
+ }
359
+ }
360
+
361
+ pairs.sort(
362
+ (a, b) =>
363
+ Number(b.suspicion_level === "high") - Number(a.suspicion_level === "high") ||
364
+ Number(b.suspicion_level === "medium") - Number(a.suspicion_level === "medium") ||
365
+ b.when_to_use_similarity - a.when_to_use_similarity ||
366
+ b.description_similarity - a.description_similarity,
367
+ );
368
+
369
+ const suspiciousPairCount = pairs.length;
370
+ const averageStaticSimilarity =
371
+ suspiciousPairCount > 0
372
+ ? pairs.reduce(
373
+ // Command overlap is binary, but we weight it equally with the two Jaccard scores
374
+ // because shared command surfaces are a high-precision cold-start signal even when
375
+ // description text is sparse or noisy.
376
+ (sum, pair) =>
377
+ sum +
378
+ (pair.description_similarity +
379
+ pair.when_to_use_similarity +
380
+ (pair.shared_command_surfaces.length > 0 ? 1 : 0)) /
381
+ 3,
382
+ 0,
383
+ ) / suspiciousPairCount
384
+ : 0;
385
+ const candidate =
386
+ suspiciousPairCount > 0 &&
387
+ readySkillCount < 2 &&
388
+ suspiciousPairCount >= (skills.length >= 3 ? 2 : 1);
389
+
390
+ const rationale: string[] = [];
391
+ if (suspiciousPairCount === 0) {
392
+ rationale.push(
393
+ "Installed skill surfaces do not show meaningful overlap yet. Keep gathering cold-start evals and real usage before making a packaging call.",
394
+ );
395
+ } else {
396
+ rationale.push(
397
+ `${suspiciousPairCount} sibling pair${suspiciousPairCount === 1 ? "" : "s"} show overlapping installed skill surfaces before trusted telemetry is available.`,
398
+ );
399
+ if (pairs.some((pair) => pair.shared_command_surfaces.length > 0)) {
400
+ rationale.push(
401
+ "Shared command surfaces suggest some siblings may be thin wrappers around the same backend or query path.",
402
+ );
403
+ }
404
+ if (pairs.some((pair) => pair.when_to_use_similarity >= WHEN_TO_USE_SIMILARITY_THRESHOLD)) {
405
+ rationale.push(
406
+ "Overlapping `When to Use` language suggests sibling boundaries may already be competing on intent before enough telemetry exists to confirm it.",
407
+ );
408
+ }
409
+ if (pairs.some((pair) => pair.synthetic_confusion_queries.length > 0)) {
410
+ rationale.push(
411
+ "Synthetic sibling-confusion probes are available for suspicious pairs, so you can test the family boundary before real telemetry converges.",
412
+ );
413
+ }
414
+ if (candidate) {
415
+ rationale.push(
416
+ "Treat this as architecture suspicion, not proof. Run cold-start evals and gather trusted usage before consolidating the family.",
417
+ );
418
+ }
419
+ }
420
+
421
+ return {
422
+ candidate,
423
+ analyzed_pairs: analyzedPairs,
424
+ suspicious_pair_count: suspiciousPairCount,
425
+ average_static_similarity: averageStaticSimilarity,
426
+ pairs: pairs.slice(0, STATIC_PAIR_LIMIT),
427
+ rationale,
428
+ };
429
+ }
430
+
122
431
  function buildRefactorProposal(
123
432
  skills: string[],
124
433
  familyPrefix: string | undefined,
@@ -214,6 +523,7 @@ export function analyzeSkillFamilyOverlap(
214
523
  const readySkillCount = members.filter(
215
524
  (member) => member.positive_query_count >= minSharedQueries,
216
525
  ).length;
526
+ const coldStartSuspicion = analyzeColdStartSuspicion(skills, searchDirs, readySkillCount);
217
527
  const consolidationCandidate =
218
528
  readySkillCount >= 2 &&
219
529
  skills.length >= 3 &&
@@ -239,6 +549,12 @@ export function analyzeSkillFamilyOverlap(
239
549
  );
240
550
  }
241
551
 
552
+ if (readySkillCount < 2 && coldStartSuspicion?.candidate) {
553
+ rationale.push(
554
+ "Installed skill surfaces already suggest an architecture suspicion: some siblings look like overlapping entry points to the same underlying workflow family.",
555
+ );
556
+ }
557
+
242
558
  if (consolidationCandidate) {
243
559
  rationale.push(
244
560
  "This family looks like a packaging problem, not just a wording problem. Test a parent skill with internal workflows before continuing standalone description optimization.",
@@ -250,6 +566,7 @@ export function analyzeSkillFamilyOverlap(
250
566
  analyzed_skills: skills,
251
567
  members,
252
568
  pairs,
569
+ cold_start_suspicion: coldStartSuspicion,
253
570
  total_pairs_analyzed: totalPairsAnalyzed,
254
571
  overlap_count: overlapCount,
255
572
  overlap_density: overlapDensity,
@@ -257,7 +574,9 @@ export function analyzeSkillFamilyOverlap(
257
574
  consolidation_candidate: consolidationCandidate,
258
575
  recommendation:
259
576
  readySkillCount < 2
260
- ? "Insufficient trusted telemetry to make a family-packaging call yet. Use cold-start evals plus a few days of real usage before deciding whether to consolidate."
577
+ ? coldStartSuspicion?.candidate
578
+ ? "Trusted telemetry is still sparse, but installed skill surfaces suggest this family may want a parent skill. Treat this as cold-start architecture suspicion, then confirm with cold-start evals plus real usage."
579
+ : "Insufficient trusted telemetry to make a family-packaging call yet. Use cold-start evals plus a few days of real usage before deciding whether to consolidate."
261
580
  : consolidationCandidate
262
581
  ? `Consider consolidating this family under a parent skill like \`${parentSkillName}\`.`
263
582
  : "Keep the skills separate for now and continue improving boundaries at the description/workflow level.",
@@ -29,3 +29,8 @@ export function readEvidenceTrail(skillName?: string, _logPath?: string): Evolut
29
29
  const db = getDb();
30
30
  return queryEvolutionEvidence(db, skillName) as EvolutionEvidenceEntry[];
31
31
  }
32
+
33
+ /** Build the stable evidence key used to connect audit entries to validation artifacts. */
34
+ export function buildValidationEvidenceRef(proposalId: string, stage: string): string {
35
+ return `evolution_evidence:${proposalId}:${stage}`;
36
+ }
@@ -31,12 +31,13 @@ import { callViaSubagent } from "../utils/llm-call.js";
31
31
  import { appendAuditEntry } from "./audit.js";
32
32
  import { checkConstitutionSizeOnly } from "./constitutional.js";
33
33
  import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
34
- import { appendEvidenceEntry } from "./evidence.js";
34
+ import { appendEvidenceEntry, buildValidationEvidenceRef } from "./evidence.js";
35
35
  import { extractFailurePatterns } from "./extract-patterns.js";
36
36
  import { type ExecutionContext, generateBodyProposal } from "./propose-body.js";
37
37
  import { generateRoutingProposal } from "./propose-routing.js";
38
38
  import { refineBodyProposal } from "./refine-body.js";
39
39
  import { validateBodyProposal } from "./validate-body.js";
40
+ import { buildRoutingReplayFixture } from "./validate-host-replay.js";
40
41
  import { validateRoutingProposal } from "./validate-routing.js";
41
42
 
42
43
  // ---------------------------------------------------------------------------
@@ -106,6 +107,10 @@ function createAuditEntry(
106
107
  action: EvolutionAuditEntry["action"],
107
108
  details: string,
108
109
  skillName?: string,
110
+ provenance?: Pick<
111
+ EvolutionAuditEntry,
112
+ "validation_mode" | "validation_agent" | "validation_fixture_id" | "validation_evidence_ref"
113
+ >,
109
114
  ): EvolutionAuditEntry {
110
115
  return {
111
116
  timestamp: new Date().toISOString(),
@@ -113,6 +118,14 @@ function createAuditEntry(
113
118
  skill_name: skillName,
114
119
  action,
115
120
  details,
121
+ ...(provenance?.validation_mode ? { validation_mode: provenance.validation_mode } : {}),
122
+ ...(provenance?.validation_agent ? { validation_agent: provenance.validation_agent } : {}),
123
+ ...(provenance?.validation_fixture_id
124
+ ? { validation_fixture_id: provenance.validation_fixture_id }
125
+ : {}),
126
+ ...(provenance?.validation_evidence_ref
127
+ ? { validation_evidence_ref: provenance.validation_evidence_ref }
128
+ : {}),
116
129
  };
117
130
  }
118
131
 
@@ -181,8 +194,12 @@ export async function evolveBody(
181
194
  proposalId: string,
182
195
  action: EvolutionAuditEntry["action"],
183
196
  details: string,
197
+ provenance?: Pick<
198
+ EvolutionAuditEntry,
199
+ "validation_mode" | "validation_agent" | "validation_fixture_id" | "validation_evidence_ref"
200
+ >,
184
201
  ): void {
185
- const entry = createAuditEntry(proposalId, action, details, skillName);
202
+ const entry = createAuditEntry(proposalId, action, details, skillName, provenance);
186
203
  auditEntries.push(entry);
187
204
  try {
188
205
  _appendAuditEntry(entry);
@@ -443,11 +460,17 @@ export async function evolveBody(
443
460
  const validationModelFlag = options.validationModel ?? studentModel;
444
461
  let validation: BodyValidationResult;
445
462
  if (target === "routing") {
463
+ const replayFixture = buildRoutingReplayFixture({
464
+ skillName,
465
+ skillPath,
466
+ platform: studentAgent === "codex" ? "codex" : "claude_code",
467
+ });
446
468
  validation = await _validateRoutingProposal(
447
469
  proposal,
448
470
  evalSet,
449
471
  studentAgent,
450
472
  validationModelFlag,
473
+ { replayFixture },
451
474
  );
452
475
  } else {
453
476
  validation = await _validateBodyProposal(
@@ -458,11 +481,18 @@ export async function evolveBody(
458
481
  );
459
482
  }
460
483
  lastValidation = validation;
484
+ const validatedEvidenceRef = buildValidationEvidenceRef(proposal.proposal_id, "validated");
461
485
 
462
486
  recordAudit(
463
487
  proposal.proposal_id,
464
488
  "validated",
465
489
  `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed`,
490
+ {
491
+ validation_mode: validation.validation_mode,
492
+ validation_agent: validation.validation_agent,
493
+ validation_fixture_id: validation.validation_fixture_id,
494
+ validation_evidence_ref: validatedEvidenceRef,
495
+ },
466
496
  );
467
497
  recordEvidence({
468
498
  timestamp: new Date().toISOString(),
@@ -480,6 +510,12 @@ export async function evolveBody(
480
510
  gates_total: validation.gates_total,
481
511
  gate_results: validation.gate_results,
482
512
  regressions: validation.regressions,
513
+ before_pass_rate: validation.before_pass_rate,
514
+ after_pass_rate: validation.after_pass_rate,
515
+ validation_mode: validation.validation_mode,
516
+ validation_agent: validation.validation_agent,
517
+ validation_fixture_id: validation.validation_fixture_id,
518
+ validation_evidence_ref: validatedEvidenceRef,
483
519
  },
484
520
  });
485
521
 
@@ -491,6 +527,12 @@ export async function evolveBody(
491
527
  proposal.proposal_id,
492
528
  "rejected",
493
529
  `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates`,
530
+ {
531
+ validation_mode: validation.validation_mode,
532
+ validation_agent: validation.validation_agent,
533
+ validation_fixture_id: validation.validation_fixture_id,
534
+ validation_evidence_ref: buildValidationEvidenceRef(proposal.proposal_id, "rejected"),
535
+ },
494
536
  );
495
537
  recordEvidence({
496
538
  timestamp: new Date().toISOString(),
@@ -508,6 +550,12 @@ export async function evolveBody(
508
550
  gates_total: validation.gates_total,
509
551
  gate_results: validation.gate_results,
510
552
  regressions: validation.regressions,
553
+ before_pass_rate: validation.before_pass_rate,
554
+ after_pass_rate: validation.after_pass_rate,
555
+ validation_mode: validation.validation_mode,
556
+ validation_agent: validation.validation_agent,
557
+ validation_fixture_id: validation.validation_fixture_id,
558
+ validation_evidence_ref: buildValidationEvidenceRef(proposal.proposal_id, "rejected"),
511
559
  },
512
560
  });
513
561
 
@@ -607,6 +655,12 @@ export async function evolveBody(
607
655
  lastProposal.proposal_id,
608
656
  "deployed",
609
657
  `Deployed ${target} proposal for ${skillName}`,
658
+ {
659
+ validation_mode: lastValidation.validation_mode,
660
+ validation_agent: lastValidation.validation_agent,
661
+ validation_fixture_id: lastValidation.validation_fixture_id,
662
+ validation_evidence_ref: buildValidationEvidenceRef(lastProposal.proposal_id, "deployed"),
663
+ },
610
664
  );
611
665
  recordEvidence({
612
666
  timestamp: new Date().toISOString(),
@@ -624,6 +678,12 @@ export async function evolveBody(
624
678
  gates_total: lastValidation.gates_total,
625
679
  gate_results: lastValidation.gate_results,
626
680
  regressions: lastValidation.regressions,
681
+ before_pass_rate: lastValidation.before_pass_rate,
682
+ after_pass_rate: lastValidation.after_pass_rate,
683
+ validation_mode: lastValidation.validation_mode,
684
+ validation_agent: lastValidation.validation_agent,
685
+ validation_fixture_id: lastValidation.validation_fixture_id,
686
+ validation_evidence_ref: buildValidationEvidenceRef(lastProposal.proposal_id, "deployed"),
627
687
  },
628
688
  });
629
689