selftune 0.2.19 → 0.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,7 +43,7 @@ import { createEvolveTUI } from "../utils/tui.js";
43
43
  import { appendAuditEntry } from "./audit.js";
44
44
  import { checkConstitution } from "./constitutional.js";
45
45
  import { scoreDescription } from "./description-quality.js";
46
- import { appendEvidenceEntry } from "./evidence.js";
46
+ import { appendEvidenceEntry, buildValidationEvidenceRef } from "./evidence.js";
47
47
  import { extractFailurePatterns } from "./extract-patterns.js";
48
48
  import {
49
49
  computeInvocationScores,
@@ -139,6 +139,10 @@ function createAuditEntry(
139
139
  evalSnapshot?: EvalPassRate,
140
140
  skillName?: string,
141
141
  iterationsUsed?: number,
142
+ provenance?: Pick<
143
+ EvolutionAuditEntry,
144
+ "validation_mode" | "validation_agent" | "validation_fixture_id" | "validation_evidence_ref"
145
+ >,
142
146
  ): EvolutionAuditEntry {
143
147
  return {
144
148
  timestamp: new Date().toISOString(),
@@ -148,6 +152,14 @@ function createAuditEntry(
148
152
  ...(skillName ? { skill_name: skillName } : {}),
149
153
  ...(evalSnapshot ? { eval_snapshot: evalSnapshot } : {}),
150
154
  ...(iterationsUsed != null ? { iterations_used: iterationsUsed } : {}),
155
+ ...(provenance?.validation_mode ? { validation_mode: provenance.validation_mode } : {}),
156
+ ...(provenance?.validation_agent ? { validation_agent: provenance.validation_agent } : {}),
157
+ ...(provenance?.validation_fixture_id
158
+ ? { validation_fixture_id: provenance.validation_fixture_id }
159
+ : {}),
160
+ ...(provenance?.validation_evidence_ref
161
+ ? { validation_evidence_ref: provenance.validation_evidence_ref }
162
+ : {}),
151
163
  };
152
164
  }
153
165
 
@@ -289,6 +301,10 @@ export async function evolve(
289
301
  details: string,
290
302
  evalSnapshot?: EvalPassRate,
291
303
  iterationsUsed?: number,
304
+ provenance?: Pick<
305
+ EvolutionAuditEntry,
306
+ "validation_mode" | "validation_agent" | "validation_fixture_id" | "validation_evidence_ref"
307
+ >,
292
308
  ): void {
293
309
  const entry = createAuditEntry(
294
310
  proposalId,
@@ -297,6 +313,7 @@ export async function evolve(
297
313
  evalSnapshot,
298
314
  skillName,
299
315
  iterationsUsed,
316
+ provenance,
300
317
  );
301
318
  auditEntries.push(entry);
302
319
  try {
@@ -637,10 +654,18 @@ export async function evolve(
637
654
  options.validationModel,
638
655
  );
639
656
  llmCallCount += countValidationLlmCalls(evalSet.length);
657
+ const evidenceRef = buildValidationEvidenceRef(proposal.proposal_id, "validated");
640
658
  recordAudit(
641
659
  proposal.proposal_id,
642
660
  "validated",
643
661
  `Pareto validation: improved=${validation.improved}`,
662
+ undefined,
663
+ undefined,
664
+ {
665
+ validation_mode: validation.validation_mode,
666
+ validation_agent: validation.validation_agent,
667
+ validation_evidence_ref: evidenceRef,
668
+ },
644
669
  );
645
670
  recordEvidence({
646
671
  timestamp: new Date().toISOString(),
@@ -660,6 +685,9 @@ export async function evolve(
660
685
  regressions: validation.regressions,
661
686
  new_passes: validation.new_passes,
662
687
  per_entry_results: validation.per_entry_results,
688
+ validation_mode: validation.validation_mode,
689
+ validation_agent: validation.validation_agent,
690
+ validation_evidence_ref: evidenceRef,
663
691
  },
664
692
  });
665
693
 
@@ -866,11 +894,18 @@ export async function evolve(
866
894
  failed: evalSet.length - Math.round(validation.after_pass_rate * evalSet.length),
867
895
  pass_rate: validation.after_pass_rate,
868
896
  };
897
+ const validatedEvidenceRef = buildValidationEvidenceRef(proposal.proposal_id, "validated");
869
898
  recordAudit(
870
899
  proposal.proposal_id,
871
900
  "validated",
872
901
  `Validation complete: improved=${validation.improved}`,
873
902
  evalSnapshot,
903
+ undefined,
904
+ {
905
+ validation_mode: validation.validation_mode,
906
+ validation_agent: validation.validation_agent,
907
+ validation_evidence_ref: validatedEvidenceRef,
908
+ },
874
909
  );
875
910
  recordEvidence({
876
911
  timestamp: new Date().toISOString(),
@@ -890,6 +925,9 @@ export async function evolve(
890
925
  regressions: validation.regressions,
891
926
  new_passes: validation.new_passes,
892
927
  per_entry_results: validation.per_entry_results,
928
+ validation_mode: validation.validation_mode,
929
+ validation_agent: validation.validation_agent,
930
+ validation_evidence_ref: validatedEvidenceRef,
893
931
  },
894
932
  });
895
933
 
@@ -906,10 +944,18 @@ export async function evolve(
906
944
 
907
945
  if (!validation.improved) {
908
946
  feedbackReason = `Validation failed: net_change=${validation.net_change.toFixed(3)}, improved=false`;
947
+ const rejectedEvidenceRef = buildValidationEvidenceRef(proposal.proposal_id, "rejected");
909
948
  recordAudit(
910
949
  proposal.proposal_id,
911
950
  "rejected",
912
951
  `Validation failed: net_change=${validation.net_change.toFixed(3)} (stopping: ${stopping.reason})`,
952
+ undefined,
953
+ undefined,
954
+ {
955
+ validation_mode: validation.validation_mode,
956
+ validation_agent: validation.validation_agent,
957
+ validation_evidence_ref: rejectedEvidenceRef,
958
+ },
913
959
  );
914
960
  recordEvidence({
915
961
  timestamp: new Date().toISOString(),
@@ -929,6 +975,9 @@ export async function evolve(
929
975
  regressions: validation.regressions,
930
976
  new_passes: validation.new_passes,
931
977
  per_entry_results: validation.per_entry_results,
978
+ validation_mode: validation.validation_mode,
979
+ validation_agent: validation.validation_agent,
980
+ validation_evidence_ref: rejectedEvidenceRef,
932
981
  },
933
982
  });
934
983
 
@@ -1138,6 +1187,11 @@ export async function evolve(
1138
1187
  pass_rate: lastValidation.after_pass_rate,
1139
1188
  },
1140
1189
  iterationsCompleted,
1190
+ {
1191
+ validation_mode: lastValidation.validation_mode,
1192
+ validation_agent: lastValidation.validation_agent,
1193
+ validation_evidence_ref: buildValidationEvidenceRef(lastProposal.proposal_id, "deployed"),
1194
+ },
1141
1195
  );
1142
1196
  recordEvidence({
1143
1197
  timestamp: new Date().toISOString(),
@@ -1157,6 +1211,9 @@ export async function evolve(
1157
1211
  regressions: lastValidation.regressions,
1158
1212
  new_passes: lastValidation.new_passes,
1159
1213
  per_entry_results: lastValidation.per_entry_results,
1214
+ validation_mode: lastValidation.validation_mode,
1215
+ validation_agent: lastValidation.validation_agent,
1216
+ validation_evidence_ref: buildValidationEvidenceRef(lastProposal.proposal_id, "deployed"),
1160
1217
  },
1161
1218
  });
1162
1219
  }
@@ -209,6 +209,8 @@ export async function validateBodyProposal(
209
209
  gate_results: gateResults,
210
210
  improved: false,
211
211
  regressions: [],
212
+ validation_mode: "structural_guard",
213
+ validation_agent: agent,
212
214
  };
213
215
  }
214
216
 
@@ -250,5 +252,13 @@ export async function validateBodyProposal(
250
252
  gate_results: gateResults,
251
253
  improved: gatesPassed === 3,
252
254
  regressions: accuracy.regressions,
255
+ validation_mode: "llm_judge",
256
+ validation_agent: agent,
257
+ ...(evalSet.length > 0
258
+ ? {
259
+ before_pass_rate: accuracy.before_pass_rate,
260
+ after_pass_rate: accuracy.after_pass_rate,
261
+ }
262
+ : {}),
253
263
  };
254
264
  }