synergyspec-selfevolving 2.1.5 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/commands/learn.js +80 -24
  2. package/dist/commands/self-evolution-dream.d.ts +15 -1
  3. package/dist/commands/self-evolution-dream.js +111 -6
  4. package/dist/commands/self-evolution-episode.d.ts +3 -0
  5. package/dist/commands/self-evolution-episode.js +157 -108
  6. package/dist/commands/workflow/status.js +4 -0
  7. package/dist/core/archive.js +17 -9
  8. package/dist/core/change-readiness.d.ts +16 -1
  9. package/dist/core/change-readiness.js +441 -15
  10. package/dist/core/fitness/loss.d.ts +3 -5
  11. package/dist/core/fitness/loss.js +2 -2
  12. package/dist/core/fitness/test-metrics.d.ts +1 -0
  13. package/dist/core/fitness/test-metrics.js +49 -0
  14. package/dist/core/learn.js +129 -11
  15. package/dist/core/migration.d.ts +6 -14
  16. package/dist/core/migration.js +63 -21
  17. package/dist/core/runner-evidence.d.ts +53 -0
  18. package/dist/core/runner-evidence.js +613 -0
  19. package/dist/core/self-evolution/candidates.js +0 -2
  20. package/dist/core/self-evolution/dream.d.ts +57 -3
  21. package/dist/core/self-evolution/dream.js +480 -9
  22. package/dist/core/self-evolution/episode-orchestrator.d.ts +2 -0
  23. package/dist/core/self-evolution/episode-orchestrator.js +17 -5
  24. package/dist/core/self-evolution/episode-store.d.ts +5 -0
  25. package/dist/core/self-evolution/episode-store.js +6 -2
  26. package/dist/core/self-evolution/evolving-agent.js +8 -0
  27. package/dist/core/self-evolution/host-harness.d.ts +35 -12
  28. package/dist/core/self-evolution/host-harness.js +188 -49
  29. package/dist/core/self-evolution/reward-aggregator.js +2 -2
  30. package/dist/core/templates/workflows/archive-change.js +18 -18
  31. package/dist/core/templates/workflows/dream.js +57 -47
  32. package/dist/core/templates/workflows/learn.js +7 -5
  33. package/dist/core/templates/workflows/run-tests.js +48 -29
  34. package/dist/core/templates/workflows/self-evolving.js +11 -8
  35. package/dist/core/trajectory/facts.d.ts +1 -1
  36. package/dist/core/trajectory/registry.js +39 -8
  37. package/package.json +1 -1
@@ -4,6 +4,8 @@ import path from 'path';
4
4
  import { formatChangeStatus, loadChangeContext, } from './artifact-graph/index.js';
5
5
  import { listEpisodes } from './self-evolution/episode-store.js';
6
6
  import { listCandidates, resolveCandidateRepo, } from './self-evolution/candidates.js';
7
+ import { parseTestCollection } from './fitness/test-metrics.js';
8
+ import { extractExpectedTestPaths } from './trajectory/facts.js';
7
9
  const TASK_PATTERN = /^[-*]\s+\[([\sx])\]\s*(.*)$/i;
8
10
  const REQUIRED_EVIDENCE_FILES = [
9
11
  ['specTests', 'spec-tests.md'],
@@ -51,6 +53,7 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
51
53
  const evidence = await readEvidenceReadiness(context.changeDir);
52
54
  const evolution = await readEvolutionOutcome(projectRoot, context.changeDir, changeName);
53
55
  const observedVerification = deriveObservedVerificationReadiness(evolution);
56
+ const learnEvidence = await readLearnEvidenceReadiness(projectRoot, context.changeDir, evolution);
54
57
  const workspaceIdentity = await readWorkspaceIdentityReadiness(projectRoot, context.changeDir, changeName);
55
58
  const status = deriveChangeReadinessStatus(artifactStatus, taskReadiness.total, taskReadiness.completed);
56
59
  return {
@@ -66,10 +69,15 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
66
69
  evidence,
67
70
  evolution,
68
71
  observedVerification,
72
+ learnEvidence,
69
73
  workspaceIdentity,
70
74
  isArchiveReady: artifactStatus === 'complete' &&
71
75
  taskReadiness.status === 'complete' &&
72
76
  evidence.missing.length === 0 &&
77
+ observedVerification.ready &&
78
+ evolution.status !== 'error' &&
79
+ evolution.status !== 'busy' &&
80
+ learnEvidence.ready &&
73
81
  workspaceIdentity.ready,
74
82
  artifactGraph,
75
83
  };
@@ -86,6 +94,7 @@ export function toReadinessJson(readiness) {
86
94
  evidence: readiness.evidence,
87
95
  evolution: readiness.evolution,
88
96
  observedVerification: readiness.observedVerification,
97
+ learnEvidence: readiness.learnEvidence,
89
98
  workspaceIdentity: readiness.workspaceIdentity,
90
99
  isArchiveReady: readiness.isArchiveReady,
91
100
  };
@@ -163,11 +172,11 @@ async function readWorkspaceIdentityReadiness(projectRoot, changeDir, changeName
163
172
  current,
164
173
  };
165
174
  }
166
- const exitJsonPath = extractRunnerExitJsonPath(report, projectRoot);
175
+ const exitJsonPath = await extractRunnerExitJsonPath(report, projectRoot, changeDir);
167
176
  if (!exitJsonPath) {
168
177
  return {
169
- status: 'not-recorded',
170
- ready: true,
178
+ status: 'invalid',
179
+ ready: false,
171
180
  reason: 'test-report.md has no runner-exit.json reference',
172
181
  current,
173
182
  };
@@ -201,13 +210,23 @@ async function readWorkspaceIdentityReadiness(projectRoot, changeDir, changeName
201
210
  const record = asRecord(parsed);
202
211
  if (!record || !Object.prototype.hasOwnProperty.call(record, 'workspaceIdentity')) {
203
212
  return {
204
- status: 'not-recorded',
205
- ready: true,
213
+ status: 'invalid',
214
+ ready: false,
206
215
  reason: 'runner-exit.json has no workspaceIdentity field',
207
216
  evidencePath: formatProjectPath(projectRoot, exitJsonPath),
208
217
  current,
209
218
  };
210
219
  }
220
+ const runnerRecordProblem = await validateRunnerExitRecord(projectRoot, changeDir, record, exitJsonPath);
221
+ if (runnerRecordProblem) {
222
+ return {
223
+ status: 'invalid',
224
+ ready: false,
225
+ reason: runnerRecordProblem,
226
+ evidencePath: formatProjectPath(projectRoot, exitJsonPath),
227
+ current,
228
+ };
229
+ }
211
230
  const workspaceIdentity = asRecord(record.workspaceIdentity);
212
231
  if (!workspaceIdentity) {
213
232
  return {
@@ -219,6 +238,17 @@ async function readWorkspaceIdentityReadiness(projectRoot, changeDir, changeName
219
238
  };
220
239
  }
221
240
  const recorded = workspaceIdentitySnapshotFromRecord(workspaceIdentity);
241
+ const completenessProblems = validateWorkspaceIdentityCompleteness(recorded, current);
242
+ if (completenessProblems.length > 0) {
243
+ return {
244
+ status: 'invalid',
245
+ ready: false,
246
+ reason: `runner-exit.json workspaceIdentity is incomplete: ${completenessProblems.join('; ')}`,
247
+ evidencePath: formatProjectPath(projectRoot, exitJsonPath),
248
+ recorded,
249
+ current,
250
+ };
251
+ }
222
252
  const mismatches = compareWorkspaceIdentities(projectRoot, changeName, recorded, current);
223
253
  if (mismatches.length > 0) {
224
254
  return {
@@ -239,18 +269,314 @@ async function readWorkspaceIdentityReadiness(projectRoot, changeDir, changeName
239
269
  current,
240
270
  };
241
271
  }
242
- function extractRunnerExitJsonPath(report, projectRoot) {
272
+ async function readLearnEvidenceReadiness(projectRoot, changeDir, evolution) {
273
+ const reportPath = path.join(changeDir, 'learn-report.md');
274
+ let content;
275
+ try {
276
+ content = await fs.readFile(reportPath, 'utf-8');
277
+ }
278
+ catch {
279
+ return {
280
+ status: 'missing',
281
+ ready: false,
282
+ reason: evolution.status === 'not-run'
283
+ ? 'learn-report.md is required before archive'
284
+ : `learn/evolution outcome ${evolution.status} is recorded but learn-report.md is absent`,
285
+ path: formatProjectPath(projectRoot, reportPath),
286
+ };
287
+ }
288
+ const validationProblem = validateLearnReportContent(content);
289
+ if (validationProblem) {
290
+ return {
291
+ status: 'invalid',
292
+ ready: false,
293
+ reason: validationProblem,
294
+ path: formatProjectPath(projectRoot, reportPath),
295
+ };
296
+ }
297
+ const evolutionProblem = validateLearnReportAgainstEvolution(content, evolution);
298
+ if (evolutionProblem) {
299
+ return {
300
+ status: 'invalid',
301
+ ready: false,
302
+ reason: evolutionProblem,
303
+ path: formatProjectPath(projectRoot, reportPath),
304
+ };
305
+ }
306
+ if (content.trim().length > 0) {
307
+ return {
308
+ status: 'present',
309
+ ready: true,
310
+ reason: 'learn-report.md is present',
311
+ path: formatProjectPath(projectRoot, reportPath),
312
+ };
313
+ }
314
+ return {
315
+ status: 'invalid',
316
+ ready: false,
317
+ reason: 'learn-report.md is empty',
318
+ path: formatProjectPath(projectRoot, reportPath),
319
+ };
320
+ }
321
+ function validateLearnReportContent(content) {
322
+ const required = [
323
+ ['## Episode Verdict', /^## Episode Verdict:/m],
324
+ ['Outcome', /^-\s*Outcome:\s*\S/m],
325
+ ['Episode id', /^-\s*Episode id:\s*\S/m],
326
+ ['Decision', /^-\s*Decision:\s*\S/m],
327
+ ['Evolution', /^-\s*Evolution:\s*\S/m],
328
+ ['Advantage', /^-\s*Advantage:\s*\S/m],
329
+ ['Policy version', /^-\s*.*POLICY version:\s*\S/im],
330
+ ['Evolved target', /^-\s*Evolved target:\s*\S/m],
331
+ ['Canonical file(s) changed', /^-\s*Canonical file\(s\) changed:\s*\S/m],
332
+ ['Rollback', /^-\s*Rollback:\s*\S/m],
333
+ ['Defects to surface', /^-\s*Defects to surface:\s*\S/m],
334
+ ['Key lessons', /^-\s*Key lessons:\s*\S/m],
335
+ ['Isolation', /^-\s*Isolation:\s*\S/m],
336
+ ];
337
+ const missing = required
338
+ .filter(([, pattern]) => !pattern.test(content))
339
+ .map(([label]) => label);
340
+ if (missing.length === 0)
341
+ return null;
342
+ return `learn-report.md is missing required verdict field(s): ${missing.join(', ')}`;
343
+ }
344
+ function parseLearnReportVerdict(content) {
345
+ return {
346
+ outcome: lineField(content, 'Outcome'),
347
+ episodeId: lineField(content, 'Episode id'),
348
+ evolvedTarget: lineField(content, 'Evolved target'),
349
+ };
350
+ }
351
+ function lineField(content, label) {
352
+ const escaped = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
353
+ const match = new RegExp(`^-\\s*${escaped}:\\s*(.+)$`, 'im').exec(content);
354
+ return match?.[1]?.trim();
355
+ }
356
+ function validateLearnReportAgainstEvolution(content, evolution) {
357
+ if (evolution.status === 'not-run') {
358
+ return 'learn-report.md is not bound to a recorded learn/evolution outcome';
359
+ }
360
+ const verdict = parseLearnReportVerdict(content);
361
+ const reportStatus = evolutionOutcomeStatus(verdict.outcome ?? '');
362
+ if (reportStatus !== evolution.status) {
363
+ return `learn-report.md outcome ${verdict.outcome ?? 'missing'} does not match latest evolution status ${evolution.status}`;
364
+ }
365
+ if (evolution.episodeId) {
366
+ if (!verdict.episodeId || verdict.episodeId === 'none') {
367
+ return `learn-report.md is missing the latest episode id ${evolution.episodeId}`;
368
+ }
369
+ if (verdict.episodeId !== evolution.episodeId) {
370
+ return `learn-report.md episode id ${verdict.episodeId} does not match latest episode ${evolution.episodeId}`;
371
+ }
372
+ }
373
+ if (evolution.targetId && verdict.evolvedTarget && verdict.evolvedTarget !== evolution.targetId) {
374
+ return `learn-report.md target ${verdict.evolvedTarget} does not match latest evolution target ${evolution.targetId}`;
375
+ }
376
+ return null;
377
+ }
378
+ async function validateRunnerExitRecord(projectRoot, changeDir, record, exitJsonPath) {
379
+ const command = stringValue(record.command);
380
+ if (!command)
381
+ return 'runner-exit.json command is missing or invalid';
382
+ const cwd = stringValue(record.cwd);
383
+ if (!cwd)
384
+ return 'runner-exit.json cwd is missing or invalid';
385
+ if (!sameResolvedPath(cwd, projectRoot)) {
386
+ return `runner cwd changed from ${cwd} to ${path.resolve(projectRoot)}`;
387
+ }
388
+ if (!stringValue(record.startedAt))
389
+ return 'runner-exit.json startedAt is missing or invalid';
390
+ if (!stringValue(record.finishedAt))
391
+ return 'runner-exit.json finishedAt is missing or invalid';
392
+ const exitCode = typeof record.exitCode === 'number' && Number.isInteger(record.exitCode)
393
+ ? record.exitCode
394
+ : null;
395
+ if (exitCode === null)
396
+ return 'runner-exit.json exitCode is missing or invalid';
397
+ if (exitCode !== 0)
398
+ return `runner-exit.json exitCode is ${exitCode}`;
399
+ const testMetricsProblem = validateStructuredTestMetrics(record.testMetrics);
400
+ if (testMetricsProblem)
401
+ return testMetricsProblem;
402
+ const stdoutLog = stringValue(record.stdoutLog);
403
+ const stderrLog = stringValue(record.stderrLog);
404
+ if (!stdoutLog)
405
+ return 'runner-exit.json stdoutLog is missing or invalid';
406
+ if (!stderrLog)
407
+ return 'runner-exit.json stderrLog is missing or invalid';
408
+ const requiredPathProblem = (await validateEvidencePath(projectRoot, changeDir, stdoutLog, 'stdoutLog')) ??
409
+ (await validateEvidencePath(projectRoot, changeDir, stderrLog, 'stderrLog')) ??
410
+ (await validateEvidenceHash(projectRoot, changeDir, stdoutLog, record.stdoutLogSha256, 'stdoutLogSha256')) ??
411
+ (await validateEvidenceHash(projectRoot, changeDir, stderrLog, record.stderrLogSha256, 'stderrLogSha256'));
412
+ if (requiredPathProblem)
413
+ return requiredPathProblem;
414
+ const scopeProblem = await validateRunnerScope(projectRoot, changeDir, record, stdoutLog, stderrLog);
415
+ if (scopeProblem)
416
+ return scopeProblem;
417
+ for (const field of ['junitXml', 'coverageSummary', 'coverageLcov', 'coverageHtml']) {
418
+ const value = record[field];
419
+ if (value === null || value === undefined)
420
+ continue;
421
+ const pathValue = stringValue(value);
422
+ if (!pathValue)
423
+ return `runner-exit.json ${field} must be a path string or null`;
424
+ const problem = await validateEvidencePath(projectRoot, changeDir, pathValue, field);
425
+ if (problem)
426
+ return problem;
427
+ }
428
+ if (!path.resolve(exitJsonPath).startsWith(path.resolve(projectRoot) + path.sep)) {
429
+ return `runner-exit.json is outside project root: ${exitJsonPath}`;
430
+ }
431
+ return null;
432
+ }
433
+ function validateStructuredTestMetrics(value) {
434
+ if (value === null || value === undefined)
435
+ return null;
436
+ const record = asRecord(value);
437
+ if (!record)
438
+ return 'runner-exit.json testMetrics must be an object';
439
+ const total = metricNumber(record.total);
440
+ const passed = metricNumber(record.passed);
441
+ const failed = metricNumber(record.failed);
442
+ if (total === null || passed === null || failed === null) {
443
+ return 'runner-exit.json testMetrics total, passed, and failed must be finite numbers';
444
+ }
445
+ if (total < 0 || passed < 0 || failed < 0) {
446
+ return 'runner-exit.json testMetrics counts must be non-negative';
447
+ }
448
+ if (passed + failed !== total) {
449
+ return 'runner-exit.json testMetrics total must equal passed + failed';
450
+ }
451
+ if (Object.prototype.hasOwnProperty.call(record, 'passRate')) {
452
+ const passRate = metricNumber(record.passRate);
453
+ if (passRate === null)
454
+ return 'runner-exit.json testMetrics passRate must be a finite number';
455
+ const expected = total > 0 ? passed / total : 0;
456
+ if (Math.abs(passRate - expected) > 1e-9) {
457
+ return 'runner-exit.json testMetrics passRate does not match passed / total';
458
+ }
459
+ }
460
+ return null;
461
+ }
462
+ async function validateRunnerScope(projectRoot, changeDir, record, stdoutLog, stderrLog) {
463
+ const specTests = await fs
464
+ .readFile(path.join(changeDir, 'spec-tests.md'), 'utf-8')
465
+ .catch(() => undefined);
466
+ const expected = extractExpectedTestPaths(specTests).map(normPath).filter(Boolean);
467
+ if (expected.length === 0)
468
+ return null;
469
+ const stdoutPath = await resolveEvidencePath(projectRoot, changeDir, stdoutLog);
470
+ const stderrPath = await resolveEvidencePath(projectRoot, changeDir, stderrLog);
471
+ const outputText = [
472
+ stdoutPath ? await fs.readFile(stdoutPath, 'utf-8').catch(() => '') : '',
473
+ stderrPath ? await fs.readFile(stderrPath, 'utf-8').catch(() => '') : '',
474
+ ]
475
+ .filter((text) => text.length > 0)
476
+ .join('\n');
477
+ const collection = parseTestCollection(outputText);
478
+ const observed = collection?.paths ?? [];
479
+ if (observed.length > 0) {
480
+ return pathsIntersect(observed, expected)
481
+ ? null
482
+ : 'runner-exit.json green run is out-of-scope for spec-tests.md';
483
+ }
484
+ const commandPaths = commandTestPaths(stringValue(record.command));
485
+ if (pathsIntersect(commandPaths, expected))
486
+ return null;
487
+ const knownScope = collection !== null && collection.collected !== null;
488
+ return knownScope ? 'runner-exit.json green run is out-of-scope for spec-tests.md' : null;
489
+ }
490
+ async function validateEvidencePath(projectRoot, changeDir, rawPath, field) {
491
+ const fullPath = await resolveEvidencePath(projectRoot, changeDir, rawPath);
492
+ if (!fullPath) {
493
+ return `runner-exit.json ${field} points outside project root`;
494
+ }
495
+ try {
496
+ await fs.access(fullPath);
497
+ }
498
+ catch {
499
+ return `runner-exit.json ${field} is missing: ${formatProjectPath(projectRoot, fullPath)}`;
500
+ }
501
+ return null;
502
+ }
503
+ async function validateEvidenceHash(projectRoot, changeDir, rawPath, rawExpected, field) {
504
+ const expected = stringValue(rawExpected);
505
+ if (!expected)
506
+ return `runner-exit.json ${field} is missing or invalid`;
507
+ const fullPath = await resolveEvidencePath(projectRoot, changeDir, rawPath);
508
+ if (!fullPath) {
509
+ return `runner-exit.json ${field} path points outside project root`;
510
+ }
511
+ let content;
512
+ try {
513
+ content = await fs.readFile(fullPath);
514
+ }
515
+ catch {
516
+ return `runner-exit.json ${field} cannot hash missing evidence: ${formatProjectPath(projectRoot, fullPath)}`;
517
+ }
518
+ const actual = createHash('sha256').update(content).digest('hex');
519
+ if (actual !== expected)
520
+ return `runner-exit.json ${field} does not match current log content`;
521
+ return null;
522
+ }
523
+ async function extractRunnerExitJsonPath(report, projectRoot, changeDir) {
243
524
  RUNNER_EXIT_JSON_PATTERN.lastIndex = 0;
525
+ let latest = null;
244
526
  for (const match of report.matchAll(RUNNER_EXIT_JSON_PATTERN)) {
245
527
  const value = (match[1] ?? match[2] ?? '').trim();
246
528
  if (!value)
247
529
  continue;
248
530
  const withoutMdLink = value.replace(/^\((.*)\)$/, '$1');
249
- return path.isAbsolute(withoutMdLink)
250
- ? path.normalize(withoutMdLink)
251
- : path.resolve(projectRoot, withoutMdLink);
531
+ latest = {
532
+ direct: resolveProjectPath(projectRoot, withoutMdLink),
533
+ remapped: resolveChangeDirTestEvidencePath(projectRoot, changeDir, withoutMdLink),
534
+ };
252
535
  }
253
- return null;
536
+ if (!latest)
537
+ return null;
538
+ if (latest.direct && (await fileExists(latest.direct)))
539
+ return latest.direct;
540
+ if (latest.remapped && (await fileExists(latest.remapped)))
541
+ return latest.remapped;
542
+ return latest.direct ?? latest.remapped;
543
+ }
544
+ function resolveProjectPath(projectRoot, rawPath) {
545
+ if (!rawPath)
546
+ return null;
547
+ const resolved = path.isAbsolute(rawPath)
548
+ ? path.normalize(rawPath)
549
+ : path.resolve(projectRoot, rawPath);
550
+ const root = path.resolve(projectRoot);
551
+ return resolved === root || resolved.startsWith(root + path.sep) ? resolved : null;
552
+ }
553
+ function resolveChangeDirTestEvidencePath(projectRoot, changeDir, rawPath) {
554
+ if (path.isAbsolute(rawPath))
555
+ return null;
556
+ const normalized = rawPath.replace(/\\/g, '/');
557
+ const marker = '/test-evidence/';
558
+ const markerIndex = normalized.indexOf(marker);
559
+ const bareMarkerIndex = normalized.startsWith('test-evidence/') ? 0 : -1;
560
+ const suffix = markerIndex >= 0
561
+ ? normalized.slice(markerIndex + marker.length)
562
+ : bareMarkerIndex === 0
563
+ ? normalized.slice('test-evidence/'.length)
564
+ : null;
565
+ if (!suffix)
566
+ return null;
567
+ const resolved = path.resolve(changeDir, 'test-evidence', ...suffix.split('/'));
568
+ return isInside(projectRoot, resolved) && isInside(changeDir, resolved) ? resolved : null;
569
+ }
570
+ async function resolveEvidencePath(projectRoot, changeDir, rawPath) {
571
+ if (!rawPath)
572
+ return null;
573
+ const direct = resolveProjectPath(projectRoot, rawPath);
574
+ if (direct && (await fileExists(direct)))
575
+ return direct;
576
+ const remapped = resolveChangeDirTestEvidencePath(projectRoot, changeDir, rawPath);
577
+ if (remapped && (await fileExists(remapped)))
578
+ return remapped;
579
+ return direct ?? remapped;
254
580
  }
255
581
  async function readCurrentWorkspaceIdentity(projectRoot, changeName) {
256
582
  return {
@@ -347,7 +673,7 @@ function fileSnapshotFromUnknown(value) {
347
673
  }
348
674
  function compareWorkspaceIdentities(projectRoot, changeName, recorded, current) {
349
675
  const mismatches = [];
350
- if (recorded.changeName && recorded.changeName !== changeName) {
676
+ if (recorded.changeName && !changeNamesEquivalent(recorded.changeName, changeName)) {
351
677
  mismatches.push(`change name changed from ${recorded.changeName} to ${changeName}`);
352
678
  }
353
679
  if (recorded.cwd && !sameResolvedPath(recorded.cwd, projectRoot)) {
@@ -374,6 +700,39 @@ function compareIdentityFile(label, recorded, current, mismatches) {
374
700
  mismatches.push(`${label} content changed since runner evidence was captured`);
375
701
  }
376
702
  }
703
+ function validateWorkspaceIdentityCompleteness(recorded, current) {
704
+ const problems = [];
705
+ if (!recorded.cwd)
706
+ problems.push('cwd is missing');
707
+ if (!recorded.changeName)
708
+ problems.push('changeName is missing');
709
+ const currentFiles = [
710
+ ['pyproject', 'pyproject.toml', current.pyproject],
711
+ ['packageJson', 'package.json', current.packageJson],
712
+ ];
713
+ for (const [key, label, currentFile] of currentFiles) {
714
+ if (!currentFile)
715
+ continue;
716
+ const recordedFile = recorded[key];
717
+ if (!recordedFile) {
718
+ problems.push(`${label} identity is missing`);
719
+ continue;
720
+ }
721
+ if (!recordedFile.path)
722
+ problems.push(`${label} path is missing`);
723
+ if (currentFile.name && !recordedFile.name)
724
+ problems.push(`${label} name is missing`);
725
+ if (!recordedFile.sha256)
726
+ problems.push(`${label} sha256 is missing`);
727
+ }
728
+ return problems;
729
+ }
730
+ function changeNamesEquivalent(recorded, current) {
731
+ if (recorded === current)
732
+ return true;
733
+ const archived = current.match(/^\d{4}-\d{2}-\d{2}-(.+)$/);
734
+ return archived?.[1] === recorded;
735
+ }
377
736
  function sameResolvedPath(left, right) {
378
737
  const normalizedLeft = path.resolve(left);
379
738
  const normalizedRight = path.resolve(right);
@@ -385,11 +744,51 @@ function sameResolvedPath(left, right) {
385
744
  function stringValue(value) {
386
745
  return typeof value === 'string' && value.length > 0 ? value : undefined;
387
746
  }
747
+ function metricNumber(value) {
748
+ return typeof value === 'number' && Number.isFinite(value) ? value : null;
749
+ }
388
750
  function asRecord(value) {
389
751
  return value && typeof value === 'object' && !Array.isArray(value)
390
752
  ? value
391
753
  : null;
392
754
  }
755
+ function pathsIntersect(observed, expected) {
756
+ if (observed.length === 0 || expected.length === 0)
757
+ return false;
758
+ for (const o of observed.map(normPath)) {
759
+ for (const e of expected) {
760
+ if (o === e || o.endsWith('/' + e) || e.endsWith('/' + o)) {
761
+ return true;
762
+ }
763
+ }
764
+ }
765
+ return false;
766
+ }
767
+ function commandTestPaths(command) {
768
+ if (!command)
769
+ return [];
770
+ const paths = new Set();
771
+ const afterRunner = command.replace(/\b(?:pytest|py\.test|python\s+-m\s+pytest|vitest|npm\s+test|pnpm\s+test|yarn\s+test|go\s+test)\b/i, ' ');
772
+ for (const raw of afterRunner.split(/\s+/)) {
773
+ const tok = raw.trim().replace(/^['"]|['"]$/g, '');
774
+ if (!tok || tok.startsWith('-'))
775
+ continue;
776
+ const looksLikePath = /[\\/]/.test(tok) || /\b(?:tests?|spec|specs|benchmark_tests)\b/i.test(tok);
777
+ if (!looksLikePath)
778
+ continue;
779
+ if (/\.[a-z]+$/i.test(tok) && !/\.(?:py|[tj]sx?)$/i.test(tok))
780
+ continue;
781
+ paths.add(normPath(tok));
782
+ }
783
+ return [...paths];
784
+ }
785
+ function normPath(p) {
786
+ return p.replace(/\\/g, '/').toLowerCase().replace(/^\.\//, '').replace(/::.*/, '');
787
+ }
788
+ function isInside(root, candidate) {
789
+ const relative = path.relative(path.resolve(root), path.resolve(candidate));
790
+ return relative === '' || (!relative.startsWith('..') && !path.isAbsolute(relative));
791
+ }
393
792
  function formatProjectPath(projectRoot, filePath) {
394
793
  const relative = path.relative(projectRoot, filePath);
395
794
  if (relative && !relative.startsWith('..') && !path.isAbsolute(relative)) {
@@ -410,7 +809,14 @@ function deriveObservedVerificationReadiness(evolution) {
410
809
  if (evolution.status === 'promoted' && evolution.promoted) {
411
810
  return { status: 'verified', ready: true };
412
811
  }
413
- return { status: 'not-assessed', ready: true };
812
+ if (isObservedVerifiedOutcome(evolution)) {
813
+ return { status: 'verified', ready: true, reason: evolution.reason ?? evolution.outcome };
814
+ }
815
+ return {
816
+ status: 'not-assessed',
817
+ ready: false,
818
+ reason: 'learn/evolution has not recorded observed-verified evidence',
819
+ };
414
820
  }
415
821
  function isObservedVerificationBlocker(evolution) {
416
822
  const raw = `${evolution.outcome ?? ''} ${evolution.reason ?? ''}`.toLowerCase();
@@ -418,9 +824,8 @@ function isObservedVerificationBlocker(evolution) {
418
824
  return false;
419
825
  const mentionsUnverified = raw.includes('refused-unverified-evidence') ||
420
826
  raw.includes('not observed-verified') ||
421
- raw.includes('observed-verified') ||
422
827
  raw.includes('observed-green gate failed') ||
423
- raw.includes('observed-green') ||
828
+ raw.includes('observed-green failed') ||
424
829
  raw.includes('trajectory not verified') ||
425
830
  raw.includes('not verified') ||
426
831
  raw.includes('unverified') ||
@@ -432,6 +837,20 @@ function isObservedVerificationBlocker(evolution) {
432
837
  evolution.status === 'rolled-back' ||
433
838
  evolution.status === 'error');
434
839
  }
840
+ function isObservedVerifiedOutcome(evolution) {
841
+ const raw = `${evolution.outcome ?? ''} ${evolution.reason ?? ''}`.toLowerCase();
842
+ if (!raw)
843
+ return false;
844
+ const mentionsVerified = raw.includes('observed-verified') ||
845
+ raw.includes('observed verified') ||
846
+ raw.includes('verified-green') ||
847
+ raw.includes('verified green');
848
+ const mentionsUnverified = raw.includes('unverified') ||
849
+ raw.includes('not observed-verified') ||
850
+ raw.includes('observed-green gate failed') ||
851
+ raw.includes('not verified');
852
+ return mentionsVerified && !mentionsUnverified;
853
+ }
435
854
  /**
436
855
  * Read the CLI-written evolution outcome for the change, if any. When the manual
437
856
  * evolution-result file is absent, fall back to the durable loop-v2 episode store
@@ -484,7 +903,7 @@ async function readEvolutionOutcome(projectRoot, changeDir, changeName) {
484
903
  }
485
904
  }
486
905
  function evolutionOutcomeStatus(outcome) {
487
- if (outcome === 'promoted')
906
+ if (outcome === 'promoted' || outcome === 'evolved')
488
907
  return 'promoted';
489
908
  if (outcome === 'rolled-back')
490
909
  return 'rolled-back';
@@ -599,6 +1018,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
599
1018
  outcome: 'episode-error',
600
1019
  reason: episode.terminalError,
601
1020
  targetId: episode.targetId,
1021
+ episodeId: episode.episodeId,
602
1022
  promoted: false,
603
1023
  promotedFiles: [],
604
1024
  timestamp: episode.updatedAt,
@@ -610,6 +1030,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
610
1030
  outcome: 'episode-evolution-refused',
611
1031
  reason: episode.evolutionOutcomeReason ?? 'evolution refused',
612
1032
  targetId: episode.targetId,
1033
+ episodeId: episode.episodeId,
613
1034
  promoted: false,
614
1035
  promotedFiles: [],
615
1036
  timestamp: episode.updatedAt,
@@ -620,6 +1041,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
620
1041
  status: 'promoted',
621
1042
  outcome: 'episode-evolved',
622
1043
  targetId: episode.targetId,
1044
+ episodeId: episode.episodeId,
623
1045
  promoted: true,
624
1046
  promotedFiles: [],
625
1047
  timestamp: episode.updatedAt,
@@ -631,6 +1053,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
631
1053
  outcome: 'episode-abstained',
632
1054
  reason: episode.evolutionOutcomeReason ?? 'reward agent abstained; no policy promotion',
633
1055
  targetId: episode.targetId,
1056
+ episodeId: episode.episodeId,
634
1057
  promoted: false,
635
1058
  promotedFiles: [],
636
1059
  timestamp: episode.updatedAt,
@@ -642,6 +1065,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
642
1065
  outcome: 'episode-not-spawned',
643
1066
  reason: episode.evolutionOutcomeReason ?? 'episode rolled back before policy evolution',
644
1067
  targetId: episode.targetId,
1068
+ episodeId: episode.episodeId,
645
1069
  promoted: false,
646
1070
  promotedFiles: [],
647
1071
  timestamp: episode.updatedAt,
@@ -653,6 +1077,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
653
1077
  outcome: 'episode-not-spawned',
654
1078
  reason: episode.evolutionOutcomeReason ?? 'episode closed without policy promotion',
655
1079
  targetId: episode.targetId,
1080
+ episodeId: episode.episodeId,
656
1081
  promoted: false,
657
1082
  promotedFiles: [],
658
1083
  timestamp: episode.updatedAt,
@@ -664,6 +1089,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
664
1089
  outcome: 'episode-in-progress',
665
1090
  reason: `episode currently at stage ${stage}`,
666
1091
  targetId: episode.targetId,
1092
+ episodeId: episode.episodeId,
667
1093
  promoted: false,
668
1094
  promotedFiles: [],
669
1095
  timestamp: episode.updatedAt,
@@ -47,11 +47,9 @@ export interface ComputeLossInput {
47
47
  */
48
48
  verified?: boolean;
49
49
  /**
50
- * Weight on the unverified soft-penalty term; default 0 (OBSERVE-ONLY). At 0
51
- * the loss is byte-identical to the functional⊕health baseline regardless of
52
- * `verified` the trajectory signal is recorded on the FitnessSample for
53
- * auditing without yet moving selection. Raise it to let unverified
54
- * candidates be down-weighted (never hard-disqualified) when comparing them.
50
+ * Weight on the unverified soft-penalty term; defaults to a small nonzero
51
+ * penalty so authored green reports are visibly non-green unless corroborated
52
+ * by runner evidence. Set it to 0 explicitly to make the signal observe-only.
55
53
  */
56
54
  unverifiedWeight?: number;
57
55
  }
@@ -20,8 +20,8 @@ function clamp01(v) {
20
20
  export function computePerChangeLoss(input) {
21
21
  const wf = input.functionalWeight ?? 0.7;
22
22
  const wh = input.healthWeight ?? 0.3;
23
- // Default 0 ⇒ observe-only: the unverified term vanishes and the returned
24
- // loss/shape stay identical to the functional⊕health baseline.
23
+ // Default-on soft penalty: authored green reports are not treated as fully
24
+ // measured green unless runner evidence corroborates them.
25
25
  const wu = input.unverifiedWeight ?? DEFAULT_UNVERIFIED_WEIGHT;
26
26
  const functionalLoss = clamp01(1 - clamp01(input.passRate));
27
27
  const healthPenalty = clamp01(input.healthPenalty ?? 0);
@@ -28,6 +28,7 @@ export interface TestMetrics {
28
28
  * "1 failed, 9 passed in 0.4s" (order-independent)
29
29
  * - SynergySpec reports: table/status prose like
30
30
  * "Passed; 85 tests collected and passed"
31
+ * "Summary: 29 collected, 29 passed, 0 failed, 0 skipped, 0 collection errors."
31
32
  * A bare "N passed" line that is not a recognized summary (e.g. prose or a
32
33
  * per-suite tally) is ignored. Pytest "errors" count toward `failed`.
33
34
  * Returns null when no recognized summary is found.