synergyspec-selfevolving 2.1.5 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/learn.js +80 -24
- package/dist/commands/self-evolution-dream.d.ts +15 -1
- package/dist/commands/self-evolution-dream.js +111 -6
- package/dist/commands/self-evolution-episode.d.ts +3 -0
- package/dist/commands/self-evolution-episode.js +157 -108
- package/dist/commands/workflow/status.js +4 -0
- package/dist/core/archive.js +17 -9
- package/dist/core/change-readiness.d.ts +16 -1
- package/dist/core/change-readiness.js +441 -15
- package/dist/core/fitness/loss.d.ts +3 -5
- package/dist/core/fitness/loss.js +2 -2
- package/dist/core/fitness/test-metrics.d.ts +1 -0
- package/dist/core/fitness/test-metrics.js +49 -0
- package/dist/core/learn.js +129 -11
- package/dist/core/migration.d.ts +6 -14
- package/dist/core/migration.js +63 -21
- package/dist/core/runner-evidence.d.ts +53 -0
- package/dist/core/runner-evidence.js +613 -0
- package/dist/core/self-evolution/candidates.js +0 -2
- package/dist/core/self-evolution/dream.d.ts +57 -3
- package/dist/core/self-evolution/dream.js +480 -9
- package/dist/core/self-evolution/episode-orchestrator.d.ts +2 -0
- package/dist/core/self-evolution/episode-orchestrator.js +17 -5
- package/dist/core/self-evolution/episode-store.d.ts +5 -0
- package/dist/core/self-evolution/episode-store.js +6 -2
- package/dist/core/self-evolution/evolving-agent.js +8 -0
- package/dist/core/self-evolution/host-harness.d.ts +35 -12
- package/dist/core/self-evolution/host-harness.js +188 -49
- package/dist/core/self-evolution/reward-aggregator.js +2 -2
- package/dist/core/templates/workflows/archive-change.js +18 -18
- package/dist/core/templates/workflows/dream.js +57 -47
- package/dist/core/templates/workflows/learn.js +7 -5
- package/dist/core/templates/workflows/run-tests.js +48 -29
- package/dist/core/templates/workflows/self-evolving.js +11 -8
- package/dist/core/trajectory/facts.d.ts +1 -1
- package/dist/core/trajectory/registry.js +39 -8
- package/package.json +1 -1
|
@@ -4,6 +4,8 @@ import path from 'path';
|
|
|
4
4
|
import { formatChangeStatus, loadChangeContext, } from './artifact-graph/index.js';
|
|
5
5
|
import { listEpisodes } from './self-evolution/episode-store.js';
|
|
6
6
|
import { listCandidates, resolveCandidateRepo, } from './self-evolution/candidates.js';
|
|
7
|
+
import { parseTestCollection } from './fitness/test-metrics.js';
|
|
8
|
+
import { extractExpectedTestPaths } from './trajectory/facts.js';
|
|
7
9
|
const TASK_PATTERN = /^[-*]\s+\[([\sx])\]\s*(.*)$/i;
|
|
8
10
|
const REQUIRED_EVIDENCE_FILES = [
|
|
9
11
|
['specTests', 'spec-tests.md'],
|
|
@@ -51,6 +53,7 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
|
|
|
51
53
|
const evidence = await readEvidenceReadiness(context.changeDir);
|
|
52
54
|
const evolution = await readEvolutionOutcome(projectRoot, context.changeDir, changeName);
|
|
53
55
|
const observedVerification = deriveObservedVerificationReadiness(evolution);
|
|
56
|
+
const learnEvidence = await readLearnEvidenceReadiness(projectRoot, context.changeDir, evolution);
|
|
54
57
|
const workspaceIdentity = await readWorkspaceIdentityReadiness(projectRoot, context.changeDir, changeName);
|
|
55
58
|
const status = deriveChangeReadinessStatus(artifactStatus, taskReadiness.total, taskReadiness.completed);
|
|
56
59
|
return {
|
|
@@ -66,10 +69,15 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
|
|
|
66
69
|
evidence,
|
|
67
70
|
evolution,
|
|
68
71
|
observedVerification,
|
|
72
|
+
learnEvidence,
|
|
69
73
|
workspaceIdentity,
|
|
70
74
|
isArchiveReady: artifactStatus === 'complete' &&
|
|
71
75
|
taskReadiness.status === 'complete' &&
|
|
72
76
|
evidence.missing.length === 0 &&
|
|
77
|
+
observedVerification.ready &&
|
|
78
|
+
evolution.status !== 'error' &&
|
|
79
|
+
evolution.status !== 'busy' &&
|
|
80
|
+
learnEvidence.ready &&
|
|
73
81
|
workspaceIdentity.ready,
|
|
74
82
|
artifactGraph,
|
|
75
83
|
};
|
|
@@ -86,6 +94,7 @@ export function toReadinessJson(readiness) {
|
|
|
86
94
|
evidence: readiness.evidence,
|
|
87
95
|
evolution: readiness.evolution,
|
|
88
96
|
observedVerification: readiness.observedVerification,
|
|
97
|
+
learnEvidence: readiness.learnEvidence,
|
|
89
98
|
workspaceIdentity: readiness.workspaceIdentity,
|
|
90
99
|
isArchiveReady: readiness.isArchiveReady,
|
|
91
100
|
};
|
|
@@ -163,11 +172,11 @@ async function readWorkspaceIdentityReadiness(projectRoot, changeDir, changeName
|
|
|
163
172
|
current,
|
|
164
173
|
};
|
|
165
174
|
}
|
|
166
|
-
const exitJsonPath = extractRunnerExitJsonPath(report, projectRoot);
|
|
175
|
+
const exitJsonPath = await extractRunnerExitJsonPath(report, projectRoot, changeDir);
|
|
167
176
|
if (!exitJsonPath) {
|
|
168
177
|
return {
|
|
169
|
-
status: '
|
|
170
|
-
ready:
|
|
178
|
+
status: 'invalid',
|
|
179
|
+
ready: false,
|
|
171
180
|
reason: 'test-report.md has no runner-exit.json reference',
|
|
172
181
|
current,
|
|
173
182
|
};
|
|
@@ -201,13 +210,23 @@ async function readWorkspaceIdentityReadiness(projectRoot, changeDir, changeName
|
|
|
201
210
|
const record = asRecord(parsed);
|
|
202
211
|
if (!record || !Object.prototype.hasOwnProperty.call(record, 'workspaceIdentity')) {
|
|
203
212
|
return {
|
|
204
|
-
status: '
|
|
205
|
-
ready:
|
|
213
|
+
status: 'invalid',
|
|
214
|
+
ready: false,
|
|
206
215
|
reason: 'runner-exit.json has no workspaceIdentity field',
|
|
207
216
|
evidencePath: formatProjectPath(projectRoot, exitJsonPath),
|
|
208
217
|
current,
|
|
209
218
|
};
|
|
210
219
|
}
|
|
220
|
+
const runnerRecordProblem = await validateRunnerExitRecord(projectRoot, changeDir, record, exitJsonPath);
|
|
221
|
+
if (runnerRecordProblem) {
|
|
222
|
+
return {
|
|
223
|
+
status: 'invalid',
|
|
224
|
+
ready: false,
|
|
225
|
+
reason: runnerRecordProblem,
|
|
226
|
+
evidencePath: formatProjectPath(projectRoot, exitJsonPath),
|
|
227
|
+
current,
|
|
228
|
+
};
|
|
229
|
+
}
|
|
211
230
|
const workspaceIdentity = asRecord(record.workspaceIdentity);
|
|
212
231
|
if (!workspaceIdentity) {
|
|
213
232
|
return {
|
|
@@ -219,6 +238,17 @@ async function readWorkspaceIdentityReadiness(projectRoot, changeDir, changeName
|
|
|
219
238
|
};
|
|
220
239
|
}
|
|
221
240
|
const recorded = workspaceIdentitySnapshotFromRecord(workspaceIdentity);
|
|
241
|
+
const completenessProblems = validateWorkspaceIdentityCompleteness(recorded, current);
|
|
242
|
+
if (completenessProblems.length > 0) {
|
|
243
|
+
return {
|
|
244
|
+
status: 'invalid',
|
|
245
|
+
ready: false,
|
|
246
|
+
reason: `runner-exit.json workspaceIdentity is incomplete: ${completenessProblems.join('; ')}`,
|
|
247
|
+
evidencePath: formatProjectPath(projectRoot, exitJsonPath),
|
|
248
|
+
recorded,
|
|
249
|
+
current,
|
|
250
|
+
};
|
|
251
|
+
}
|
|
222
252
|
const mismatches = compareWorkspaceIdentities(projectRoot, changeName, recorded, current);
|
|
223
253
|
if (mismatches.length > 0) {
|
|
224
254
|
return {
|
|
@@ -239,18 +269,314 @@ async function readWorkspaceIdentityReadiness(projectRoot, changeDir, changeName
|
|
|
239
269
|
current,
|
|
240
270
|
};
|
|
241
271
|
}
|
|
242
|
-
function
|
|
272
|
+
async function readLearnEvidenceReadiness(projectRoot, changeDir, evolution) {
|
|
273
|
+
const reportPath = path.join(changeDir, 'learn-report.md');
|
|
274
|
+
let content;
|
|
275
|
+
try {
|
|
276
|
+
content = await fs.readFile(reportPath, 'utf-8');
|
|
277
|
+
}
|
|
278
|
+
catch {
|
|
279
|
+
return {
|
|
280
|
+
status: 'missing',
|
|
281
|
+
ready: false,
|
|
282
|
+
reason: evolution.status === 'not-run'
|
|
283
|
+
? 'learn-report.md is required before archive'
|
|
284
|
+
: `learn/evolution outcome ${evolution.status} is recorded but learn-report.md is absent`,
|
|
285
|
+
path: formatProjectPath(projectRoot, reportPath),
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
const validationProblem = validateLearnReportContent(content);
|
|
289
|
+
if (validationProblem) {
|
|
290
|
+
return {
|
|
291
|
+
status: 'invalid',
|
|
292
|
+
ready: false,
|
|
293
|
+
reason: validationProblem,
|
|
294
|
+
path: formatProjectPath(projectRoot, reportPath),
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
const evolutionProblem = validateLearnReportAgainstEvolution(content, evolution);
|
|
298
|
+
if (evolutionProblem) {
|
|
299
|
+
return {
|
|
300
|
+
status: 'invalid',
|
|
301
|
+
ready: false,
|
|
302
|
+
reason: evolutionProblem,
|
|
303
|
+
path: formatProjectPath(projectRoot, reportPath),
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
if (content.trim().length > 0) {
|
|
307
|
+
return {
|
|
308
|
+
status: 'present',
|
|
309
|
+
ready: true,
|
|
310
|
+
reason: 'learn-report.md is present',
|
|
311
|
+
path: formatProjectPath(projectRoot, reportPath),
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
return {
|
|
315
|
+
status: 'invalid',
|
|
316
|
+
ready: false,
|
|
317
|
+
reason: 'learn-report.md is empty',
|
|
318
|
+
path: formatProjectPath(projectRoot, reportPath),
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
function validateLearnReportContent(content) {
|
|
322
|
+
const required = [
|
|
323
|
+
['## Episode Verdict', /^## Episode Verdict:/m],
|
|
324
|
+
['Outcome', /^-\s*Outcome:\s*\S/m],
|
|
325
|
+
['Episode id', /^-\s*Episode id:\s*\S/m],
|
|
326
|
+
['Decision', /^-\s*Decision:\s*\S/m],
|
|
327
|
+
['Evolution', /^-\s*Evolution:\s*\S/m],
|
|
328
|
+
['Advantage', /^-\s*Advantage:\s*\S/m],
|
|
329
|
+
['Policy version', /^-\s*.*POLICY version:\s*\S/im],
|
|
330
|
+
['Evolved target', /^-\s*Evolved target:\s*\S/m],
|
|
331
|
+
['Canonical file(s) changed', /^-\s*Canonical file\(s\) changed:\s*\S/m],
|
|
332
|
+
['Rollback', /^-\s*Rollback:\s*\S/m],
|
|
333
|
+
['Defects to surface', /^-\s*Defects to surface:\s*\S/m],
|
|
334
|
+
['Key lessons', /^-\s*Key lessons:\s*\S/m],
|
|
335
|
+
['Isolation', /^-\s*Isolation:\s*\S/m],
|
|
336
|
+
];
|
|
337
|
+
const missing = required
|
|
338
|
+
.filter(([, pattern]) => !pattern.test(content))
|
|
339
|
+
.map(([label]) => label);
|
|
340
|
+
if (missing.length === 0)
|
|
341
|
+
return null;
|
|
342
|
+
return `learn-report.md is missing required verdict field(s): ${missing.join(', ')}`;
|
|
343
|
+
}
|
|
344
|
+
function parseLearnReportVerdict(content) {
|
|
345
|
+
return {
|
|
346
|
+
outcome: lineField(content, 'Outcome'),
|
|
347
|
+
episodeId: lineField(content, 'Episode id'),
|
|
348
|
+
evolvedTarget: lineField(content, 'Evolved target'),
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
function lineField(content, label) {
|
|
352
|
+
const escaped = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
353
|
+
const match = new RegExp(`^-\\s*${escaped}:\\s*(.+)$`, 'im').exec(content);
|
|
354
|
+
return match?.[1]?.trim();
|
|
355
|
+
}
|
|
356
|
+
function validateLearnReportAgainstEvolution(content, evolution) {
|
|
357
|
+
if (evolution.status === 'not-run') {
|
|
358
|
+
return 'learn-report.md is not bound to a recorded learn/evolution outcome';
|
|
359
|
+
}
|
|
360
|
+
const verdict = parseLearnReportVerdict(content);
|
|
361
|
+
const reportStatus = evolutionOutcomeStatus(verdict.outcome ?? '');
|
|
362
|
+
if (reportStatus !== evolution.status) {
|
|
363
|
+
return `learn-report.md outcome ${verdict.outcome ?? 'missing'} does not match latest evolution status ${evolution.status}`;
|
|
364
|
+
}
|
|
365
|
+
if (evolution.episodeId) {
|
|
366
|
+
if (!verdict.episodeId || verdict.episodeId === 'none') {
|
|
367
|
+
return `learn-report.md is missing the latest episode id ${evolution.episodeId}`;
|
|
368
|
+
}
|
|
369
|
+
if (verdict.episodeId !== evolution.episodeId) {
|
|
370
|
+
return `learn-report.md episode id ${verdict.episodeId} does not match latest episode ${evolution.episodeId}`;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
if (evolution.targetId && verdict.evolvedTarget && verdict.evolvedTarget !== evolution.targetId) {
|
|
374
|
+
return `learn-report.md target ${verdict.evolvedTarget} does not match latest evolution target ${evolution.targetId}`;
|
|
375
|
+
}
|
|
376
|
+
return null;
|
|
377
|
+
}
|
|
378
|
+
async function validateRunnerExitRecord(projectRoot, changeDir, record, exitJsonPath) {
|
|
379
|
+
const command = stringValue(record.command);
|
|
380
|
+
if (!command)
|
|
381
|
+
return 'runner-exit.json command is missing or invalid';
|
|
382
|
+
const cwd = stringValue(record.cwd);
|
|
383
|
+
if (!cwd)
|
|
384
|
+
return 'runner-exit.json cwd is missing or invalid';
|
|
385
|
+
if (!sameResolvedPath(cwd, projectRoot)) {
|
|
386
|
+
return `runner cwd changed from ${cwd} to ${path.resolve(projectRoot)}`;
|
|
387
|
+
}
|
|
388
|
+
if (!stringValue(record.startedAt))
|
|
389
|
+
return 'runner-exit.json startedAt is missing or invalid';
|
|
390
|
+
if (!stringValue(record.finishedAt))
|
|
391
|
+
return 'runner-exit.json finishedAt is missing or invalid';
|
|
392
|
+
const exitCode = typeof record.exitCode === 'number' && Number.isInteger(record.exitCode)
|
|
393
|
+
? record.exitCode
|
|
394
|
+
: null;
|
|
395
|
+
if (exitCode === null)
|
|
396
|
+
return 'runner-exit.json exitCode is missing or invalid';
|
|
397
|
+
if (exitCode !== 0)
|
|
398
|
+
return `runner-exit.json exitCode is ${exitCode}`;
|
|
399
|
+
const testMetricsProblem = validateStructuredTestMetrics(record.testMetrics);
|
|
400
|
+
if (testMetricsProblem)
|
|
401
|
+
return testMetricsProblem;
|
|
402
|
+
const stdoutLog = stringValue(record.stdoutLog);
|
|
403
|
+
const stderrLog = stringValue(record.stderrLog);
|
|
404
|
+
if (!stdoutLog)
|
|
405
|
+
return 'runner-exit.json stdoutLog is missing or invalid';
|
|
406
|
+
if (!stderrLog)
|
|
407
|
+
return 'runner-exit.json stderrLog is missing or invalid';
|
|
408
|
+
const requiredPathProblem = (await validateEvidencePath(projectRoot, changeDir, stdoutLog, 'stdoutLog')) ??
|
|
409
|
+
(await validateEvidencePath(projectRoot, changeDir, stderrLog, 'stderrLog')) ??
|
|
410
|
+
(await validateEvidenceHash(projectRoot, changeDir, stdoutLog, record.stdoutLogSha256, 'stdoutLogSha256')) ??
|
|
411
|
+
(await validateEvidenceHash(projectRoot, changeDir, stderrLog, record.stderrLogSha256, 'stderrLogSha256'));
|
|
412
|
+
if (requiredPathProblem)
|
|
413
|
+
return requiredPathProblem;
|
|
414
|
+
const scopeProblem = await validateRunnerScope(projectRoot, changeDir, record, stdoutLog, stderrLog);
|
|
415
|
+
if (scopeProblem)
|
|
416
|
+
return scopeProblem;
|
|
417
|
+
for (const field of ['junitXml', 'coverageSummary', 'coverageLcov', 'coverageHtml']) {
|
|
418
|
+
const value = record[field];
|
|
419
|
+
if (value === null || value === undefined)
|
|
420
|
+
continue;
|
|
421
|
+
const pathValue = stringValue(value);
|
|
422
|
+
if (!pathValue)
|
|
423
|
+
return `runner-exit.json ${field} must be a path string or null`;
|
|
424
|
+
const problem = await validateEvidencePath(projectRoot, changeDir, pathValue, field);
|
|
425
|
+
if (problem)
|
|
426
|
+
return problem;
|
|
427
|
+
}
|
|
428
|
+
if (!path.resolve(exitJsonPath).startsWith(path.resolve(projectRoot) + path.sep)) {
|
|
429
|
+
return `runner-exit.json is outside project root: ${exitJsonPath}`;
|
|
430
|
+
}
|
|
431
|
+
return null;
|
|
432
|
+
}
|
|
433
|
+
function validateStructuredTestMetrics(value) {
|
|
434
|
+
if (value === null || value === undefined)
|
|
435
|
+
return null;
|
|
436
|
+
const record = asRecord(value);
|
|
437
|
+
if (!record)
|
|
438
|
+
return 'runner-exit.json testMetrics must be an object';
|
|
439
|
+
const total = metricNumber(record.total);
|
|
440
|
+
const passed = metricNumber(record.passed);
|
|
441
|
+
const failed = metricNumber(record.failed);
|
|
442
|
+
if (total === null || passed === null || failed === null) {
|
|
443
|
+
return 'runner-exit.json testMetrics total, passed, and failed must be finite numbers';
|
|
444
|
+
}
|
|
445
|
+
if (total < 0 || passed < 0 || failed < 0) {
|
|
446
|
+
return 'runner-exit.json testMetrics counts must be non-negative';
|
|
447
|
+
}
|
|
448
|
+
if (passed + failed !== total) {
|
|
449
|
+
return 'runner-exit.json testMetrics total must equal passed + failed';
|
|
450
|
+
}
|
|
451
|
+
if (Object.prototype.hasOwnProperty.call(record, 'passRate')) {
|
|
452
|
+
const passRate = metricNumber(record.passRate);
|
|
453
|
+
if (passRate === null)
|
|
454
|
+
return 'runner-exit.json testMetrics passRate must be a finite number';
|
|
455
|
+
const expected = total > 0 ? passed / total : 0;
|
|
456
|
+
if (Math.abs(passRate - expected) > 1e-9) {
|
|
457
|
+
return 'runner-exit.json testMetrics passRate does not match passed / total';
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
return null;
|
|
461
|
+
}
|
|
462
|
+
async function validateRunnerScope(projectRoot, changeDir, record, stdoutLog, stderrLog) {
|
|
463
|
+
const specTests = await fs
|
|
464
|
+
.readFile(path.join(changeDir, 'spec-tests.md'), 'utf-8')
|
|
465
|
+
.catch(() => undefined);
|
|
466
|
+
const expected = extractExpectedTestPaths(specTests).map(normPath).filter(Boolean);
|
|
467
|
+
if (expected.length === 0)
|
|
468
|
+
return null;
|
|
469
|
+
const stdoutPath = await resolveEvidencePath(projectRoot, changeDir, stdoutLog);
|
|
470
|
+
const stderrPath = await resolveEvidencePath(projectRoot, changeDir, stderrLog);
|
|
471
|
+
const outputText = [
|
|
472
|
+
stdoutPath ? await fs.readFile(stdoutPath, 'utf-8').catch(() => '') : '',
|
|
473
|
+
stderrPath ? await fs.readFile(stderrPath, 'utf-8').catch(() => '') : '',
|
|
474
|
+
]
|
|
475
|
+
.filter((text) => text.length > 0)
|
|
476
|
+
.join('\n');
|
|
477
|
+
const collection = parseTestCollection(outputText);
|
|
478
|
+
const observed = collection?.paths ?? [];
|
|
479
|
+
if (observed.length > 0) {
|
|
480
|
+
return pathsIntersect(observed, expected)
|
|
481
|
+
? null
|
|
482
|
+
: 'runner-exit.json green run is out-of-scope for spec-tests.md';
|
|
483
|
+
}
|
|
484
|
+
const commandPaths = commandTestPaths(stringValue(record.command));
|
|
485
|
+
if (pathsIntersect(commandPaths, expected))
|
|
486
|
+
return null;
|
|
487
|
+
const knownScope = collection !== null && collection.collected !== null;
|
|
488
|
+
return knownScope ? 'runner-exit.json green run is out-of-scope for spec-tests.md' : null;
|
|
489
|
+
}
|
|
490
|
+
async function validateEvidencePath(projectRoot, changeDir, rawPath, field) {
|
|
491
|
+
const fullPath = await resolveEvidencePath(projectRoot, changeDir, rawPath);
|
|
492
|
+
if (!fullPath) {
|
|
493
|
+
return `runner-exit.json ${field} points outside project root`;
|
|
494
|
+
}
|
|
495
|
+
try {
|
|
496
|
+
await fs.access(fullPath);
|
|
497
|
+
}
|
|
498
|
+
catch {
|
|
499
|
+
return `runner-exit.json ${field} is missing: ${formatProjectPath(projectRoot, fullPath)}`;
|
|
500
|
+
}
|
|
501
|
+
return null;
|
|
502
|
+
}
|
|
503
|
+
async function validateEvidenceHash(projectRoot, changeDir, rawPath, rawExpected, field) {
|
|
504
|
+
const expected = stringValue(rawExpected);
|
|
505
|
+
if (!expected)
|
|
506
|
+
return `runner-exit.json ${field} is missing or invalid`;
|
|
507
|
+
const fullPath = await resolveEvidencePath(projectRoot, changeDir, rawPath);
|
|
508
|
+
if (!fullPath) {
|
|
509
|
+
return `runner-exit.json ${field} path points outside project root`;
|
|
510
|
+
}
|
|
511
|
+
let content;
|
|
512
|
+
try {
|
|
513
|
+
content = await fs.readFile(fullPath);
|
|
514
|
+
}
|
|
515
|
+
catch {
|
|
516
|
+
return `runner-exit.json ${field} cannot hash missing evidence: ${formatProjectPath(projectRoot, fullPath)}`;
|
|
517
|
+
}
|
|
518
|
+
const actual = createHash('sha256').update(content).digest('hex');
|
|
519
|
+
if (actual !== expected)
|
|
520
|
+
return `runner-exit.json ${field} does not match current log content`;
|
|
521
|
+
return null;
|
|
522
|
+
}
|
|
523
|
+
async function extractRunnerExitJsonPath(report, projectRoot, changeDir) {
|
|
243
524
|
RUNNER_EXIT_JSON_PATTERN.lastIndex = 0;
|
|
525
|
+
let latest = null;
|
|
244
526
|
for (const match of report.matchAll(RUNNER_EXIT_JSON_PATTERN)) {
|
|
245
527
|
const value = (match[1] ?? match[2] ?? '').trim();
|
|
246
528
|
if (!value)
|
|
247
529
|
continue;
|
|
248
530
|
const withoutMdLink = value.replace(/^\((.*)\)$/, '$1');
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
:
|
|
531
|
+
latest = {
|
|
532
|
+
direct: resolveProjectPath(projectRoot, withoutMdLink),
|
|
533
|
+
remapped: resolveChangeDirTestEvidencePath(projectRoot, changeDir, withoutMdLink),
|
|
534
|
+
};
|
|
252
535
|
}
|
|
253
|
-
|
|
536
|
+
if (!latest)
|
|
537
|
+
return null;
|
|
538
|
+
if (latest.direct && (await fileExists(latest.direct)))
|
|
539
|
+
return latest.direct;
|
|
540
|
+
if (latest.remapped && (await fileExists(latest.remapped)))
|
|
541
|
+
return latest.remapped;
|
|
542
|
+
return latest.direct ?? latest.remapped;
|
|
543
|
+
}
|
|
544
|
+
function resolveProjectPath(projectRoot, rawPath) {
|
|
545
|
+
if (!rawPath)
|
|
546
|
+
return null;
|
|
547
|
+
const resolved = path.isAbsolute(rawPath)
|
|
548
|
+
? path.normalize(rawPath)
|
|
549
|
+
: path.resolve(projectRoot, rawPath);
|
|
550
|
+
const root = path.resolve(projectRoot);
|
|
551
|
+
return resolved === root || resolved.startsWith(root + path.sep) ? resolved : null;
|
|
552
|
+
}
|
|
553
|
+
function resolveChangeDirTestEvidencePath(projectRoot, changeDir, rawPath) {
|
|
554
|
+
if (path.isAbsolute(rawPath))
|
|
555
|
+
return null;
|
|
556
|
+
const normalized = rawPath.replace(/\\/g, '/');
|
|
557
|
+
const marker = '/test-evidence/';
|
|
558
|
+
const markerIndex = normalized.indexOf(marker);
|
|
559
|
+
const bareMarkerIndex = normalized.startsWith('test-evidence/') ? 0 : -1;
|
|
560
|
+
const suffix = markerIndex >= 0
|
|
561
|
+
? normalized.slice(markerIndex + marker.length)
|
|
562
|
+
: bareMarkerIndex === 0
|
|
563
|
+
? normalized.slice('test-evidence/'.length)
|
|
564
|
+
: null;
|
|
565
|
+
if (!suffix)
|
|
566
|
+
return null;
|
|
567
|
+
const resolved = path.resolve(changeDir, 'test-evidence', ...suffix.split('/'));
|
|
568
|
+
return isInside(projectRoot, resolved) && isInside(changeDir, resolved) ? resolved : null;
|
|
569
|
+
}
|
|
570
|
+
async function resolveEvidencePath(projectRoot, changeDir, rawPath) {
|
|
571
|
+
if (!rawPath)
|
|
572
|
+
return null;
|
|
573
|
+
const direct = resolveProjectPath(projectRoot, rawPath);
|
|
574
|
+
if (direct && (await fileExists(direct)))
|
|
575
|
+
return direct;
|
|
576
|
+
const remapped = resolveChangeDirTestEvidencePath(projectRoot, changeDir, rawPath);
|
|
577
|
+
if (remapped && (await fileExists(remapped)))
|
|
578
|
+
return remapped;
|
|
579
|
+
return direct ?? remapped;
|
|
254
580
|
}
|
|
255
581
|
async function readCurrentWorkspaceIdentity(projectRoot, changeName) {
|
|
256
582
|
return {
|
|
@@ -347,7 +673,7 @@ function fileSnapshotFromUnknown(value) {
|
|
|
347
673
|
}
|
|
348
674
|
function compareWorkspaceIdentities(projectRoot, changeName, recorded, current) {
|
|
349
675
|
const mismatches = [];
|
|
350
|
-
if (recorded.changeName && recorded.changeName
|
|
676
|
+
if (recorded.changeName && !changeNamesEquivalent(recorded.changeName, changeName)) {
|
|
351
677
|
mismatches.push(`change name changed from ${recorded.changeName} to ${changeName}`);
|
|
352
678
|
}
|
|
353
679
|
if (recorded.cwd && !sameResolvedPath(recorded.cwd, projectRoot)) {
|
|
@@ -374,6 +700,39 @@ function compareIdentityFile(label, recorded, current, mismatches) {
|
|
|
374
700
|
mismatches.push(`${label} content changed since runner evidence was captured`);
|
|
375
701
|
}
|
|
376
702
|
}
|
|
703
|
+
function validateWorkspaceIdentityCompleteness(recorded, current) {
|
|
704
|
+
const problems = [];
|
|
705
|
+
if (!recorded.cwd)
|
|
706
|
+
problems.push('cwd is missing');
|
|
707
|
+
if (!recorded.changeName)
|
|
708
|
+
problems.push('changeName is missing');
|
|
709
|
+
const currentFiles = [
|
|
710
|
+
['pyproject', 'pyproject.toml', current.pyproject],
|
|
711
|
+
['packageJson', 'package.json', current.packageJson],
|
|
712
|
+
];
|
|
713
|
+
for (const [key, label, currentFile] of currentFiles) {
|
|
714
|
+
if (!currentFile)
|
|
715
|
+
continue;
|
|
716
|
+
const recordedFile = recorded[key];
|
|
717
|
+
if (!recordedFile) {
|
|
718
|
+
problems.push(`${label} identity is missing`);
|
|
719
|
+
continue;
|
|
720
|
+
}
|
|
721
|
+
if (!recordedFile.path)
|
|
722
|
+
problems.push(`${label} path is missing`);
|
|
723
|
+
if (currentFile.name && !recordedFile.name)
|
|
724
|
+
problems.push(`${label} name is missing`);
|
|
725
|
+
if (!recordedFile.sha256)
|
|
726
|
+
problems.push(`${label} sha256 is missing`);
|
|
727
|
+
}
|
|
728
|
+
return problems;
|
|
729
|
+
}
|
|
730
|
+
function changeNamesEquivalent(recorded, current) {
|
|
731
|
+
if (recorded === current)
|
|
732
|
+
return true;
|
|
733
|
+
const archived = current.match(/^\d{4}-\d{2}-\d{2}-(.+)$/);
|
|
734
|
+
return archived?.[1] === recorded;
|
|
735
|
+
}
|
|
377
736
|
function sameResolvedPath(left, right) {
|
|
378
737
|
const normalizedLeft = path.resolve(left);
|
|
379
738
|
const normalizedRight = path.resolve(right);
|
|
@@ -385,11 +744,51 @@ function sameResolvedPath(left, right) {
|
|
|
385
744
|
function stringValue(value) {
|
|
386
745
|
return typeof value === 'string' && value.length > 0 ? value : undefined;
|
|
387
746
|
}
|
|
747
|
+
function metricNumber(value) {
|
|
748
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : null;
|
|
749
|
+
}
|
|
388
750
|
function asRecord(value) {
|
|
389
751
|
return value && typeof value === 'object' && !Array.isArray(value)
|
|
390
752
|
? value
|
|
391
753
|
: null;
|
|
392
754
|
}
|
|
755
|
+
function pathsIntersect(observed, expected) {
|
|
756
|
+
if (observed.length === 0 || expected.length === 0)
|
|
757
|
+
return false;
|
|
758
|
+
for (const o of observed.map(normPath)) {
|
|
759
|
+
for (const e of expected) {
|
|
760
|
+
if (o === e || o.endsWith('/' + e) || e.endsWith('/' + o)) {
|
|
761
|
+
return true;
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
return false;
|
|
766
|
+
}
|
|
767
|
+
function commandTestPaths(command) {
|
|
768
|
+
if (!command)
|
|
769
|
+
return [];
|
|
770
|
+
const paths = new Set();
|
|
771
|
+
const afterRunner = command.replace(/\b(?:pytest|py\.test|python\s+-m\s+pytest|vitest|npm\s+test|pnpm\s+test|yarn\s+test|go\s+test)\b/i, ' ');
|
|
772
|
+
for (const raw of afterRunner.split(/\s+/)) {
|
|
773
|
+
const tok = raw.trim().replace(/^['"]|['"]$/g, '');
|
|
774
|
+
if (!tok || tok.startsWith('-'))
|
|
775
|
+
continue;
|
|
776
|
+
const looksLikePath = /[\\/]/.test(tok) || /\b(?:tests?|spec|specs|benchmark_tests)\b/i.test(tok);
|
|
777
|
+
if (!looksLikePath)
|
|
778
|
+
continue;
|
|
779
|
+
if (/\.[a-z]+$/i.test(tok) && !/\.(?:py|[tj]sx?)$/i.test(tok))
|
|
780
|
+
continue;
|
|
781
|
+
paths.add(normPath(tok));
|
|
782
|
+
}
|
|
783
|
+
return [...paths];
|
|
784
|
+
}
|
|
785
|
+
function normPath(p) {
|
|
786
|
+
return p.replace(/\\/g, '/').toLowerCase().replace(/^\.\//, '').replace(/::.*/, '');
|
|
787
|
+
}
|
|
788
|
+
function isInside(root, candidate) {
|
|
789
|
+
const relative = path.relative(path.resolve(root), path.resolve(candidate));
|
|
790
|
+
return relative === '' || (!relative.startsWith('..') && !path.isAbsolute(relative));
|
|
791
|
+
}
|
|
393
792
|
function formatProjectPath(projectRoot, filePath) {
|
|
394
793
|
const relative = path.relative(projectRoot, filePath);
|
|
395
794
|
if (relative && !relative.startsWith('..') && !path.isAbsolute(relative)) {
|
|
@@ -410,7 +809,14 @@ function deriveObservedVerificationReadiness(evolution) {
|
|
|
410
809
|
if (evolution.status === 'promoted' && evolution.promoted) {
|
|
411
810
|
return { status: 'verified', ready: true };
|
|
412
811
|
}
|
|
413
|
-
|
|
812
|
+
if (isObservedVerifiedOutcome(evolution)) {
|
|
813
|
+
return { status: 'verified', ready: true, reason: evolution.reason ?? evolution.outcome };
|
|
814
|
+
}
|
|
815
|
+
return {
|
|
816
|
+
status: 'not-assessed',
|
|
817
|
+
ready: false,
|
|
818
|
+
reason: 'learn/evolution has not recorded observed-verified evidence',
|
|
819
|
+
};
|
|
414
820
|
}
|
|
415
821
|
function isObservedVerificationBlocker(evolution) {
|
|
416
822
|
const raw = `${evolution.outcome ?? ''} ${evolution.reason ?? ''}`.toLowerCase();
|
|
@@ -418,9 +824,8 @@ function isObservedVerificationBlocker(evolution) {
|
|
|
418
824
|
return false;
|
|
419
825
|
const mentionsUnverified = raw.includes('refused-unverified-evidence') ||
|
|
420
826
|
raw.includes('not observed-verified') ||
|
|
421
|
-
raw.includes('observed-verified') ||
|
|
422
827
|
raw.includes('observed-green gate failed') ||
|
|
423
|
-
raw.includes('observed-green') ||
|
|
828
|
+
raw.includes('observed-green failed') ||
|
|
424
829
|
raw.includes('trajectory not verified') ||
|
|
425
830
|
raw.includes('not verified') ||
|
|
426
831
|
raw.includes('unverified') ||
|
|
@@ -432,6 +837,20 @@ function isObservedVerificationBlocker(evolution) {
|
|
|
432
837
|
evolution.status === 'rolled-back' ||
|
|
433
838
|
evolution.status === 'error');
|
|
434
839
|
}
|
|
840
|
+
function isObservedVerifiedOutcome(evolution) {
|
|
841
|
+
const raw = `${evolution.outcome ?? ''} ${evolution.reason ?? ''}`.toLowerCase();
|
|
842
|
+
if (!raw)
|
|
843
|
+
return false;
|
|
844
|
+
const mentionsVerified = raw.includes('observed-verified') ||
|
|
845
|
+
raw.includes('observed verified') ||
|
|
846
|
+
raw.includes('verified-green') ||
|
|
847
|
+
raw.includes('verified green');
|
|
848
|
+
const mentionsUnverified = raw.includes('unverified') ||
|
|
849
|
+
raw.includes('not observed-verified') ||
|
|
850
|
+
raw.includes('observed-green gate failed') ||
|
|
851
|
+
raw.includes('not verified');
|
|
852
|
+
return mentionsVerified && !mentionsUnverified;
|
|
853
|
+
}
|
|
435
854
|
/**
|
|
436
855
|
* Read the CLI-written evolution outcome for the change, if any. When the manual
|
|
437
856
|
* evolution-result file is absent, fall back to the durable loop-v2 episode store
|
|
@@ -484,7 +903,7 @@ async function readEvolutionOutcome(projectRoot, changeDir, changeName) {
|
|
|
484
903
|
}
|
|
485
904
|
}
|
|
486
905
|
function evolutionOutcomeStatus(outcome) {
|
|
487
|
-
if (outcome === 'promoted')
|
|
906
|
+
if (outcome === 'promoted' || outcome === 'evolved')
|
|
488
907
|
return 'promoted';
|
|
489
908
|
if (outcome === 'rolled-back')
|
|
490
909
|
return 'rolled-back';
|
|
@@ -599,6 +1018,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
|
|
|
599
1018
|
outcome: 'episode-error',
|
|
600
1019
|
reason: episode.terminalError,
|
|
601
1020
|
targetId: episode.targetId,
|
|
1021
|
+
episodeId: episode.episodeId,
|
|
602
1022
|
promoted: false,
|
|
603
1023
|
promotedFiles: [],
|
|
604
1024
|
timestamp: episode.updatedAt,
|
|
@@ -610,6 +1030,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
|
|
|
610
1030
|
outcome: 'episode-evolution-refused',
|
|
611
1031
|
reason: episode.evolutionOutcomeReason ?? 'evolution refused',
|
|
612
1032
|
targetId: episode.targetId,
|
|
1033
|
+
episodeId: episode.episodeId,
|
|
613
1034
|
promoted: false,
|
|
614
1035
|
promotedFiles: [],
|
|
615
1036
|
timestamp: episode.updatedAt,
|
|
@@ -620,6 +1041,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
|
|
|
620
1041
|
status: 'promoted',
|
|
621
1042
|
outcome: 'episode-evolved',
|
|
622
1043
|
targetId: episode.targetId,
|
|
1044
|
+
episodeId: episode.episodeId,
|
|
623
1045
|
promoted: true,
|
|
624
1046
|
promotedFiles: [],
|
|
625
1047
|
timestamp: episode.updatedAt,
|
|
@@ -631,6 +1053,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
|
|
|
631
1053
|
outcome: 'episode-abstained',
|
|
632
1054
|
reason: episode.evolutionOutcomeReason ?? 'reward agent abstained; no policy promotion',
|
|
633
1055
|
targetId: episode.targetId,
|
|
1056
|
+
episodeId: episode.episodeId,
|
|
634
1057
|
promoted: false,
|
|
635
1058
|
promotedFiles: [],
|
|
636
1059
|
timestamp: episode.updatedAt,
|
|
@@ -642,6 +1065,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
|
|
|
642
1065
|
outcome: 'episode-not-spawned',
|
|
643
1066
|
reason: episode.evolutionOutcomeReason ?? 'episode rolled back before policy evolution',
|
|
644
1067
|
targetId: episode.targetId,
|
|
1068
|
+
episodeId: episode.episodeId,
|
|
645
1069
|
promoted: false,
|
|
646
1070
|
promotedFiles: [],
|
|
647
1071
|
timestamp: episode.updatedAt,
|
|
@@ -653,6 +1077,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
|
|
|
653
1077
|
outcome: 'episode-not-spawned',
|
|
654
1078
|
reason: episode.evolutionOutcomeReason ?? 'episode closed without policy promotion',
|
|
655
1079
|
targetId: episode.targetId,
|
|
1080
|
+
episodeId: episode.episodeId,
|
|
656
1081
|
promoted: false,
|
|
657
1082
|
promotedFiles: [],
|
|
658
1083
|
timestamp: episode.updatedAt,
|
|
@@ -664,6 +1089,7 @@ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
|
|
|
664
1089
|
outcome: 'episode-in-progress',
|
|
665
1090
|
reason: `episode currently at stage ${stage}`,
|
|
666
1091
|
targetId: episode.targetId,
|
|
1092
|
+
episodeId: episode.episodeId,
|
|
667
1093
|
promoted: false,
|
|
668
1094
|
promotedFiles: [],
|
|
669
1095
|
timestamp: episode.updatedAt,
|
|
@@ -47,11 +47,9 @@ export interface ComputeLossInput {
|
|
|
47
47
|
*/
|
|
48
48
|
verified?: boolean;
|
|
49
49
|
/**
|
|
50
|
-
* Weight on the unverified soft-penalty term;
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
* auditing without yet moving selection. Raise it to let unverified
|
|
54
|
-
* candidates be down-weighted (never hard-disqualified) when comparing them.
|
|
50
|
+
* Weight on the unverified soft-penalty term; defaults to a small nonzero
|
|
51
|
+
* penalty so authored green reports are visibly non-green unless corroborated
|
|
52
|
+
* by runner evidence. Set it to 0 explicitly to make the signal observe-only.
|
|
55
53
|
*/
|
|
56
54
|
unverifiedWeight?: number;
|
|
57
55
|
}
|
|
@@ -20,8 +20,8 @@ function clamp01(v) {
|
|
|
20
20
|
export function computePerChangeLoss(input) {
|
|
21
21
|
const wf = input.functionalWeight ?? 0.7;
|
|
22
22
|
const wh = input.healthWeight ?? 0.3;
|
|
23
|
-
// Default
|
|
24
|
-
//
|
|
23
|
+
// Default-on soft penalty: authored green reports are not treated as fully
|
|
24
|
+
// measured green unless runner evidence corroborates them.
|
|
25
25
|
const wu = input.unverifiedWeight ?? DEFAULT_UNVERIFIED_WEIGHT;
|
|
26
26
|
const functionalLoss = clamp01(1 - clamp01(input.passRate));
|
|
27
27
|
const healthPenalty = clamp01(input.healthPenalty ?? 0);
|
|
@@ -28,6 +28,7 @@ export interface TestMetrics {
|
|
|
28
28
|
* "1 failed, 9 passed in 0.4s" (order-independent)
|
|
29
29
|
* - SynergySpec reports: table/status prose like
|
|
30
30
|
* "Passed; 85 tests collected and passed"
|
|
31
|
+
* "Summary: 29 collected, 29 passed, 0 failed, 0 skipped, 0 collection errors."
|
|
31
32
|
* A bare "N passed" line that is not a recognized summary (e.g. prose or a
|
|
32
33
|
* per-suite tally) is ignored. Pytest "errors" count toward `failed`.
|
|
33
34
|
* Returns null when no recognized summary is found.
|