principles-disciple 1.18.0 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/commands/nocturnal-rollout.ts +2 -0
- package/src/core/merge-gate-audit.ts +506 -0
- package/src/core/nocturnal-export.ts +106 -6
- package/src/core/nocturnal-trinity.ts +111 -28
- package/src/core/promotion-gate.ts +33 -0
- package/src/core/replay-engine.ts +25 -0
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +2 -6
- package/tests/core/merge-gate-audit.test.ts +284 -0
- package/tests/core/nocturnal-export.test.ts +55 -0
- package/tests/core/nocturnal-trinity.test.ts +77 -4
- package/tests/core/pain-integration.test.ts +27 -0
- package/tests/core/promotion-gate.test.ts +5 -0
- package/tests/core/replay-engine.test.ts +19 -0
- package/tests/service/nocturnal-workflow-manager.test.ts +2 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
import * as os from 'os';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import {
|
|
6
|
+
formatMergeGateAuditReport,
|
|
7
|
+
runMergeGateAudit,
|
|
8
|
+
} from '../../src/core/merge-gate-audit.js';
|
|
9
|
+
import type { NocturnalArtifact } from '../../src/core/nocturnal-arbiter.js';
|
|
10
|
+
import {
|
|
11
|
+
registerSample,
|
|
12
|
+
updateReviewStatus,
|
|
13
|
+
} from '../../src/core/nocturnal-dataset.js';
|
|
14
|
+
import { appendArtifactLineageRecord } from '../../src/core/nocturnal-artifact-lineage.js';
|
|
15
|
+
import { exportORPOSamples } from '../../src/core/nocturnal-export.js';
|
|
16
|
+
import { createImplementationAssetDir, getImplementationAssetRoot } from '../../src/core/code-implementation-storage.js';
|
|
17
|
+
import { safeRmDir } from '../test-utils.js';
|
|
18
|
+
|
|
19
|
+
function makeArtifact(overrides: Partial<NocturnalArtifact> = {}): NocturnalArtifact {
|
|
20
|
+
return {
|
|
21
|
+
artifactId: 'artifact-1',
|
|
22
|
+
sessionId: 'session-1',
|
|
23
|
+
principleId: 'T-08',
|
|
24
|
+
sourceSnapshotRef: 'snapshot-1',
|
|
25
|
+
badDecision: 'Retried without checking state',
|
|
26
|
+
betterDecision: 'Inspect state before retrying',
|
|
27
|
+
rationale: 'Evidence first.',
|
|
28
|
+
createdAt: '2026-04-12T09:00:00.000Z',
|
|
29
|
+
...overrides,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
describe('merge-gate-audit', () => {
|
|
34
|
+
let tempDir: string;
|
|
35
|
+
let workspaceDir: string;
|
|
36
|
+
let stateDir: string;
|
|
37
|
+
|
|
38
|
+
beforeEach(() => {
|
|
39
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-merge-gate-audit-'));
|
|
40
|
+
workspaceDir = path.join(tempDir, 'workspace');
|
|
41
|
+
stateDir = path.join(tempDir, '.state');
|
|
42
|
+
fs.mkdirSync(workspaceDir, { recursive: true });
|
|
43
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
afterEach(() => {
|
|
47
|
+
safeRmDir(tempDir);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
function registerApprovedArtifact(artifactId = 'artifact-1'): string {
|
|
51
|
+
const artifact = makeArtifact({ artifactId });
|
|
52
|
+
const artifactPath = path.join(
|
|
53
|
+
workspaceDir,
|
|
54
|
+
'.state',
|
|
55
|
+
'nocturnal',
|
|
56
|
+
'samples',
|
|
57
|
+
`${artifactId}.json`,
|
|
58
|
+
);
|
|
59
|
+
fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
|
|
60
|
+
fs.writeFileSync(artifactPath, JSON.stringify(artifact, null, 2), 'utf-8');
|
|
61
|
+
|
|
62
|
+
const record = registerSample(workspaceDir, artifact, artifactPath, 'gpt-4').record;
|
|
63
|
+
updateReviewStatus(
|
|
64
|
+
workspaceDir,
|
|
65
|
+
record.sampleFingerprint,
|
|
66
|
+
'approved_for_training',
|
|
67
|
+
'approved for merge gate audit',
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
appendArtifactLineageRecord(workspaceDir, {
|
|
71
|
+
artifactKind: 'behavioral-sample',
|
|
72
|
+
artifactId: record.artifactId,
|
|
73
|
+
principleId: record.principleId,
|
|
74
|
+
ruleId: null,
|
|
75
|
+
sessionId: record.sessionId,
|
|
76
|
+
sourceSnapshotRef: record.sourceSnapshotRef,
|
|
77
|
+
sourcePainIds: ['pain-1'],
|
|
78
|
+
sourceGateBlockIds: ['gate-1'],
|
|
79
|
+
storagePath: artifactPath,
|
|
80
|
+
implementationId: null,
|
|
81
|
+
createdAt: record.createdAt,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
return record.sampleFingerprint;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
it('returns defer when audit surfaces are not populated yet', () => {
|
|
88
|
+
const report = runMergeGateAudit(workspaceDir, stateDir);
|
|
89
|
+
|
|
90
|
+
expect(report.overallStatus).toBe('defer');
|
|
91
|
+
expect(report.checks.find((check) => check.id === 'pain_flag_path_contract')?.status).toBe('pass');
|
|
92
|
+
expect(report.checks.find((check) => check.id === 'queue_path_contract')?.status).toBe('pass');
|
|
93
|
+
expect(report.checks.find((check) => check.id === 'runtime_adapter_contract')?.status).toBe('pass');
|
|
94
|
+
expect(report.counts.defer).toBeGreaterThan(0);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it('blocks malformed replay reports that claim pass without evidence', () => {
|
|
98
|
+
createImplementationAssetDir(stateDir, 'IMPL-1', '1.0.0');
|
|
99
|
+
const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-1'), 'replays');
|
|
100
|
+
fs.mkdirSync(replayDir, { recursive: true });
|
|
101
|
+
fs.writeFileSync(
|
|
102
|
+
path.join(replayDir, 'bad-report.json'),
|
|
103
|
+
JSON.stringify(
|
|
104
|
+
{
|
|
105
|
+
overallDecision: 'pass',
|
|
106
|
+
blockers: [],
|
|
107
|
+
generatedAt: '2026-04-12T09:00:00.000Z',
|
|
108
|
+
implementationId: 'IMPL-1',
|
|
109
|
+
evidenceSummary: {
|
|
110
|
+
evidenceStatus: 'empty',
|
|
111
|
+
totalSamples: 0,
|
|
112
|
+
classifiedCounts: {
|
|
113
|
+
painNegative: 0,
|
|
114
|
+
successPositive: 0,
|
|
115
|
+
principleAnchor: 0,
|
|
116
|
+
},
|
|
117
|
+
},
|
|
118
|
+
},
|
|
119
|
+
null,
|
|
120
|
+
2,
|
|
121
|
+
),
|
|
122
|
+
'utf-8',
|
|
123
|
+
);
|
|
124
|
+
|
|
125
|
+
const report = runMergeGateAudit(workspaceDir, stateDir);
|
|
126
|
+
const replayCheck = report.checks.find((check) => check.id === 'replay_evidence_integrity');
|
|
127
|
+
|
|
128
|
+
expect(report.overallStatus).toBe('block');
|
|
129
|
+
expect(replayCheck?.status).toBe('block');
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it('passes populated dataset, lineage, export, and replay evidence surfaces', () => {
|
|
133
|
+
registerApprovedArtifact('artifact-pass');
|
|
134
|
+
const exportResult = exportORPOSamples(workspaceDir, 'gpt-4');
|
|
135
|
+
expect(exportResult.success).toBe(true);
|
|
136
|
+
|
|
137
|
+
createImplementationAssetDir(stateDir, 'IMPL-1', '1.0.0');
|
|
138
|
+
const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-1'), 'replays');
|
|
139
|
+
fs.mkdirSync(replayDir, { recursive: true });
|
|
140
|
+
fs.writeFileSync(
|
|
141
|
+
path.join(replayDir, 'good-report.json'),
|
|
142
|
+
JSON.stringify(
|
|
143
|
+
{
|
|
144
|
+
overallDecision: 'pass',
|
|
145
|
+
replayResults: {
|
|
146
|
+
painNegative: { total: 1, passed: 1, failed: 0, details: [] },
|
|
147
|
+
successPositive: { total: 0, passed: 0, failed: 0, details: [] },
|
|
148
|
+
principleAnchor: { total: 0, passed: 0, failed: 0, details: [] },
|
|
149
|
+
},
|
|
150
|
+
blockers: [],
|
|
151
|
+
generatedAt: '2026-04-12T09:00:00.000Z',
|
|
152
|
+
implementationId: 'IMPL-1',
|
|
153
|
+
sampleFingerprints: ['sample-1'],
|
|
154
|
+
evidenceSummary: {
|
|
155
|
+
evidenceStatus: 'observed',
|
|
156
|
+
totalSamples: 1,
|
|
157
|
+
classifiedCounts: {
|
|
158
|
+
painNegative: 1,
|
|
159
|
+
successPositive: 0,
|
|
160
|
+
principleAnchor: 0,
|
|
161
|
+
},
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
null,
|
|
165
|
+
2,
|
|
166
|
+
),
|
|
167
|
+
'utf-8',
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
const report = runMergeGateAudit(workspaceDir, stateDir);
|
|
171
|
+
|
|
172
|
+
expect(report.overallStatus).toBe('pass');
|
|
173
|
+
expect(report.counts.block).toBe(0);
|
|
174
|
+
expect(report.counts.defer).toBe(0);
|
|
175
|
+
expect(formatMergeGateAuditReport(report)).toContain('Overall Status: PASS');
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it('blocks when dataset artifacts are missing', () => {
|
|
179
|
+
const artifactPath = path.join(
|
|
180
|
+
workspaceDir,
|
|
181
|
+
'.state',
|
|
182
|
+
'nocturnal',
|
|
183
|
+
'samples',
|
|
184
|
+
'artifact-missing.json',
|
|
185
|
+
);
|
|
186
|
+
fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
|
|
187
|
+
const artifact = makeArtifact({ artifactId: 'artifact-missing' });
|
|
188
|
+
fs.writeFileSync(artifactPath, JSON.stringify(artifact, null, 2), 'utf-8');
|
|
189
|
+
|
|
190
|
+
const record = registerSample(workspaceDir, artifact, artifactPath, 'gpt-4').record;
|
|
191
|
+
updateReviewStatus(workspaceDir, record.sampleFingerprint, 'approved_for_training', 'approved');
|
|
192
|
+
|
|
193
|
+
// Append lineage pointing to a real file (so lineage passes)
|
|
194
|
+
appendArtifactLineageRecord(workspaceDir, {
|
|
195
|
+
artifactKind: 'behavioral-sample',
|
|
196
|
+
artifactId: record.artifactId,
|
|
197
|
+
principleId: record.principleId,
|
|
198
|
+
ruleId: null,
|
|
199
|
+
sessionId: record.sessionId,
|
|
200
|
+
sourceSnapshotRef: record.sourceSnapshotRef,
|
|
201
|
+
sourcePainIds: [],
|
|
202
|
+
sourceGateBlockIds: [],
|
|
203
|
+
storagePath: artifactPath,
|
|
204
|
+
implementationId: null,
|
|
205
|
+
createdAt: record.createdAt,
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
// Delete the artifact to simulate a missing file
|
|
209
|
+
fs.unlinkSync(artifactPath);
|
|
210
|
+
|
|
211
|
+
const report = runMergeGateAudit(workspaceDir, stateDir);
|
|
212
|
+
const datasetCheck = report.checks.find((c) => c.id === 'dataset_artifact_integrity');
|
|
213
|
+
|
|
214
|
+
expect(report.overallStatus).toBe('block');
|
|
215
|
+
expect(datasetCheck?.status).toBe('block');
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
it('blocks when artifact lineage storage paths are missing', () => {
|
|
219
|
+
const badPath = path.join(workspaceDir, '.state', 'nocturnal', 'samples', 'nonexistent.json');
|
|
220
|
+
appendArtifactLineageRecord(workspaceDir, {
|
|
221
|
+
artifactKind: 'behavioral-sample',
|
|
222
|
+
artifactId: 'lineage-missing',
|
|
223
|
+
principleId: 'T-08',
|
|
224
|
+
ruleId: null,
|
|
225
|
+
sessionId: 'session-1',
|
|
226
|
+
sourceSnapshotRef: 'snap-1',
|
|
227
|
+
sourcePainIds: [],
|
|
228
|
+
sourceGateBlockIds: [],
|
|
229
|
+
storagePath: badPath,
|
|
230
|
+
implementationId: null,
|
|
231
|
+
createdAt: new Date().toISOString(),
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
const report = runMergeGateAudit(workspaceDir, stateDir);
|
|
235
|
+
const lineageCheck = report.checks.find((c) => c.id === 'artifact_lineage_integrity');
|
|
236
|
+
|
|
237
|
+
expect(report.overallStatus).toBe('block');
|
|
238
|
+
expect(lineageCheck?.status).toBe('block');
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
it('blocks when replay reports are malformed', () => {
|
|
242
|
+
createImplementationAssetDir(stateDir, 'IMPL-BAD', '1.0.0');
|
|
243
|
+
const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-BAD'), 'replays');
|
|
244
|
+
fs.mkdirSync(replayDir, { recursive: true });
|
|
245
|
+
fs.writeFileSync(
|
|
246
|
+
path.join(replayDir, 'malformed.json'),
|
|
247
|
+
'{bad json',
|
|
248
|
+
'utf-8',
|
|
249
|
+
);
|
|
250
|
+
|
|
251
|
+
const report = runMergeGateAudit(workspaceDir, stateDir);
|
|
252
|
+
const replayCheck = report.checks.find((c) => c.id === 'replay_evidence_integrity');
|
|
253
|
+
const details = replayCheck?.details as Record<string, string[]> | undefined;
|
|
254
|
+
|
|
255
|
+
expect(report.overallStatus).toBe('block');
|
|
256
|
+
expect(replayCheck?.status).toBe('block');
|
|
257
|
+
expect(details?.malformedReports).toHaveLength(1);
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
it('blocks when replay reports have invalid evidenceSummary shape', () => {
|
|
261
|
+
createImplementationAssetDir(stateDir, 'IMPL-NOEVID', '1.0.0');
|
|
262
|
+
const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-NOEVID'), 'replays');
|
|
263
|
+
fs.mkdirSync(replayDir, { recursive: true });
|
|
264
|
+
fs.writeFileSync(
|
|
265
|
+
path.join(replayDir, 'bad-evidence.json'),
|
|
266
|
+
JSON.stringify({
|
|
267
|
+
overallDecision: 'pass',
|
|
268
|
+
blockers: [],
|
|
269
|
+
generatedAt: '2026-04-12T09:00:00.000Z',
|
|
270
|
+
implementationId: 'IMPL-NOEVID',
|
|
271
|
+
evidenceSummary: { evidenceStatus: 'observed' }, // missing totalSamples
|
|
272
|
+
}),
|
|
273
|
+
'utf-8',
|
|
274
|
+
);
|
|
275
|
+
|
|
276
|
+
const report = runMergeGateAudit(workspaceDir, stateDir);
|
|
277
|
+
const replayCheck = report.checks.find((c) => c.id === 'replay_evidence_integrity');
|
|
278
|
+
const details = replayCheck?.details as Record<string, string[]> | undefined;
|
|
279
|
+
|
|
280
|
+
expect(report.overallStatus).toBe('block');
|
|
281
|
+
expect(replayCheck?.status).toBe('block');
|
|
282
|
+
expect(details?.missingEvidenceSummary).toHaveLength(1);
|
|
283
|
+
});
|
|
284
|
+
});
|
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
updateReviewStatus,
|
|
16
16
|
getDatasetRecord,
|
|
17
17
|
} from '../../src/core/nocturnal-dataset.js';
|
|
18
|
+
import { appendArtifactLineageRecord } from '../../src/core/nocturnal-artifact-lineage.js';
|
|
18
19
|
import type { NocturnalDatasetRecord } from '../../src/core/nocturnal-dataset.js';
|
|
19
20
|
|
|
20
21
|
// ---------------------------------------------------------------------------
|
|
@@ -123,9 +124,63 @@ describe('NocturnalExport exportORPOSamples', () => {
|
|
|
123
124
|
expect(sample.rejected).toBeTruthy();
|
|
124
125
|
expect(sample.rationale).toBeTruthy();
|
|
125
126
|
expect(sample.datasetMetadata.exportId).toBe(result.manifest!.exportId);
|
|
127
|
+
expect(sample.datasetMetadata.evidenceSummary.lineageStatus).toBe('unknown');
|
|
126
128
|
}
|
|
127
129
|
});
|
|
128
130
|
|
|
131
|
+
it('degrades to evidence-bounded neutral text when lineage is missing', () => {
|
|
132
|
+
setupExportReady(tmpDir, 'art-no-lineage', 'gpt-4');
|
|
133
|
+
|
|
134
|
+
const result = exportORPOSamples(tmpDir, 'gpt-4');
|
|
135
|
+
|
|
136
|
+
expect(result.success).toBe(true);
|
|
137
|
+
const [sample] = fs.readFileSync(result.manifest!.exportPath, 'utf-8').trim().split('\n').map((line) => JSON.parse(line));
|
|
138
|
+
expect(sample.prompt).toBe('Take the next action without verified source evidence.');
|
|
139
|
+
expect(sample.rejected).toBe('Take the next action without verified source evidence.');
|
|
140
|
+
expect(sample.rationale).toContain('Source evidence is unknown');
|
|
141
|
+
expect(sample.datasetMetadata.evidenceSummary).toEqual({
|
|
142
|
+
lineageStatus: 'unknown',
|
|
143
|
+
painSignals: { status: 'unknown', count: null, ids: [] },
|
|
144
|
+
gateBlocks: { status: 'unknown', count: null, ids: [] },
|
|
145
|
+
});
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it('exports observed lineage evidence when available', () => {
|
|
149
|
+
const record = setupExportReady(tmpDir, 'art-with-lineage', 'gpt-4');
|
|
150
|
+
appendArtifactLineageRecord(tmpDir, {
|
|
151
|
+
artifactKind: 'behavioral-sample',
|
|
152
|
+
artifactId: record.artifactId,
|
|
153
|
+
principleId: record.principleId,
|
|
154
|
+
ruleId: null,
|
|
155
|
+
sessionId: record.sessionId,
|
|
156
|
+
sourceSnapshotRef: record.sourceSnapshotRef,
|
|
157
|
+
sourcePainIds: ['pain-1', 'pain-2'],
|
|
158
|
+
sourceGateBlockIds: ['gate-1'],
|
|
159
|
+
storagePath: record.artifactPath,
|
|
160
|
+
implementationId: null,
|
|
161
|
+
createdAt: record.createdAt,
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
const result = exportORPOSamples(tmpDir, 'gpt-4');
|
|
165
|
+
|
|
166
|
+
expect(result.success).toBe(true);
|
|
167
|
+
const [sample] = fs.readFileSync(result.manifest!.exportPath, 'utf-8').trim().split('\n').map((line) => JSON.parse(line));
|
|
168
|
+
expect(sample.prompt).toContain('2 observed pain signals');
|
|
169
|
+
expect(sample.rejected).toContain('1 observed gate blocks');
|
|
170
|
+
expect(sample.rationale).toContain('Observed source evidence: 2 pain signals and 1 gate blocks');
|
|
171
|
+
expect(sample.datasetMetadata.evidenceSummary.lineageStatus).toBe('observed');
|
|
172
|
+
expect(sample.datasetMetadata.evidenceSummary.painSignals).toEqual({
|
|
173
|
+
status: 'observed',
|
|
174
|
+
count: 2,
|
|
175
|
+
ids: ['pain-1', 'pain-2'],
|
|
176
|
+
});
|
|
177
|
+
expect(sample.datasetMetadata.evidenceSummary.gateBlocks).toEqual({
|
|
178
|
+
status: 'observed',
|
|
179
|
+
count: 1,
|
|
180
|
+
ids: ['gate-1'],
|
|
181
|
+
});
|
|
182
|
+
});
|
|
183
|
+
|
|
129
184
|
it('writes manifest alongside JSONL', () => {
|
|
130
185
|
setupExportReady(tmpDir, 'art-manifest-1', 'gpt-4');
|
|
131
186
|
|
|
@@ -5,6 +5,8 @@ import {
|
|
|
5
5
|
validateDraftArtifact,
|
|
6
6
|
draftToArtifact,
|
|
7
7
|
DEFAULT_TRINITY_CONFIG,
|
|
8
|
+
OpenClawTrinityRuntimeAdapter,
|
|
9
|
+
TrinityRuntimeContractError,
|
|
8
10
|
type TrinityConfig,
|
|
9
11
|
type DreamerOutput,
|
|
10
12
|
type PhilosopherOutput,
|
|
@@ -25,12 +27,16 @@ function makeSnapshot(overrides: Partial<{
|
|
|
25
27
|
failureCount: number;
|
|
26
28
|
totalPainEvents: number;
|
|
27
29
|
totalGateBlocks: number;
|
|
28
|
-
}> = {})
|
|
29
|
-
sessionId: string;
|
|
30
|
-
stats: { failureCount: number; totalPainEvents: number; totalGateBlocks: number; totalAssistantTurns: number; totalToolCalls: number };
|
|
31
|
-
} {
|
|
30
|
+
}> = {}) {
|
|
32
31
|
return {
|
|
33
32
|
sessionId: 'session-test-123',
|
|
33
|
+
startedAt: '2026-04-12T00:00:00.000Z',
|
|
34
|
+
updatedAt: '2026-04-12T00:05:00.000Z',
|
|
35
|
+
assistantTurns: [],
|
|
36
|
+
userTurns: [],
|
|
37
|
+
toolCalls: [],
|
|
38
|
+
painEvents: [],
|
|
39
|
+
gateBlocks: [],
|
|
34
40
|
stats: {
|
|
35
41
|
failureCount: overrides.failureCount ?? 0,
|
|
36
42
|
totalPainEvents: overrides.totalPainEvents ?? 0,
|
|
@@ -236,6 +242,73 @@ describe('validateDreamerOutput', () => {
|
|
|
236
242
|
});
|
|
237
243
|
});
|
|
238
244
|
|
|
245
|
+
describe('OpenClawTrinityRuntimeAdapter contract hardening', () => {
|
|
246
|
+
function makeRuntimeApi(overrides: Partial<any> = {}) {
|
|
247
|
+
return {
|
|
248
|
+
runtime: {
|
|
249
|
+
agent: {
|
|
250
|
+
runEmbeddedPiAgent: vi.fn().mockResolvedValue({
|
|
251
|
+
payloads: [
|
|
252
|
+
{ text: '{"valid":true,"candidates":[],"generatedAt":"2026-04-12T00:00:00.000Z"}' },
|
|
253
|
+
],
|
|
254
|
+
}),
|
|
255
|
+
},
|
|
256
|
+
config: {
|
|
257
|
+
loadConfig: vi.fn().mockReturnValue({
|
|
258
|
+
agents: {
|
|
259
|
+
defaults: {
|
|
260
|
+
model: 'openai/gpt-5.4',
|
|
261
|
+
},
|
|
262
|
+
},
|
|
263
|
+
}),
|
|
264
|
+
},
|
|
265
|
+
...overrides.runtime,
|
|
266
|
+
},
|
|
267
|
+
logger: {
|
|
268
|
+
info: vi.fn(),
|
|
269
|
+
warn: vi.fn(),
|
|
270
|
+
error: vi.fn(),
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
it('rejects missing runtime.agent.runEmbeddedPiAgent contract explicitly', () => {
|
|
276
|
+
expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(TrinityRuntimeContractError);
|
|
277
|
+
expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(/runtime_unavailable/);
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
it('passes explicit provider/model overrides into runtime.agent.runEmbeddedPiAgent', async () => {
|
|
281
|
+
const api = makeRuntimeApi();
|
|
282
|
+
const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
|
|
283
|
+
|
|
284
|
+
await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
|
|
285
|
+
|
|
286
|
+
expect(api.runtime.agent.runEmbeddedPiAgent).toHaveBeenCalledWith(
|
|
287
|
+
expect.objectContaining({
|
|
288
|
+
provider: 'openai',
|
|
289
|
+
model: 'gpt-5.4',
|
|
290
|
+
}),
|
|
291
|
+
);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it('returns stable failure classes when runtime invocation fails', async () => {
|
|
295
|
+
const api = makeRuntimeApi({
|
|
296
|
+
runtime: {
|
|
297
|
+
agent: {
|
|
298
|
+
runEmbeddedPiAgent: vi.fn().mockRejectedValue(new Error('gateway unavailable')),
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
});
|
|
302
|
+
const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
|
|
303
|
+
|
|
304
|
+
const result = await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
|
|
305
|
+
|
|
306
|
+
expect(result.valid).toBe(false);
|
|
307
|
+
expect(result.reason).toContain('runtime_run_failed');
|
|
308
|
+
expect(adapter.getLastFailureReason()).toContain('runtime_run_failed');
|
|
309
|
+
});
|
|
310
|
+
});
|
|
311
|
+
|
|
239
312
|
// ---------------------------------------------------------------------------
|
|
240
313
|
// Tests: validatePhilosopherOutput
|
|
241
314
|
// ---------------------------------------------------------------------------
|
|
@@ -479,5 +479,32 @@ unknown_meta: some data`;
|
|
|
479
479
|
fs.rmSync(freshDir, { recursive: true, force: true });
|
|
480
480
|
}
|
|
481
481
|
});
|
|
482
|
+
|
|
483
|
+
it('readPainFlagData reads only the canonical .state/.pain_flag path', () => {
|
|
484
|
+
const legacyRootPath = path.join(TEST_DIR, 'PAIN_FLAG');
|
|
485
|
+
fs.writeFileSync(
|
|
486
|
+
legacyRootPath,
|
|
487
|
+
`source: legacy_root
|
|
488
|
+
score: 90
|
|
489
|
+
reason: should be ignored
|
|
490
|
+
time: 2026-04-10T09:00:00.000Z`,
|
|
491
|
+
'utf-8',
|
|
492
|
+
);
|
|
493
|
+
fs.writeFileSync(
|
|
494
|
+
path.join(STATE_DIR, '.pain_flag'),
|
|
495
|
+
`source: canonical_state
|
|
496
|
+
score: 80
|
|
497
|
+
reason: should be read
|
|
498
|
+
time: 2026-04-10T09:00:00.000Z`,
|
|
499
|
+
'utf-8',
|
|
500
|
+
);
|
|
501
|
+
|
|
502
|
+
const result = readPainFlagData(TEST_DIR);
|
|
503
|
+
|
|
504
|
+
expect(result.source).toBe('canonical_state');
|
|
505
|
+
expect(result.score).toBe('80');
|
|
506
|
+
const legacyResult = readPainFlagData(path.join(TEST_DIR, '..'));
|
|
507
|
+
expect(legacyResult.source).not.toBe('legacy_root');
|
|
508
|
+
});
|
|
482
509
|
});
|
|
483
510
|
});
|
|
@@ -255,6 +255,11 @@ describe('promotion-gate', () => {
|
|
|
255
255
|
|
|
256
256
|
expect(result.constraintChecks.length).toBeGreaterThan(0);
|
|
257
257
|
expect(result.deltaCheck).toBeDefined();
|
|
258
|
+
expect(result.evidenceSummary).toEqual({
|
|
259
|
+
evidenceMode: 'eval-proxy',
|
|
260
|
+
shadowSampleCount: 0,
|
|
261
|
+
deltaSource: 'eval',
|
|
262
|
+
});
|
|
258
263
|
});
|
|
259
264
|
});
|
|
260
265
|
|
|
@@ -231,4 +231,23 @@ describe('ReplayEngine', () => {
|
|
|
231
231
|
expect(fs.existsSync(reportDir)).toBe(true);
|
|
232
232
|
expect(fs.readdirSync(reportDir).some((file) => file.endsWith('.json'))).toBe(true);
|
|
233
233
|
});
|
|
234
|
+
|
|
235
|
+
it('marks empty replay evidence as needs-review instead of pass', () => {
|
|
236
|
+
seedLedgerAndImplementation();
|
|
237
|
+
|
|
238
|
+
const engine = new ReplayEngine(workspaceDir, stateDir);
|
|
239
|
+
const report = engine.runReplayForImplementation('IMPL-1', ['pain-negative']);
|
|
240
|
+
|
|
241
|
+
expect(report.overallDecision).toBe('needs-review');
|
|
242
|
+
expect(report.evidenceSummary).toEqual({
|
|
243
|
+
evidenceStatus: 'empty',
|
|
244
|
+
totalSamples: 0,
|
|
245
|
+
classifiedCounts: {
|
|
246
|
+
painNegative: 0,
|
|
247
|
+
successPositive: 0,
|
|
248
|
+
principleAnchor: 0,
|
|
249
|
+
},
|
|
250
|
+
});
|
|
251
|
+
expect(report.blockers[0]).toContain('NO REPLAY EVIDENCE');
|
|
252
|
+
});
|
|
234
253
|
});
|
|
@@ -29,6 +29,8 @@ const mockRunTrinityAsync = runTrinityAsync as ReturnType<typeof vi.fn>;
|
|
|
29
29
|
|
|
30
30
|
function createMockRuntimeAdapter() {
|
|
31
31
|
return {
|
|
32
|
+
isRuntimeAvailable: vi.fn(() => true),
|
|
33
|
+
getLastFailureReason: vi.fn(() => null),
|
|
32
34
|
invokeDreamer: vi.fn<(snapshot: any, principleId: any, maxCandidates: any) => Promise<DreamerOutput>>(),
|
|
33
35
|
invokePhilosopher: vi.fn<(dreamerOutput: any, principleId: any) => Promise<PhilosopherOutput>>(),
|
|
34
36
|
invokeScribe: vi.fn<(dreamerOutput: any, philosopherOutput: any, snapshot: any, principleId: any, telemetry: any, config: any) => Promise<TrinityDraftArtifact | null>>(),
|