principles-disciple 1.18.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  "id": "principles-disciple",
3
3
  "name": "Principles Disciple",
4
4
  "description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
5
- "version": "1.18.0",
5
+ "version": "1.20.0",
6
6
  "skills": [
7
7
  "./skills"
8
8
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "principles-disciple",
3
- "version": "1.18.0",
3
+ "version": "1.20.0",
4
4
  "description": "Native OpenClaw plugin for Principles Disciple",
5
5
  "type": "module",
6
6
  "main": "./dist/bundle.js",
@@ -222,6 +222,8 @@ Checkpoint: ${checkpointId.substring(0, 8)}...
222
222
  Profile: ${profile}
223
223
  Result: ${result.passes ? 'PASS' : 'FAIL'}
224
224
  Suggested State: ${result.suggestedState ? formatPromotionState(result.suggestedState, zh) : 'N/A'}
225
+ Evidence Mode: ${result.evidenceSummary.evidenceMode}
226
+ Shadow Samples: ${result.evidenceSummary.shadowSampleCount}
225
227
 
226
228
  --- Delta Check ---
227
229
  ${result.deltaCheck.passed ? 'PASS' : 'FAIL'} Delta: ${result.deltaCheck.actual >= 0 ? '+' : ''}${result.deltaCheck.actual.toFixed(4)} (threshold: ${result.deltaCheck.threshold.toFixed(4)})
@@ -0,0 +1,506 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import { getImplementationAssetRoot } from './code-implementation-storage.js';
4
+ import { listDatasetRecords } from './nocturnal-dataset.js';
5
+ import { listArtifactLineageRecords } from './nocturnal-artifact-lineage.js';
6
+ import { listExports, verifyExportIntegrity } from './nocturnal-export.js';
7
+ import { OpenClawTrinityRuntimeAdapter } from './nocturnal-trinity.js';
8
+ import { resolvePdPath } from './paths.js';
9
+ import type { ReplayReport } from './replay-engine.js';
10
+
11
+ export type MergeGateAuditStatus = 'pass' | 'block' | 'defer';
12
+
13
+ export interface MergeGateAuditCheck {
14
+ id: string;
15
+ status: MergeGateAuditStatus;
16
+ summary: string;
17
+ details?: Record<string, unknown>;
18
+ }
19
+
20
+ export interface MergeGateAuditReport {
21
+ overallStatus: MergeGateAuditStatus;
22
+ generatedAt: string;
23
+ workspaceDir: string;
24
+ stateDir: string;
25
+ checks: MergeGateAuditCheck[];
26
+ counts: {
27
+ pass: number;
28
+ block: number;
29
+ defer: number;
30
+ };
31
+ }
32
+
33
+ function isWithinDir(parentDir: string, candidatePath: string): boolean {
34
+ const relative = path.relative(path.resolve(parentDir), path.resolve(candidatePath));
35
+ return relative === '' || (!relative.startsWith('..') && !path.isAbsolute(relative));
36
+ }
37
+
38
+ function computeOverallStatus(checks: MergeGateAuditCheck[]): MergeGateAuditStatus {
39
+ if (checks.some((check) => check.status === 'block')) {
40
+ return 'block';
41
+ }
42
+ if (checks.some((check) => check.status === 'defer')) {
43
+ return 'defer';
44
+ }
45
+ return 'pass';
46
+ }
47
+
48
+ function countStatuses(checks: MergeGateAuditCheck[]): MergeGateAuditReport['counts'] {
49
+ const counts = { pass: 0, block: 0, defer: 0 };
50
+ for (const check of checks) {
51
+ counts[check.status] += 1;
52
+ }
53
+ return counts;
54
+ }
55
+
56
+ function auditPainFlagPathContract(workspaceDir: string): MergeGateAuditCheck {
57
+ const painFlagPath = resolvePdPath(workspaceDir, 'PAIN_FLAG');
58
+ const expectedPath = path.join(path.resolve(workspaceDir), '.state', '.pain_flag');
59
+ const normalizedPainFlagPath = path.normalize(painFlagPath);
60
+ const normalizedExpectedPath = path.normalize(expectedPath);
61
+
62
+ if (normalizedPainFlagPath !== normalizedExpectedPath) {
63
+ return {
64
+ id: 'pain_flag_path_contract',
65
+ status: 'block',
66
+ summary: 'Canonical pain flag path does not resolve under workspace/.state/.pain_flag.',
67
+ details: {
68
+ resolvedPath: normalizedPainFlagPath,
69
+ expectedPath: normalizedExpectedPath,
70
+ },
71
+ };
72
+ }
73
+
74
+ return {
75
+ id: 'pain_flag_path_contract',
76
+ status: 'pass',
77
+ summary: 'Canonical pain flag path resolves to workspace/.state/.pain_flag.',
78
+ details: {
79
+ resolvedPath: normalizedPainFlagPath,
80
+ },
81
+ };
82
+ }
83
+
84
+ function auditQueuePathContract(workspaceDir: string): MergeGateAuditCheck {
85
+ const queuePath = resolvePdPath(workspaceDir, 'EVOLUTION_QUEUE');
86
+ const expectedPath = path.join(path.resolve(workspaceDir), '.state', 'evolution_queue.json');
87
+ const normalizedQueuePath = path.normalize(queuePath);
88
+ const normalizedExpectedPath = path.normalize(expectedPath);
89
+
90
+ if (normalizedQueuePath !== normalizedExpectedPath) {
91
+ return {
92
+ id: 'queue_path_contract',
93
+ status: 'block',
94
+ summary: 'Canonical evolution queue path does not resolve under workspace/.state/evolution_queue.json.',
95
+ details: {
96
+ resolvedPath: normalizedQueuePath,
97
+ expectedPath: normalizedExpectedPath,
98
+ },
99
+ };
100
+ }
101
+
102
+ return {
103
+ id: 'queue_path_contract',
104
+ status: 'pass',
105
+ summary: 'Canonical evolution queue path resolves to workspace/.state/evolution_queue.json.',
106
+ details: {
107
+ resolvedPath: normalizedQueuePath,
108
+ },
109
+ };
110
+ }
111
+
112
+ function auditRuntimeAdapterContract(): MergeGateAuditCheck {
113
+ // Check the prototype surface only — do NOT instantiate the adapter.
114
+ // Instantiation triggers cleanupStaleTempDirs() which scans os.tmpdir()
115
+ // and could have side effects (removing stale temp dirs of other processes).
116
+ const hasSurface =
117
+ typeof OpenClawTrinityRuntimeAdapter.prototype.isRuntimeAvailable === 'function' &&
118
+ typeof OpenClawTrinityRuntimeAdapter.prototype.getLastFailureReason === 'function';
119
+
120
+ if (!hasSurface) {
121
+ return {
122
+ id: 'runtime_adapter_contract',
123
+ status: 'block',
124
+ summary: 'OpenClaw runtime adapter does not expose the expected contract-check surface.',
125
+ };
126
+ }
127
+
128
+ return {
129
+ id: 'runtime_adapter_contract',
130
+ status: 'pass',
131
+ summary: 'OpenClaw runtime adapter exposes the expected contract-check surface (isRuntimeAvailable, getLastFailureReason).',
132
+ };
133
+ }
134
+
135
+ function auditDatasetArtifactIntegrity(workspaceDir: string): MergeGateAuditCheck {
136
+ const records = listDatasetRecords(workspaceDir);
137
+ if (records.length === 0) {
138
+ return {
139
+ id: 'dataset_artifact_integrity',
140
+ status: 'defer',
141
+ summary: 'No dataset records found. Dataset artifact integrity cannot be verified yet.',
142
+ };
143
+ }
144
+
145
+ const missingArtifacts: string[] = [];
146
+ const outOfWorkspaceArtifacts: string[] = [];
147
+
148
+ for (const record of records) {
149
+ if (!fs.existsSync(record.artifactPath)) {
150
+ missingArtifacts.push(record.sampleFingerprint);
151
+ continue;
152
+ }
153
+ if (!isWithinDir(workspaceDir, record.artifactPath)) {
154
+ outOfWorkspaceArtifacts.push(record.sampleFingerprint);
155
+ }
156
+ }
157
+
158
+ if (missingArtifacts.length > 0 || outOfWorkspaceArtifacts.length > 0) {
159
+ return {
160
+ id: 'dataset_artifact_integrity',
161
+ status: 'block',
162
+ summary: 'Dataset registry points to missing artifacts or paths outside the workspace boundary.',
163
+ details: {
164
+ recordCount: records.length,
165
+ missingArtifacts,
166
+ outOfWorkspaceArtifacts,
167
+ },
168
+ };
169
+ }
170
+
171
+ return {
172
+ id: 'dataset_artifact_integrity',
173
+ status: 'pass',
174
+ summary: 'All dataset artifacts exist and remain inside the workspace boundary.',
175
+ details: {
176
+ recordCount: records.length,
177
+ },
178
+ };
179
+ }
180
+
181
+ function auditArtifactLineageIntegrity(workspaceDir: string): MergeGateAuditCheck {
182
+ const records = listArtifactLineageRecords(workspaceDir);
183
+ if (records.length === 0) {
184
+ return {
185
+ id: 'artifact_lineage_integrity',
186
+ status: 'defer',
187
+ summary: 'No artifact lineage records found. Lineage integrity cannot be verified yet.',
188
+ };
189
+ }
190
+
191
+ const missingStoragePaths: string[] = [];
192
+ const outOfWorkspaceStoragePaths: string[] = [];
193
+
194
+ for (const record of records) {
195
+ if (!fs.existsSync(record.storagePath)) {
196
+ missingStoragePaths.push(record.artifactId);
197
+ continue;
198
+ }
199
+ if (!isWithinDir(workspaceDir, record.storagePath)) {
200
+ outOfWorkspaceStoragePaths.push(record.artifactId);
201
+ }
202
+ }
203
+
204
+ if (missingStoragePaths.length > 0 || outOfWorkspaceStoragePaths.length > 0) {
205
+ return {
206
+ id: 'artifact_lineage_integrity',
207
+ status: 'block',
208
+ summary: 'Artifact lineage points to missing files or paths outside the workspace boundary.',
209
+ details: {
210
+ recordCount: records.length,
211
+ missingStoragePaths,
212
+ outOfWorkspaceStoragePaths,
213
+ },
214
+ };
215
+ }
216
+
217
+ return {
218
+ id: 'artifact_lineage_integrity',
219
+ status: 'pass',
220
+ summary: 'All lineage storage paths exist and remain inside the workspace boundary.',
221
+ details: {
222
+ recordCount: records.length,
223
+ },
224
+ };
225
+ }
226
+
227
+ function auditOrpoExportIntegrity(workspaceDir: string): MergeGateAuditCheck {
228
+ const exports = listExports(workspaceDir);
229
+ if (exports.length === 0) {
230
+ return {
231
+ id: 'orpo_export_integrity',
232
+ status: 'defer',
233
+ summary: 'No ORPO exports found. Export integrity cannot be verified yet.',
234
+ };
235
+ }
236
+
237
+ const invalidExportIds: string[] = [];
238
+ const missingExportFiles: string[] = [];
239
+
240
+ for (const manifest of exports) {
241
+ if (!fs.existsSync(manifest.exportPath)) {
242
+ missingExportFiles.push(manifest.exportId);
243
+ continue;
244
+ }
245
+
246
+ const integrity = verifyExportIntegrity(workspaceDir, manifest.exportId);
247
+ if (!integrity || !integrity.valid) {
248
+ invalidExportIds.push(manifest.exportId);
249
+ }
250
+ }
251
+
252
+ if (invalidExportIds.length > 0 || missingExportFiles.length > 0) {
253
+ return {
254
+ id: 'orpo_export_integrity',
255
+ status: 'block',
256
+ summary: 'ORPO export manifests or payloads failed integrity verification.',
257
+ details: {
258
+ exportCount: exports.length,
259
+ invalidExportIds,
260
+ missingExportFiles,
261
+ },
262
+ };
263
+ }
264
+
265
+ return {
266
+ id: 'orpo_export_integrity',
267
+ status: 'pass',
268
+ summary: 'All ORPO exports pass manifest fingerprint verification.',
269
+ details: {
270
+ exportCount: exports.length,
271
+ },
272
+ };
273
+ }
274
+
275
+ function isReplayReportShape(value: unknown): value is ReplayReport {
276
+ if (!value || typeof value !== 'object') {
277
+ return false;
278
+ }
279
+
280
+ const report = value as Partial<ReplayReport>;
281
+ return (
282
+ typeof report.overallDecision === 'string' &&
283
+ typeof report.generatedAt === 'string' &&
284
+ typeof report.implementationId === 'string' &&
285
+ report.evidenceSummary !== undefined &&
286
+ Array.isArray(report.blockers)
287
+ );
288
+ }
289
+
290
+ /**
291
+ * Collect all replay report file paths under the implementations directory.
292
+ */
293
+ function collectReplayReportPaths(stateDir: string): string[] {
294
+ const implementationsRoot = path.join(stateDir, 'principles', 'implementations');
295
+ if (!fs.existsSync(implementationsRoot)) return [];
296
+
297
+ const implementationIds = fs
298
+ .readdirSync(implementationsRoot, { withFileTypes: true })
299
+ .filter((entry) => entry.isDirectory())
300
+ .map((entry) => entry.name);
301
+
302
+ const paths: string[] = [];
303
+ for (const id of implementationIds) {
304
+ const replaysDir = path.join(getImplementationAssetRoot(stateDir, id), 'replays');
305
+ if (!fs.existsSync(replaysDir)) continue;
306
+
307
+ const files = fs
308
+ .readdirSync(replaysDir, { withFileTypes: true })
309
+ .filter((entry) => entry.isFile() && entry.name.endsWith('.json'))
310
+ .map((entry) => path.join(replaysDir, entry.name));
311
+ paths.push(...files);
312
+ }
313
+ return paths;
314
+ }
315
+
316
+ /**
317
+ * Result of validating a single replay report file.
318
+ */
319
+ type ReplayValidationCategory =
320
+ | 'io_error'
321
+ | 'malformed'
322
+ | 'missing_evidence_summary'
323
+ | 'unsupported_pass'
324
+ | 'empty_needs_review'
325
+ | 'valid';
326
+
327
+ /**
328
+ * Check if the parsed replay report has a valid evidenceSummary shape.
329
+ */
330
+ function hasValidEvidenceSummary(parsed: unknown): boolean {
331
+ if (!parsed || typeof parsed !== 'object') return false;
332
+ const report = parsed as Partial<ReplayReport>;
333
+ const summary = report.evidenceSummary;
334
+ if (!summary) return false;
335
+ if (typeof (summary as Partial<ReplayReport['evidenceSummary']>).evidenceStatus !== 'string') {
336
+ return false;
337
+ }
338
+ return typeof (summary as Partial<ReplayReport['evidenceSummary']>).totalSamples === 'number';
339
+ }
340
+
341
+ /**
342
+ * Validate a single replay report file and return its category.
343
+ */
344
+ function validateSingleReplayReport(reportPath: string): ReplayValidationCategory {
345
+ let rawContent: string;
346
+ try {
347
+ rawContent = fs.readFileSync(reportPath, 'utf-8');
348
+ } catch {
349
+ return 'io_error';
350
+ }
351
+
352
+ let parsed: unknown;
353
+ try {
354
+ parsed = JSON.parse(rawContent);
355
+ } catch {
356
+ return 'malformed';
357
+ }
358
+
359
+ if (!isReplayReportShape(parsed)) {
360
+ return 'malformed';
361
+ }
362
+
363
+ if (!hasValidEvidenceSummary(parsed)) {
364
+ return 'missing_evidence_summary';
365
+ }
366
+
367
+ const evidenceSummary = (parsed as ReplayReport).evidenceSummary;
368
+ if (parsed.overallDecision === 'pass' && evidenceSummary.totalSamples === 0) {
369
+ return 'unsupported_pass';
370
+ }
371
+
372
+ if (parsed.overallDecision === 'needs-review' && evidenceSummary.totalSamples === 0) {
373
+ return 'empty_needs_review';
374
+ }
375
+
376
+ return 'valid';
377
+ }
378
+
379
+ /**
380
+ * Categorize all replay report files by validation outcome.
381
+ */
382
+ interface ReplayValidationResults {
383
+ ioErrorReports: string[];
384
+ malformedReports: string[];
385
+ missingEvidenceSummary: string[];
386
+ unsupportedPassingReports: string[];
387
+ emptyEvidenceNeedsReview: string[];
388
+ }
389
+
390
+ function categorizeReplayReports(reportPaths: string[]): ReplayValidationResults {
391
+ const results: ReplayValidationResults = {
392
+ ioErrorReports: [],
393
+ malformedReports: [],
394
+ missingEvidenceSummary: [],
395
+ unsupportedPassingReports: [],
396
+ emptyEvidenceNeedsReview: [],
397
+ };
398
+
399
+ for (const reportPath of reportPaths) {
400
+ const category = validateSingleReplayReport(reportPath);
401
+ switch (category) {
402
+ case 'io_error':
403
+ results.ioErrorReports.push(reportPath);
404
+ break;
405
+ case 'malformed':
406
+ results.malformedReports.push(reportPath);
407
+ break;
408
+ case 'missing_evidence_summary':
409
+ results.missingEvidenceSummary.push(reportPath);
410
+ break;
411
+ case 'unsupported_pass':
412
+ results.unsupportedPassingReports.push(reportPath);
413
+ break;
414
+ case 'empty_needs_review':
415
+ results.emptyEvidenceNeedsReview.push(reportPath);
416
+ break;
417
+ // 'valid' — no action needed
418
+ }
419
+ }
420
+
421
+ return results;
422
+ }
423
+
424
+ function hasValidationFailures(results: ReplayValidationResults): boolean {
425
+ return (
426
+ results.malformedReports.length > 0 ||
427
+ results.ioErrorReports.length > 0 ||
428
+ results.missingEvidenceSummary.length > 0 ||
429
+ results.unsupportedPassingReports.length > 0 ||
430
+ results.emptyEvidenceNeedsReview.length > 0
431
+ );
432
+ }
433
+
434
+ function auditReplayEvidenceIntegrity(stateDir: string): MergeGateAuditCheck {
435
+ const replayReportPaths = collectReplayReportPaths(stateDir);
436
+
437
+ if (replayReportPaths.length === 0) {
438
+ return {
439
+ id: 'replay_evidence_integrity',
440
+ status: 'defer',
441
+ summary: 'No replay reports found. Replay evidence integrity cannot be verified yet.',
442
+ };
443
+ }
444
+
445
+ const results = categorizeReplayReports(replayReportPaths);
446
+
447
+ if (hasValidationFailures(results)) {
448
+ return {
449
+ id: 'replay_evidence_integrity',
450
+ status: 'block',
451
+ summary: 'Replay reports contain malformed payloads, I/O errors, empty-evidence passes, or zero-evidence needs-review verdicts.',
452
+ details: {
453
+ reportCount: replayReportPaths.length,
454
+ ...results,
455
+ },
456
+ };
457
+ }
458
+
459
+ return {
460
+ id: 'replay_evidence_integrity',
461
+ status: 'pass',
462
+ summary: 'Replay reports include evidence summaries and no empty-evidence unsafe verdicts.',
463
+ details: {
464
+ reportCount: replayReportPaths.length,
465
+ },
466
+ };
467
+ }
468
+
469
+ export function runMergeGateAudit(workspaceDir: string, stateDir: string): MergeGateAuditReport {
470
+ const checks: MergeGateAuditCheck[] = [
471
+ auditPainFlagPathContract(workspaceDir),
472
+ auditQueuePathContract(workspaceDir),
473
+ auditRuntimeAdapterContract(),
474
+ auditDatasetArtifactIntegrity(workspaceDir),
475
+ auditArtifactLineageIntegrity(workspaceDir),
476
+ auditOrpoExportIntegrity(workspaceDir),
477
+ auditReplayEvidenceIntegrity(stateDir),
478
+ ];
479
+
480
+ return {
481
+ overallStatus: computeOverallStatus(checks),
482
+ generatedAt: new Date().toISOString(),
483
+ workspaceDir: path.resolve(workspaceDir),
484
+ stateDir: path.resolve(stateDir),
485
+ checks,
486
+ counts: countStatuses(checks),
487
+ };
488
+ }
489
+
490
+ export function formatMergeGateAuditReport(report: MergeGateAuditReport): string {
491
+ const lines: string[] = [
492
+ '=== Merge Gate Audit ===',
493
+ `Overall Status: ${report.overallStatus.toUpperCase()}`,
494
+ `Generated At: ${report.generatedAt}`,
495
+ `Workspace: ${report.workspaceDir}`,
496
+ `State Dir: ${report.stateDir}`,
497
+ `Counts: pass=${report.counts.pass}, block=${report.counts.block}, defer=${report.counts.defer}`,
498
+ '',
499
+ ];
500
+
501
+ for (const check of report.checks) {
502
+ lines.push(`[${check.status.toUpperCase()}] ${check.id}: ${check.summary}`);
503
+ }
504
+
505
+ return `${lines.join('\n')}\n`;
506
+ }
@@ -52,6 +52,10 @@ import {
52
52
  readDatasetArtifact,
53
53
  type NocturnalDatasetRecord,
54
54
  } from './nocturnal-dataset.js';
55
+ import {
56
+ listArtifactLineageRecords,
57
+ type ArtifactLineageRecord,
58
+ } from './nocturnal-artifact-lineage.js';
55
59
  import { NocturnalPathResolver } from './nocturnal-paths.js';
56
60
 
57
61
  // ---------------------------------------------------------------------------
@@ -81,6 +85,23 @@ export interface ORPOSample {
81
85
  exportedAt: string;
82
86
  exportId: string;
83
87
  datasetFingerprint: string;
88
+ evidenceSummary: ORPOEvidenceSummary;
89
+ };
90
+ }
91
+
92
+ export type EvidenceState = 'observed' | 'not_observed' | 'unknown';
93
+
94
+ export interface ORPOEvidenceSummary {
95
+ lineageStatus: 'observed' | 'unknown';
96
+ painSignals: {
97
+ status: EvidenceState;
98
+ count: number | null;
99
+ ids: string[];
100
+ };
101
+ gateBlocks: {
102
+ status: EvidenceState;
103
+ count: number | null;
104
+ ids: string[];
84
105
  };
85
106
  }
86
107
 
@@ -140,10 +161,12 @@ function computeDatasetFingerprint(sampleFingerprints: string[]): string {
140
161
  function serializeORPOSample(
141
162
  record: NocturnalDatasetRecord,
142
163
  artifact: ReturnType<typeof readDatasetArtifact>,
164
+ evidenceSummary: ORPOEvidenceSummary,
143
165
  exportId: string,
144
166
  datasetFingerprint: string
145
167
  ): ORPOSample {
146
168
  const now = new Date().toISOString();
169
+ const rejected = buildEvidenceBoundedRejected(artifact, evidenceSummary);
147
170
 
148
171
  return {
149
172
  sampleFingerprint: record.sampleFingerprint,
@@ -151,12 +174,11 @@ function serializeORPOSample(
151
174
  sessionId: record.sessionId,
152
175
  principleId: record.principleId,
153
176
  targetModelFamily: record.targetModelFamily as string, // validated non-null by caller
154
- // For ORPO: prompt = badDecision, chosen = betterDecision, rejected = badDecision
155
- // This teaches the model to prefer betterDecision over badDecision
156
- prompt: artifact.badDecision,
177
+ // Export only evidence-bounded narratives. Free-form artifact text can overstate what was observed.
178
+ prompt: rejected,
157
179
  chosen: artifact.betterDecision,
158
- rejected: artifact.badDecision,
159
- rationale: artifact.rationale,
180
+ rejected,
181
+ rationale: buildEvidenceBoundedRationale(evidenceSummary),
160
182
  datasetMetadata: {
161
183
  sampleFingerprint: record.sampleFingerprint,
162
184
  artifactPath: record.artifactPath,
@@ -164,10 +186,83 @@ function serializeORPOSample(
164
186
  exportedAt: now,
165
187
  exportId,
166
188
  datasetFingerprint,
189
+ evidenceSummary,
190
+ },
191
+ };
192
+ }
193
+
194
+ function buildEvidenceSummary(
195
+ lineageRecord: ArtifactLineageRecord | null
196
+ ): ORPOEvidenceSummary {
197
+ if (!lineageRecord) {
198
+ return {
199
+ lineageStatus: 'unknown',
200
+ painSignals: { status: 'unknown', count: null, ids: [] },
201
+ gateBlocks: { status: 'unknown', count: null, ids: [] },
202
+ };
203
+ }
204
+
205
+ // Defensive: old lineage files on disk may lack these fields
206
+ const painIds = lineageRecord.sourcePainIds ?? [];
207
+ const gateBlockIds = lineageRecord.sourceGateBlockIds ?? [];
208
+ const painCount = painIds.length;
209
+ const gateCount = gateBlockIds.length;
210
+
211
+ return {
212
+ lineageStatus: 'observed',
213
+ painSignals: {
214
+ status: painCount > 0 ? 'observed' : 'not_observed',
215
+ count: painCount,
216
+ ids: [...painIds],
217
+ },
218
+ gateBlocks: {
219
+ status: gateCount > 0 ? 'observed' : 'not_observed',
220
+ count: gateCount,
221
+ ids: [...gateBlockIds],
167
222
  },
168
223
  };
169
224
  }
170
225
 
226
+ function buildEvidenceBoundedRejected(
227
+ artifact: ReturnType<typeof readDatasetArtifact>,
228
+ evidenceSummary: ORPOEvidenceSummary
229
+ ): string {
230
+ if (evidenceSummary.lineageStatus === 'unknown') {
231
+ return 'Take the next action without verified source evidence.';
232
+ }
233
+
234
+ const clauses: string[] = [];
235
+ if (evidenceSummary.painSignals.status === 'observed' && evidenceSummary.painSignals.count) {
236
+ clauses.push(`continue despite ${evidenceSummary.painSignals.count} observed pain signals`);
237
+ }
238
+ if (evidenceSummary.gateBlocks.status === 'observed' && evidenceSummary.gateBlocks.count) {
239
+ clauses.push(`ignore ${evidenceSummary.gateBlocks.count} observed gate blocks`);
240
+ }
241
+
242
+ if (clauses.length === 0) {
243
+ return 'Proceed without first verifying the relevant state from the source session.';
244
+ }
245
+
246
+ const prefix = artifact.badDecision.trim().length > 0
247
+ ? 'Proceed with the rejected action and '
248
+ : 'Take the rejected action and ';
249
+ return `${prefix}${clauses.join(' and ')}.`;
250
+ }
251
+
252
+ function buildEvidenceBoundedRationale(evidenceSummary: ORPOEvidenceSummary): string {
253
+ if (evidenceSummary.lineageStatus === 'unknown') {
254
+ return 'Source evidence is unknown. Export uses a neutral rationale instead of narrating unverified failures or violations.';
255
+ }
256
+
257
+ const painCount = evidenceSummary.painSignals.count ?? 0;
258
+ const gateCount = evidenceSummary.gateBlocks.count ?? 0;
259
+ if (painCount === 0 && gateCount === 0) {
260
+ return 'Source lineage is present but records no pain signals or gate blocks. Export keeps the corrective preference while avoiding invented failure narratives.';
261
+ }
262
+
263
+ return `Observed source evidence: ${painCount} pain signals and ${gateCount} gate blocks. Prefer the bounded corrective action over repeating the rejected choice.`;
264
+ }
265
+
171
266
  // ---------------------------------------------------------------------------
172
267
  // Core Export Function
173
268
  // ---------------------------------------------------------------------------
@@ -187,6 +282,7 @@ export function exportORPOSamples(
187
282
  ): ExportResult {
188
283
  const exportId = crypto.randomUUID();
189
284
  const now = new Date().toISOString();
285
+ const lineageRecords = listArtifactLineageRecords(workspaceDir, 'behavioral-sample');
190
286
 
191
287
  // Step 1: Collect eligible records
192
288
  // Use listDatasetRecords directly to have full control over the family filter
@@ -253,8 +349,12 @@ export function exportORPOSamples(
253
349
  continue;
254
350
  }
255
351
 
352
+ const lineageRecord =
353
+ lineageRecords.find((candidate) => candidate.artifactId === record.artifactId) ?? null;
354
+ const evidenceSummary = buildEvidenceSummary(lineageRecord);
355
+
256
356
  // Serialize
257
- orpoSamples.push(serializeORPOSample(record, artifact, exportId, ''));
357
+ orpoSamples.push(serializeORPOSample(record, artifact, evidenceSummary, exportId, ''));
258
358
  }
259
359
 
260
360
  // Step 4: Fail if all samples failed validation