principles-disciple 1.17.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -277,6 +277,12 @@ export interface PromotionGateResult {
277
277
  threshold: number;
278
278
  passed: boolean;
279
279
  };
280
+
281
+ evidenceSummary: {
282
+ evidenceMode: 'shadow' | 'eval-proxy' | 'mixed';
283
+ shadowSampleCount: number;
284
+ deltaSource: 'eval';
285
+ };
280
286
  }
281
287
 
282
288
  /**
@@ -337,6 +343,11 @@ export function evaluatePromotionGate(
337
343
  blockers,
338
344
  constraintChecks: [],
339
345
  deltaCheck: { actual: 0, threshold: minDelta, passed: false },
346
+ evidenceSummary: {
347
+ evidenceMode: 'eval-proxy',
348
+ shadowSampleCount: 0,
349
+ deltaSource: 'eval',
350
+ },
340
351
  };
341
352
  }
342
353
 
@@ -351,6 +362,11 @@ export function evaluatePromotionGate(
351
362
  blockers,
352
363
  constraintChecks: [],
353
364
  deltaCheck: { actual: 0, threshold: minDelta, passed: false },
365
+ evidenceSummary: {
366
+ evidenceMode: 'eval-proxy',
367
+ shadowSampleCount: 0,
368
+ deltaSource: 'eval',
369
+ },
354
370
  };
355
371
  }
356
372
 
@@ -366,6 +382,11 @@ export function evaluatePromotionGate(
366
382
  blockers,
367
383
  constraintChecks: [],
368
384
  deltaCheck: { actual: 0, threshold: minDelta, passed: false },
385
+ evidenceSummary: {
386
+ evidenceMode: 'eval-proxy',
387
+ shadowSampleCount: 0,
388
+ deltaSource: 'eval',
389
+ },
369
390
  };
370
391
  }
371
392
 
@@ -496,12 +517,24 @@ export function evaluatePromotionGate(
496
517
  suggestedState = 'rejected';
497
518
  }
498
519
 
520
+ const evidenceMode =
521
+ arbiterRejectSource === 'shadow' && executabilityRejectSource === 'shadow'
522
+ ? 'shadow'
523
+ : arbiterRejectSource === 'eval-proxy' && executabilityRejectSource === 'eval-proxy'
524
+ ? 'eval-proxy'
525
+ : 'mixed';
526
+
499
527
  return {
500
528
  passes: allPassed,
501
529
  suggestedState,
502
530
  blockers,
503
531
  constraintChecks,
504
532
  deltaCheck,
533
+ evidenceSummary: {
534
+ evidenceMode,
535
+ shadowSampleCount: shadowStats?.totalCount ?? 0,
536
+ deltaSource: 'eval',
537
+ },
505
538
  };
506
539
  }
507
540
 
@@ -63,6 +63,15 @@ export interface ReplayReport {
63
63
  principleAnchor: ClassificationSummary;
64
64
  };
65
65
  blockers: string[];
66
+ evidenceSummary: {
67
+ evidenceStatus: 'observed' | 'empty';
68
+ totalSamples: number;
69
+ classifiedCounts: {
70
+ painNegative: number;
71
+ successPositive: number;
72
+ principleAnchor: number;
73
+ };
74
+ };
66
75
  generatedAt: string;
67
76
  implementationId: string;
68
77
  sampleFingerprints: string[];
@@ -432,6 +441,11 @@ export class ReplayEngine {
432
441
  const successSummary = toSummary(successPositive);
433
442
  const anchorSummary = toSummary(principleAnchor);
434
443
  const blockers: string[] = [];
444
+ const totalSamples = results.length;
445
+
446
+ if (totalSamples === 0) {
447
+ blockers.push('NO REPLAY EVIDENCE: No classified replay samples were available. Report cannot justify promotion-quality conclusions.');
448
+ }
435
449
 
436
450
  for (const leak of painSummary.details.filter((result) => !result.passed)) {
437
451
  blockers.push(
@@ -459,6 +473,15 @@ export class ReplayEngine {
459
473
  principleAnchor: anchorSummary,
460
474
  },
461
475
  blockers,
476
+ evidenceSummary: {
477
+ evidenceStatus: totalSamples > 0 ? 'observed' : 'empty',
478
+ totalSamples,
479
+ classifiedCounts: {
480
+ painNegative: painSummary.total,
481
+ successPositive: successSummary.total,
482
+ principleAnchor: anchorSummary.total,
483
+ },
484
+ },
462
485
  generatedAt: new Date().toISOString(),
463
486
  implementationId,
464
487
  sampleFingerprints: results.map((result) => result.sampleFingerprint),
@@ -471,6 +494,7 @@ export class ReplayEngine {
471
494
  success: ClassificationSummary,
472
495
  anchor: ClassificationSummary
473
496
  ): 'pass' | 'fail' | 'needs-review' {
497
+ if (pain.total + success.total + anchor.total === 0) return 'needs-review';
474
498
  if (pain.failed > 0) return 'fail';
475
499
  if (anchor.failed > 0) return 'fail';
476
500
  if (success.failed > 0) return 'needs-review';
@@ -526,6 +550,7 @@ export function formatReplayReport(report: ReplayReport): string {
526
550
  output += `Implementation: ${report.implementationId}\n`;
527
551
  output += `Generated At: ${report.generatedAt}\n`;
528
552
  output += `Overall Decision: [${decisionEmoji}]\n\n`;
553
+ output += `Evidence Status: ${report.evidenceSummary.evidenceStatus} (samples=${report.evidenceSummary.totalSamples})\n\n`;
529
554
 
530
555
  const formatSection = (
531
556
  label: string,
@@ -173,7 +173,7 @@ let timeoutId: NodeJS.Timeout | null = null;
173
173
  * Old queue items (without taskKind) are migrated to pain_diagnosis for compatibility.
174
174
  */
175
175
  export type QueueStatus = 'pending' | 'in_progress' | 'completed' | 'failed' | 'canceled';
176
- export type TaskResolution = 'marker_detected' | 'auto_completed_timeout' | 'failed_max_retries' | 'runtime_unavailable' | 'canceled' | 'late_marker_principle_created' | 'late_marker_no_principle' | 'stub_fallback';
176
+ export type TaskResolution = 'marker_detected' | 'auto_completed_timeout' | 'failed_max_retries' | 'runtime_unavailable' | 'canceled' | 'late_marker_principle_created' | 'late_marker_no_principle' | 'stub_fallback' | 'skipped_thin_violation';
177
177
 
178
178
  /**
179
179
  * Recent pain context attached to sleep_reflection tasks.
@@ -1595,13 +1595,14 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1595
1595
  const errorReason = lastEvent?.reason ?? 'unknown';
1596
1596
  // #219: Include payload details for better diagnostics
1597
1597
  let detailedError = `Workflow terminal_error: ${errorReason}`;
1598
+ let payload: unknown = {};
1598
1599
  try {
1599
- const payload = lastEvent?.payload ?? {};
1600
- if (payload.skipReason) {
1601
- detailedError += ` (skipReason: ${payload.skipReason})`;
1600
+ payload = lastEvent?.payload ?? {};
1601
+ if ((payload as any).skipReason) {
1602
+ detailedError += ` (skipReason: ${(payload as any).skipReason})`;
1602
1603
  }
1603
- if (payload.failures && Array.isArray(payload.failures) && payload.failures.length > 0) {
1604
- detailedError += ` | failures: ${(payload.failures as string[]).slice(0, 3).join(', ')}`;
1604
+ if ((payload as any).failures && Array.isArray((payload as any).failures) && (payload as any).failures.length > 0) {
1605
+ detailedError += ` | failures: ${((payload as any).failures as string[]).slice(0, 3).join(', ')}`;
1605
1606
  }
1606
1607
  } catch { /* ignore parse errors */ }
1607
1608
  sleepTask.lastError = detailedError;
@@ -1613,6 +1614,12 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1613
1614
  sleepTask.completed_at = new Date().toISOString();
1614
1615
  sleepTask.resolution = 'stub_fallback';
1615
1616
  logger?.warn?.(`[PD:EvolutionWorker] sleep_reflection task ${sleepTask.id} background runtime unavailable, using stub fallback: ${errorReason}`);
1617
+ } else if ((payload as any).skipReason === 'no_violating_sessions') {
1618
+ // #244: No meaningful violations found (thin filter) → skip without failure
1619
+ sleepTask.status = 'completed';
1620
+ sleepTask.completed_at = new Date().toISOString();
1621
+ sleepTask.resolution = 'skipped_thin_violation';
1622
+ logger?.info?.(`[PD:EvolutionWorker] sleep_reflection task ${sleepTask.id} completed: no sessions with meaningful violations found`);
1616
1623
  } else {
1617
1624
  sleepTask.status = 'failed';
1618
1625
  sleepTask.completed_at = new Date().toISOString();
@@ -302,7 +302,7 @@ export class NocturnalTargetSelector {
302
302
  this.recentPainContext = recentPainContext;
303
303
  this.opts = {
304
304
  minViolationDensity: restOptions.minViolationDensity ?? 0.1,
305
- maxSessionCandidates: restOptions.maxSessionCandidates ?? 50,
305
+ maxSessionCandidates: restOptions.maxSessionCandidates ?? 300,
306
306
  idleThresholdMs: restOptions.idleThresholdMs ?? DEFAULT_IDLE_THRESHOLD_MS,
307
307
  };
308
308
  }
@@ -440,7 +440,14 @@ export class NocturnalTargetSelector {
440
440
  }
441
441
 
442
442
  // Compute violation signals for each session
443
- const violatingSessions: ViolationSignal[] = recentSessions.map((session) => {
443
+ // #244: Filter out sessions that are too thin for meaningful reflection
444
+ // A session needs enough violation context (failures + pain + gates >= 2)
445
+ const MIN_VIOLATION_DEPTH = 2;
446
+ const richSessions = recentSessions.filter(
447
+ s => (s.failureCount ?? 0) + (s.painEventCount ?? 0) + (s.gateBlockCount ?? 0) >= MIN_VIOLATION_DEPTH
448
+ );
449
+
450
+ const violatingSessions: ViolationSignal[] = richSessions.map((session) => {
444
451
  const violationDensity = computeViolationDensity(session);
445
452
  const snapshot = this.extractor.getNocturnalSessionSnapshot(session.sessionId);
446
453
 
@@ -40,7 +40,6 @@ import type { NocturnalSessionSnapshot } from '../../core/nocturnal-trajectory-e
40
40
  import type { RecentPainContext } from '../evolution-worker.js';
41
41
  import * as fs from 'fs';
42
42
  import * as path from 'path';
43
- import { isSubagentRuntimeAvailable } from '../../utils/subagent-probe.js';
44
43
  import { validateNocturnalSnapshotIngress } from '../../core/nocturnal-snapshot-contract.js';
45
44
 
46
45
  // ─────────────────────────────────────────────────────────────────────────────
@@ -173,11 +172,8 @@ export class NocturnalWorkflowManager implements WorkflowManager {
173
172
  metadata?: Record<string, unknown>;
174
173
  }
175
174
  ): Promise<WorkflowHandle> {
176
- // #179: Check subagent runtime availability before starting
177
- // Other workflow managers (empathy, deep-reflect) have this check
178
- // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Reason: TrinityRuntimeAdapter interface doesn't expose api.runtime.subagent, but OpenClawTrinityRuntimeAdapter has it
179
- const subagent = (this.runtimeAdapter as any).api?.runtime?.subagent;
180
- if (!isSubagentRuntimeAvailable(subagent)) {
175
+ const runtimeAvailable = this.runtimeAdapter.isRuntimeAvailable();
176
+ if (!runtimeAvailable) {
181
177
  this.logger.warn(`[PD:NocturnalWorkflow] Subagent runtime unavailable, skipping workflow`);
182
178
  throw new Error(`NocturnalWorkflowManager: subagent runtime unavailable`);
183
179
  }
@@ -0,0 +1,111 @@
1
+ {
2
+ "id": "nocturnal-trinity-quality-enhancement",
3
+ "title": "Enhance nocturnal Trinity prompt quality",
4
+ "description": "Enhance nocturnal Trinity prompt quality — add Dreamer perspective diversity constraints and Scribe rejected-decision analysis",
5
+ "workspace": "/home/csuzngjh/code/principles",
6
+ "branch": "fix/bugs-231-228",
7
+ "requiresTaskContract": true,
8
+ "maxRoundsPerStage": 2,
9
+ "maxRuntimeMinutes": 60,
10
+ "stages": [
11
+ "investigate",
12
+ "implement-pass-1",
13
+ "verify"
14
+ ],
15
+ "taskContract": {
16
+ "goal": "Improve nocturnal Trinity output quality by adding perspective diversity to Dreamer and rejected-decision analysis to Scribe",
17
+ "inScope": [
18
+ "nocturnal-trinity.ts prompt modifications",
19
+ "nocturnal-trinity.test.ts assertion updates",
20
+ "nocturnal-arbiter.ts compatibility verification"
21
+ ],
22
+ "outOfScope": [
23
+ "Runtime or infrastructure changes",
24
+ "New file creation",
25
+ "Non-Trinity prompt changes"
26
+ ],
27
+ "validationCommands": [
28
+ "npx vitest run packages/openclaw-plugin/tests/core/nocturnal --reporter=verbose"
29
+ ],
30
+ "expectedArtifacts": [
31
+ "packages/openclaw-plugin/src/core/nocturnal-trinity.ts"
32
+ ]
33
+ },
34
+ "producer": {
35
+ "agent": "iflow",
36
+ "model": "glm-5",
37
+ "timeoutSeconds": 1800
38
+ },
39
+ "reviewerA": {
40
+ "agent": "iflow",
41
+ "model": "glm-4.7",
42
+ "timeoutSeconds": 1200,
43
+ "role": "code-quality",
44
+ "focus": "Verify prompt changes are minimal, backward-compatible, and don't break existing arbiter validation"
45
+ },
46
+ "reviewerB": {
47
+ "agent": "iflow",
48
+ "model": "glm-4.7",
49
+ "timeoutSeconds": 1200,
50
+ "role": "functional-correctness",
51
+ "focus": "Verify tests pass and the new prompt constraints produce structurally valid Trinity output"
52
+ },
53
+ "escalationReviewer": {
54
+ "agent": "iflow",
55
+ "model": "glm-5",
56
+ "timeoutSeconds": 1800
57
+ },
58
+ "stageGoals": {
59
+ "investigate": [
60
+ "Read nocturnal-trinity.ts lines 64-298 (all three prompts) and nocturnal-trinity.test.ts",
61
+ "Identify exact insertion points for Dreamer diversity section and Scribe analysis section",
62
+ "List all test assertions that reference prompt content",
63
+ "Report findings in producer.md"
64
+ ],
65
+ "implement-pass-1": [
66
+ "Apply Dreamer perspective diversity constraints to NOCTURNAL_DREAMER_PROMPT",
67
+ "Apply Scribe rejected-decision analysis to NOCTURNAL_SCRIBE_PROMPT",
68
+ "Update test assertions in nocturnal-trinity.test.ts if needed",
69
+ "Run nocturnal-trinity and nocturnal-arbiter tests to verify no breakage"
70
+ ],
71
+ "verify": [
72
+ "Run full nocturnal test suite: npx vitest run packages/openclaw-plugin/tests/core/nocturnal --reporter=verbose",
73
+ "Verify all tests pass with 0 failures",
74
+ "Confirm arbiter validation is unchanged",
75
+ "Confirm no new files were created"
76
+ ]
77
+ },
78
+ "stageCriteria": {
79
+ "investigate": {
80
+ "scoringDimensions": [
81
+ "completeness",
82
+ "accuracy"
83
+ ],
84
+ "dimensionThreshold": 3,
85
+ "requiredDeliverables": [
86
+ "producer.md"
87
+ ]
88
+ },
89
+ "implement-pass-1": {
90
+ "scoringDimensions": [
91
+ "correctness",
92
+ "completeness"
93
+ ],
94
+ "dimensionThreshold": 3,
95
+ "requiredDeliverables": [
96
+ "producer.md",
97
+ "reviewer-a.md",
98
+ "reviewer-b.md"
99
+ ]
100
+ },
101
+ "verify": {
102
+ "scoringDimensions": [
103
+ "correctness"
104
+ ],
105
+ "dimensionThreshold": 3,
106
+ "requiredDeliverables": [
107
+ "producer.md"
108
+ ]
109
+ }
110
+ }
111
+ }
@@ -325,7 +325,7 @@ export function buildStageBrief(spec, stage, round, previousDecision, handoff =
325
325
  carryForward.trimEnd(),
326
326
  '',
327
327
  `## Constraints`,
328
- ...spec.context.map((line) => `- ${line}`),
328
+ ...((spec.context ?? []).map((line) => `- ${line}`)),
329
329
  '',
330
330
  ...(spec.taskContract
331
331
  ? [
@@ -3413,7 +3413,7 @@ if (process.argv[1] === fileURLToPath(import.meta.url)) {
3413
3413
  main().catch((err) => {
3414
3414
  // main() is async and may throw. The try/catch inside main() handles
3415
3415
  // errors within its body, but rejections from the Promise itself land here.
3416
- console.error('Fatal error:', err.message);
3416
+ console.error('Fatal error:', err.message, err.stack);
3417
3417
  process.exit(1);
3418
3418
  });
3419
3419
  }
@@ -0,0 +1,284 @@
1
+ import { afterEach, beforeEach, describe, expect, it } from 'vitest';
2
+ import * as fs from 'fs';
3
+ import * as os from 'os';
4
+ import * as path from 'path';
5
+ import {
6
+ formatMergeGateAuditReport,
7
+ runMergeGateAudit,
8
+ } from '../../src/core/merge-gate-audit.js';
9
+ import type { NocturnalArtifact } from '../../src/core/nocturnal-arbiter.js';
10
+ import {
11
+ registerSample,
12
+ updateReviewStatus,
13
+ } from '../../src/core/nocturnal-dataset.js';
14
+ import { appendArtifactLineageRecord } from '../../src/core/nocturnal-artifact-lineage.js';
15
+ import { exportORPOSamples } from '../../src/core/nocturnal-export.js';
16
+ import { createImplementationAssetDir, getImplementationAssetRoot } from '../../src/core/code-implementation-storage.js';
17
+ import { safeRmDir } from '../test-utils.js';
18
+
19
+ function makeArtifact(overrides: Partial<NocturnalArtifact> = {}): NocturnalArtifact {
20
+ return {
21
+ artifactId: 'artifact-1',
22
+ sessionId: 'session-1',
23
+ principleId: 'T-08',
24
+ sourceSnapshotRef: 'snapshot-1',
25
+ badDecision: 'Retried without checking state',
26
+ betterDecision: 'Inspect state before retrying',
27
+ rationale: 'Evidence first.',
28
+ createdAt: '2026-04-12T09:00:00.000Z',
29
+ ...overrides,
30
+ };
31
+ }
32
+
33
+ describe('merge-gate-audit', () => {
34
+ let tempDir: string;
35
+ let workspaceDir: string;
36
+ let stateDir: string;
37
+
38
+ beforeEach(() => {
39
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-merge-gate-audit-'));
40
+ workspaceDir = path.join(tempDir, 'workspace');
41
+ stateDir = path.join(tempDir, '.state');
42
+ fs.mkdirSync(workspaceDir, { recursive: true });
43
+ fs.mkdirSync(stateDir, { recursive: true });
44
+ });
45
+
46
+ afterEach(() => {
47
+ safeRmDir(tempDir);
48
+ });
49
+
50
+ function registerApprovedArtifact(artifactId = 'artifact-1'): string {
51
+ const artifact = makeArtifact({ artifactId });
52
+ const artifactPath = path.join(
53
+ workspaceDir,
54
+ '.state',
55
+ 'nocturnal',
56
+ 'samples',
57
+ `${artifactId}.json`,
58
+ );
59
+ fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
60
+ fs.writeFileSync(artifactPath, JSON.stringify(artifact, null, 2), 'utf-8');
61
+
62
+ const record = registerSample(workspaceDir, artifact, artifactPath, 'gpt-4').record;
63
+ updateReviewStatus(
64
+ workspaceDir,
65
+ record.sampleFingerprint,
66
+ 'approved_for_training',
67
+ 'approved for merge gate audit',
68
+ );
69
+
70
+ appendArtifactLineageRecord(workspaceDir, {
71
+ artifactKind: 'behavioral-sample',
72
+ artifactId: record.artifactId,
73
+ principleId: record.principleId,
74
+ ruleId: null,
75
+ sessionId: record.sessionId,
76
+ sourceSnapshotRef: record.sourceSnapshotRef,
77
+ sourcePainIds: ['pain-1'],
78
+ sourceGateBlockIds: ['gate-1'],
79
+ storagePath: artifactPath,
80
+ implementationId: null,
81
+ createdAt: record.createdAt,
82
+ });
83
+
84
+ return record.sampleFingerprint;
85
+ }
86
+
87
+ it('returns defer when audit surfaces are not populated yet', () => {
88
+ const report = runMergeGateAudit(workspaceDir, stateDir);
89
+
90
+ expect(report.overallStatus).toBe('defer');
91
+ expect(report.checks.find((check) => check.id === 'pain_flag_path_contract')?.status).toBe('pass');
92
+ expect(report.checks.find((check) => check.id === 'queue_path_contract')?.status).toBe('pass');
93
+ expect(report.checks.find((check) => check.id === 'runtime_adapter_contract')?.status).toBe('pass');
94
+ expect(report.counts.defer).toBeGreaterThan(0);
95
+ });
96
+
97
+ it('blocks malformed replay reports that claim pass without evidence', () => {
98
+ createImplementationAssetDir(stateDir, 'IMPL-1', '1.0.0');
99
+ const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-1'), 'replays');
100
+ fs.mkdirSync(replayDir, { recursive: true });
101
+ fs.writeFileSync(
102
+ path.join(replayDir, 'bad-report.json'),
103
+ JSON.stringify(
104
+ {
105
+ overallDecision: 'pass',
106
+ blockers: [],
107
+ generatedAt: '2026-04-12T09:00:00.000Z',
108
+ implementationId: 'IMPL-1',
109
+ evidenceSummary: {
110
+ evidenceStatus: 'empty',
111
+ totalSamples: 0,
112
+ classifiedCounts: {
113
+ painNegative: 0,
114
+ successPositive: 0,
115
+ principleAnchor: 0,
116
+ },
117
+ },
118
+ },
119
+ null,
120
+ 2,
121
+ ),
122
+ 'utf-8',
123
+ );
124
+
125
+ const report = runMergeGateAudit(workspaceDir, stateDir);
126
+ const replayCheck = report.checks.find((check) => check.id === 'replay_evidence_integrity');
127
+
128
+ expect(report.overallStatus).toBe('block');
129
+ expect(replayCheck?.status).toBe('block');
130
+ });
131
+
132
+ it('passes populated dataset, lineage, export, and replay evidence surfaces', () => {
133
+ registerApprovedArtifact('artifact-pass');
134
+ const exportResult = exportORPOSamples(workspaceDir, 'gpt-4');
135
+ expect(exportResult.success).toBe(true);
136
+
137
+ createImplementationAssetDir(stateDir, 'IMPL-1', '1.0.0');
138
+ const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-1'), 'replays');
139
+ fs.mkdirSync(replayDir, { recursive: true });
140
+ fs.writeFileSync(
141
+ path.join(replayDir, 'good-report.json'),
142
+ JSON.stringify(
143
+ {
144
+ overallDecision: 'pass',
145
+ replayResults: {
146
+ painNegative: { total: 1, passed: 1, failed: 0, details: [] },
147
+ successPositive: { total: 0, passed: 0, failed: 0, details: [] },
148
+ principleAnchor: { total: 0, passed: 0, failed: 0, details: [] },
149
+ },
150
+ blockers: [],
151
+ generatedAt: '2026-04-12T09:00:00.000Z',
152
+ implementationId: 'IMPL-1',
153
+ sampleFingerprints: ['sample-1'],
154
+ evidenceSummary: {
155
+ evidenceStatus: 'observed',
156
+ totalSamples: 1,
157
+ classifiedCounts: {
158
+ painNegative: 1,
159
+ successPositive: 0,
160
+ principleAnchor: 0,
161
+ },
162
+ },
163
+ },
164
+ null,
165
+ 2,
166
+ ),
167
+ 'utf-8',
168
+ );
169
+
170
+ const report = runMergeGateAudit(workspaceDir, stateDir);
171
+
172
+ expect(report.overallStatus).toBe('pass');
173
+ expect(report.counts.block).toBe(0);
174
+ expect(report.counts.defer).toBe(0);
175
+ expect(formatMergeGateAuditReport(report)).toContain('Overall Status: PASS');
176
+ });
177
+
178
+ it('blocks when dataset artifacts are missing', () => {
179
+ const artifactPath = path.join(
180
+ workspaceDir,
181
+ '.state',
182
+ 'nocturnal',
183
+ 'samples',
184
+ 'artifact-missing.json',
185
+ );
186
+ fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
187
+ const artifact = makeArtifact({ artifactId: 'artifact-missing' });
188
+ fs.writeFileSync(artifactPath, JSON.stringify(artifact, null, 2), 'utf-8');
189
+
190
+ const record = registerSample(workspaceDir, artifact, artifactPath, 'gpt-4').record;
191
+ updateReviewStatus(workspaceDir, record.sampleFingerprint, 'approved_for_training', 'approved');
192
+
193
+ // Append lineage pointing to a real file (so lineage passes)
194
+ appendArtifactLineageRecord(workspaceDir, {
195
+ artifactKind: 'behavioral-sample',
196
+ artifactId: record.artifactId,
197
+ principleId: record.principleId,
198
+ ruleId: null,
199
+ sessionId: record.sessionId,
200
+ sourceSnapshotRef: record.sourceSnapshotRef,
201
+ sourcePainIds: [],
202
+ sourceGateBlockIds: [],
203
+ storagePath: artifactPath,
204
+ implementationId: null,
205
+ createdAt: record.createdAt,
206
+ });
207
+
208
+ // Delete the artifact to simulate a missing file
209
+ fs.unlinkSync(artifactPath);
210
+
211
+ const report = runMergeGateAudit(workspaceDir, stateDir);
212
+ const datasetCheck = report.checks.find((c) => c.id === 'dataset_artifact_integrity');
213
+
214
+ expect(report.overallStatus).toBe('block');
215
+ expect(datasetCheck?.status).toBe('block');
216
+ });
217
+
218
+ it('blocks when artifact lineage storage paths are missing', () => {
219
+ const badPath = path.join(workspaceDir, '.state', 'nocturnal', 'samples', 'nonexistent.json');
220
+ appendArtifactLineageRecord(workspaceDir, {
221
+ artifactKind: 'behavioral-sample',
222
+ artifactId: 'lineage-missing',
223
+ principleId: 'T-08',
224
+ ruleId: null,
225
+ sessionId: 'session-1',
226
+ sourceSnapshotRef: 'snap-1',
227
+ sourcePainIds: [],
228
+ sourceGateBlockIds: [],
229
+ storagePath: badPath,
230
+ implementationId: null,
231
+ createdAt: new Date().toISOString(),
232
+ });
233
+
234
+ const report = runMergeGateAudit(workspaceDir, stateDir);
235
+ const lineageCheck = report.checks.find((c) => c.id === 'artifact_lineage_integrity');
236
+
237
+ expect(report.overallStatus).toBe('block');
238
+ expect(lineageCheck?.status).toBe('block');
239
+ });
240
+
241
+ it('blocks when replay reports are malformed', () => {
242
+ createImplementationAssetDir(stateDir, 'IMPL-BAD', '1.0.0');
243
+ const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-BAD'), 'replays');
244
+ fs.mkdirSync(replayDir, { recursive: true });
245
+ fs.writeFileSync(
246
+ path.join(replayDir, 'malformed.json'),
247
+ '{bad json',
248
+ 'utf-8',
249
+ );
250
+
251
+ const report = runMergeGateAudit(workspaceDir, stateDir);
252
+ const replayCheck = report.checks.find((c) => c.id === 'replay_evidence_integrity');
253
+ const details = replayCheck?.details as Record<string, string[]> | undefined;
254
+
255
+ expect(report.overallStatus).toBe('block');
256
+ expect(replayCheck?.status).toBe('block');
257
+ expect(details?.malformedReports).toHaveLength(1);
258
+ });
259
+
260
+ it('blocks when replay reports have invalid evidenceSummary shape', () => {
261
+ createImplementationAssetDir(stateDir, 'IMPL-NOEVID', '1.0.0');
262
+ const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-NOEVID'), 'replays');
263
+ fs.mkdirSync(replayDir, { recursive: true });
264
+ fs.writeFileSync(
265
+ path.join(replayDir, 'bad-evidence.json'),
266
+ JSON.stringify({
267
+ overallDecision: 'pass',
268
+ blockers: [],
269
+ generatedAt: '2026-04-12T09:00:00.000Z',
270
+ implementationId: 'IMPL-NOEVID',
271
+ evidenceSummary: { evidenceStatus: 'observed' }, // missing totalSamples
272
+ }),
273
+ 'utf-8',
274
+ );
275
+
276
+ const report = runMergeGateAudit(workspaceDir, stateDir);
277
+ const replayCheck = report.checks.find((c) => c.id === 'replay_evidence_integrity');
278
+ const details = replayCheck?.details as Record<string, string[]> | undefined;
279
+
280
+ expect(report.overallStatus).toBe('block');
281
+ expect(replayCheck?.status).toBe('block');
282
+ expect(details?.missingEvidenceSummary).toHaveLength(1);
283
+ });
284
+ });