npm - principles-disciple - Versions diffs - 1.17.0 → 1.19.0 - Mend

principles-disciple 1.17.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/src/commands/nocturnal-rollout.ts +2 -0
package/src/core/merge-gate-audit.ts +506 -0
package/src/core/nocturnal-compliance.ts +1 -0
package/src/core/nocturnal-export.ts +106 -6
package/src/core/nocturnal-trinity.ts +559 -153
package/src/core/promotion-gate.ts +33 -0
package/src/core/replay-engine.ts +25 -0
package/src/service/evolution-worker.ts +13 -6
package/src/service/nocturnal-target-selector.ts +9 -2
package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +2 -6
package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +111 -0
package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/task-specs.mjs +1 -1
package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +1 -1
package/tests/core/merge-gate-audit.test.ts +284 -0
package/tests/core/nocturnal-export.test.ts +55 -0
package/tests/core/nocturnal-trinity.test.ts +77 -4
package/tests/core/pain-integration.test.ts +27 -0
package/tests/core/promotion-gate.test.ts +5 -0
package/tests/core/replay-engine.test.ts +19 -0
package/tests/service/evolution-worker.nocturnal.test.ts +0 -547
package/tests/service/nocturnal-workflow-manager.test.ts +2 -0

package/src/core/promotion-gate.ts CHANGED Viewed

@@ -277,6 +277,12 @@ export interface PromotionGateResult {
     threshold: number;
     passed: boolean;
   };
+  evidenceSummary: {
+    evidenceMode: 'shadow' | 'eval-proxy' | 'mixed';
+    shadowSampleCount: number;
+    deltaSource: 'eval';
+  };
 }
 /**
@@ -337,6 +343,11 @@ export function evaluatePromotionGate(
       blockers,
       constraintChecks: [],
       deltaCheck: { actual: 0, threshold: minDelta, passed: false },
+      evidenceSummary: {
+        evidenceMode: 'eval-proxy',
+        shadowSampleCount: 0,
+        deltaSource: 'eval',
+      },
     };
   }
@@ -351,6 +362,11 @@ export function evaluatePromotionGate(
       blockers,
       constraintChecks: [],
       deltaCheck: { actual: 0, threshold: minDelta, passed: false },
+      evidenceSummary: {
+        evidenceMode: 'eval-proxy',
+        shadowSampleCount: 0,
+        deltaSource: 'eval',
+      },
     };
   }
@@ -366,6 +382,11 @@ export function evaluatePromotionGate(
       blockers,
       constraintChecks: [],
       deltaCheck: { actual: 0, threshold: minDelta, passed: false },
+      evidenceSummary: {
+        evidenceMode: 'eval-proxy',
+        shadowSampleCount: 0,
+        deltaSource: 'eval',
+      },
     };
   }
@@ -496,12 +517,24 @@ export function evaluatePromotionGate(
     suggestedState = 'rejected';
   }
+  const evidenceMode =
+    arbiterRejectSource === 'shadow' && executabilityRejectSource === 'shadow'
+      ? 'shadow'
+      : arbiterRejectSource === 'eval-proxy' && executabilityRejectSource === 'eval-proxy'
+        ? 'eval-proxy'
+        : 'mixed';
   return {
     passes: allPassed,
     suggestedState,
     blockers,
     constraintChecks,
     deltaCheck,
+    evidenceSummary: {
+      evidenceMode,
+      shadowSampleCount: shadowStats?.totalCount ?? 0,
+      deltaSource: 'eval',
+    },
   };
 }

package/src/core/replay-engine.ts CHANGED Viewed

@@ -63,6 +63,15 @@ export interface ReplayReport {
     principleAnchor: ClassificationSummary;
   };
   blockers: string[];
+  evidenceSummary: {
+    evidenceStatus: 'observed' | 'empty';
+    totalSamples: number;
+    classifiedCounts: {
+      painNegative: number;
+      successPositive: number;
+      principleAnchor: number;
+    };
+  };
   generatedAt: string;
   implementationId: string;
   sampleFingerprints: string[];
@@ -432,6 +441,11 @@ export class ReplayEngine {
     const successSummary = toSummary(successPositive);
     const anchorSummary = toSummary(principleAnchor);
     const blockers: string[] = [];
+    const totalSamples = results.length;
+    if (totalSamples === 0) {
+      blockers.push('NO REPLAY EVIDENCE: No classified replay samples were available. Report cannot justify promotion-quality conclusions.');
+    }
     for (const leak of painSummary.details.filter((result) => !result.passed)) {
       blockers.push(
@@ -459,6 +473,15 @@ export class ReplayEngine {
         principleAnchor: anchorSummary,
       },
       blockers,
+      evidenceSummary: {
+        evidenceStatus: totalSamples > 0 ? 'observed' : 'empty',
+        totalSamples,
+        classifiedCounts: {
+          painNegative: painSummary.total,
+          successPositive: successSummary.total,
+          principleAnchor: anchorSummary.total,
+        },
+      },
       generatedAt: new Date().toISOString(),
       implementationId,
       sampleFingerprints: results.map((result) => result.sampleFingerprint),
@@ -471,6 +494,7 @@ export class ReplayEngine {
     success: ClassificationSummary,
     anchor: ClassificationSummary
   ): 'pass' | 'fail' | 'needs-review' {
+    if (pain.total + success.total + anchor.total === 0) return 'needs-review';
     if (pain.failed > 0) return 'fail';
     if (anchor.failed > 0) return 'fail';
     if (success.failed > 0) return 'needs-review';
@@ -526,6 +550,7 @@ export function formatReplayReport(report: ReplayReport): string {
   output += `Implementation: ${report.implementationId}\n`;
   output += `Generated At:   ${report.generatedAt}\n`;
   output += `Overall Decision: [${decisionEmoji}]\n\n`;
+  output += `Evidence Status: ${report.evidenceSummary.evidenceStatus} (samples=${report.evidenceSummary.totalSamples})\n\n`;
   const formatSection = (
     label: string,

package/src/service/evolution-worker.ts CHANGED Viewed

@@ -173,7 +173,7 @@ let timeoutId: NodeJS.Timeout | null = null;
  * Old queue items (without taskKind) are migrated to pain_diagnosis for compatibility.
  */
 export type QueueStatus = 'pending' | 'in_progress' | 'completed' | 'failed' | 'canceled';
-export type TaskResolution = 'marker_detected' | 'auto_completed_timeout' | 'failed_max_retries' | 'runtime_unavailable' | 'canceled' | 'late_marker_principle_created' | 'late_marker_no_principle' | 'stub_fallback';
+export type TaskResolution = 'marker_detected' | 'auto_completed_timeout' | 'failed_max_retries' | 'runtime_unavailable' | 'canceled' | 'late_marker_principle_created' | 'late_marker_no_principle' | 'stub_fallback' | 'skipped_thin_violation';
 /**
  * Recent pain context attached to sleep_reflection tasks.
@@ -1595,13 +1595,14 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
                             const errorReason = lastEvent?.reason ?? 'unknown';
                             // #219: Include payload details for better diagnostics
                             let detailedError = `Workflow terminal_error: ${errorReason}`;
+                            let payload: unknown = {};
                             try {
-                                const payload = lastEvent?.payload ?? {};
-                                if (payload.skipReason) {
-                                    detailedError += ` (skipReason: ${payload.skipReason})`;
+                                payload = lastEvent?.payload ?? {};
+                                if ((payload as any).skipReason) {
+                                    detailedError += ` (skipReason: ${(payload as any).skipReason})`;
                                 }
-                                if (payload.failures && Array.isArray(payload.failures) && payload.failures.length > 0) {
-                                    detailedError += ` | failures: ${(payload.failures as string[]).slice(0, 3).join(', ')}`;
+                                if ((payload as any).failures && Array.isArray((payload as any).failures) && (payload as any).failures.length > 0) {
+                                    detailedError += ` | failures: ${((payload as any).failures as string[]).slice(0, 3).join(', ')}`;
                                 }
                             } catch { /* ignore parse errors */ }
                             sleepTask.lastError = detailedError;
@@ -1613,6 +1614,12 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
                                 sleepTask.completed_at = new Date().toISOString();
                                 sleepTask.resolution = 'stub_fallback';
                                 logger?.warn?.(`[PD:EvolutionWorker] sleep_reflection task ${sleepTask.id} background runtime unavailable, using stub fallback: ${errorReason}`);
+                            } else if ((payload as any).skipReason === 'no_violating_sessions') {
+                                // #244: No meaningful violations found (thin filter) → skip without failure
+                                sleepTask.status = 'completed';
+                                sleepTask.completed_at = new Date().toISOString();
+                                sleepTask.resolution = 'skipped_thin_violation';
+                                logger?.info?.(`[PD:EvolutionWorker] sleep_reflection task ${sleepTask.id} completed: no sessions with meaningful violations found`);
                             } else {
                                 sleepTask.status = 'failed';
                                 sleepTask.completed_at = new Date().toISOString();

package/src/service/nocturnal-target-selector.ts CHANGED Viewed

@@ -302,7 +302,7 @@ export class NocturnalTargetSelector {
     this.recentPainContext = recentPainContext;
     this.opts = {
       minViolationDensity: restOptions.minViolationDensity ?? 0.1,
-      maxSessionCandidates: restOptions.maxSessionCandidates ?? 50,
+      maxSessionCandidates: restOptions.maxSessionCandidates ?? 300,
       idleThresholdMs: restOptions.idleThresholdMs ?? DEFAULT_IDLE_THRESHOLD_MS,
     };
   }
@@ -440,7 +440,14 @@ export class NocturnalTargetSelector {
     }
     // Compute violation signals for each session
-    const violatingSessions: ViolationSignal[] = recentSessions.map((session) => {
+    // #244: Filter out sessions that are too thin for meaningful reflection
+    // A session needs enough violation context (failures + pain + gates >= 2)
+    const MIN_VIOLATION_DEPTH = 2;
+    const richSessions = recentSessions.filter(
+      s => (s.failureCount ?? 0) + (s.painEventCount ?? 0) + (s.gateBlockCount ?? 0) >= MIN_VIOLATION_DEPTH
+    );
+    const violatingSessions: ViolationSignal[] = richSessions.map((session) => {
       const violationDensity = computeViolationDensity(session);
       const snapshot = this.extractor.getNocturnalSessionSnapshot(session.sessionId);

package/src/service/subagent-workflow/nocturnal-workflow-manager.ts CHANGED Viewed

@@ -40,7 +40,6 @@ import type { NocturnalSessionSnapshot } from '../../core/nocturnal-trajectory-e
 import type { RecentPainContext } from '../evolution-worker.js';
 import * as fs from 'fs';
 import * as path from 'path';
-import { isSubagentRuntimeAvailable } from '../../utils/subagent-probe.js';
 import { validateNocturnalSnapshotIngress } from '../../core/nocturnal-snapshot-contract.js';
 // ─────────────────────────────────────────────────────────────────────────────
@@ -173,11 +172,8 @@ export class NocturnalWorkflowManager implements WorkflowManager {
             metadata?: Record<string, unknown>;
         }
     ): Promise<WorkflowHandle> {
-        // #179: Check subagent runtime availability before starting
-        // Other workflow managers (empathy, deep-reflect) have this check
-        // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Reason: TrinityRuntimeAdapter interface doesn't expose api.runtime.subagent, but OpenClawTrinityRuntimeAdapter has it
-        const subagent = (this.runtimeAdapter as any).api?.runtime?.subagent;
-        if (!isSubagentRuntimeAvailable(subagent)) {
+        const runtimeAvailable = this.runtimeAdapter.isRuntimeAvailable();
+        if (!runtimeAvailable) {
             this.logger.warn(`[PD:NocturnalWorkflow] Subagent runtime unavailable, skipping workflow`);
             throw new Error(`NocturnalWorkflowManager: subagent runtime unavailable`);
         }

package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json ADDED Viewed

@@ -0,0 +1,111 @@
+{
+  "id": "nocturnal-trinity-quality-enhancement",
+  "title": "Enhance nocturnal Trinity prompt quality",
+  "description": "Enhance nocturnal Trinity prompt quality — add Dreamer perspective diversity constraints and Scribe rejected-decision analysis",
+  "workspace": "/home/csuzngjh/code/principles",
+  "branch": "fix/bugs-231-228",
+  "requiresTaskContract": true,
+  "maxRoundsPerStage": 2,
+  "maxRuntimeMinutes": 60,
+  "stages": [
+    "investigate",
+    "implement-pass-1",
+    "verify"
+  ],
+  "taskContract": {
+    "goal": "Improve nocturnal Trinity output quality by adding perspective diversity to Dreamer and rejected-decision analysis to Scribe",
+    "inScope": [
+      "nocturnal-trinity.ts prompt modifications",
+      "nocturnal-trinity.test.ts assertion updates",
+      "nocturnal-arbiter.ts compatibility verification"
+    ],
+    "outOfScope": [
+      "Runtime or infrastructure changes",
+      "New file creation",
+      "Non-Trinity prompt changes"
+    ],
+    "validationCommands": [
+      "npx vitest run packages/openclaw-plugin/tests/core/nocturnal --reporter=verbose"
+    ],
+    "expectedArtifacts": [
+      "packages/openclaw-plugin/src/core/nocturnal-trinity.ts"
+    ]
+  },
+  "producer": {
+    "agent": "iflow",
+    "model": "glm-5",
+    "timeoutSeconds": 1800
+  },
+  "reviewerA": {
+    "agent": "iflow",
+    "model": "glm-4.7",
+    "timeoutSeconds": 1200,
+    "role": "code-quality",
+    "focus": "Verify prompt changes are minimal, backward-compatible, and don't break existing arbiter validation"
+  },
+  "reviewerB": {
+    "agent": "iflow",
+    "model": "glm-4.7",
+    "timeoutSeconds": 1200,
+    "role": "functional-correctness",
+    "focus": "Verify tests pass and the new prompt constraints produce structurally valid Trinity output"
+  },
+  "escalationReviewer": {
+    "agent": "iflow",
+    "model": "glm-5",
+    "timeoutSeconds": 1800
+  },
+  "stageGoals": {
+    "investigate": [
+      "Read nocturnal-trinity.ts lines 64-298 (all three prompts) and nocturnal-trinity.test.ts",
+      "Identify exact insertion points for Dreamer diversity section and Scribe analysis section",
+      "List all test assertions that reference prompt content",
+      "Report findings in producer.md"
+    ],
+    "implement-pass-1": [
+      "Apply Dreamer perspective diversity constraints to NOCTURNAL_DREAMER_PROMPT",
+      "Apply Scribe rejected-decision analysis to NOCTURNAL_SCRIBE_PROMPT",
+      "Update test assertions in nocturnal-trinity.test.ts if needed",
+      "Run nocturnal-trinity and nocturnal-arbiter tests to verify no breakage"
+    ],
+    "verify": [
+      "Run full nocturnal test suite: npx vitest run packages/openclaw-plugin/tests/core/nocturnal --reporter=verbose",
+      "Verify all tests pass with 0 failures",
+      "Confirm arbiter validation is unchanged",
+      "Confirm no new files were created"
+    ]
+  },
+  "stageCriteria": {
+    "investigate": {
+      "scoringDimensions": [
+        "completeness",
+        "accuracy"
+      ],
+      "dimensionThreshold": 3,
+      "requiredDeliverables": [
+        "producer.md"
+      ]
+    },
+    "implement-pass-1": {
+      "scoringDimensions": [
+        "correctness",
+        "completeness"
+      ],
+      "dimensionThreshold": 3,
+      "requiredDeliverables": [
+        "producer.md",
+        "reviewer-a.md",
+        "reviewer-b.md"
+      ]
+    },
+    "verify": {
+      "scoringDimensions": [
+        "correctness"
+      ],
+      "dimensionThreshold": 3,
+      "requiredDeliverables": [
+        "producer.md"
+      ]
+    }
+  }
+}

package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/task-specs.mjs CHANGED Viewed

@@ -325,7 +325,7 @@ export function buildStageBrief(spec, stage, round, previousDecision, handoff =
     carryForward.trimEnd(),
     '',
     `## Constraints`,
-    ...spec.context.map((line) => `- ${line}`),
+    ...((spec.context ?? []).map((line) => `- ${line}`)),
     '',
     ...(spec.taskContract
       ? [

package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs CHANGED Viewed

@@ -3413,7 +3413,7 @@ if (process.argv[1] === fileURLToPath(import.meta.url)) {
   main().catch((err) => {
     // main() is async and may throw. The try/catch inside main() handles
     // errors within its body, but rejections from the Promise itself land here.
-    console.error('Fatal error:', err.message);
+    console.error('Fatal error:', err.message, err.stack);
     process.exit(1);
   });
 }

package/tests/core/merge-gate-audit.test.ts ADDED Viewed

@@ -0,0 +1,284 @@
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import {
+  formatMergeGateAuditReport,
+  runMergeGateAudit,
+} from '../../src/core/merge-gate-audit.js';
+import type { NocturnalArtifact } from '../../src/core/nocturnal-arbiter.js';
+import {
+  registerSample,
+  updateReviewStatus,
+} from '../../src/core/nocturnal-dataset.js';
+import { appendArtifactLineageRecord } from '../../src/core/nocturnal-artifact-lineage.js';
+import { exportORPOSamples } from '../../src/core/nocturnal-export.js';
+import { createImplementationAssetDir, getImplementationAssetRoot } from '../../src/core/code-implementation-storage.js';
+import { safeRmDir } from '../test-utils.js';
+function makeArtifact(overrides: Partial<NocturnalArtifact> = {}): NocturnalArtifact {
+  return {
+    artifactId: 'artifact-1',
+    sessionId: 'session-1',
+    principleId: 'T-08',
+    sourceSnapshotRef: 'snapshot-1',
+    badDecision: 'Retried without checking state',
+    betterDecision: 'Inspect state before retrying',
+    rationale: 'Evidence first.',
+    createdAt: '2026-04-12T09:00:00.000Z',
+    ...overrides,
+  };
+}
+describe('merge-gate-audit', () => {
+  let tempDir: string;
+  let workspaceDir: string;
+  let stateDir: string;
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-merge-gate-audit-'));
+    workspaceDir = path.join(tempDir, 'workspace');
+    stateDir = path.join(tempDir, '.state');
+    fs.mkdirSync(workspaceDir, { recursive: true });
+    fs.mkdirSync(stateDir, { recursive: true });
+  });
+  afterEach(() => {
+    safeRmDir(tempDir);
+  });
+  function registerApprovedArtifact(artifactId = 'artifact-1'): string {
+    const artifact = makeArtifact({ artifactId });
+    const artifactPath = path.join(
+      workspaceDir,
+      '.state',
+      'nocturnal',
+      'samples',
+      `${artifactId}.json`,
+    );
+    fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
+    fs.writeFileSync(artifactPath, JSON.stringify(artifact, null, 2), 'utf-8');
+    const record = registerSample(workspaceDir, artifact, artifactPath, 'gpt-4').record;
+    updateReviewStatus(
+      workspaceDir,
+      record.sampleFingerprint,
+      'approved_for_training',
+      'approved for merge gate audit',
+    );
+    appendArtifactLineageRecord(workspaceDir, {
+      artifactKind: 'behavioral-sample',
+      artifactId: record.artifactId,
+      principleId: record.principleId,
+      ruleId: null,
+      sessionId: record.sessionId,
+      sourceSnapshotRef: record.sourceSnapshotRef,
+      sourcePainIds: ['pain-1'],
+      sourceGateBlockIds: ['gate-1'],
+      storagePath: artifactPath,
+      implementationId: null,
+      createdAt: record.createdAt,
+    });
+    return record.sampleFingerprint;
+  }
+  it('returns defer when audit surfaces are not populated yet', () => {
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    expect(report.overallStatus).toBe('defer');
+    expect(report.checks.find((check) => check.id === 'pain_flag_path_contract')?.status).toBe('pass');
+    expect(report.checks.find((check) => check.id === 'queue_path_contract')?.status).toBe('pass');
+    expect(report.checks.find((check) => check.id === 'runtime_adapter_contract')?.status).toBe('pass');
+    expect(report.counts.defer).toBeGreaterThan(0);
+  });
+  it('blocks malformed replay reports that claim pass without evidence', () => {
+    createImplementationAssetDir(stateDir, 'IMPL-1', '1.0.0');
+    const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-1'), 'replays');
+    fs.mkdirSync(replayDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(replayDir, 'bad-report.json'),
+      JSON.stringify(
+        {
+          overallDecision: 'pass',
+          blockers: [],
+          generatedAt: '2026-04-12T09:00:00.000Z',
+          implementationId: 'IMPL-1',
+          evidenceSummary: {
+            evidenceStatus: 'empty',
+            totalSamples: 0,
+            classifiedCounts: {
+              painNegative: 0,
+              successPositive: 0,
+              principleAnchor: 0,
+            },
+          },
+        },
+        null,
+        2,
+      ),
+      'utf-8',
+    );
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const replayCheck = report.checks.find((check) => check.id === 'replay_evidence_integrity');
+    expect(report.overallStatus).toBe('block');
+    expect(replayCheck?.status).toBe('block');
+  });
+  it('passes populated dataset, lineage, export, and replay evidence surfaces', () => {
+    registerApprovedArtifact('artifact-pass');
+    const exportResult = exportORPOSamples(workspaceDir, 'gpt-4');
+    expect(exportResult.success).toBe(true);
+    createImplementationAssetDir(stateDir, 'IMPL-1', '1.0.0');
+    const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-1'), 'replays');
+    fs.mkdirSync(replayDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(replayDir, 'good-report.json'),
+      JSON.stringify(
+        {
+          overallDecision: 'pass',
+          replayResults: {
+            painNegative: { total: 1, passed: 1, failed: 0, details: [] },
+            successPositive: { total: 0, passed: 0, failed: 0, details: [] },
+            principleAnchor: { total: 0, passed: 0, failed: 0, details: [] },
+          },
+          blockers: [],
+          generatedAt: '2026-04-12T09:00:00.000Z',
+          implementationId: 'IMPL-1',
+          sampleFingerprints: ['sample-1'],
+          evidenceSummary: {
+            evidenceStatus: 'observed',
+            totalSamples: 1,
+            classifiedCounts: {
+              painNegative: 1,
+              successPositive: 0,
+              principleAnchor: 0,
+            },
+          },
+        },
+        null,
+        2,
+      ),
+      'utf-8',
+    );
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    expect(report.overallStatus).toBe('pass');
+    expect(report.counts.block).toBe(0);
+    expect(report.counts.defer).toBe(0);
+    expect(formatMergeGateAuditReport(report)).toContain('Overall Status: PASS');
+  });
+  it('blocks when dataset artifacts are missing', () => {
+    const artifactPath = path.join(
+      workspaceDir,
+      '.state',
+      'nocturnal',
+      'samples',
+      'artifact-missing.json',
+    );
+    fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
+    const artifact = makeArtifact({ artifactId: 'artifact-missing' });
+    fs.writeFileSync(artifactPath, JSON.stringify(artifact, null, 2), 'utf-8');
+    const record = registerSample(workspaceDir, artifact, artifactPath, 'gpt-4').record;
+    updateReviewStatus(workspaceDir, record.sampleFingerprint, 'approved_for_training', 'approved');
+    // Append lineage pointing to a real file (so lineage passes)
+    appendArtifactLineageRecord(workspaceDir, {
+      artifactKind: 'behavioral-sample',
+      artifactId: record.artifactId,
+      principleId: record.principleId,
+      ruleId: null,
+      sessionId: record.sessionId,
+      sourceSnapshotRef: record.sourceSnapshotRef,
+      sourcePainIds: [],
+      sourceGateBlockIds: [],
+      storagePath: artifactPath,
+      implementationId: null,
+      createdAt: record.createdAt,
+    });
+    // Delete the artifact to simulate a missing file
+    fs.unlinkSync(artifactPath);
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const datasetCheck = report.checks.find((c) => c.id === 'dataset_artifact_integrity');
+    expect(report.overallStatus).toBe('block');
+    expect(datasetCheck?.status).toBe('block');
+  });
+  it('blocks when artifact lineage storage paths are missing', () => {
+    const badPath = path.join(workspaceDir, '.state', 'nocturnal', 'samples', 'nonexistent.json');
+    appendArtifactLineageRecord(workspaceDir, {
+      artifactKind: 'behavioral-sample',
+      artifactId: 'lineage-missing',
+      principleId: 'T-08',
+      ruleId: null,
+      sessionId: 'session-1',
+      sourceSnapshotRef: 'snap-1',
+      sourcePainIds: [],
+      sourceGateBlockIds: [],
+      storagePath: badPath,
+      implementationId: null,
+      createdAt: new Date().toISOString(),
+    });
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const lineageCheck = report.checks.find((c) => c.id === 'artifact_lineage_integrity');
+    expect(report.overallStatus).toBe('block');
+    expect(lineageCheck?.status).toBe('block');
+  });
+  it('blocks when replay reports are malformed', () => {
+    createImplementationAssetDir(stateDir, 'IMPL-BAD', '1.0.0');
+    const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-BAD'), 'replays');
+    fs.mkdirSync(replayDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(replayDir, 'malformed.json'),
+      '{bad json',
+      'utf-8',
+    );
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const replayCheck = report.checks.find((c) => c.id === 'replay_evidence_integrity');
+    const details = replayCheck?.details as Record<string, string[]> | undefined;
+    expect(report.overallStatus).toBe('block');
+    expect(replayCheck?.status).toBe('block');
+    expect(details?.malformedReports).toHaveLength(1);
+  });
+  it('blocks when replay reports have invalid evidenceSummary shape', () => {
+    createImplementationAssetDir(stateDir, 'IMPL-NOEVID', '1.0.0');
+    const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-NOEVID'), 'replays');
+    fs.mkdirSync(replayDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(replayDir, 'bad-evidence.json'),
+      JSON.stringify({
+        overallDecision: 'pass',
+        blockers: [],
+        generatedAt: '2026-04-12T09:00:00.000Z',
+        implementationId: 'IMPL-NOEVID',
+        evidenceSummary: { evidenceStatus: 'observed' }, // missing totalSamples
+      }),
+      'utf-8',
+    );
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const replayCheck = report.checks.find((c) => c.id === 'replay_evidence_integrity');
+    const details = replayCheck?.details as Record<string, string[]> | undefined;
+    expect(report.overallStatus).toBe('block');
+    expect(replayCheck?.status).toBe('block');
+    expect(details?.missingEvidenceSummary).toHaveLength(1);
+  });
+});