npm - principles-disciple - Versions diffs - 1.18.0 → 1.19.0 - Mend

principles-disciple 1.18.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/src/commands/nocturnal-rollout.ts +2 -0
package/src/core/merge-gate-audit.ts +506 -0
package/src/core/nocturnal-export.ts +106 -6
package/src/core/nocturnal-trinity.ts +111 -28
package/src/core/promotion-gate.ts +33 -0
package/src/core/replay-engine.ts +25 -0
package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +2 -6
package/tests/core/merge-gate-audit.test.ts +284 -0
package/tests/core/nocturnal-export.test.ts +55 -0
package/tests/core/nocturnal-trinity.test.ts +77 -4
package/tests/core/pain-integration.test.ts +27 -0
package/tests/core/promotion-gate.test.ts +5 -0
package/tests/core/replay-engine.test.ts +19 -0
package/tests/service/nocturnal-workflow-manager.test.ts +2 -0

package/tests/core/merge-gate-audit.test.ts ADDED Viewed

@@ -0,0 +1,284 @@
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import {
+  formatMergeGateAuditReport,
+  runMergeGateAudit,
+} from '../../src/core/merge-gate-audit.js';
+import type { NocturnalArtifact } from '../../src/core/nocturnal-arbiter.js';
+import {
+  registerSample,
+  updateReviewStatus,
+} from '../../src/core/nocturnal-dataset.js';
+import { appendArtifactLineageRecord } from '../../src/core/nocturnal-artifact-lineage.js';
+import { exportORPOSamples } from '../../src/core/nocturnal-export.js';
+import { createImplementationAssetDir, getImplementationAssetRoot } from '../../src/core/code-implementation-storage.js';
+import { safeRmDir } from '../test-utils.js';
+function makeArtifact(overrides: Partial<NocturnalArtifact> = {}): NocturnalArtifact {
+  return {
+    artifactId: 'artifact-1',
+    sessionId: 'session-1',
+    principleId: 'T-08',
+    sourceSnapshotRef: 'snapshot-1',
+    badDecision: 'Retried without checking state',
+    betterDecision: 'Inspect state before retrying',
+    rationale: 'Evidence first.',
+    createdAt: '2026-04-12T09:00:00.000Z',
+    ...overrides,
+  };
+}
+describe('merge-gate-audit', () => {
+  let tempDir: string;
+  let workspaceDir: string;
+  let stateDir: string;
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-merge-gate-audit-'));
+    workspaceDir = path.join(tempDir, 'workspace');
+    stateDir = path.join(tempDir, '.state');
+    fs.mkdirSync(workspaceDir, { recursive: true });
+    fs.mkdirSync(stateDir, { recursive: true });
+  });
+  afterEach(() => {
+    safeRmDir(tempDir);
+  });
+  function registerApprovedArtifact(artifactId = 'artifact-1'): string {
+    const artifact = makeArtifact({ artifactId });
+    const artifactPath = path.join(
+      workspaceDir,
+      '.state',
+      'nocturnal',
+      'samples',
+      `${artifactId}.json`,
+    );
+    fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
+    fs.writeFileSync(artifactPath, JSON.stringify(artifact, null, 2), 'utf-8');
+    const record = registerSample(workspaceDir, artifact, artifactPath, 'gpt-4').record;
+    updateReviewStatus(
+      workspaceDir,
+      record.sampleFingerprint,
+      'approved_for_training',
+      'approved for merge gate audit',
+    );
+    appendArtifactLineageRecord(workspaceDir, {
+      artifactKind: 'behavioral-sample',
+      artifactId: record.artifactId,
+      principleId: record.principleId,
+      ruleId: null,
+      sessionId: record.sessionId,
+      sourceSnapshotRef: record.sourceSnapshotRef,
+      sourcePainIds: ['pain-1'],
+      sourceGateBlockIds: ['gate-1'],
+      storagePath: artifactPath,
+      implementationId: null,
+      createdAt: record.createdAt,
+    });
+    return record.sampleFingerprint;
+  }
+  it('returns defer when audit surfaces are not populated yet', () => {
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    expect(report.overallStatus).toBe('defer');
+    expect(report.checks.find((check) => check.id === 'pain_flag_path_contract')?.status).toBe('pass');
+    expect(report.checks.find((check) => check.id === 'queue_path_contract')?.status).toBe('pass');
+    expect(report.checks.find((check) => check.id === 'runtime_adapter_contract')?.status).toBe('pass');
+    expect(report.counts.defer).toBeGreaterThan(0);
+  });
+  it('blocks malformed replay reports that claim pass without evidence', () => {
+    createImplementationAssetDir(stateDir, 'IMPL-1', '1.0.0');
+    const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-1'), 'replays');
+    fs.mkdirSync(replayDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(replayDir, 'bad-report.json'),
+      JSON.stringify(
+        {
+          overallDecision: 'pass',
+          blockers: [],
+          generatedAt: '2026-04-12T09:00:00.000Z',
+          implementationId: 'IMPL-1',
+          evidenceSummary: {
+            evidenceStatus: 'empty',
+            totalSamples: 0,
+            classifiedCounts: {
+              painNegative: 0,
+              successPositive: 0,
+              principleAnchor: 0,
+            },
+          },
+        },
+        null,
+        2,
+      ),
+      'utf-8',
+    );
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const replayCheck = report.checks.find((check) => check.id === 'replay_evidence_integrity');
+    expect(report.overallStatus).toBe('block');
+    expect(replayCheck?.status).toBe('block');
+  });
+  it('passes populated dataset, lineage, export, and replay evidence surfaces', () => {
+    registerApprovedArtifact('artifact-pass');
+    const exportResult = exportORPOSamples(workspaceDir, 'gpt-4');
+    expect(exportResult.success).toBe(true);
+    createImplementationAssetDir(stateDir, 'IMPL-1', '1.0.0');
+    const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-1'), 'replays');
+    fs.mkdirSync(replayDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(replayDir, 'good-report.json'),
+      JSON.stringify(
+        {
+          overallDecision: 'pass',
+          replayResults: {
+            painNegative: { total: 1, passed: 1, failed: 0, details: [] },
+            successPositive: { total: 0, passed: 0, failed: 0, details: [] },
+            principleAnchor: { total: 0, passed: 0, failed: 0, details: [] },
+          },
+          blockers: [],
+          generatedAt: '2026-04-12T09:00:00.000Z',
+          implementationId: 'IMPL-1',
+          sampleFingerprints: ['sample-1'],
+          evidenceSummary: {
+            evidenceStatus: 'observed',
+            totalSamples: 1,
+            classifiedCounts: {
+              painNegative: 1,
+              successPositive: 0,
+              principleAnchor: 0,
+            },
+          },
+        },
+        null,
+        2,
+      ),
+      'utf-8',
+    );
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    expect(report.overallStatus).toBe('pass');
+    expect(report.counts.block).toBe(0);
+    expect(report.counts.defer).toBe(0);
+    expect(formatMergeGateAuditReport(report)).toContain('Overall Status: PASS');
+  });
+  it('blocks when dataset artifacts are missing', () => {
+    const artifactPath = path.join(
+      workspaceDir,
+      '.state',
+      'nocturnal',
+      'samples',
+      'artifact-missing.json',
+    );
+    fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
+    const artifact = makeArtifact({ artifactId: 'artifact-missing' });
+    fs.writeFileSync(artifactPath, JSON.stringify(artifact, null, 2), 'utf-8');
+    const record = registerSample(workspaceDir, artifact, artifactPath, 'gpt-4').record;
+    updateReviewStatus(workspaceDir, record.sampleFingerprint, 'approved_for_training', 'approved');
+    // Append lineage pointing to a real file (so lineage passes)
+    appendArtifactLineageRecord(workspaceDir, {
+      artifactKind: 'behavioral-sample',
+      artifactId: record.artifactId,
+      principleId: record.principleId,
+      ruleId: null,
+      sessionId: record.sessionId,
+      sourceSnapshotRef: record.sourceSnapshotRef,
+      sourcePainIds: [],
+      sourceGateBlockIds: [],
+      storagePath: artifactPath,
+      implementationId: null,
+      createdAt: record.createdAt,
+    });
+    // Delete the artifact to simulate a missing file
+    fs.unlinkSync(artifactPath);
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const datasetCheck = report.checks.find((c) => c.id === 'dataset_artifact_integrity');
+    expect(report.overallStatus).toBe('block');
+    expect(datasetCheck?.status).toBe('block');
+  });
+  it('blocks when artifact lineage storage paths are missing', () => {
+    const badPath = path.join(workspaceDir, '.state', 'nocturnal', 'samples', 'nonexistent.json');
+    appendArtifactLineageRecord(workspaceDir, {
+      artifactKind: 'behavioral-sample',
+      artifactId: 'lineage-missing',
+      principleId: 'T-08',
+      ruleId: null,
+      sessionId: 'session-1',
+      sourceSnapshotRef: 'snap-1',
+      sourcePainIds: [],
+      sourceGateBlockIds: [],
+      storagePath: badPath,
+      implementationId: null,
+      createdAt: new Date().toISOString(),
+    });
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const lineageCheck = report.checks.find((c) => c.id === 'artifact_lineage_integrity');
+    expect(report.overallStatus).toBe('block');
+    expect(lineageCheck?.status).toBe('block');
+  });
+  it('blocks when replay reports are malformed', () => {
+    createImplementationAssetDir(stateDir, 'IMPL-BAD', '1.0.0');
+    const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-BAD'), 'replays');
+    fs.mkdirSync(replayDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(replayDir, 'malformed.json'),
+      '{bad json',
+      'utf-8',
+    );
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const replayCheck = report.checks.find((c) => c.id === 'replay_evidence_integrity');
+    const details = replayCheck?.details as Record<string, string[]> | undefined;
+    expect(report.overallStatus).toBe('block');
+    expect(replayCheck?.status).toBe('block');
+    expect(details?.malformedReports).toHaveLength(1);
+  });
+  it('blocks when replay reports have invalid evidenceSummary shape', () => {
+    createImplementationAssetDir(stateDir, 'IMPL-NOEVID', '1.0.0');
+    const replayDir = path.join(getImplementationAssetRoot(stateDir, 'IMPL-NOEVID'), 'replays');
+    fs.mkdirSync(replayDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(replayDir, 'bad-evidence.json'),
+      JSON.stringify({
+        overallDecision: 'pass',
+        blockers: [],
+        generatedAt: '2026-04-12T09:00:00.000Z',
+        implementationId: 'IMPL-NOEVID',
+        evidenceSummary: { evidenceStatus: 'observed' }, // missing totalSamples
+      }),
+      'utf-8',
+    );
+    const report = runMergeGateAudit(workspaceDir, stateDir);
+    const replayCheck = report.checks.find((c) => c.id === 'replay_evidence_integrity');
+    const details = replayCheck?.details as Record<string, string[]> | undefined;
+    expect(report.overallStatus).toBe('block');
+    expect(replayCheck?.status).toBe('block');
+    expect(details?.missingEvidenceSummary).toHaveLength(1);
+  });
+});

package/tests/core/nocturnal-export.test.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import {
   updateReviewStatus,
   getDatasetRecord,
 } from '../../src/core/nocturnal-dataset.js';
+import { appendArtifactLineageRecord } from '../../src/core/nocturnal-artifact-lineage.js';
 import type { NocturnalDatasetRecord } from '../../src/core/nocturnal-dataset.js';
 // ---------------------------------------------------------------------------
@@ -123,9 +124,63 @@ describe('NocturnalExport exportORPOSamples', () => {
       expect(sample.rejected).toBeTruthy();
       expect(sample.rationale).toBeTruthy();
       expect(sample.datasetMetadata.exportId).toBe(result.manifest!.exportId);
+      expect(sample.datasetMetadata.evidenceSummary.lineageStatus).toBe('unknown');
     }
   });
+  it('degrades to evidence-bounded neutral text when lineage is missing', () => {
+    setupExportReady(tmpDir, 'art-no-lineage', 'gpt-4');
+    const result = exportORPOSamples(tmpDir, 'gpt-4');
+    expect(result.success).toBe(true);
+    const [sample] = fs.readFileSync(result.manifest!.exportPath, 'utf-8').trim().split('\n').map((line) => JSON.parse(line));
+    expect(sample.prompt).toBe('Take the next action without verified source evidence.');
+    expect(sample.rejected).toBe('Take the next action without verified source evidence.');
+    expect(sample.rationale).toContain('Source evidence is unknown');
+    expect(sample.datasetMetadata.evidenceSummary).toEqual({
+      lineageStatus: 'unknown',
+      painSignals: { status: 'unknown', count: null, ids: [] },
+      gateBlocks: { status: 'unknown', count: null, ids: [] },
+    });
+  });
+  it('exports observed lineage evidence when available', () => {
+    const record = setupExportReady(tmpDir, 'art-with-lineage', 'gpt-4');
+    appendArtifactLineageRecord(tmpDir, {
+      artifactKind: 'behavioral-sample',
+      artifactId: record.artifactId,
+      principleId: record.principleId,
+      ruleId: null,
+      sessionId: record.sessionId,
+      sourceSnapshotRef: record.sourceSnapshotRef,
+      sourcePainIds: ['pain-1', 'pain-2'],
+      sourceGateBlockIds: ['gate-1'],
+      storagePath: record.artifactPath,
+      implementationId: null,
+      createdAt: record.createdAt,
+    });
+    const result = exportORPOSamples(tmpDir, 'gpt-4');
+    expect(result.success).toBe(true);
+    const [sample] = fs.readFileSync(result.manifest!.exportPath, 'utf-8').trim().split('\n').map((line) => JSON.parse(line));
+    expect(sample.prompt).toContain('2 observed pain signals');
+    expect(sample.rejected).toContain('1 observed gate blocks');
+    expect(sample.rationale).toContain('Observed source evidence: 2 pain signals and 1 gate blocks');
+    expect(sample.datasetMetadata.evidenceSummary.lineageStatus).toBe('observed');
+    expect(sample.datasetMetadata.evidenceSummary.painSignals).toEqual({
+      status: 'observed',
+      count: 2,
+      ids: ['pain-1', 'pain-2'],
+    });
+    expect(sample.datasetMetadata.evidenceSummary.gateBlocks).toEqual({
+      status: 'observed',
+      count: 1,
+      ids: ['gate-1'],
+    });
+  });
   it('writes manifest alongside JSONL', () => {
     setupExportReady(tmpDir, 'art-manifest-1', 'gpt-4');

package/tests/core/nocturnal-trinity.test.ts CHANGED Viewed

@@ -5,6 +5,8 @@ import {
   validateDraftArtifact,
   draftToArtifact,
   DEFAULT_TRINITY_CONFIG,
+  OpenClawTrinityRuntimeAdapter,
+  TrinityRuntimeContractError,
   type TrinityConfig,
   type DreamerOutput,
   type PhilosopherOutput,
@@ -25,12 +27,16 @@ function makeSnapshot(overrides: Partial<{
   failureCount: number;
   totalPainEvents: number;
   totalGateBlocks: number;
-}> = {}): {
-  sessionId: string;
-  stats: { failureCount: number; totalPainEvents: number; totalGateBlocks: number; totalAssistantTurns: number; totalToolCalls: number };
-} {
+}> = {}) {
   return {
     sessionId: 'session-test-123',
+    startedAt: '2026-04-12T00:00:00.000Z',
+    updatedAt: '2026-04-12T00:05:00.000Z',
+    assistantTurns: [],
+    userTurns: [],
+    toolCalls: [],
+    painEvents: [],
+    gateBlocks: [],
     stats: {
       failureCount: overrides.failureCount ?? 0,
       totalPainEvents: overrides.totalPainEvents ?? 0,
@@ -236,6 +242,73 @@ describe('validateDreamerOutput', () => {
   });
 });
+describe('OpenClawTrinityRuntimeAdapter contract hardening', () => {
+  function makeRuntimeApi(overrides: Partial<any> = {}) {
+    return {
+      runtime: {
+        agent: {
+          runEmbeddedPiAgent: vi.fn().mockResolvedValue({
+            payloads: [
+              { text: '{"valid":true,"candidates":[],"generatedAt":"2026-04-12T00:00:00.000Z"}' },
+            ],
+          }),
+        },
+        config: {
+          loadConfig: vi.fn().mockReturnValue({
+            agents: {
+              defaults: {
+                model: 'openai/gpt-5.4',
+              },
+            },
+          }),
+        },
+        ...overrides.runtime,
+      },
+      logger: {
+        info: vi.fn(),
+        warn: vi.fn(),
+        error: vi.fn(),
+      },
+    };
+  }
+  it('rejects missing runtime.agent.runEmbeddedPiAgent contract explicitly', () => {
+    expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(TrinityRuntimeContractError);
+    expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(/runtime_unavailable/);
+  });
+  it('passes explicit provider/model overrides into runtime.agent.runEmbeddedPiAgent', async () => {
+    const api = makeRuntimeApi();
+    const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
+    await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
+    expect(api.runtime.agent.runEmbeddedPiAgent).toHaveBeenCalledWith(
+      expect.objectContaining({
+        provider: 'openai',
+        model: 'gpt-5.4',
+      }),
+    );
+  });
+  it('returns stable failure classes when runtime invocation fails', async () => {
+    const api = makeRuntimeApi({
+      runtime: {
+        agent: {
+          runEmbeddedPiAgent: vi.fn().mockRejectedValue(new Error('gateway unavailable')),
+        },
+      },
+    });
+    const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
+    const result = await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
+    expect(result.valid).toBe(false);
+    expect(result.reason).toContain('runtime_run_failed');
+    expect(adapter.getLastFailureReason()).toContain('runtime_run_failed');
+  });
+});
 // ---------------------------------------------------------------------------
 // Tests: validatePhilosopherOutput
 // ---------------------------------------------------------------------------

package/tests/core/pain-integration.test.ts CHANGED Viewed

@@ -479,5 +479,32 @@ unknown_meta: some data`;
         fs.rmSync(freshDir, { recursive: true, force: true });
       }
     });
+    it('readPainFlagData reads only the canonical .state/.pain_flag path', () => {
+      const legacyRootPath = path.join(TEST_DIR, 'PAIN_FLAG');
+      fs.writeFileSync(
+        legacyRootPath,
+        `source: legacy_root
+score: 90
+reason: should be ignored
+time: 2026-04-10T09:00:00.000Z`,
+        'utf-8',
+      );
+      fs.writeFileSync(
+        path.join(STATE_DIR, '.pain_flag'),
+        `source: canonical_state
+score: 80
+reason: should be read
+time: 2026-04-10T09:00:00.000Z`,
+        'utf-8',
+      );
+      const result = readPainFlagData(TEST_DIR);
+      expect(result.source).toBe('canonical_state');
+      expect(result.score).toBe('80');
+      const legacyResult = readPainFlagData(path.join(TEST_DIR, '..'));
+      expect(legacyResult.source).not.toBe('legacy_root');
+    });
   });
 });

package/tests/core/promotion-gate.test.ts CHANGED Viewed

@@ -255,6 +255,11 @@ describe('promotion-gate', () => {
       expect(result.constraintChecks.length).toBeGreaterThan(0);
       expect(result.deltaCheck).toBeDefined();
+      expect(result.evidenceSummary).toEqual({
+        evidenceMode: 'eval-proxy',
+        shadowSampleCount: 0,
+        deltaSource: 'eval',
+      });
     });
   });

package/tests/core/replay-engine.test.ts CHANGED Viewed

@@ -231,4 +231,23 @@ describe('ReplayEngine', () => {
     expect(fs.existsSync(reportDir)).toBe(true);
     expect(fs.readdirSync(reportDir).some((file) => file.endsWith('.json'))).toBe(true);
   });
+  it('marks empty replay evidence as needs-review instead of pass', () => {
+    seedLedgerAndImplementation();
+    const engine = new ReplayEngine(workspaceDir, stateDir);
+    const report = engine.runReplayForImplementation('IMPL-1', ['pain-negative']);
+    expect(report.overallDecision).toBe('needs-review');
+    expect(report.evidenceSummary).toEqual({
+      evidenceStatus: 'empty',
+      totalSamples: 0,
+      classifiedCounts: {
+        painNegative: 0,
+        successPositive: 0,
+        principleAnchor: 0,
+      },
+    });
+    expect(report.blockers[0]).toContain('NO REPLAY EVIDENCE');
+  });
 });

package/tests/service/nocturnal-workflow-manager.test.ts CHANGED Viewed

@@ -29,6 +29,8 @@ const mockRunTrinityAsync = runTrinityAsync as ReturnType<typeof vi.fn>;
 function createMockRuntimeAdapter() {
     return {
+        isRuntimeAvailable: vi.fn(() => true),
+        getLastFailureReason: vi.fn(() => null),
         invokeDreamer: vi.fn<(snapshot: any, principleId: any, maxCandidates: any) => Promise<DreamerOutput>>(),
         invokePhilosopher: vi.fn<(dreamerOutput: any, principleId: any) => Promise<PhilosopherOutput>>(),
         invokeScribe: vi.fn<(dreamerOutput: any, philosopherOutput: any, snapshot: any, principleId: any, telemetry: any, config: any) => Promise<TrinityDraftArtifact | null>>(),