principles-disciple 1.21.0 → 1.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +4 -4
- package/package.json +1 -1
- package/scripts/sync-plugin.mjs +1 -0
- package/src/commands/nocturnal-rollout.ts +35 -0
- package/src/core/nocturnal-arbiter.ts +2 -1
- package/src/hooks/subagent.ts +1 -12
- package/src/index.ts +7 -4
- package/src/service/evolution-worker.ts +1 -0
- package/src/service/nocturnal-runtime.ts +14 -0
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +17 -1
- package/src/utils/session-key.ts +17 -0
- package/tests/core/nocturnal-arbiter.test.ts +57 -0
- package/tests/service/evolution-worker.nocturnal.test.ts +382 -12
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "principles-disciple",
|
|
3
3
|
"name": "Principles Disciple",
|
|
4
4
|
"description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.23.0",
|
|
6
6
|
"skills": [
|
|
7
7
|
"./skills"
|
|
8
8
|
],
|
|
@@ -76,8 +76,8 @@
|
|
|
76
76
|
}
|
|
77
77
|
},
|
|
78
78
|
"buildFingerprint": {
|
|
79
|
-
"gitSha": "
|
|
80
|
-
"bundleMd5": "
|
|
81
|
-
"builtAt": "2026-04-
|
|
79
|
+
"gitSha": "ebbaa40d6e3a",
|
|
80
|
+
"bundleMd5": "7c84860901894f7c049b54028d489ed4",
|
|
81
|
+
"builtAt": "2026-04-12T15:51:34.724Z"
|
|
82
82
|
}
|
|
83
83
|
}
|
package/package.json
CHANGED
package/scripts/sync-plugin.mjs
CHANGED
|
@@ -372,6 +372,7 @@ function verifyBundleContents() {
|
|
|
372
372
|
{ name: 'EvolutionWorkerService', reason: 'main plugin service export' },
|
|
373
373
|
{ name: 'checkPainFlag', reason: 'pain flag detection' },
|
|
374
374
|
{ name: 'processEvolutionQueue', reason: 'queue processing' },
|
|
375
|
+
{ name: 'acquireQueueLock', reason: 'queue lock for pd-reflect and worker' },
|
|
375
376
|
];
|
|
376
377
|
|
|
377
378
|
const missing = [];
|
|
@@ -56,6 +56,11 @@ import {
|
|
|
56
56
|
import {
|
|
57
57
|
getCheckpoint,
|
|
58
58
|
} from '../core/model-training-registry.js';
|
|
59
|
+
import {
|
|
60
|
+
runMergeGateAudit,
|
|
61
|
+
formatMergeGateAuditReport,
|
|
62
|
+
} from '../core/merge-gate-audit.js';
|
|
63
|
+
import { resolvePdPath } from '../core/paths.js';
|
|
59
64
|
|
|
60
65
|
function isZh(ctx: PluginCommandContext): boolean {
|
|
61
66
|
return String(ctx.config?.language || 'en').startsWith('zh');
|
|
@@ -257,6 +262,14 @@ ${result.passes
|
|
|
257
262
|
return { text };
|
|
258
263
|
}
|
|
259
264
|
|
|
265
|
+
// ── Merge-Gate Audit ──────────────────────────────────────────────────
|
|
266
|
+
if (subcommand === 'audit') {
|
|
267
|
+
const stateDir = resolvePdPath(workspaceDir, 'STATE_DIR');
|
|
268
|
+
const report = runMergeGateAudit(workspaceDir, stateDir);
|
|
269
|
+
const formatted = formatMergeGateAuditReport(report);
|
|
270
|
+
return { text: formatted };
|
|
271
|
+
}
|
|
272
|
+
|
|
260
273
|
// ── Advance Promotion ─────────────────────────────────────────────────
|
|
261
274
|
if (subcommand === 'advance-promotion') {
|
|
262
275
|
const checkpointId = parts[1] || checkpointIdArg;
|
|
@@ -267,6 +280,28 @@ ${result.passes
|
|
|
267
280
|
const profile = parseProfile(profileArg);
|
|
268
281
|
const hasReview = args.includes('--review');
|
|
269
282
|
const noteArg = parts.find((p) => p.startsWith('--note='))?.split('=')[1];
|
|
283
|
+
const skipAudit = args.includes('--skip-audit');
|
|
284
|
+
|
|
285
|
+
// ── Merge-gate auto-gate: block advance-promotion if audit is BLOCK ──
|
|
286
|
+
if (!skipAudit) {
|
|
287
|
+
const stateDir = resolvePdPath(workspaceDir, 'STATE_DIR');
|
|
288
|
+
const auditReport = runMergeGateAudit(workspaceDir, stateDir);
|
|
289
|
+
if (auditReport.overallStatus === 'block') {
|
|
290
|
+
return {
|
|
291
|
+
text: zh
|
|
292
|
+
? `❌ Merge-Gate 审计阻止了晋升:发现 ${auditReport.counts.block} 个阻断项
|
|
293
|
+
|
|
294
|
+
${formatMergeGateAuditReport(auditReport)}
|
|
295
|
+
|
|
296
|
+
如需强制晋升,请添加 --skip-audit 标志。`
|
|
297
|
+
: `❌ Merge-Gate audit blocked promotion: ${auditReport.counts.block} blocking issue(s) found
|
|
298
|
+
|
|
299
|
+
${formatMergeGateAuditReport(auditReport)}
|
|
300
|
+
|
|
301
|
+
To force promotion, add --skip-audit flag.`,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
}
|
|
270
305
|
|
|
271
306
|
try {
|
|
272
307
|
const promotion = advancePromotion(workspaceDir, {
|
|
@@ -623,11 +623,12 @@ export function validateArtifact(
|
|
|
623
623
|
// Rule 11: Quality threshold gate — reject low-signal artifacts
|
|
624
624
|
// A reflection artifact must show positive cognitive improvement (thinkingModelDelta > 0).
|
|
625
625
|
// planningRatioGain must not show catastrophic regression (< -0.5).
|
|
626
|
+
// #244: Use strict < so thinkingModelDelta=threshold passes (thin violations allowed at boundary)
|
|
626
627
|
if (
|
|
627
628
|
options.qualityThresholds?.thinkingModelDeltaMin !== undefined &&
|
|
628
629
|
thinkingModelDelta !== undefined &&
|
|
629
630
|
typeof thinkingModelDelta === 'number' &&
|
|
630
|
-
thinkingModelDelta
|
|
631
|
+
thinkingModelDelta < options.qualityThresholds.thinkingModelDeltaMin
|
|
631
632
|
) {
|
|
632
633
|
failures.push({
|
|
633
634
|
reason: `thinkingModelDelta (${thinkingModelDelta}) does not meet minimum quality threshold (${options.qualityThresholds.thinkingModelDeltaMin}) — reflection shows no cognitive improvement`,
|
package/src/hooks/subagent.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { PluginHookSubagentEndedEvent, PluginHookSubagentContext, PluginLogger, OpenClawPluginApi } from '../openclaw-sdk.js';
|
|
2
2
|
import { buildPainFlag, writePainFlag } from '../core/pain.js';
|
|
3
3
|
import { WorkspaceContext } from '../core/workspace-context.js';
|
|
4
|
+
import { extractAgentIdFromSessionKey } from '../utils/session-key.js';
|
|
4
5
|
// No longer needed — diagnostician runs via HEARTBEAT, not subagent
|
|
5
6
|
import { recordEvolutionSuccess } from '../core/evolution-engine.js';
|
|
6
7
|
import { WorkflowStore } from '../service/subagent-workflow/workflow-store.js';
|
|
@@ -81,18 +82,6 @@ function emitSubagentPainEvent(
|
|
|
81
82
|
}
|
|
82
83
|
}
|
|
83
84
|
|
|
84
|
-
|
|
85
|
-
function extractAgentIdFromSessionKey(sessionKey: string | undefined): string | undefined {
|
|
86
|
-
// sessionKey format: "agent:{agentId}:{type}:{uuid}" or "agent:{agentId}:{uuid}"
|
|
87
|
-
if (!sessionKey) return undefined;
|
|
88
|
-
const match = /^agent:([^:]+):/.exec(sessionKey);
|
|
89
|
-
return match ? match[1] : undefined;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
85
|
type SubagentEndedHookContext = PluginHookSubagentContext & {
|
|
97
86
|
api?: OpenClawPluginApi;
|
|
98
87
|
workspaceDir?: string;
|
package/src/index.ts
CHANGED
|
@@ -61,6 +61,7 @@ import { PathResolver, resolveWorkspaceDirFromApi } from './core/path-resolver.j
|
|
|
61
61
|
import { validateWorkspaceDir } from './core/workspace-dir-validation.js';
|
|
62
62
|
import { resolveRequiredWorkspaceDir, resolveWorkspaceDir, type WorkspaceResolutionContext } from './core/workspace-dir-service.js';
|
|
63
63
|
import { createPrinciplesConsoleRoute } from './http/principles-console-route.js';
|
|
64
|
+
import { extractAgentIdFromSessionKey } from './utils/session-key.js';
|
|
64
65
|
|
|
65
66
|
// Track initialization to avoid repeated calls
|
|
66
67
|
let workspaceInitialized = false;
|
|
@@ -423,11 +424,13 @@ const plugin = {
|
|
|
423
424
|
registerCommandWithAlias('pd-thinking', 'pdt', getCommandDescription('pd-thinking', language), (ctx: any) => handleThinkingOs(ctx), { acceptsArgs: true });
|
|
424
425
|
registerCommandWithAlias('pd-reflect', 'pdrl', getCommandDescription('pd-reflect', language), (ctx: any) => {
|
|
425
426
|
try {
|
|
426
|
-
|
|
427
|
-
|
|
427
|
+
// Resolve agentId from sessionKey (if available), fallback to 'main'
|
|
428
|
+
const agentId = extractAgentIdFromSessionKey(ctx.sessionKey) ?? 'main';
|
|
429
|
+
const workspaceDir = resolveRequiredWorkspaceDir(api, { ...ctx, agentId }, { source: 'pd-reflect', fallbackAgentId: 'main' });
|
|
430
|
+
return handlePdReflect.handler({ ...ctx, api, workspaceDir });
|
|
428
431
|
} catch (err) {
|
|
429
|
-
api.logger.error(`[PD] Command
|
|
430
|
-
return { text: language === 'zh' ?
|
|
432
|
+
api.logger.error(`[PD:pd-reflect] Command failed: ${String(err)}`);
|
|
433
|
+
return { text: language === 'zh' ? '命令执行失败,请查看日志。' : 'Command failed. Check logs.' };
|
|
431
434
|
}
|
|
432
435
|
});
|
|
433
436
|
registerCommandWithAlias('pd-daily', 'pdd', getCommandDescription('pd-daily', language), () => ({
|
|
@@ -1596,6 +1596,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1596
1596
|
snapshot: snapshotData,
|
|
1597
1597
|
taskId: sleepTask.id,
|
|
1598
1598
|
painContext: sleepTask.recentPainContext,
|
|
1599
|
+
triggerSource: sleepTask.source,
|
|
1599
1600
|
},
|
|
1600
1601
|
});
|
|
1601
1602
|
sleepTask.resultRef = workflowHandle.workflowId;
|
|
@@ -60,6 +60,20 @@ function isSystemSession(state: SessionState): boolean {
|
|
|
60
60
|
if (sessionId?.startsWith('boot-')) return true;
|
|
61
61
|
if (sessionId?.startsWith('probe-')) return true;
|
|
62
62
|
|
|
63
|
+
// CRITICAL FIX: Legacy sessions from persistence may have missing trigger/sessionKey
|
|
64
|
+
// If both are missing AND the session is old (inactive > abandoned threshold),
|
|
65
|
+
// treat as legacy/orphan to avoid blocking idle detection with unknown sessions.
|
|
66
|
+
// Recent sessions without trigger/sessionKey are likely real user sessions still
|
|
67
|
+
// being enriched — do NOT classify them as system sessions.
|
|
68
|
+
const ABANDONED_THRESHOLD_MS = 2 * 60 * 60 * 1000; // 2 hours
|
|
69
|
+
if (!trigger && !sessionKey) {
|
|
70
|
+
const inactiveFor = Date.now() - state.lastActivityAt;
|
|
71
|
+
if (inactiveFor > ABANDONED_THRESHOLD_MS) {
|
|
72
|
+
return true; // Legacy/orphan session — don't block idle detection
|
|
73
|
+
}
|
|
74
|
+
// Recent session without metadata — likely a real user session, let it through
|
|
75
|
+
}
|
|
76
|
+
|
|
63
77
|
return false;
|
|
64
78
|
}
|
|
65
79
|
|
|
@@ -210,7 +210,7 @@ export class NocturnalWorkflowManager implements WorkflowManager {
|
|
|
210
210
|
|
|
211
211
|
// Extract snapshot and principleId from taskInput.metadata (NOC-07: Trinity async path)
|
|
212
212
|
const snapshotValidation = validateNocturnalSnapshotIngress(options.metadata?.snapshot);
|
|
213
|
-
const snapshot = snapshotValidation
|
|
213
|
+
const {snapshot} = snapshotValidation;
|
|
214
214
|
const principleId = options.metadata?.principleId as string | undefined;
|
|
215
215
|
// Extract painContext for Selector ranking bias
|
|
216
216
|
const painContext = options.metadata?.painContext as RecentPainContext | undefined;
|
|
@@ -254,6 +254,22 @@ export class NocturnalWorkflowManager implements WorkflowManager {
|
|
|
254
254
|
},
|
|
255
255
|
// Pass painContext for Selector ranking bias
|
|
256
256
|
painContext,
|
|
257
|
+
// #244: Only skip preflight idle gate for manual/test triggers.
|
|
258
|
+
// Automatic triggers must go through normal idle check.
|
|
259
|
+
...(((options.metadata)?.triggerSource === 'manual' ||
|
|
260
|
+
(options.metadata)?.triggerSource === 'test')
|
|
261
|
+
? {
|
|
262
|
+
idleCheckOverride: {
|
|
263
|
+
isIdle: true,
|
|
264
|
+
mostRecentActivityAt: Date.now() - 1800000,
|
|
265
|
+
idleForMs: 1800000,
|
|
266
|
+
userActiveSessions: 0,
|
|
267
|
+
abandonedSessionIds: [],
|
|
268
|
+
trajectoryGuardrailConfirmsIdle: true,
|
|
269
|
+
reason: 'manual/test override',
|
|
270
|
+
},
|
|
271
|
+
}
|
|
272
|
+
: {}),
|
|
257
273
|
// Skip Selector if principleId and snapshot are provided
|
|
258
274
|
...(principleId && snapshot ? {
|
|
259
275
|
principleIdOverride: principleId,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session key parsing utilities.
|
|
3
|
+
*
|
|
4
|
+
* Session key format: "agent:{agentId}:{type}:{uuid}" or "agent:{agentId}:{uuid}"
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Extract agentId from a sessionKey.
|
|
9
|
+
* Returns `undefined` if sessionKey is missing, malformed, or has whitespace-only agentId.
|
|
10
|
+
*/
|
|
11
|
+
export function extractAgentIdFromSessionKey(sessionKey: string | undefined): string | undefined {
|
|
12
|
+
if (!sessionKey) return undefined;
|
|
13
|
+
const match = /^agent:([^:]+):/.exec(sessionKey);
|
|
14
|
+
if (!match) return undefined;
|
|
15
|
+
const agentId = match[1].trim();
|
|
16
|
+
return agentId || undefined;
|
|
17
|
+
}
|
|
@@ -491,4 +491,61 @@ describe('Nocturnal Arbiter', () => {
|
|
|
491
491
|
expect(result.artifact?.sourceSnapshotRef).toBe('');
|
|
492
492
|
});
|
|
493
493
|
});
|
|
494
|
+
|
|
495
|
+
// -------------------------------------------------------------------------
|
|
496
|
+
// Tests: quality threshold gates (Rule 10/11)
|
|
497
|
+
// -------------------------------------------------------------------------
|
|
498
|
+
|
|
499
|
+
describe('quality threshold gates', () => {
|
|
500
|
+
const defaultThresholds = { thinkingModelDeltaMin: 0.05, planningRatioGainMin: -0.5 };
|
|
501
|
+
|
|
502
|
+
it('rejects when thinkingModelDelta is below threshold', () => {
|
|
503
|
+
const artifact = makeValidArtifact({ thinkingModelDelta: 0.03 });
|
|
504
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
505
|
+
expect(result.passed).toBe(false);
|
|
506
|
+
expect(result.failures).toHaveLength(1);
|
|
507
|
+
expect(result.failures[0].field).toBe('thinkingModelDelta');
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
it('passes when thinkingModelDelta equals threshold exactly (boundary value)', () => {
|
|
511
|
+
const artifact = makeValidArtifact({ thinkingModelDelta: 0.05 });
|
|
512
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
513
|
+
expect(result.passed).toBe(true);
|
|
514
|
+
});
|
|
515
|
+
|
|
516
|
+
it('passes when thinkingModelDelta exceeds threshold', () => {
|
|
517
|
+
const artifact = makeValidArtifact({ thinkingModelDelta: 0.15 });
|
|
518
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
519
|
+
expect(result.passed).toBe(true);
|
|
520
|
+
});
|
|
521
|
+
|
|
522
|
+
it('passes when thinkingModelDelta is absent (optional field)', () => {
|
|
523
|
+
const artifact = makeValidArtifact();
|
|
524
|
+
delete artifact.thinkingModelDelta;
|
|
525
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
526
|
+
expect(result.passed).toBe(true);
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
it('rejects when planningRatioGain is below threshold', () => {
|
|
530
|
+
const artifact = makeValidArtifact({ planningRatioGain: -0.6 });
|
|
531
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
532
|
+
expect(result.passed).toBe(false);
|
|
533
|
+
expect(result.failures.some(f => f.field === 'planningRatioGain')).toBe(true);
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
it('passes when planningRatioGain equals threshold exactly (boundary value)', () => {
|
|
537
|
+
const artifact = makeValidArtifact({ planningRatioGain: -0.5 });
|
|
538
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
539
|
+
expect(result.passed).toBe(true);
|
|
540
|
+
});
|
|
541
|
+
|
|
542
|
+
it('rejects both quality thresholds simultaneously', () => {
|
|
543
|
+
const artifact = makeValidArtifact({ thinkingModelDelta: 0.01, planningRatioGain: -0.8 });
|
|
544
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
545
|
+
expect(result.passed).toBe(false);
|
|
546
|
+
expect(result.failures.length).toBeGreaterThanOrEqual(2);
|
|
547
|
+
expect(result.failures.some(f => f.field === 'thinkingModelDelta')).toBe(true);
|
|
548
|
+
expect(result.failures.some(f => f.field === 'planningRatioGain')).toBe(true);
|
|
549
|
+
});
|
|
550
|
+
});
|
|
494
551
|
});
|
|
@@ -35,18 +35,22 @@ vi.mock('../../src/service/subagent-workflow/nocturnal-workflow-manager.js', ()
|
|
|
35
35
|
|
|
36
36
|
const { mockGetNocturnalSessionSnapshot, mockListRecentNocturnalCandidateSessions } = vi.hoisted(() => ({
|
|
37
37
|
mockGetNocturnalSessionSnapshot: vi.fn(),
|
|
38
|
-
mockListRecentNocturnalCandidateSessions: vi.fn(() => []),
|
|
38
|
+
mockListRecentNocturnalCandidateSessions: vi.fn(() => [] as Array<{ sessionId: string; startedAt: string; failureCount: number; painEventCount: number; gateBlockCount: number }>),
|
|
39
39
|
}));
|
|
40
|
+
|
|
41
|
+
// Create a shared mock extractor instance so spy calls are tracked correctly
|
|
42
|
+
const mockExtractorInstance = {
|
|
43
|
+
getNocturnalSessionSnapshot: mockGetNocturnalSessionSnapshot,
|
|
44
|
+
listRecentNocturnalCandidateSessions: mockListRecentNocturnalCandidateSessions,
|
|
45
|
+
};
|
|
46
|
+
|
|
40
47
|
vi.mock('../../src/core/nocturnal-trajectory-extractor.js', async () => {
|
|
41
48
|
const actual = await vi.importActual<typeof import('../../src/core/nocturnal-trajectory-extractor.js')>(
|
|
42
49
|
'../../src/core/nocturnal-trajectory-extractor.js'
|
|
43
50
|
);
|
|
44
51
|
return {
|
|
45
52
|
...actual,
|
|
46
|
-
createNocturnalTrajectoryExtractor: vi.fn(() =>
|
|
47
|
-
getNocturnalSessionSnapshot: mockGetNocturnalSessionSnapshot,
|
|
48
|
-
listRecentNocturnalCandidateSessions: mockListRecentNocturnalCandidateSessions,
|
|
49
|
-
})),
|
|
53
|
+
createNocturnalTrajectoryExtractor: vi.fn(() => mockExtractorInstance),
|
|
50
54
|
};
|
|
51
55
|
});
|
|
52
56
|
|
|
@@ -55,6 +59,17 @@ import { WorkspaceContext } from '../../src/core/workspace-context.js';
|
|
|
55
59
|
import { handlePdReflect } from '../../src/commands/pd-reflect.js';
|
|
56
60
|
import { safeRmDir } from '../test-utils.js';
|
|
57
61
|
|
|
62
|
+
// Helper to create a mock API for E2E tests
|
|
63
|
+
function createMockApi() {
|
|
64
|
+
return {
|
|
65
|
+
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() },
|
|
66
|
+
runtime: { agent: { runEmbeddedPiAgent: vi.fn() } },
|
|
67
|
+
} as any;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Helper config for fast poll cycle
|
|
71
|
+
const fastPollConfig = { get: (k: string) => k === 'intervals.worker_poll_ms' ? 100 : undefined };
|
|
72
|
+
|
|
58
73
|
function readQueue(stateDir: string) {
|
|
59
74
|
return JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
|
|
60
75
|
}
|
|
@@ -93,11 +108,11 @@ session_id: explicit-session-from-pain
|
|
|
93
108
|
|
|
94
109
|
try {
|
|
95
110
|
const context = readRecentPainContext(wctx);
|
|
96
|
-
|
|
111
|
+
|
|
97
112
|
// Verify the session_id was extracted from the pain flag file
|
|
98
113
|
expect(context.mostRecent).toBeDefined();
|
|
99
|
-
expect(context.mostRecent
|
|
100
|
-
expect(context.mostRecent
|
|
114
|
+
expect(context.mostRecent!.sessionId).toBe('explicit-session-from-pain');
|
|
115
|
+
expect(context.mostRecent!.score).toBe(80);
|
|
101
116
|
expect(context.recentPainCount).toBe(1);
|
|
102
117
|
} finally {
|
|
103
118
|
safeRmDir(workspaceDir);
|
|
@@ -155,9 +170,9 @@ session_id: pain-session-abc
|
|
|
155
170
|
|
|
156
171
|
// Contract: session_id must be extracted from the pain flag
|
|
157
172
|
expect(painContext.mostRecent).toBeDefined();
|
|
158
|
-
expect(painContext.mostRecent
|
|
159
|
-
expect(painContext.mostRecent
|
|
160
|
-
expect(painContext.mostRecent
|
|
173
|
+
expect(painContext.mostRecent!.sessionId).toBe('pain-session-abc');
|
|
174
|
+
expect(painContext.mostRecent!.score).toBe(70);
|
|
175
|
+
expect(painContext.mostRecent!.source).toBe('tool_failure');
|
|
161
176
|
|
|
162
177
|
// Now simulate what the worker does: attach this context to a queued task
|
|
163
178
|
const simulatedTask = {
|
|
@@ -167,7 +182,7 @@ session_id: pain-session-abc
|
|
|
167
182
|
};
|
|
168
183
|
|
|
169
184
|
// Verify the contract holds end-to-end
|
|
170
|
-
expect(simulatedTask.recentPainContext.mostRecent
|
|
185
|
+
expect(simulatedTask.recentPainContext.mostRecent!.sessionId).toBe('pain-session-abc');
|
|
171
186
|
});
|
|
172
187
|
|
|
173
188
|
it('e2e: /pd-reflect command writes to workspace/.state, never to HOME/.state', async () => {
|
|
@@ -214,4 +229,359 @@ session_id: pain-session-abc
|
|
|
214
229
|
safeRmDir(workspaceDir);
|
|
215
230
|
}
|
|
216
231
|
});
|
|
232
|
+
|
|
233
|
+
// === Nocturnal E2E Pipeline Tests (from PR #243) ===
|
|
234
|
+
|
|
235
|
+
it('does not start a nocturnal workflow when only an empty fallback snapshot is available', async () => {
|
|
236
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-empty-'));
|
|
237
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
238
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
239
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
240
|
+
|
|
241
|
+
mockGetNocturnalSessionSnapshot.mockReturnValue(null);
|
|
242
|
+
|
|
243
|
+
fs.writeFileSync(
|
|
244
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
245
|
+
JSON.stringify([
|
|
246
|
+
{
|
|
247
|
+
id: 'sleep-empty',
|
|
248
|
+
taskKind: 'sleep_reflection',
|
|
249
|
+
priority: 'medium',
|
|
250
|
+
score: 50,
|
|
251
|
+
source: 'nocturnal',
|
|
252
|
+
reason: 'Sleep reflection',
|
|
253
|
+
timestamp: '2026-04-10T00:00:00.000Z',
|
|
254
|
+
enqueued_at: '2026-04-10T00:00:00.000Z',
|
|
255
|
+
status: 'pending',
|
|
256
|
+
retryCount: 0,
|
|
257
|
+
maxRetries: 1,
|
|
258
|
+
recentPainContext: {
|
|
259
|
+
mostRecent: null,
|
|
260
|
+
recentPainCount: 0,
|
|
261
|
+
recentMaxPainScore: 0,
|
|
262
|
+
},
|
|
263
|
+
},
|
|
264
|
+
], null, 2),
|
|
265
|
+
'utf8'
|
|
266
|
+
);
|
|
267
|
+
|
|
268
|
+
const mockApi = createMockApi();
|
|
269
|
+
EvolutionWorkerService.api = mockApi;
|
|
270
|
+
|
|
271
|
+
try {
|
|
272
|
+
EvolutionWorkerService.start({
|
|
273
|
+
workspaceDir,
|
|
274
|
+
stateDir,
|
|
275
|
+
logger: mockApi.logger,
|
|
276
|
+
config: fastPollConfig,
|
|
277
|
+
api: mockApi,
|
|
278
|
+
} as any);
|
|
279
|
+
|
|
280
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
281
|
+
|
|
282
|
+
const queue = readQueue(stateDir);
|
|
283
|
+
expect(queue[0].status).toBe('failed');
|
|
284
|
+
expect(queue[0].lastError).toContain('invalid_snapshot_ingress');
|
|
285
|
+
expect(queue[0].lastError).toContain('fallback snapshot must contain at least one pain signal');
|
|
286
|
+
expect(queue[0].resultRef).toBeFalsy();
|
|
287
|
+
expect(mockStartWorkflow).not.toHaveBeenCalled();
|
|
288
|
+
} finally {
|
|
289
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
290
|
+
safeRmDir(workspaceDir);
|
|
291
|
+
}
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it('uses stub_fallback for expected gateway-only background unavailability', async () => {
|
|
295
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-gateway-'));
|
|
296
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
297
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
298
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
299
|
+
|
|
300
|
+
mockGetNocturnalSessionSnapshot.mockReturnValue({
|
|
301
|
+
sessionId: 'sleep-gateway',
|
|
302
|
+
startedAt: '2026-04-10T00:00:00.000Z',
|
|
303
|
+
updatedAt: '2026-04-10T00:01:00.000Z',
|
|
304
|
+
assistantTurns: [],
|
|
305
|
+
userTurns: [],
|
|
306
|
+
toolCalls: [],
|
|
307
|
+
painEvents: [],
|
|
308
|
+
gateBlocks: [],
|
|
309
|
+
stats: { totalAssistantTurns: 1, totalToolCalls: 1, totalPainEvents: 0, totalGateBlocks: 0, failureCount: 0 },
|
|
310
|
+
});
|
|
311
|
+
mockStartWorkflow.mockResolvedValue({ workflowId: 'wf-1', childSessionKey: 'child-1', state: 'active' });
|
|
312
|
+
mockGetWorkflowDebugSummary.mockResolvedValue({
|
|
313
|
+
state: 'terminal_error',
|
|
314
|
+
metadata: {},
|
|
315
|
+
recentEvents: [{ reason: 'Error: Plugin runtime subagent methods are only available during a gateway request.', payload: {} }],
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
fs.writeFileSync(
|
|
319
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
320
|
+
JSON.stringify([
|
|
321
|
+
{
|
|
322
|
+
id: 'sleep-gateway',
|
|
323
|
+
taskKind: 'sleep_reflection',
|
|
324
|
+
priority: 'medium',
|
|
325
|
+
score: 50,
|
|
326
|
+
source: 'nocturnal',
|
|
327
|
+
reason: 'Sleep reflection',
|
|
328
|
+
timestamp: '2026-04-10T00:00:00.000Z',
|
|
329
|
+
enqueued_at: '2026-04-10T00:00:00.000Z',
|
|
330
|
+
status: 'pending',
|
|
331
|
+
retryCount: 0,
|
|
332
|
+
maxRetries: 1,
|
|
333
|
+
recentPainContext: {
|
|
334
|
+
mostRecent: { source: 'test', score: 50, reason: 'test', timestamp: '2026-04-10T00:00:00.000Z', sessionId: 'sleep-gateway' },
|
|
335
|
+
recentPainCount: 1,
|
|
336
|
+
recentMaxPainScore: 50,
|
|
337
|
+
},
|
|
338
|
+
},
|
|
339
|
+
], null, 2),
|
|
340
|
+
'utf8'
|
|
341
|
+
);
|
|
342
|
+
|
|
343
|
+
const mockApi = createMockApi();
|
|
344
|
+
EvolutionWorkerService.api = mockApi;
|
|
345
|
+
|
|
346
|
+
try {
|
|
347
|
+
EvolutionWorkerService.start({
|
|
348
|
+
workspaceDir,
|
|
349
|
+
stateDir,
|
|
350
|
+
logger: mockApi.logger,
|
|
351
|
+
config: fastPollConfig,
|
|
352
|
+
api: mockApi,
|
|
353
|
+
} as any);
|
|
354
|
+
|
|
355
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
356
|
+
|
|
357
|
+
const queue = readQueue(stateDir);
|
|
358
|
+
expect(queue[0].status).toBe('completed');
|
|
359
|
+
expect(queue[0].resolution).toBe('stub_fallback');
|
|
360
|
+
} finally {
|
|
361
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
362
|
+
safeRmDir(workspaceDir);
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
it('uses stub_fallback for expected subagent runtime unavailability', async () => {
|
|
367
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-subagent-'));
|
|
368
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
369
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
370
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
371
|
+
|
|
372
|
+
mockGetNocturnalSessionSnapshot.mockReturnValue({
|
|
373
|
+
sessionId: 'sleep-subagent',
|
|
374
|
+
startedAt: '2026-04-10T00:00:00.000Z',
|
|
375
|
+
updatedAt: '2026-04-10T00:01:00.000Z',
|
|
376
|
+
assistantTurns: [],
|
|
377
|
+
userTurns: [],
|
|
378
|
+
toolCalls: [],
|
|
379
|
+
painEvents: [],
|
|
380
|
+
gateBlocks: [],
|
|
381
|
+
stats: { totalAssistantTurns: 1, totalToolCalls: 1, totalPainEvents: 0, totalGateBlocks: 0, failureCount: 0 },
|
|
382
|
+
});
|
|
383
|
+
mockStartWorkflow.mockRejectedValue(new Error('NocturnalWorkflowManager: subagent runtime unavailable'));
|
|
384
|
+
|
|
385
|
+
fs.writeFileSync(
|
|
386
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
387
|
+
JSON.stringify([
|
|
388
|
+
{
|
|
389
|
+
id: 'sleep-subagent',
|
|
390
|
+
taskKind: 'sleep_reflection',
|
|
391
|
+
priority: 'medium',
|
|
392
|
+
score: 50,
|
|
393
|
+
source: 'nocturnal',
|
|
394
|
+
reason: 'Sleep reflection',
|
|
395
|
+
timestamp: '2026-04-10T00:00:00.000Z',
|
|
396
|
+
enqueued_at: '2026-04-10T00:00:00.000Z',
|
|
397
|
+
status: 'pending',
|
|
398
|
+
retryCount: 0,
|
|
399
|
+
maxRetries: 1,
|
|
400
|
+
recentPainContext: {
|
|
401
|
+
mostRecent: { source: 'test', score: 50, reason: 'test', timestamp: '2026-04-10T00:00:00.000Z', sessionId: 'sleep-subagent' },
|
|
402
|
+
recentPainCount: 1,
|
|
403
|
+
recentMaxPainScore: 50,
|
|
404
|
+
},
|
|
405
|
+
},
|
|
406
|
+
], null, 2),
|
|
407
|
+
'utf8'
|
|
408
|
+
);
|
|
409
|
+
|
|
410
|
+
const mockApi = createMockApi();
|
|
411
|
+
EvolutionWorkerService.api = mockApi;
|
|
412
|
+
|
|
413
|
+
try {
|
|
414
|
+
EvolutionWorkerService.start({
|
|
415
|
+
workspaceDir,
|
|
416
|
+
stateDir,
|
|
417
|
+
logger: mockApi.logger,
|
|
418
|
+
config: fastPollConfig,
|
|
419
|
+
api: mockApi,
|
|
420
|
+
} as any);
|
|
421
|
+
|
|
422
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
423
|
+
|
|
424
|
+
const queue = readQueue(stateDir);
|
|
425
|
+
expect(queue[0].status).toBe('completed');
|
|
426
|
+
expect(queue[0].resolution).toBe('stub_fallback');
|
|
427
|
+
} finally {
|
|
428
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
429
|
+
safeRmDir(workspaceDir);
|
|
430
|
+
}
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
it('prioritizes pain signal session ID for snapshot extraction', async () => {
|
|
434
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-pain-session-'));
|
|
435
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
436
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
437
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
438
|
+
|
|
439
|
+
const painSessionId = 'pain-signal-session-123';
|
|
440
|
+
|
|
441
|
+
mockGetNocturnalSessionSnapshot.mockImplementation((sessionId: string) => {
|
|
442
|
+
if (sessionId === painSessionId) {
|
|
443
|
+
return {
|
|
444
|
+
sessionId: painSessionId,
|
|
445
|
+
startedAt: '2026-04-09T23:00:00.000Z',
|
|
446
|
+
updatedAt: '2026-04-09T23:01:00.000Z',
|
|
447
|
+
assistantTurns: [],
|
|
448
|
+
userTurns: [],
|
|
449
|
+
toolCalls: [],
|
|
450
|
+
painEvents: [{ source: 'tool_failure', score: 70, severity: null, reason: 'test', createdAt: '2026-04-09T23:00:00.000Z' }],
|
|
451
|
+
gateBlocks: [],
|
|
452
|
+
stats: { totalAssistantTurns: 1, totalToolCalls: 1, failureCount: 1, totalPainEvents: 1, totalGateBlocks: 0 },
|
|
453
|
+
};
|
|
454
|
+
}
|
|
455
|
+
return null;
|
|
456
|
+
});
|
|
457
|
+
mockStartWorkflow.mockResolvedValue({ workflowId: 'wf-pain', childSessionKey: 'child-pain', state: 'active' });
|
|
458
|
+
|
|
459
|
+
fs.writeFileSync(
|
|
460
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
461
|
+
JSON.stringify([
|
|
462
|
+
{
|
|
463
|
+
id: 'sleep-pain-priority',
|
|
464
|
+
taskKind: 'sleep_reflection',
|
|
465
|
+
priority: 'medium',
|
|
466
|
+
score: 50,
|
|
467
|
+
source: 'nocturnal',
|
|
468
|
+
reason: 'Sleep reflection',
|
|
469
|
+
timestamp: '2026-04-10T00:00:00.000Z',
|
|
470
|
+
enqueued_at: '2026-04-10T00:00:00.000Z',
|
|
471
|
+
status: 'pending',
|
|
472
|
+
retryCount: 0,
|
|
473
|
+
maxRetries: 1,
|
|
474
|
+
recentPainContext: {
|
|
475
|
+
mostRecent: { source: 'tool_failure', score: 70, reason: 'test', timestamp: '2026-04-10T00:00:00.000Z', sessionId: painSessionId },
|
|
476
|
+
recentPainCount: 1,
|
|
477
|
+
recentMaxPainScore: 70,
|
|
478
|
+
},
|
|
479
|
+
},
|
|
480
|
+
], null, 2),
|
|
481
|
+
'utf8'
|
|
482
|
+
);
|
|
483
|
+
|
|
484
|
+
const mockApi = createMockApi();
|
|
485
|
+
EvolutionWorkerService.api = mockApi;
|
|
486
|
+
|
|
487
|
+
try {
|
|
488
|
+
EvolutionWorkerService.start({
|
|
489
|
+
workspaceDir,
|
|
490
|
+
stateDir,
|
|
491
|
+
logger: mockApi.logger,
|
|
492
|
+
config: fastPollConfig,
|
|
493
|
+
api: mockApi,
|
|
494
|
+
} as any);
|
|
495
|
+
|
|
496
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
497
|
+
|
|
498
|
+
expect(mockStartWorkflow).toHaveBeenCalledTimes(1);
|
|
499
|
+
const metadata = mockStartWorkflow.mock.calls[0][1].metadata;
|
|
500
|
+
expect(metadata.snapshot.sessionId).toBe(painSessionId);
|
|
501
|
+
} finally {
|
|
502
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
503
|
+
safeRmDir(workspaceDir);
|
|
504
|
+
}
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
it('e2e: bounded session selection — never picks a session newer than the triggering task', async () => {
|
|
508
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-e2e-bounded-'));
|
|
509
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
510
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
511
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
512
|
+
|
|
513
|
+
const taskTimestamp = '2026-04-10T00:00:00.000Z';
|
|
514
|
+
const validSessionTimestamp = '2026-04-09T23:00:00.000Z';
|
|
515
|
+
const invalidSessionTimestamp = '2026-04-10T01:00:00.000Z';
|
|
516
|
+
|
|
517
|
+
mockGetNocturnalSessionSnapshot.mockImplementation((sessionId: string) => {
|
|
518
|
+
if (sessionId === 'valid-session') {
|
|
519
|
+
return {
|
|
520
|
+
sessionId: 'valid-session',
|
|
521
|
+
startedAt: validSessionTimestamp,
|
|
522
|
+
updatedAt: validSessionTimestamp,
|
|
523
|
+
assistantTurns: [],
|
|
524
|
+
userTurns: [],
|
|
525
|
+
toolCalls: [],
|
|
526
|
+
painEvents: [{ source: 'tool_failure', score: 50, severity: null, reason: 'test', createdAt: validSessionTimestamp }],
|
|
527
|
+
gateBlocks: [],
|
|
528
|
+
stats: { totalAssistantTurns: 1, totalToolCalls: 1, failureCount: 1, totalPainEvents: 1, totalGateBlocks: 0 },
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
return null;
|
|
532
|
+
});
|
|
533
|
+
mockListRecentNocturnalCandidateSessions.mockReturnValue([
|
|
534
|
+
{ sessionId: 'valid-session', startedAt: validSessionTimestamp, failureCount: 1, painEventCount: 1, gateBlockCount: 0 },
|
|
535
|
+
{ sessionId: 'invalid-session', startedAt: invalidSessionTimestamp, failureCount: 1, painEventCount: 0, gateBlockCount: 0 },
|
|
536
|
+
]);
|
|
537
|
+
mockStartWorkflow.mockResolvedValue({ workflowId: 'wf-bounded', childSessionKey: 'child-bounded', state: 'active' });
|
|
538
|
+
|
|
539
|
+
fs.writeFileSync(
|
|
540
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
541
|
+
JSON.stringify([
|
|
542
|
+
{
|
|
543
|
+
id: 'sleep-e2e-bounded',
|
|
544
|
+
taskKind: 'sleep_reflection',
|
|
545
|
+
priority: 'medium',
|
|
546
|
+
score: 50,
|
|
547
|
+
source: 'nocturnal',
|
|
548
|
+
reason: 'Sleep reflection',
|
|
549
|
+
timestamp: taskTimestamp,
|
|
550
|
+
enqueued_at: taskTimestamp,
|
|
551
|
+
status: 'pending',
|
|
552
|
+
retryCount: 0,
|
|
553
|
+
maxRetries: 1,
|
|
554
|
+
recentPainContext: {
|
|
555
|
+
mostRecent: { source: 'test', score: 50, reason: 'test', timestamp: taskTimestamp, sessionId: 'pain-session' },
|
|
556
|
+
recentPainCount: 1,
|
|
557
|
+
recentMaxPainScore: 50,
|
|
558
|
+
},
|
|
559
|
+
},
|
|
560
|
+
], null, 2),
|
|
561
|
+
'utf8'
|
|
562
|
+
);
|
|
563
|
+
|
|
564
|
+
const mockApi = createMockApi();
|
|
565
|
+
EvolutionWorkerService.api = mockApi;
|
|
566
|
+
|
|
567
|
+
try {
|
|
568
|
+
EvolutionWorkerService.start({
|
|
569
|
+
workspaceDir,
|
|
570
|
+
stateDir,
|
|
571
|
+
logger: mockApi.logger,
|
|
572
|
+
config: fastPollConfig,
|
|
573
|
+
api: mockApi,
|
|
574
|
+
} as any);
|
|
575
|
+
|
|
576
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
577
|
+
|
|
578
|
+
expect(mockStartWorkflow).toHaveBeenCalledTimes(1);
|
|
579
|
+
const metadata = mockStartWorkflow.mock.calls[0][1].metadata;
|
|
580
|
+
expect(metadata.snapshot.sessionId).toBe('valid-session');
|
|
581
|
+
expect(new Date(metadata.snapshot.startedAt).getTime()).toBeLessThanOrEqual(new Date(taskTimestamp).getTime());
|
|
582
|
+
} finally {
|
|
583
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
584
|
+
safeRmDir(workspaceDir);
|
|
585
|
+
}
|
|
586
|
+
});
|
|
217
587
|
});
|