principles-disciple 1.22.0 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  "id": "principles-disciple",
3
3
  "name": "Principles Disciple",
4
4
  "description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
5
- "version": "1.22.0",
5
+ "version": "1.24.0",
6
6
  "skills": [
7
7
  "./skills"
8
8
  ],
@@ -76,8 +76,8 @@
76
76
  }
77
77
  },
78
78
  "buildFingerprint": {
79
- "gitSha": "bce835db37a0",
80
- "bundleMd5": "9e44177badb37ac423669fd187bf2667",
81
- "builtAt": "2026-04-10T14:01:23.050Z"
79
+ "gitSha": "ebbaa40d6e3a",
80
+ "bundleMd5": "7c84860901894f7c049b54028d489ed4",
81
+ "builtAt": "2026-04-12T15:51:34.724Z"
82
82
  }
83
83
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "principles-disciple",
3
- "version": "1.22.0",
3
+ "version": "1.24.0",
4
4
  "description": "Native OpenClaw plugin for Principles Disciple",
5
5
  "type": "module",
6
6
  "main": "./dist/bundle.js",
@@ -0,0 +1,393 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Nocturnal Pipeline Diagnostic Script
5
+ * ======================================
6
+ * Checks every link in the Nocturnal reflection chain:
7
+ * Heartbeat → Idle Detection → Queue → Snapshot → Workflow → Trinity → Arbiter → Persistence
8
+ *
9
+ * Usage:
10
+ * node scripts/diagnose-nocturnal.mjs [--workspace /path/to/workspace]
11
+ *
12
+ * Output: Structured report with pass/fail for each checkpoint.
13
+ */
14
+
15
+ import { existsSync, readFileSync, readdirSync, statSync } from 'fs';
16
+ import { join, dirname } from 'path';
17
+ import { fileURLToPath } from 'url';
18
+ import { execSync } from 'child_process';
19
+
20
+ const __filename = fileURLToPath(import.meta.url);
21
+ const __dirname = dirname(__filename);
22
+ const PLUGIN_DIR = join(__dirname, '..');
23
+
24
+ // ─── Argument parsing ───
25
+ function parseArgs() {
26
+ let workspaceDir = null;
27
+ const argv = process.argv.slice(2);
28
+ for (let i = 0; i < argv.length; i++) {
29
+ if (argv[i] === '--workspace' && argv[i + 1]) {
30
+ workspaceDir = argv[++i];
31
+ }
32
+ }
33
+ // Auto-detect workspace from current git working directory
34
+ if (!workspaceDir) {
35
+ try {
36
+ const gitRoot = execSync('git rev-parse --show-toplevel', { encoding: 'utf-8' }).trim();
37
+ workspaceDir = gitRoot;
38
+ } catch {
39
+ workspaceDir = process.cwd();
40
+ }
41
+ }
42
+ return { workspaceDir };
43
+ }
44
+
45
+ // ─── Report helpers ───
46
+ const results = [];
47
+ let checksPassed = 0;
48
+ let checksFailed = 0;
49
+ let checksWarned = 0;
50
+
51
+ function check(name, fn) {
52
+ try {
53
+ const result = fn();
54
+ if (result && result.status === 'warn') {
55
+ checksWarned++;
56
+ results.push({ name, status: 'warn', detail: result.detail || '' });
57
+ } else {
58
+ checksPassed++;
59
+ results.push({ name, status: 'pass', detail: typeof result === 'string' ? result : '' });
60
+ }
61
+ } catch (err) {
62
+ checksFailed++;
63
+ results.push({ name, status: 'fail', detail: err.message || String(err) });
64
+ }
65
+ }
66
+
67
+ function printReport() {
68
+ console.log('\n' + '='.repeat(60));
69
+ console.log(' NOCTURNAL PIPELINE DIAGNOSTIC REPORT');
70
+ console.log(' ' + new Date().toISOString());
71
+ console.log('='.repeat(60));
72
+
73
+ for (const r of results) {
74
+ const icon = r.status === 'pass' ? '✅' : r.status === 'warn' ? '⚠️ ' : '❌';
75
+ console.log(`\n${icon} ${r.name}`);
76
+ if (r.detail) {
77
+ console.log(` ${r.detail}`);
78
+ }
79
+ }
80
+
81
+ console.log('\n' + '-'.repeat(60));
82
+ console.log(` Summary: ${checksPassed} passed, ${checksWarned} warnings, ${checksFailed} failed`);
83
+ console.log('-'.repeat(60) + '\n');
84
+
85
+ if (checksFailed > 0) {
86
+ process.exitCode = 1;
87
+ }
88
+ }
89
+
90
+ // ─── Main ───
91
+ function main() {
92
+ const { workspaceDir } = parseArgs();
93
+ const stateDir = join(workspaceDir, '.state');
94
+
95
+ console.log(`\n🔍 Diagnosing Nocturnal pipeline for workspace: ${workspaceDir}`);
96
+
97
+ // ─────────────────────────────────────────────────────────
98
+ // CHECKPOINT 1: State directory structure
99
+ // ─────────────────────────────────────────────────────────
100
+ check('1. State directory structure', () => {
101
+ // All state dirs are inside .state/
102
+ const required = ['sessions', 'logs', 'nocturnal', 'nocturnal/samples'];
103
+ const missing = [];
104
+ for (const rel of required) {
105
+ if (!existsSync(join(stateDir, rel))) missing.push(rel);
106
+ }
107
+ if (missing.length > 0) throw new Error(`Missing directories: ${missing.join(', ')}`);
108
+ return 'All required directories present';
109
+ });
110
+
111
+ // ─────────────────────────────────────────────────────────
112
+ // CHECKPOINT 2: Session tracker persistence
113
+ // ─────────────────────────────────────────────────────────
114
+ check('2. Session tracker persistence', () => {
115
+ const sessionsDir = join(stateDir, 'sessions');
116
+ if (!existsSync(sessionsDir)) throw new Error('sessions/ directory missing');
117
+ const files = readdirSync(sessionsDir).filter(f => f.endsWith('.json'));
118
+ if (files.length === 0) {
119
+ return { status: 'warn', detail: 'No session files found — idle check will report idle immediately' };
120
+ }
121
+ // Verify at least one session file is valid JSON
122
+ let validSessions = 0;
123
+ for (const f of files) {
124
+ try {
125
+ const data = JSON.parse(readFileSync(join(sessionsDir, f), 'utf-8'));
126
+ if (data.sessionId && data.lastActivityAt) validSessions++;
127
+ } catch { /* corrupted, skip */ }
128
+ }
129
+ return `${files.length} session files, ${validSessions} valid with sessionId+lastActivityAt`;
130
+ });
131
+
132
+ // ─────────────────────────────────────────────────────────
133
+ // CHECKPOINT 3: Idle detection logic
134
+ // ─────────────────────────────────────────────────────────
135
+ check('3. Idle detection (checkWorkspaceIdle)', () => {
136
+ // Functions are minified — check for unique string markers instead.
137
+ const bundlePath = join(PLUGIN_DIR, 'dist', 'bundle.js');
138
+ const content = readFileSync(bundlePath, 'utf-8');
139
+
140
+ // Stable markers: log messages, object fields, event strings that survive minification.
141
+ const markers = [
142
+ { name: 'Workspace not idle', reason: 'preflight idle check log message' },
143
+ { name: 'trigger', reason: 'system session detection (checks trigger field)' },
144
+ { name: 'abandonedSessionIds', reason: 'IdleCheckResult field (preserved in object literal)' },
145
+ { name: 'trajectoryGuardrailConfirmsIdle', reason: 'IdleCheckResult field' },
146
+ ];
147
+ const missing = markers.filter(m => !content.includes(m.name));
148
+ if (missing.length > 0) {
149
+ throw new Error(`Idle detection markers missing: ${missing.map(m => m.name).join(', ')}`);
150
+ }
151
+
152
+ // Check PR #256 fix: legacy session temporal guard
153
+ // The fix adds `lastActivityAt` comparison before treating sessions as system sessions.
154
+ // In minified code this appears as a comparison involving `lastActivityAt`.
155
+ if (!content.includes('lastActivityAt')) {
156
+ return { status: 'warn', detail: 'lastActivityAt reference not found — temporal guard for legacy sessions may be missing' };
157
+ }
158
+ return 'Idle detection functions present (verified via stable string markers)';
159
+ });
160
+
161
+ // ─────────────────────────────────────────────────────────
162
+ // CHECKPOINT 4: Evolution queue
163
+ // ─────────────────────────────────────────────────────────
164
+ check('4. Evolution queue', () => {
165
+ const queuePath = join(stateDir, 'evolution_queue.json');
166
+ if (!existsSync(queuePath)) {
167
+ return { status: 'warn', detail: 'No evolution queue — idle check has not yet enqueued a task' };
168
+ }
169
+ const queue = JSON.parse(readFileSync(queuePath, 'utf-8'));
170
+ const sleepTasks = queue.filter(t => t.taskKind === 'sleep_reflection');
171
+ const pending = sleepTasks.filter(t => t.status === 'pending' || t.status === 'in_progress');
172
+ const completed = sleepTasks.filter(t => t.status === 'completed');
173
+ const failed = sleepTasks.filter(t => t.status === 'failed');
174
+
175
+ if (pending.length > 0) return `${pending.length} pending sleep_reflection task(s) awaiting processing`;
176
+ if (completed.length > 0) return `${completed.length} completed, ${failed.length} failed (total ${sleepTasks.length} tasks)`;
177
+ return { status: 'warn', detail: `Queue exists with ${queue.length} items but no sleep_reflection tasks` };
178
+ });
179
+
180
+ // ─────────────────────────────────────────────────────────
181
+ // CHECKPOINT 5: Nocturnal samples (artifacts)
182
+ // ─────────────────────────────────────────────────────────
183
+ check('5. Nocturnal artifact persistence', () => {
184
+ const samplesDir = join(stateDir, 'nocturnal', 'samples');
185
+ if (!existsSync(samplesDir)) {
186
+ return { status: 'warn', detail: 'No samples directory — no reflections have been persisted yet' };
187
+ }
188
+ const files = readdirSync(samplesDir).filter(f => f.endsWith('.json'));
189
+ if (files.length === 0) return { status: 'warn', detail: 'samples/ directory exists but is empty' };
190
+
191
+ // Validate most recent artifact
192
+ const sorted = files
193
+ .map(f => ({ name: f, mtime: statSync(join(samplesDir, f)).mtimeMs }))
194
+ .sort((a, b) => b.mtime - a.mtime);
195
+ const latest = sorted[0].name;
196
+ const artifact = JSON.parse(readFileSync(join(samplesDir, latest), 'utf-8'));
197
+ const hasRequired = artifact.artifactId && artifact.badDecision && artifact.betterDecision && artifact.rationale;
198
+ if (!hasRequired) {
199
+ return { status: 'warn', detail: `Latest artifact ${latest} is missing required fields` };
200
+ }
201
+ return `${files.length} artifact(s), latest: ${latest} (${artifact.principleId || 'unknown principle'})`;
202
+ });
203
+
204
+ // ─────────────────────────────────────────────────────────
205
+ // CHECKPOINT 6: Workflow store
206
+ // ─────────────────────────────────────────────────────────
207
+ check('6. Nocturnal workflow store', () => {
208
+ const workflowsPath = join(stateDir, 'nocturnal', 'workflows.json');
209
+ if (!existsSync(workflowsPath)) {
210
+ return { status: 'warn', detail: 'No workflows.json — no nocturnal workflows have been started' };
211
+ }
212
+ const workflows = JSON.parse(readFileSync(workflowsPath, 'utf-8'));
213
+ if (!Array.isArray(workflows) || workflows.length === 0) {
214
+ return { status: 'warn', detail: 'workflows.json is empty — no workflows recorded' };
215
+ }
216
+ const active = workflows.filter(w => w.state === 'active');
217
+ const completed = workflows.filter(w => w.state === 'completed');
218
+ const errored = workflows.filter(w => w.state === 'terminal_error');
219
+ const expired = workflows.filter(w => w.state === 'expired');
220
+
221
+ if (active.length > 0) {
222
+ return { status: 'warn', detail: `${active.length} workflow(s) still active — may be in progress or stuck. IDs: ${active.map(w => w.workflow_id).join(', ')}` };
223
+ }
224
+ return `${workflows.length} total: ${completed} completed, ${errored} errored, ${expired} expired`;
225
+ });
226
+
227
+ // ─────────────────────────────────────────────────────────
228
+ // CHECKPOINT 7: Nocturnal runtime state (cooldown/quota)
229
+ // ─────────────────────────────────────────────────────────
230
+ check('7. Nocturnal runtime state (cooldown/quota)', () => {
231
+ const runtimePath = join(stateDir, 'nocturnal-runtime.json');
232
+ if (!existsSync(runtimePath)) {
233
+ return 'No runtime state — no cooldown or quota restrictions';
234
+ }
235
+ const state = JSON.parse(readFileSync(runtimePath, 'utf-8'));
236
+ const issues = [];
237
+
238
+ if (state.globalCooldownUntil) {
239
+ const cooldownEnd = new Date(state.globalCooldownUntil).getTime();
240
+ if (cooldownEnd > Date.now()) {
241
+ const remainingMin = Math.round((cooldownEnd - Date.now()) / 60000);
242
+ issues.push(`global cooldown active (${remainingMin}min remaining)`);
243
+ }
244
+ }
245
+
246
+ if (state.recentRunTimestamps) {
247
+ const windowStart = Date.now() - 24 * 60 * 60 * 1000;
248
+ const recentRuns = state.recentRunTimestamps
249
+ .map(ts => new Date(ts).getTime())
250
+ .filter(ts => ts > windowStart);
251
+ if (recentRuns.length >= 3) {
252
+ issues.push(`quota exhausted (${recentRuns.length}/3 runs used in 24h)`);
253
+ }
254
+ }
255
+
256
+ if (issues.length > 0) {
257
+ return { status: 'warn', detail: issues.join('; ') };
258
+ }
259
+ return 'No active cooldown or quota restrictions';
260
+ });
261
+
262
+ // ─────────────────────────────────────────────────────────
263
+ // CHECKPOINT 8: Bundle health
264
+ // ─────────────────────────────────────────────────────────
265
+ check('8. Plugin bundle health', () => {
266
+ const bundlePath = join(PLUGIN_DIR, 'dist', 'bundle.js');
267
+ if (!existsSync(bundlePath)) throw new Error('dist/bundle.js missing — run build first');
268
+
269
+ const content = readFileSync(bundlePath, 'utf-8');
270
+
271
+ // Use a mix of exported symbols and stable string markers.
272
+ // Class names and exported symbols survive minification; internal function names don't.
273
+ const markers = [
274
+ 'EvolutionWorkerService', // exported class
275
+ 'checkPainFlag', // exported function
276
+ 'processEvolutionQueue', // function reference
277
+ 'NocturnalWorkflowManager', // exported class
278
+ 'executeNocturnalReflectionAsync', // used in log messages
279
+ 'nocturnal_started', // event type string
280
+ 'nocturnal_completed', // event type string
281
+ 'nocturnal_failed', // event type string
282
+ 'nocturnal_expired', // event type string
283
+ ];
284
+ const missing = markers.filter(m => !content.includes(m));
285
+ if (missing.length > 0) throw new Error(`Missing critical symbols in bundle: ${missing.join(', ')}`);
286
+
287
+ return `Bundle OK (${Math.round(content.length / 1024)}KB), all ${markers.length} critical markers present`;
288
+ });
289
+
290
+ // ─────────────────────────────────────────────────────────
291
+ // CHECKPOINT 9: Git state — uncommitted changes that could break pipeline
292
+ // ─────────────────────────────────────────────────────────
293
+ check('9. Git state (uncommitted changes)', () => {
294
+ try {
295
+ const status = execSync('git status --porcelain', { encoding: 'utf-8', timeout: 5000, cwd: PLUGIN_DIR }).trim();
296
+ if (!status) return 'Working tree clean';
297
+ const changedFiles = status.split('\n').length;
298
+ return { status: 'warn', detail: `${changedFiles} uncommitted change(s) in plugin directory` };
299
+ } catch {
300
+ return { status: 'warn', detail: 'Could not check git status' };
301
+ }
302
+ });
303
+
304
+ // ─────────────────────────────────────────────────────────
305
+ // CHECKPOINT 10: Pain flag state
306
+ // ─────────────────────────────────────────────────────────
307
+ check('10. Pain flag state', () => {
308
+ const painFlagPath = join(stateDir, '.pain_flag');
309
+ if (!existsSync(painFlagPath)) {
310
+ return 'No active pain flag';
311
+ }
312
+ const content = readFileSync(painFlagPath, 'utf-8');
313
+ const lines = content.split('\n');
314
+ const fields = {};
315
+ for (const line of lines) {
316
+ const colonIdx = line.indexOf(':');
317
+ if (colonIdx > 0) {
318
+ fields[line.substring(0, colonIdx).trim()] = line.substring(colonIdx + 1).trim();
319
+ }
320
+ }
321
+ if (!fields.score || !fields.reason) {
322
+ return { status: 'warn', detail: 'Pain flag exists but is missing required fields (score, reason)' };
323
+ }
324
+ return `Pain flag active (score: ${fields.score}, source: ${fields.source || 'unknown'}, session: ${fields.session_id || 'none'})`;
325
+ });
326
+
327
+ // ─────────────────────────────────────────────────────────
328
+ // CHECKPOINT 11: Trajectory data
329
+ // ─────────────────────────────────────────────────────────
330
+ check('11. Trajectory data availability', () => {
331
+ const trajectoryPath = join(stateDir, 'trajectory.json');
332
+ const trajectoryDir = join(stateDir, 'trajectory');
333
+ const trajectoryDb = join(stateDir, 'trajectory.db');
334
+ if (!existsSync(trajectoryPath) && !existsSync(trajectoryDir) && !existsSync(trajectoryDb)) {
335
+ return { status: 'warn', detail: 'No trajectory data — snapshot extraction will use pain context fallback or fail' };
336
+ }
337
+ if (existsSync(trajectoryDb)) {
338
+ const stat = statSync(trajectoryDb);
339
+ return `Trajectory SQLite database present (${Math.round(stat.size / 1024)}KB)`;
340
+ }
341
+ // Check trajectory content
342
+ if (existsSync(trajectoryPath)) {
343
+ try {
344
+ const data = JSON.parse(readFileSync(trajectoryPath, 'utf-8'));
345
+ const entryCount = Array.isArray(data) ? data.length : Object.keys(data).length;
346
+ return `${entryCount} trajectory entries available`;
347
+ } catch {
348
+ return { status: 'warn', detail: 'trajectory.json exists but is corrupted' };
349
+ }
350
+ }
351
+ if (existsSync(trajectoryDir)) {
352
+ const files = readdirSync(trajectoryDir).filter(f => f.endsWith('.json'));
353
+ return `${files.length} trajectory file(s) available`;
354
+ }
355
+ return { status: 'warn', detail: 'Trajectory storage not found in expected locations' };
356
+ });
357
+
358
+ // ─────────────────────────────────────────────────────────
359
+ // CHECKPOINT 12: Principle training state
360
+ // ─────────────────────────────────────────────────────────
361
+ check('12. Principle training state', () => {
362
+ // Check multiple possible locations
363
+ const candidates = [
364
+ join(stateDir, 'nocturnal', 'training_store.json'),
365
+ join(stateDir, 'principle_training_state.json'),
366
+ ];
367
+ let trainingPath = null;
368
+ for (const c of candidates) {
369
+ if (existsSync(c)) { trainingPath = c; break; }
370
+ }
371
+ if (!trainingPath) {
372
+ return { status: 'warn', detail: 'No training_store.json or principle_training_state.json — NocturnalTargetSelector may not find evaluable principles' };
373
+ }
374
+ try {
375
+ const store = JSON.parse(readFileSync(trainingPath, 'utf-8'));
376
+ const principles = Object.keys(store.principles || store);
377
+ if (principles.length === 0) {
378
+ return { status: 'warn', detail: 'Training store exists but has no principles' };
379
+ }
380
+ const evaluable = principles.filter(p => {
381
+ const pr = store.principles ? store.principles[p] : store[p];
382
+ return pr && pr.evaluability !== 'manual_only';
383
+ });
384
+ return `${principles.length} principle(s) in training store, ${evaluable.length} evaluable`;
385
+ } catch {
386
+ return { status: 'warn', detail: 'Training store exists but is corrupted' };
387
+ }
388
+ });
389
+
390
+ printReport();
391
+ }
392
+
393
+ main();
@@ -372,6 +372,7 @@ function verifyBundleContents() {
372
372
  { name: 'EvolutionWorkerService', reason: 'main plugin service export' },
373
373
  { name: 'checkPainFlag', reason: 'pain flag detection' },
374
374
  { name: 'processEvolutionQueue', reason: 'queue processing' },
375
+ { name: 'acquireQueueLock', reason: 'queue lock for pd-reflect and worker' },
375
376
  ];
376
377
 
377
378
  const missing = [];
@@ -623,11 +623,12 @@ export function validateArtifact(
623
623
  // Rule 11: Quality threshold gate — reject low-signal artifacts
624
624
  // A reflection artifact must show positive cognitive improvement (thinkingModelDelta > 0).
625
625
  // planningRatioGain must not show catastrophic regression (< -0.5).
626
+ // #244: Use strict < so thinkingModelDelta=threshold passes (thin violations allowed at boundary)
626
627
  if (
627
628
  options.qualityThresholds?.thinkingModelDeltaMin !== undefined &&
628
629
  thinkingModelDelta !== undefined &&
629
630
  typeof thinkingModelDelta === 'number' &&
630
- thinkingModelDelta <= options.qualityThresholds.thinkingModelDeltaMin
631
+ thinkingModelDelta < options.qualityThresholds.thinkingModelDeltaMin
631
632
  ) {
632
633
  failures.push({
633
634
  reason: `thinkingModelDelta (${thinkingModelDelta}) does not meet minimum quality threshold (${options.qualityThresholds.thinkingModelDeltaMin}) — reflection shows no cognitive improvement`,
@@ -1,6 +1,7 @@
1
1
  import type { PluginHookSubagentEndedEvent, PluginHookSubagentContext, PluginLogger, OpenClawPluginApi } from '../openclaw-sdk.js';
2
2
  import { buildPainFlag, writePainFlag } from '../core/pain.js';
3
3
  import { WorkspaceContext } from '../core/workspace-context.js';
4
+ import { extractAgentIdFromSessionKey } from '../utils/session-key.js';
4
5
  // No longer needed — diagnostician runs via HEARTBEAT, not subagent
5
6
  import { recordEvolutionSuccess } from '../core/evolution-engine.js';
6
7
  import { WorkflowStore } from '../service/subagent-workflow/workflow-store.js';
@@ -81,18 +82,6 @@ function emitSubagentPainEvent(
81
82
  }
82
83
  }
83
84
 
84
-
85
- function extractAgentIdFromSessionKey(sessionKey: string | undefined): string | undefined {
86
- // sessionKey format: "agent:{agentId}:{type}:{uuid}" or "agent:{agentId}:{uuid}"
87
- if (!sessionKey) return undefined;
88
- const match = /^agent:([^:]+):/.exec(sessionKey);
89
- return match ? match[1] : undefined;
90
- }
91
-
92
-
93
-
94
-
95
-
96
85
  type SubagentEndedHookContext = PluginHookSubagentContext & {
97
86
  api?: OpenClawPluginApi;
98
87
  workspaceDir?: string;
package/src/index.ts CHANGED
@@ -61,6 +61,7 @@ import { PathResolver, resolveWorkspaceDirFromApi } from './core/path-resolver.j
61
61
  import { validateWorkspaceDir } from './core/workspace-dir-validation.js';
62
62
  import { resolveRequiredWorkspaceDir, resolveWorkspaceDir, type WorkspaceResolutionContext } from './core/workspace-dir-service.js';
63
63
  import { createPrinciplesConsoleRoute } from './http/principles-console-route.js';
64
+ import { extractAgentIdFromSessionKey } from './utils/session-key.js';
64
65
 
65
66
  // Track initialization to avoid repeated calls
66
67
  let workspaceInitialized = false;
@@ -423,11 +424,13 @@ const plugin = {
423
424
  registerCommandWithAlias('pd-thinking', 'pdt', getCommandDescription('pd-thinking', language), (ctx: any) => handleThinkingOs(ctx), { acceptsArgs: true });
424
425
  registerCommandWithAlias('pd-reflect', 'pdrl', getCommandDescription('pd-reflect', language), (ctx: any) => {
425
426
  try {
426
- const workspaceDir = resolveCommandWorkspaceDirStrict(api, ctx);
427
- return handlePdReflect.handler({ ...ctx, api, workspaceDir } as any);
427
+ // Resolve agentId from sessionKey (if available), fallback to 'main'
428
+ const agentId = extractAgentIdFromSessionKey(ctx.sessionKey) ?? 'main';
429
+ const workspaceDir = resolveRequiredWorkspaceDir(api, { ...ctx, agentId }, { source: 'pd-reflect', fallbackAgentId: 'main' });
430
+ return handlePdReflect.handler({ ...ctx, api, workspaceDir });
428
431
  } catch (err) {
429
- api.logger.error(`[PD] Command /pd-reflect failed: ${String(err)}`);
430
- return { text: language === 'zh' ? "命令执行失败,请检查日志。" : "Command failed. Check logs." };
432
+ api.logger.error(`[PD:pd-reflect] Command failed: ${String(err)}`);
433
+ return { text: language === 'zh' ? '命令执行失败,请查看日志。' : 'Command failed. Check logs.' };
431
434
  }
432
435
  });
433
436
  registerCommandWithAlias('pd-daily', 'pdd', getCommandDescription('pd-daily', language), () => ({
@@ -71,6 +71,17 @@ async function runWorkflowWatchdog(
71
71
  for (const wf of staleActive) {
72
72
  const ageMin = Math.round((now - wf.created_at) / 60000);
73
73
  details.push(`stale_active: ${wf.workflow_id} (${wf.workflow_type}, ${ageMin}min old)`);
74
+
75
+ // #257: Check if the last recorded event reason indicates expected subagent unavailability.
76
+ // If so, skip marking as terminal_error — the workflow is stale because the subagent
77
+ // was expectedly unavailable (daemon mode, process isolation), not due to a hard failure.
78
+ const events = store.getEvents(wf.workflow_id);
79
+ const lastEventReason = events.length > 0 ? events[events.length - 1].reason : 'unknown';
80
+ if (isExpectedSubagentError(lastEventReason)) {
81
+ logger?.debug?.(`[PD:Watchdog] Skipping stale active workflow ${wf.workflow_id}: expected subagent error (${lastEventReason})`);
82
+ continue;
83
+ }
84
+
74
85
  store.updateWorkflowState(wf.workflow_id, 'terminal_error');
75
86
  store.recordEvent(wf.workflow_id, 'watchdog_timeout', 'active', 'terminal_error', `Stale active > ${staleThreshold / 60000}s`, { ageMs: now - wf.created_at });
76
87
 
@@ -1596,6 +1607,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1596
1607
  snapshot: snapshotData,
1597
1608
  taskId: sleepTask.id,
1598
1609
  painContext: sleepTask.recentPainContext,
1610
+ triggerSource: sleepTask.source,
1599
1611
  },
1600
1612
  });
1601
1613
  sleepTask.resultRef = workflowHandle.workflowId;
@@ -60,6 +60,20 @@ function isSystemSession(state: SessionState): boolean {
60
60
  if (sessionId?.startsWith('boot-')) return true;
61
61
  if (sessionId?.startsWith('probe-')) return true;
62
62
 
63
+ // CRITICAL FIX: Legacy sessions from persistence may have missing trigger/sessionKey
64
+ // If both are missing AND the session is old (inactive > abandoned threshold),
65
+ // treat as legacy/orphan to avoid blocking idle detection with unknown sessions.
66
+ // Recent sessions without trigger/sessionKey are likely real user sessions still
67
+ // being enriched — do NOT classify them as system sessions.
68
+ const ABANDONED_THRESHOLD_MS = 2 * 60 * 60 * 1000; // 2 hours
69
+ if (!trigger && !sessionKey) {
70
+ const inactiveFor = Date.now() - state.lastActivityAt;
71
+ if (inactiveFor > ABANDONED_THRESHOLD_MS) {
72
+ return true; // Legacy/orphan session — don't block idle detection
73
+ }
74
+ // Recent session without metadata — likely a real user session, let it through
75
+ }
76
+
63
77
  return false;
64
78
  }
65
79
 
@@ -210,7 +210,7 @@ export class NocturnalWorkflowManager implements WorkflowManager {
210
210
 
211
211
  // Extract snapshot and principleId from taskInput.metadata (NOC-07: Trinity async path)
212
212
  const snapshotValidation = validateNocturnalSnapshotIngress(options.metadata?.snapshot);
213
- const snapshot = snapshotValidation.snapshot;
213
+ const {snapshot} = snapshotValidation;
214
214
  const principleId = options.metadata?.principleId as string | undefined;
215
215
  // Extract painContext for Selector ranking bias
216
216
  const painContext = options.metadata?.painContext as RecentPainContext | undefined;
@@ -254,6 +254,22 @@ export class NocturnalWorkflowManager implements WorkflowManager {
254
254
  },
255
255
  // Pass painContext for Selector ranking bias
256
256
  painContext,
257
+ // #244: Only skip preflight idle gate for manual/test triggers.
258
+ // Automatic triggers must go through normal idle check.
259
+ ...(((options.metadata)?.triggerSource === 'manual' ||
260
+ (options.metadata)?.triggerSource === 'test')
261
+ ? {
262
+ idleCheckOverride: {
263
+ isIdle: true,
264
+ mostRecentActivityAt: Date.now() - 1800000,
265
+ idleForMs: 1800000,
266
+ userActiveSessions: 0,
267
+ abandonedSessionIds: [],
268
+ trajectoryGuardrailConfirmsIdle: true,
269
+ reason: 'manual/test override',
270
+ },
271
+ }
272
+ : {}),
257
273
  // Skip Selector if principleId and snapshot are provided
258
274
  ...(principleId && snapshot ? {
259
275
  principleIdOverride: principleId,
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Session key parsing utilities.
3
+ *
4
+ * Session key format: "agent:{agentId}:{type}:{uuid}" or "agent:{agentId}:{uuid}"
5
+ */
6
+
7
+ /**
8
+ * Extract agentId from a sessionKey.
9
+ * Returns `undefined` if sessionKey is missing, malformed, or has whitespace-only agentId.
10
+ */
11
+ export function extractAgentIdFromSessionKey(sessionKey: string | undefined): string | undefined {
12
+ if (!sessionKey) return undefined;
13
+ const match = /^agent:([^:]+):/.exec(sessionKey);
14
+ if (!match) return undefined;
15
+ const agentId = match[1].trim();
16
+ return agentId || undefined;
17
+ }
@@ -491,4 +491,61 @@ describe('Nocturnal Arbiter', () => {
491
491
  expect(result.artifact?.sourceSnapshotRef).toBe('');
492
492
  });
493
493
  });
494
+
495
+ // -------------------------------------------------------------------------
496
+ // Tests: quality threshold gates (Rule 10/11)
497
+ // -------------------------------------------------------------------------
498
+
499
+ describe('quality threshold gates', () => {
500
+ const defaultThresholds = { thinkingModelDeltaMin: 0.05, planningRatioGainMin: -0.5 };
501
+
502
+ it('rejects when thinkingModelDelta is below threshold', () => {
503
+ const artifact = makeValidArtifact({ thinkingModelDelta: 0.03 });
504
+ const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
505
+ expect(result.passed).toBe(false);
506
+ expect(result.failures).toHaveLength(1);
507
+ expect(result.failures[0].field).toBe('thinkingModelDelta');
508
+ });
509
+
510
+ it('passes when thinkingModelDelta equals threshold exactly (boundary value)', () => {
511
+ const artifact = makeValidArtifact({ thinkingModelDelta: 0.05 });
512
+ const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
513
+ expect(result.passed).toBe(true);
514
+ });
515
+
516
+ it('passes when thinkingModelDelta exceeds threshold', () => {
517
+ const artifact = makeValidArtifact({ thinkingModelDelta: 0.15 });
518
+ const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
519
+ expect(result.passed).toBe(true);
520
+ });
521
+
522
+ it('passes when thinkingModelDelta is absent (optional field)', () => {
523
+ const artifact = makeValidArtifact();
524
+ delete artifact.thinkingModelDelta;
525
+ const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
526
+ expect(result.passed).toBe(true);
527
+ });
528
+
529
+ it('rejects when planningRatioGain is below threshold', () => {
530
+ const artifact = makeValidArtifact({ planningRatioGain: -0.6 });
531
+ const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
532
+ expect(result.passed).toBe(false);
533
+ expect(result.failures.some(f => f.field === 'planningRatioGain')).toBe(true);
534
+ });
535
+
536
+ it('passes when planningRatioGain equals threshold exactly (boundary value)', () => {
537
+ const artifact = makeValidArtifact({ planningRatioGain: -0.5 });
538
+ const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
539
+ expect(result.passed).toBe(true);
540
+ });
541
+
542
+ it('rejects both quality thresholds simultaneously', () => {
543
+ const artifact = makeValidArtifact({ thinkingModelDelta: 0.01, planningRatioGain: -0.8 });
544
+ const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
545
+ expect(result.passed).toBe(false);
546
+ expect(result.failures.length).toBeGreaterThanOrEqual(2);
547
+ expect(result.failures.some(f => f.field === 'thinkingModelDelta')).toBe(true);
548
+ expect(result.failures.some(f => f.field === 'planningRatioGain')).toBe(true);
549
+ });
550
+ });
494
551
  });
@@ -35,18 +35,22 @@ vi.mock('../../src/service/subagent-workflow/nocturnal-workflow-manager.js', ()
35
35
 
36
36
  const { mockGetNocturnalSessionSnapshot, mockListRecentNocturnalCandidateSessions } = vi.hoisted(() => ({
37
37
  mockGetNocturnalSessionSnapshot: vi.fn(),
38
- mockListRecentNocturnalCandidateSessions: vi.fn(() => []),
38
+ mockListRecentNocturnalCandidateSessions: vi.fn(() => [] as Array<{ sessionId: string; startedAt: string; failureCount: number; painEventCount: number; gateBlockCount: number }>),
39
39
  }));
40
+
41
+ // Create a shared mock extractor instance so spy calls are tracked correctly
42
+ const mockExtractorInstance = {
43
+ getNocturnalSessionSnapshot: mockGetNocturnalSessionSnapshot,
44
+ listRecentNocturnalCandidateSessions: mockListRecentNocturnalCandidateSessions,
45
+ };
46
+
40
47
  vi.mock('../../src/core/nocturnal-trajectory-extractor.js', async () => {
41
48
  const actual = await vi.importActual<typeof import('../../src/core/nocturnal-trajectory-extractor.js')>(
42
49
  '../../src/core/nocturnal-trajectory-extractor.js'
43
50
  );
44
51
  return {
45
52
  ...actual,
46
- createNocturnalTrajectoryExtractor: vi.fn(() => ({
47
- getNocturnalSessionSnapshot: mockGetNocturnalSessionSnapshot,
48
- listRecentNocturnalCandidateSessions: mockListRecentNocturnalCandidateSessions,
49
- })),
53
+ createNocturnalTrajectoryExtractor: vi.fn(() => mockExtractorInstance),
50
54
  };
51
55
  });
52
56
 
@@ -55,6 +59,17 @@ import { WorkspaceContext } from '../../src/core/workspace-context.js';
55
59
  import { handlePdReflect } from '../../src/commands/pd-reflect.js';
56
60
  import { safeRmDir } from '../test-utils.js';
57
61
 
62
+ // Helper to create a mock API for E2E tests
63
+ function createMockApi() {
64
+ return {
65
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() },
66
+ runtime: { agent: { runEmbeddedPiAgent: vi.fn() } },
67
+ } as any;
68
+ }
69
+
70
+ // Helper config for fast poll cycle
71
+ const fastPollConfig = { get: (k: string) => k === 'intervals.worker_poll_ms' ? 100 : undefined };
72
+
58
73
  function readQueue(stateDir: string) {
59
74
  return JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
60
75
  }
@@ -93,11 +108,11 @@ session_id: explicit-session-from-pain
93
108
 
94
109
  try {
95
110
  const context = readRecentPainContext(wctx);
96
-
111
+
97
112
  // Verify the session_id was extracted from the pain flag file
98
113
  expect(context.mostRecent).toBeDefined();
99
- expect(context.mostRecent.sessionId).toBe('explicit-session-from-pain');
100
- expect(context.mostRecent.score).toBe(80);
114
+ expect(context.mostRecent!.sessionId).toBe('explicit-session-from-pain');
115
+ expect(context.mostRecent!.score).toBe(80);
101
116
  expect(context.recentPainCount).toBe(1);
102
117
  } finally {
103
118
  safeRmDir(workspaceDir);
@@ -155,9 +170,9 @@ session_id: pain-session-abc
155
170
 
156
171
  // Contract: session_id must be extracted from the pain flag
157
172
  expect(painContext.mostRecent).toBeDefined();
158
- expect(painContext.mostRecent.sessionId).toBe('pain-session-abc');
159
- expect(painContext.mostRecent.score).toBe(70);
160
- expect(painContext.mostRecent.source).toBe('tool_failure');
173
+ expect(painContext.mostRecent!.sessionId).toBe('pain-session-abc');
174
+ expect(painContext.mostRecent!.score).toBe(70);
175
+ expect(painContext.mostRecent!.source).toBe('tool_failure');
161
176
 
162
177
  // Now simulate what the worker does: attach this context to a queued task
163
178
  const simulatedTask = {
@@ -167,7 +182,7 @@ session_id: pain-session-abc
167
182
  };
168
183
 
169
184
  // Verify the contract holds end-to-end
170
- expect(simulatedTask.recentPainContext.mostRecent.sessionId).toBe('pain-session-abc');
185
+ expect(simulatedTask.recentPainContext.mostRecent!.sessionId).toBe('pain-session-abc');
171
186
  });
172
187
 
173
188
  it('e2e: /pd-reflect command writes to workspace/.state, never to HOME/.state', async () => {
@@ -214,4 +229,359 @@ session_id: pain-session-abc
214
229
  safeRmDir(workspaceDir);
215
230
  }
216
231
  });
232
+
233
+ // === Nocturnal E2E Pipeline Tests (from PR #243) ===
234
+
235
+ it('does not start a nocturnal workflow when only an empty fallback snapshot is available', async () => {
236
+ const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-empty-'));
237
+ const stateDir = path.join(workspaceDir, '.state');
238
+ fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
239
+ fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
240
+
241
+ mockGetNocturnalSessionSnapshot.mockReturnValue(null);
242
+
243
+ fs.writeFileSync(
244
+ path.join(stateDir, 'evolution_queue.json'),
245
+ JSON.stringify([
246
+ {
247
+ id: 'sleep-empty',
248
+ taskKind: 'sleep_reflection',
249
+ priority: 'medium',
250
+ score: 50,
251
+ source: 'nocturnal',
252
+ reason: 'Sleep reflection',
253
+ timestamp: '2026-04-10T00:00:00.000Z',
254
+ enqueued_at: '2026-04-10T00:00:00.000Z',
255
+ status: 'pending',
256
+ retryCount: 0,
257
+ maxRetries: 1,
258
+ recentPainContext: {
259
+ mostRecent: null,
260
+ recentPainCount: 0,
261
+ recentMaxPainScore: 0,
262
+ },
263
+ },
264
+ ], null, 2),
265
+ 'utf8'
266
+ );
267
+
268
+ const mockApi = createMockApi();
269
+ EvolutionWorkerService.api = mockApi;
270
+
271
+ try {
272
+ EvolutionWorkerService.start({
273
+ workspaceDir,
274
+ stateDir,
275
+ logger: mockApi.logger,
276
+ config: fastPollConfig,
277
+ api: mockApi,
278
+ } as any);
279
+
280
+ await vi.advanceTimersByTimeAsync(6000);
281
+
282
+ const queue = readQueue(stateDir);
283
+ expect(queue[0].status).toBe('failed');
284
+ expect(queue[0].lastError).toContain('invalid_snapshot_ingress');
285
+ expect(queue[0].lastError).toContain('fallback snapshot must contain at least one pain signal');
286
+ expect(queue[0].resultRef).toBeFalsy();
287
+ expect(mockStartWorkflow).not.toHaveBeenCalled();
288
+ } finally {
289
+ EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
290
+ safeRmDir(workspaceDir);
291
+ }
292
+ });
293
+
294
+ it('uses stub_fallback for expected gateway-only background unavailability', async () => {
295
+ const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-gateway-'));
296
+ const stateDir = path.join(workspaceDir, '.state');
297
+ fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
298
+ fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
299
+
300
+ mockGetNocturnalSessionSnapshot.mockReturnValue({
301
+ sessionId: 'sleep-gateway',
302
+ startedAt: '2026-04-10T00:00:00.000Z',
303
+ updatedAt: '2026-04-10T00:01:00.000Z',
304
+ assistantTurns: [],
305
+ userTurns: [],
306
+ toolCalls: [],
307
+ painEvents: [],
308
+ gateBlocks: [],
309
+ stats: { totalAssistantTurns: 1, totalToolCalls: 1, totalPainEvents: 0, totalGateBlocks: 0, failureCount: 0 },
310
+ });
311
+ mockStartWorkflow.mockResolvedValue({ workflowId: 'wf-1', childSessionKey: 'child-1', state: 'active' });
312
+ mockGetWorkflowDebugSummary.mockResolvedValue({
313
+ state: 'terminal_error',
314
+ metadata: {},
315
+ recentEvents: [{ reason: 'Error: Plugin runtime subagent methods are only available during a gateway request.', payload: {} }],
316
+ });
317
+
318
+ fs.writeFileSync(
319
+ path.join(stateDir, 'evolution_queue.json'),
320
+ JSON.stringify([
321
+ {
322
+ id: 'sleep-gateway',
323
+ taskKind: 'sleep_reflection',
324
+ priority: 'medium',
325
+ score: 50,
326
+ source: 'nocturnal',
327
+ reason: 'Sleep reflection',
328
+ timestamp: '2026-04-10T00:00:00.000Z',
329
+ enqueued_at: '2026-04-10T00:00:00.000Z',
330
+ status: 'pending',
331
+ retryCount: 0,
332
+ maxRetries: 1,
333
+ recentPainContext: {
334
+ mostRecent: { source: 'test', score: 50, reason: 'test', timestamp: '2026-04-10T00:00:00.000Z', sessionId: 'sleep-gateway' },
335
+ recentPainCount: 1,
336
+ recentMaxPainScore: 50,
337
+ },
338
+ },
339
+ ], null, 2),
340
+ 'utf8'
341
+ );
342
+
343
+ const mockApi = createMockApi();
344
+ EvolutionWorkerService.api = mockApi;
345
+
346
+ try {
347
+ EvolutionWorkerService.start({
348
+ workspaceDir,
349
+ stateDir,
350
+ logger: mockApi.logger,
351
+ config: fastPollConfig,
352
+ api: mockApi,
353
+ } as any);
354
+
355
+ await vi.advanceTimersByTimeAsync(6000);
356
+
357
+ const queue = readQueue(stateDir);
358
+ expect(queue[0].status).toBe('completed');
359
+ expect(queue[0].resolution).toBe('stub_fallback');
360
+ } finally {
361
+ EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
362
+ safeRmDir(workspaceDir);
363
+ }
364
+ });
365
+
366
+ it('uses stub_fallback for expected subagent runtime unavailability', async () => {
367
+ const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-subagent-'));
368
+ const stateDir = path.join(workspaceDir, '.state');
369
+ fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
370
+ fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
371
+
372
+ mockGetNocturnalSessionSnapshot.mockReturnValue({
373
+ sessionId: 'sleep-subagent',
374
+ startedAt: '2026-04-10T00:00:00.000Z',
375
+ updatedAt: '2026-04-10T00:01:00.000Z',
376
+ assistantTurns: [],
377
+ userTurns: [],
378
+ toolCalls: [],
379
+ painEvents: [],
380
+ gateBlocks: [],
381
+ stats: { totalAssistantTurns: 1, totalToolCalls: 1, totalPainEvents: 0, totalGateBlocks: 0, failureCount: 0 },
382
+ });
383
+ mockStartWorkflow.mockRejectedValue(new Error('NocturnalWorkflowManager: subagent runtime unavailable'));
384
+
385
+ fs.writeFileSync(
386
+ path.join(stateDir, 'evolution_queue.json'),
387
+ JSON.stringify([
388
+ {
389
+ id: 'sleep-subagent',
390
+ taskKind: 'sleep_reflection',
391
+ priority: 'medium',
392
+ score: 50,
393
+ source: 'nocturnal',
394
+ reason: 'Sleep reflection',
395
+ timestamp: '2026-04-10T00:00:00.000Z',
396
+ enqueued_at: '2026-04-10T00:00:00.000Z',
397
+ status: 'pending',
398
+ retryCount: 0,
399
+ maxRetries: 1,
400
+ recentPainContext: {
401
+ mostRecent: { source: 'test', score: 50, reason: 'test', timestamp: '2026-04-10T00:00:00.000Z', sessionId: 'sleep-subagent' },
402
+ recentPainCount: 1,
403
+ recentMaxPainScore: 50,
404
+ },
405
+ },
406
+ ], null, 2),
407
+ 'utf8'
408
+ );
409
+
410
+ const mockApi = createMockApi();
411
+ EvolutionWorkerService.api = mockApi;
412
+
413
+ try {
414
+ EvolutionWorkerService.start({
415
+ workspaceDir,
416
+ stateDir,
417
+ logger: mockApi.logger,
418
+ config: fastPollConfig,
419
+ api: mockApi,
420
+ } as any);
421
+
422
+ await vi.advanceTimersByTimeAsync(6000);
423
+
424
+ const queue = readQueue(stateDir);
425
+ expect(queue[0].status).toBe('completed');
426
+ expect(queue[0].resolution).toBe('stub_fallback');
427
+ } finally {
428
+ EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
429
+ safeRmDir(workspaceDir);
430
+ }
431
+ });
432
+
433
+ it('prioritizes pain signal session ID for snapshot extraction', async () => {
434
+ const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-pain-session-'));
435
+ const stateDir = path.join(workspaceDir, '.state');
436
+ fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
437
+ fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
438
+
439
+ const painSessionId = 'pain-signal-session-123';
440
+
441
+ mockGetNocturnalSessionSnapshot.mockImplementation((sessionId: string) => {
442
+ if (sessionId === painSessionId) {
443
+ return {
444
+ sessionId: painSessionId,
445
+ startedAt: '2026-04-09T23:00:00.000Z',
446
+ updatedAt: '2026-04-09T23:01:00.000Z',
447
+ assistantTurns: [],
448
+ userTurns: [],
449
+ toolCalls: [],
450
+ painEvents: [{ source: 'tool_failure', score: 70, severity: null, reason: 'test', createdAt: '2026-04-09T23:00:00.000Z' }],
451
+ gateBlocks: [],
452
+ stats: { totalAssistantTurns: 1, totalToolCalls: 1, failureCount: 1, totalPainEvents: 1, totalGateBlocks: 0 },
453
+ };
454
+ }
455
+ return null;
456
+ });
457
+ mockStartWorkflow.mockResolvedValue({ workflowId: 'wf-pain', childSessionKey: 'child-pain', state: 'active' });
458
+
459
+ fs.writeFileSync(
460
+ path.join(stateDir, 'evolution_queue.json'),
461
+ JSON.stringify([
462
+ {
463
+ id: 'sleep-pain-priority',
464
+ taskKind: 'sleep_reflection',
465
+ priority: 'medium',
466
+ score: 50,
467
+ source: 'nocturnal',
468
+ reason: 'Sleep reflection',
469
+ timestamp: '2026-04-10T00:00:00.000Z',
470
+ enqueued_at: '2026-04-10T00:00:00.000Z',
471
+ status: 'pending',
472
+ retryCount: 0,
473
+ maxRetries: 1,
474
+ recentPainContext: {
475
+ mostRecent: { source: 'tool_failure', score: 70, reason: 'test', timestamp: '2026-04-10T00:00:00.000Z', sessionId: painSessionId },
476
+ recentPainCount: 1,
477
+ recentMaxPainScore: 70,
478
+ },
479
+ },
480
+ ], null, 2),
481
+ 'utf8'
482
+ );
483
+
484
+ const mockApi = createMockApi();
485
+ EvolutionWorkerService.api = mockApi;
486
+
487
+ try {
488
+ EvolutionWorkerService.start({
489
+ workspaceDir,
490
+ stateDir,
491
+ logger: mockApi.logger,
492
+ config: fastPollConfig,
493
+ api: mockApi,
494
+ } as any);
495
+
496
+ await vi.advanceTimersByTimeAsync(6000);
497
+
498
+ expect(mockStartWorkflow).toHaveBeenCalledTimes(1);
499
+ const metadata = mockStartWorkflow.mock.calls[0][1].metadata;
500
+ expect(metadata.snapshot.sessionId).toBe(painSessionId);
501
+ } finally {
502
+ EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
503
+ safeRmDir(workspaceDir);
504
+ }
505
+ });
506
+
507
+ it('e2e: bounded session selection — never picks a session newer than the triggering task', async () => {
508
+ const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-e2e-bounded-'));
509
+ const stateDir = path.join(workspaceDir, '.state');
510
+ fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
511
+ fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
512
+
513
+ const taskTimestamp = '2026-04-10T00:00:00.000Z';
514
+ const validSessionTimestamp = '2026-04-09T23:00:00.000Z';
515
+ const invalidSessionTimestamp = '2026-04-10T01:00:00.000Z';
516
+
517
+ mockGetNocturnalSessionSnapshot.mockImplementation((sessionId: string) => {
518
+ if (sessionId === 'valid-session') {
519
+ return {
520
+ sessionId: 'valid-session',
521
+ startedAt: validSessionTimestamp,
522
+ updatedAt: validSessionTimestamp,
523
+ assistantTurns: [],
524
+ userTurns: [],
525
+ toolCalls: [],
526
+ painEvents: [{ source: 'tool_failure', score: 50, severity: null, reason: 'test', createdAt: validSessionTimestamp }],
527
+ gateBlocks: [],
528
+ stats: { totalAssistantTurns: 1, totalToolCalls: 1, failureCount: 1, totalPainEvents: 1, totalGateBlocks: 0 },
529
+ };
530
+ }
531
+ return null;
532
+ });
533
+ mockListRecentNocturnalCandidateSessions.mockReturnValue([
534
+ { sessionId: 'valid-session', startedAt: validSessionTimestamp, failureCount: 1, painEventCount: 1, gateBlockCount: 0 },
535
+ { sessionId: 'invalid-session', startedAt: invalidSessionTimestamp, failureCount: 1, painEventCount: 0, gateBlockCount: 0 },
536
+ ]);
537
+ mockStartWorkflow.mockResolvedValue({ workflowId: 'wf-bounded', childSessionKey: 'child-bounded', state: 'active' });
538
+
539
+ fs.writeFileSync(
540
+ path.join(stateDir, 'evolution_queue.json'),
541
+ JSON.stringify([
542
+ {
543
+ id: 'sleep-e2e-bounded',
544
+ taskKind: 'sleep_reflection',
545
+ priority: 'medium',
546
+ score: 50,
547
+ source: 'nocturnal',
548
+ reason: 'Sleep reflection',
549
+ timestamp: taskTimestamp,
550
+ enqueued_at: taskTimestamp,
551
+ status: 'pending',
552
+ retryCount: 0,
553
+ maxRetries: 1,
554
+ recentPainContext: {
555
+ mostRecent: { source: 'test', score: 50, reason: 'test', timestamp: taskTimestamp, sessionId: 'pain-session' },
556
+ recentPainCount: 1,
557
+ recentMaxPainScore: 50,
558
+ },
559
+ },
560
+ ], null, 2),
561
+ 'utf8'
562
+ );
563
+
564
+ const mockApi = createMockApi();
565
+ EvolutionWorkerService.api = mockApi;
566
+
567
+ try {
568
+ EvolutionWorkerService.start({
569
+ workspaceDir,
570
+ stateDir,
571
+ logger: mockApi.logger,
572
+ config: fastPollConfig,
573
+ api: mockApi,
574
+ } as any);
575
+
576
+ await vi.advanceTimersByTimeAsync(6000);
577
+
578
+ expect(mockStartWorkflow).toHaveBeenCalledTimes(1);
579
+ const metadata = mockStartWorkflow.mock.calls[0][1].metadata;
580
+ expect(metadata.snapshot.sessionId).toBe('valid-session');
581
+ expect(new Date(metadata.snapshot.startedAt).getTime()).toBeLessThanOrEqual(new Date(taskTimestamp).getTime());
582
+ } finally {
583
+ EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
584
+ safeRmDir(workspaceDir);
585
+ }
586
+ });
217
587
  });