principles-disciple 1.22.0 → 1.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +4 -4
- package/package.json +1 -1
- package/scripts/diagnose-nocturnal.mjs +393 -0
- package/scripts/sync-plugin.mjs +1 -0
- package/src/core/nocturnal-arbiter.ts +2 -1
- package/src/hooks/subagent.ts +1 -12
- package/src/index.ts +7 -4
- package/src/service/evolution-worker.ts +12 -0
- package/src/service/nocturnal-runtime.ts +14 -0
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +17 -1
- package/src/utils/session-key.ts +17 -0
- package/tests/core/nocturnal-arbiter.test.ts +57 -0
- package/tests/service/evolution-worker.nocturnal.test.ts +382 -12
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "principles-disciple",
|
|
3
3
|
"name": "Principles Disciple",
|
|
4
4
|
"description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.24.0",
|
|
6
6
|
"skills": [
|
|
7
7
|
"./skills"
|
|
8
8
|
],
|
|
@@ -76,8 +76,8 @@
|
|
|
76
76
|
}
|
|
77
77
|
},
|
|
78
78
|
"buildFingerprint": {
|
|
79
|
-
"gitSha": "
|
|
80
|
-
"bundleMd5": "
|
|
81
|
-
"builtAt": "2026-04-
|
|
79
|
+
"gitSha": "ebbaa40d6e3a",
|
|
80
|
+
"bundleMd5": "7c84860901894f7c049b54028d489ed4",
|
|
81
|
+
"builtAt": "2026-04-12T15:51:34.724Z"
|
|
82
82
|
}
|
|
83
83
|
}
|
package/package.json
CHANGED
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Nocturnal Pipeline Diagnostic Script
|
|
5
|
+
* ======================================
|
|
6
|
+
* Checks every link in the Nocturnal reflection chain:
|
|
7
|
+
* Heartbeat → Idle Detection → Queue → Snapshot → Workflow → Trinity → Arbiter → Persistence
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node scripts/diagnose-nocturnal.mjs [--workspace /path/to/workspace]
|
|
11
|
+
*
|
|
12
|
+
* Output: Structured report with pass/fail for each checkpoint.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { existsSync, readFileSync, readdirSync, statSync } from 'fs';
|
|
16
|
+
import { join, dirname } from 'path';
|
|
17
|
+
import { fileURLToPath } from 'url';
|
|
18
|
+
import { execSync } from 'child_process';
|
|
19
|
+
|
|
20
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
21
|
+
const __dirname = dirname(__filename);
|
|
22
|
+
const PLUGIN_DIR = join(__dirname, '..');
|
|
23
|
+
|
|
24
|
+
// ─── Argument parsing ───
|
|
25
|
+
function parseArgs() {
|
|
26
|
+
let workspaceDir = null;
|
|
27
|
+
const argv = process.argv.slice(2);
|
|
28
|
+
for (let i = 0; i < argv.length; i++) {
|
|
29
|
+
if (argv[i] === '--workspace' && argv[i + 1]) {
|
|
30
|
+
workspaceDir = argv[++i];
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
// Auto-detect workspace from current git working directory
|
|
34
|
+
if (!workspaceDir) {
|
|
35
|
+
try {
|
|
36
|
+
const gitRoot = execSync('git rev-parse --show-toplevel', { encoding: 'utf-8' }).trim();
|
|
37
|
+
workspaceDir = gitRoot;
|
|
38
|
+
} catch {
|
|
39
|
+
workspaceDir = process.cwd();
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return { workspaceDir };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ─── Report helpers ───
|
|
46
|
+
const results = [];
|
|
47
|
+
let checksPassed = 0;
|
|
48
|
+
let checksFailed = 0;
|
|
49
|
+
let checksWarned = 0;
|
|
50
|
+
|
|
51
|
+
function check(name, fn) {
|
|
52
|
+
try {
|
|
53
|
+
const result = fn();
|
|
54
|
+
if (result && result.status === 'warn') {
|
|
55
|
+
checksWarned++;
|
|
56
|
+
results.push({ name, status: 'warn', detail: result.detail || '' });
|
|
57
|
+
} else {
|
|
58
|
+
checksPassed++;
|
|
59
|
+
results.push({ name, status: 'pass', detail: typeof result === 'string' ? result : '' });
|
|
60
|
+
}
|
|
61
|
+
} catch (err) {
|
|
62
|
+
checksFailed++;
|
|
63
|
+
results.push({ name, status: 'fail', detail: err.message || String(err) });
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function printReport() {
|
|
68
|
+
console.log('\n' + '='.repeat(60));
|
|
69
|
+
console.log(' NOCTURNAL PIPELINE DIAGNOSTIC REPORT');
|
|
70
|
+
console.log(' ' + new Date().toISOString());
|
|
71
|
+
console.log('='.repeat(60));
|
|
72
|
+
|
|
73
|
+
for (const r of results) {
|
|
74
|
+
const icon = r.status === 'pass' ? '✅' : r.status === 'warn' ? '⚠️ ' : '❌';
|
|
75
|
+
console.log(`\n${icon} ${r.name}`);
|
|
76
|
+
if (r.detail) {
|
|
77
|
+
console.log(` ${r.detail}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
console.log('\n' + '-'.repeat(60));
|
|
82
|
+
console.log(` Summary: ${checksPassed} passed, ${checksWarned} warnings, ${checksFailed} failed`);
|
|
83
|
+
console.log('-'.repeat(60) + '\n');
|
|
84
|
+
|
|
85
|
+
if (checksFailed > 0) {
|
|
86
|
+
process.exitCode = 1;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// ─── Main ───
|
|
91
|
+
function main() {
|
|
92
|
+
const { workspaceDir } = parseArgs();
|
|
93
|
+
const stateDir = join(workspaceDir, '.state');
|
|
94
|
+
|
|
95
|
+
console.log(`\n🔍 Diagnosing Nocturnal pipeline for workspace: ${workspaceDir}`);
|
|
96
|
+
|
|
97
|
+
// ─────────────────────────────────────────────────────────
|
|
98
|
+
// CHECKPOINT 1: State directory structure
|
|
99
|
+
// ─────────────────────────────────────────────────────────
|
|
100
|
+
check('1. State directory structure', () => {
|
|
101
|
+
// All state dirs are inside .state/
|
|
102
|
+
const required = ['sessions', 'logs', 'nocturnal', 'nocturnal/samples'];
|
|
103
|
+
const missing = [];
|
|
104
|
+
for (const rel of required) {
|
|
105
|
+
if (!existsSync(join(stateDir, rel))) missing.push(rel);
|
|
106
|
+
}
|
|
107
|
+
if (missing.length > 0) throw new Error(`Missing directories: ${missing.join(', ')}`);
|
|
108
|
+
return 'All required directories present';
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
// ─────────────────────────────────────────────────────────
|
|
112
|
+
// CHECKPOINT 2: Session tracker persistence
|
|
113
|
+
// ─────────────────────────────────────────────────────────
|
|
114
|
+
check('2. Session tracker persistence', () => {
|
|
115
|
+
const sessionsDir = join(stateDir, 'sessions');
|
|
116
|
+
if (!existsSync(sessionsDir)) throw new Error('sessions/ directory missing');
|
|
117
|
+
const files = readdirSync(sessionsDir).filter(f => f.endsWith('.json'));
|
|
118
|
+
if (files.length === 0) {
|
|
119
|
+
return { status: 'warn', detail: 'No session files found — idle check will report idle immediately' };
|
|
120
|
+
}
|
|
121
|
+
// Verify at least one session file is valid JSON
|
|
122
|
+
let validSessions = 0;
|
|
123
|
+
for (const f of files) {
|
|
124
|
+
try {
|
|
125
|
+
const data = JSON.parse(readFileSync(join(sessionsDir, f), 'utf-8'));
|
|
126
|
+
if (data.sessionId && data.lastActivityAt) validSessions++;
|
|
127
|
+
} catch { /* corrupted, skip */ }
|
|
128
|
+
}
|
|
129
|
+
return `${files.length} session files, ${validSessions} valid with sessionId+lastActivityAt`;
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// ─────────────────────────────────────────────────────────
|
|
133
|
+
// CHECKPOINT 3: Idle detection logic
|
|
134
|
+
// ─────────────────────────────────────────────────────────
|
|
135
|
+
check('3. Idle detection (checkWorkspaceIdle)', () => {
|
|
136
|
+
// Functions are minified — check for unique string markers instead.
|
|
137
|
+
const bundlePath = join(PLUGIN_DIR, 'dist', 'bundle.js');
|
|
138
|
+
const content = readFileSync(bundlePath, 'utf-8');
|
|
139
|
+
|
|
140
|
+
// Stable markers: log messages, object fields, event strings that survive minification.
|
|
141
|
+
const markers = [
|
|
142
|
+
{ name: 'Workspace not idle', reason: 'preflight idle check log message' },
|
|
143
|
+
{ name: 'trigger', reason: 'system session detection (checks trigger field)' },
|
|
144
|
+
{ name: 'abandonedSessionIds', reason: 'IdleCheckResult field (preserved in object literal)' },
|
|
145
|
+
{ name: 'trajectoryGuardrailConfirmsIdle', reason: 'IdleCheckResult field' },
|
|
146
|
+
];
|
|
147
|
+
const missing = markers.filter(m => !content.includes(m.name));
|
|
148
|
+
if (missing.length > 0) {
|
|
149
|
+
throw new Error(`Idle detection markers missing: ${missing.map(m => m.name).join(', ')}`);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Check PR #256 fix: legacy session temporal guard
|
|
153
|
+
// The fix adds `lastActivityAt` comparison before treating sessions as system sessions.
|
|
154
|
+
// In minified code this appears as a comparison involving `lastActivityAt`.
|
|
155
|
+
if (!content.includes('lastActivityAt')) {
|
|
156
|
+
return { status: 'warn', detail: 'lastActivityAt reference not found — temporal guard for legacy sessions may be missing' };
|
|
157
|
+
}
|
|
158
|
+
return 'Idle detection functions present (verified via stable string markers)';
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
// ─────────────────────────────────────────────────────────
|
|
162
|
+
// CHECKPOINT 4: Evolution queue
|
|
163
|
+
// ─────────────────────────────────────────────────────────
|
|
164
|
+
check('4. Evolution queue', () => {
|
|
165
|
+
const queuePath = join(stateDir, 'evolution_queue.json');
|
|
166
|
+
if (!existsSync(queuePath)) {
|
|
167
|
+
return { status: 'warn', detail: 'No evolution queue — idle check has not yet enqueued a task' };
|
|
168
|
+
}
|
|
169
|
+
const queue = JSON.parse(readFileSync(queuePath, 'utf-8'));
|
|
170
|
+
const sleepTasks = queue.filter(t => t.taskKind === 'sleep_reflection');
|
|
171
|
+
const pending = sleepTasks.filter(t => t.status === 'pending' || t.status === 'in_progress');
|
|
172
|
+
const completed = sleepTasks.filter(t => t.status === 'completed');
|
|
173
|
+
const failed = sleepTasks.filter(t => t.status === 'failed');
|
|
174
|
+
|
|
175
|
+
if (pending.length > 0) return `${pending.length} pending sleep_reflection task(s) awaiting processing`;
|
|
176
|
+
if (completed.length > 0) return `${completed.length} completed, ${failed.length} failed (total ${sleepTasks.length} tasks)`;
|
|
177
|
+
return { status: 'warn', detail: `Queue exists with ${queue.length} items but no sleep_reflection tasks` };
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
// ─────────────────────────────────────────────────────────
|
|
181
|
+
// CHECKPOINT 5: Nocturnal samples (artifacts)
|
|
182
|
+
// ─────────────────────────────────────────────────────────
|
|
183
|
+
check('5. Nocturnal artifact persistence', () => {
|
|
184
|
+
const samplesDir = join(stateDir, 'nocturnal', 'samples');
|
|
185
|
+
if (!existsSync(samplesDir)) {
|
|
186
|
+
return { status: 'warn', detail: 'No samples directory — no reflections have been persisted yet' };
|
|
187
|
+
}
|
|
188
|
+
const files = readdirSync(samplesDir).filter(f => f.endsWith('.json'));
|
|
189
|
+
if (files.length === 0) return { status: 'warn', detail: 'samples/ directory exists but is empty' };
|
|
190
|
+
|
|
191
|
+
// Validate most recent artifact
|
|
192
|
+
const sorted = files
|
|
193
|
+
.map(f => ({ name: f, mtime: statSync(join(samplesDir, f)).mtimeMs }))
|
|
194
|
+
.sort((a, b) => b.mtime - a.mtime);
|
|
195
|
+
const latest = sorted[0].name;
|
|
196
|
+
const artifact = JSON.parse(readFileSync(join(samplesDir, latest), 'utf-8'));
|
|
197
|
+
const hasRequired = artifact.artifactId && artifact.badDecision && artifact.betterDecision && artifact.rationale;
|
|
198
|
+
if (!hasRequired) {
|
|
199
|
+
return { status: 'warn', detail: `Latest artifact ${latest} is missing required fields` };
|
|
200
|
+
}
|
|
201
|
+
return `${files.length} artifact(s), latest: ${latest} (${artifact.principleId || 'unknown principle'})`;
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// ─────────────────────────────────────────────────────────
|
|
205
|
+
// CHECKPOINT 6: Workflow store
|
|
206
|
+
// ─────────────────────────────────────────────────────────
|
|
207
|
+
check('6. Nocturnal workflow store', () => {
|
|
208
|
+
const workflowsPath = join(stateDir, 'nocturnal', 'workflows.json');
|
|
209
|
+
if (!existsSync(workflowsPath)) {
|
|
210
|
+
return { status: 'warn', detail: 'No workflows.json — no nocturnal workflows have been started' };
|
|
211
|
+
}
|
|
212
|
+
const workflows = JSON.parse(readFileSync(workflowsPath, 'utf-8'));
|
|
213
|
+
if (!Array.isArray(workflows) || workflows.length === 0) {
|
|
214
|
+
return { status: 'warn', detail: 'workflows.json is empty — no workflows recorded' };
|
|
215
|
+
}
|
|
216
|
+
const active = workflows.filter(w => w.state === 'active');
|
|
217
|
+
const completed = workflows.filter(w => w.state === 'completed');
|
|
218
|
+
const errored = workflows.filter(w => w.state === 'terminal_error');
|
|
219
|
+
const expired = workflows.filter(w => w.state === 'expired');
|
|
220
|
+
|
|
221
|
+
if (active.length > 0) {
|
|
222
|
+
return { status: 'warn', detail: `${active.length} workflow(s) still active — may be in progress or stuck. IDs: ${active.map(w => w.workflow_id).join(', ')}` };
|
|
223
|
+
}
|
|
224
|
+
return `${workflows.length} total: ${completed} completed, ${errored} errored, ${expired} expired`;
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
// ─────────────────────────────────────────────────────────
|
|
228
|
+
// CHECKPOINT 7: Nocturnal runtime state (cooldown/quota)
|
|
229
|
+
// ─────────────────────────────────────────────────────────
|
|
230
|
+
check('7. Nocturnal runtime state (cooldown/quota)', () => {
|
|
231
|
+
const runtimePath = join(stateDir, 'nocturnal-runtime.json');
|
|
232
|
+
if (!existsSync(runtimePath)) {
|
|
233
|
+
return 'No runtime state — no cooldown or quota restrictions';
|
|
234
|
+
}
|
|
235
|
+
const state = JSON.parse(readFileSync(runtimePath, 'utf-8'));
|
|
236
|
+
const issues = [];
|
|
237
|
+
|
|
238
|
+
if (state.globalCooldownUntil) {
|
|
239
|
+
const cooldownEnd = new Date(state.globalCooldownUntil).getTime();
|
|
240
|
+
if (cooldownEnd > Date.now()) {
|
|
241
|
+
const remainingMin = Math.round((cooldownEnd - Date.now()) / 60000);
|
|
242
|
+
issues.push(`global cooldown active (${remainingMin}min remaining)`);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (state.recentRunTimestamps) {
|
|
247
|
+
const windowStart = Date.now() - 24 * 60 * 60 * 1000;
|
|
248
|
+
const recentRuns = state.recentRunTimestamps
|
|
249
|
+
.map(ts => new Date(ts).getTime())
|
|
250
|
+
.filter(ts => ts > windowStart);
|
|
251
|
+
if (recentRuns.length >= 3) {
|
|
252
|
+
issues.push(`quota exhausted (${recentRuns.length}/3 runs used in 24h)`);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (issues.length > 0) {
|
|
257
|
+
return { status: 'warn', detail: issues.join('; ') };
|
|
258
|
+
}
|
|
259
|
+
return 'No active cooldown or quota restrictions';
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
// ─────────────────────────────────────────────────────────
|
|
263
|
+
// CHECKPOINT 8: Bundle health
|
|
264
|
+
// ─────────────────────────────────────────────────────────
|
|
265
|
+
check('8. Plugin bundle health', () => {
|
|
266
|
+
const bundlePath = join(PLUGIN_DIR, 'dist', 'bundle.js');
|
|
267
|
+
if (!existsSync(bundlePath)) throw new Error('dist/bundle.js missing — run build first');
|
|
268
|
+
|
|
269
|
+
const content = readFileSync(bundlePath, 'utf-8');
|
|
270
|
+
|
|
271
|
+
// Use a mix of exported symbols and stable string markers.
|
|
272
|
+
// Class names and exported symbols survive minification; internal function names don't.
|
|
273
|
+
const markers = [
|
|
274
|
+
'EvolutionWorkerService', // exported class
|
|
275
|
+
'checkPainFlag', // exported function
|
|
276
|
+
'processEvolutionQueue', // function reference
|
|
277
|
+
'NocturnalWorkflowManager', // exported class
|
|
278
|
+
'executeNocturnalReflectionAsync', // used in log messages
|
|
279
|
+
'nocturnal_started', // event type string
|
|
280
|
+
'nocturnal_completed', // event type string
|
|
281
|
+
'nocturnal_failed', // event type string
|
|
282
|
+
'nocturnal_expired', // event type string
|
|
283
|
+
];
|
|
284
|
+
const missing = markers.filter(m => !content.includes(m));
|
|
285
|
+
if (missing.length > 0) throw new Error(`Missing critical symbols in bundle: ${missing.join(', ')}`);
|
|
286
|
+
|
|
287
|
+
return `Bundle OK (${Math.round(content.length / 1024)}KB), all ${markers.length} critical markers present`;
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
// ─────────────────────────────────────────────────────────
|
|
291
|
+
// CHECKPOINT 9: Git state — uncommitted changes that could break pipeline
|
|
292
|
+
// ─────────────────────────────────────────────────────────
|
|
293
|
+
check('9. Git state (uncommitted changes)', () => {
|
|
294
|
+
try {
|
|
295
|
+
const status = execSync('git status --porcelain', { encoding: 'utf-8', timeout: 5000, cwd: PLUGIN_DIR }).trim();
|
|
296
|
+
if (!status) return 'Working tree clean';
|
|
297
|
+
const changedFiles = status.split('\n').length;
|
|
298
|
+
return { status: 'warn', detail: `${changedFiles} uncommitted change(s) in plugin directory` };
|
|
299
|
+
} catch {
|
|
300
|
+
return { status: 'warn', detail: 'Could not check git status' };
|
|
301
|
+
}
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
// ─────────────────────────────────────────────────────────
|
|
305
|
+
// CHECKPOINT 10: Pain flag state
|
|
306
|
+
// ─────────────────────────────────────────────────────────
|
|
307
|
+
check('10. Pain flag state', () => {
|
|
308
|
+
const painFlagPath = join(stateDir, '.pain_flag');
|
|
309
|
+
if (!existsSync(painFlagPath)) {
|
|
310
|
+
return 'No active pain flag';
|
|
311
|
+
}
|
|
312
|
+
const content = readFileSync(painFlagPath, 'utf-8');
|
|
313
|
+
const lines = content.split('\n');
|
|
314
|
+
const fields = {};
|
|
315
|
+
for (const line of lines) {
|
|
316
|
+
const colonIdx = line.indexOf(':');
|
|
317
|
+
if (colonIdx > 0) {
|
|
318
|
+
fields[line.substring(0, colonIdx).trim()] = line.substring(colonIdx + 1).trim();
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
if (!fields.score || !fields.reason) {
|
|
322
|
+
return { status: 'warn', detail: 'Pain flag exists but is missing required fields (score, reason)' };
|
|
323
|
+
}
|
|
324
|
+
return `Pain flag active (score: ${fields.score}, source: ${fields.source || 'unknown'}, session: ${fields.session_id || 'none'})`;
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
// ─────────────────────────────────────────────────────────
|
|
328
|
+
// CHECKPOINT 11: Trajectory data
|
|
329
|
+
// ─────────────────────────────────────────────────────────
|
|
330
|
+
check('11. Trajectory data availability', () => {
|
|
331
|
+
const trajectoryPath = join(stateDir, 'trajectory.json');
|
|
332
|
+
const trajectoryDir = join(stateDir, 'trajectory');
|
|
333
|
+
const trajectoryDb = join(stateDir, 'trajectory.db');
|
|
334
|
+
if (!existsSync(trajectoryPath) && !existsSync(trajectoryDir) && !existsSync(trajectoryDb)) {
|
|
335
|
+
return { status: 'warn', detail: 'No trajectory data — snapshot extraction will use pain context fallback or fail' };
|
|
336
|
+
}
|
|
337
|
+
if (existsSync(trajectoryDb)) {
|
|
338
|
+
const stat = statSync(trajectoryDb);
|
|
339
|
+
return `Trajectory SQLite database present (${Math.round(stat.size / 1024)}KB)`;
|
|
340
|
+
}
|
|
341
|
+
// Check trajectory content
|
|
342
|
+
if (existsSync(trajectoryPath)) {
|
|
343
|
+
try {
|
|
344
|
+
const data = JSON.parse(readFileSync(trajectoryPath, 'utf-8'));
|
|
345
|
+
const entryCount = Array.isArray(data) ? data.length : Object.keys(data).length;
|
|
346
|
+
return `${entryCount} trajectory entries available`;
|
|
347
|
+
} catch {
|
|
348
|
+
return { status: 'warn', detail: 'trajectory.json exists but is corrupted' };
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
if (existsSync(trajectoryDir)) {
|
|
352
|
+
const files = readdirSync(trajectoryDir).filter(f => f.endsWith('.json'));
|
|
353
|
+
return `${files.length} trajectory file(s) available`;
|
|
354
|
+
}
|
|
355
|
+
return { status: 'warn', detail: 'Trajectory storage not found in expected locations' };
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
// ─────────────────────────────────────────────────────────
|
|
359
|
+
// CHECKPOINT 12: Principle training state
|
|
360
|
+
// ─────────────────────────────────────────────────────────
|
|
361
|
+
check('12. Principle training state', () => {
|
|
362
|
+
// Check multiple possible locations
|
|
363
|
+
const candidates = [
|
|
364
|
+
join(stateDir, 'nocturnal', 'training_store.json'),
|
|
365
|
+
join(stateDir, 'principle_training_state.json'),
|
|
366
|
+
];
|
|
367
|
+
let trainingPath = null;
|
|
368
|
+
for (const c of candidates) {
|
|
369
|
+
if (existsSync(c)) { trainingPath = c; break; }
|
|
370
|
+
}
|
|
371
|
+
if (!trainingPath) {
|
|
372
|
+
return { status: 'warn', detail: 'No training_store.json or principle_training_state.json — NocturnalTargetSelector may not find evaluable principles' };
|
|
373
|
+
}
|
|
374
|
+
try {
|
|
375
|
+
const store = JSON.parse(readFileSync(trainingPath, 'utf-8'));
|
|
376
|
+
const principles = Object.keys(store.principles || store);
|
|
377
|
+
if (principles.length === 0) {
|
|
378
|
+
return { status: 'warn', detail: 'Training store exists but has no principles' };
|
|
379
|
+
}
|
|
380
|
+
const evaluable = principles.filter(p => {
|
|
381
|
+
const pr = store.principles ? store.principles[p] : store[p];
|
|
382
|
+
return pr && pr.evaluability !== 'manual_only';
|
|
383
|
+
});
|
|
384
|
+
return `${principles.length} principle(s) in training store, ${evaluable.length} evaluable`;
|
|
385
|
+
} catch {
|
|
386
|
+
return { status: 'warn', detail: 'Training store exists but is corrupted' };
|
|
387
|
+
}
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
printReport();
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
main();
|
package/scripts/sync-plugin.mjs
CHANGED
|
@@ -372,6 +372,7 @@ function verifyBundleContents() {
|
|
|
372
372
|
{ name: 'EvolutionWorkerService', reason: 'main plugin service export' },
|
|
373
373
|
{ name: 'checkPainFlag', reason: 'pain flag detection' },
|
|
374
374
|
{ name: 'processEvolutionQueue', reason: 'queue processing' },
|
|
375
|
+
{ name: 'acquireQueueLock', reason: 'queue lock for pd-reflect and worker' },
|
|
375
376
|
];
|
|
376
377
|
|
|
377
378
|
const missing = [];
|
|
@@ -623,11 +623,12 @@ export function validateArtifact(
|
|
|
623
623
|
// Rule 11: Quality threshold gate — reject low-signal artifacts
|
|
624
624
|
// A reflection artifact must show positive cognitive improvement (thinkingModelDelta > 0).
|
|
625
625
|
// planningRatioGain must not show catastrophic regression (< -0.5).
|
|
626
|
+
// #244: Use strict < so thinkingModelDelta=threshold passes (thin violations allowed at boundary)
|
|
626
627
|
if (
|
|
627
628
|
options.qualityThresholds?.thinkingModelDeltaMin !== undefined &&
|
|
628
629
|
thinkingModelDelta !== undefined &&
|
|
629
630
|
typeof thinkingModelDelta === 'number' &&
|
|
630
|
-
thinkingModelDelta
|
|
631
|
+
thinkingModelDelta < options.qualityThresholds.thinkingModelDeltaMin
|
|
631
632
|
) {
|
|
632
633
|
failures.push({
|
|
633
634
|
reason: `thinkingModelDelta (${thinkingModelDelta}) does not meet minimum quality threshold (${options.qualityThresholds.thinkingModelDeltaMin}) — reflection shows no cognitive improvement`,
|
package/src/hooks/subagent.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { PluginHookSubagentEndedEvent, PluginHookSubagentContext, PluginLogger, OpenClawPluginApi } from '../openclaw-sdk.js';
|
|
2
2
|
import { buildPainFlag, writePainFlag } from '../core/pain.js';
|
|
3
3
|
import { WorkspaceContext } from '../core/workspace-context.js';
|
|
4
|
+
import { extractAgentIdFromSessionKey } from '../utils/session-key.js';
|
|
4
5
|
// No longer needed — diagnostician runs via HEARTBEAT, not subagent
|
|
5
6
|
import { recordEvolutionSuccess } from '../core/evolution-engine.js';
|
|
6
7
|
import { WorkflowStore } from '../service/subagent-workflow/workflow-store.js';
|
|
@@ -81,18 +82,6 @@ function emitSubagentPainEvent(
|
|
|
81
82
|
}
|
|
82
83
|
}
|
|
83
84
|
|
|
84
|
-
|
|
85
|
-
function extractAgentIdFromSessionKey(sessionKey: string | undefined): string | undefined {
|
|
86
|
-
// sessionKey format: "agent:{agentId}:{type}:{uuid}" or "agent:{agentId}:{uuid}"
|
|
87
|
-
if (!sessionKey) return undefined;
|
|
88
|
-
const match = /^agent:([^:]+):/.exec(sessionKey);
|
|
89
|
-
return match ? match[1] : undefined;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
85
|
type SubagentEndedHookContext = PluginHookSubagentContext & {
|
|
97
86
|
api?: OpenClawPluginApi;
|
|
98
87
|
workspaceDir?: string;
|
package/src/index.ts
CHANGED
|
@@ -61,6 +61,7 @@ import { PathResolver, resolveWorkspaceDirFromApi } from './core/path-resolver.j
|
|
|
61
61
|
import { validateWorkspaceDir } from './core/workspace-dir-validation.js';
|
|
62
62
|
import { resolveRequiredWorkspaceDir, resolveWorkspaceDir, type WorkspaceResolutionContext } from './core/workspace-dir-service.js';
|
|
63
63
|
import { createPrinciplesConsoleRoute } from './http/principles-console-route.js';
|
|
64
|
+
import { extractAgentIdFromSessionKey } from './utils/session-key.js';
|
|
64
65
|
|
|
65
66
|
// Track initialization to avoid repeated calls
|
|
66
67
|
let workspaceInitialized = false;
|
|
@@ -423,11 +424,13 @@ const plugin = {
|
|
|
423
424
|
registerCommandWithAlias('pd-thinking', 'pdt', getCommandDescription('pd-thinking', language), (ctx: any) => handleThinkingOs(ctx), { acceptsArgs: true });
|
|
424
425
|
registerCommandWithAlias('pd-reflect', 'pdrl', getCommandDescription('pd-reflect', language), (ctx: any) => {
|
|
425
426
|
try {
|
|
426
|
-
|
|
427
|
-
|
|
427
|
+
// Resolve agentId from sessionKey (if available), fallback to 'main'
|
|
428
|
+
const agentId = extractAgentIdFromSessionKey(ctx.sessionKey) ?? 'main';
|
|
429
|
+
const workspaceDir = resolveRequiredWorkspaceDir(api, { ...ctx, agentId }, { source: 'pd-reflect', fallbackAgentId: 'main' });
|
|
430
|
+
return handlePdReflect.handler({ ...ctx, api, workspaceDir });
|
|
428
431
|
} catch (err) {
|
|
429
|
-
api.logger.error(`[PD] Command
|
|
430
|
-
return { text: language === 'zh' ?
|
|
432
|
+
api.logger.error(`[PD:pd-reflect] Command failed: ${String(err)}`);
|
|
433
|
+
return { text: language === 'zh' ? '命令执行失败,请查看日志。' : 'Command failed. Check logs.' };
|
|
431
434
|
}
|
|
432
435
|
});
|
|
433
436
|
registerCommandWithAlias('pd-daily', 'pdd', getCommandDescription('pd-daily', language), () => ({
|
|
@@ -71,6 +71,17 @@ async function runWorkflowWatchdog(
|
|
|
71
71
|
for (const wf of staleActive) {
|
|
72
72
|
const ageMin = Math.round((now - wf.created_at) / 60000);
|
|
73
73
|
details.push(`stale_active: ${wf.workflow_id} (${wf.workflow_type}, ${ageMin}min old)`);
|
|
74
|
+
|
|
75
|
+
// #257: Check if the last recorded event reason indicates expected subagent unavailability.
|
|
76
|
+
// If so, skip marking as terminal_error — the workflow is stale because the subagent
|
|
77
|
+
// was expectedly unavailable (daemon mode, process isolation), not due to a hard failure.
|
|
78
|
+
const events = store.getEvents(wf.workflow_id);
|
|
79
|
+
const lastEventReason = events.length > 0 ? events[events.length - 1].reason : 'unknown';
|
|
80
|
+
if (isExpectedSubagentError(lastEventReason)) {
|
|
81
|
+
logger?.debug?.(`[PD:Watchdog] Skipping stale active workflow ${wf.workflow_id}: expected subagent error (${lastEventReason})`);
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
|
|
74
85
|
store.updateWorkflowState(wf.workflow_id, 'terminal_error');
|
|
75
86
|
store.recordEvent(wf.workflow_id, 'watchdog_timeout', 'active', 'terminal_error', `Stale active > ${staleThreshold / 60000}s`, { ageMs: now - wf.created_at });
|
|
76
87
|
|
|
@@ -1596,6 +1607,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1596
1607
|
snapshot: snapshotData,
|
|
1597
1608
|
taskId: sleepTask.id,
|
|
1598
1609
|
painContext: sleepTask.recentPainContext,
|
|
1610
|
+
triggerSource: sleepTask.source,
|
|
1599
1611
|
},
|
|
1600
1612
|
});
|
|
1601
1613
|
sleepTask.resultRef = workflowHandle.workflowId;
|
|
@@ -60,6 +60,20 @@ function isSystemSession(state: SessionState): boolean {
|
|
|
60
60
|
if (sessionId?.startsWith('boot-')) return true;
|
|
61
61
|
if (sessionId?.startsWith('probe-')) return true;
|
|
62
62
|
|
|
63
|
+
// CRITICAL FIX: Legacy sessions from persistence may have missing trigger/sessionKey
|
|
64
|
+
// If both are missing AND the session is old (inactive > abandoned threshold),
|
|
65
|
+
// treat as legacy/orphan to avoid blocking idle detection with unknown sessions.
|
|
66
|
+
// Recent sessions without trigger/sessionKey are likely real user sessions still
|
|
67
|
+
// being enriched — do NOT classify them as system sessions.
|
|
68
|
+
const ABANDONED_THRESHOLD_MS = 2 * 60 * 60 * 1000; // 2 hours
|
|
69
|
+
if (!trigger && !sessionKey) {
|
|
70
|
+
const inactiveFor = Date.now() - state.lastActivityAt;
|
|
71
|
+
if (inactiveFor > ABANDONED_THRESHOLD_MS) {
|
|
72
|
+
return true; // Legacy/orphan session — don't block idle detection
|
|
73
|
+
}
|
|
74
|
+
// Recent session without metadata — likely a real user session, let it through
|
|
75
|
+
}
|
|
76
|
+
|
|
63
77
|
return false;
|
|
64
78
|
}
|
|
65
79
|
|
|
@@ -210,7 +210,7 @@ export class NocturnalWorkflowManager implements WorkflowManager {
|
|
|
210
210
|
|
|
211
211
|
// Extract snapshot and principleId from taskInput.metadata (NOC-07: Trinity async path)
|
|
212
212
|
const snapshotValidation = validateNocturnalSnapshotIngress(options.metadata?.snapshot);
|
|
213
|
-
const snapshot = snapshotValidation
|
|
213
|
+
const {snapshot} = snapshotValidation;
|
|
214
214
|
const principleId = options.metadata?.principleId as string | undefined;
|
|
215
215
|
// Extract painContext for Selector ranking bias
|
|
216
216
|
const painContext = options.metadata?.painContext as RecentPainContext | undefined;
|
|
@@ -254,6 +254,22 @@ export class NocturnalWorkflowManager implements WorkflowManager {
|
|
|
254
254
|
},
|
|
255
255
|
// Pass painContext for Selector ranking bias
|
|
256
256
|
painContext,
|
|
257
|
+
// #244: Only skip preflight idle gate for manual/test triggers.
|
|
258
|
+
// Automatic triggers must go through normal idle check.
|
|
259
|
+
...(((options.metadata)?.triggerSource === 'manual' ||
|
|
260
|
+
(options.metadata)?.triggerSource === 'test')
|
|
261
|
+
? {
|
|
262
|
+
idleCheckOverride: {
|
|
263
|
+
isIdle: true,
|
|
264
|
+
mostRecentActivityAt: Date.now() - 1800000,
|
|
265
|
+
idleForMs: 1800000,
|
|
266
|
+
userActiveSessions: 0,
|
|
267
|
+
abandonedSessionIds: [],
|
|
268
|
+
trajectoryGuardrailConfirmsIdle: true,
|
|
269
|
+
reason: 'manual/test override',
|
|
270
|
+
},
|
|
271
|
+
}
|
|
272
|
+
: {}),
|
|
257
273
|
// Skip Selector if principleId and snapshot are provided
|
|
258
274
|
...(principleId && snapshot ? {
|
|
259
275
|
principleIdOverride: principleId,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session key parsing utilities.
|
|
3
|
+
*
|
|
4
|
+
* Session key format: "agent:{agentId}:{type}:{uuid}" or "agent:{agentId}:{uuid}"
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Extract agentId from a sessionKey.
|
|
9
|
+
* Returns `undefined` if sessionKey is missing, malformed, or has whitespace-only agentId.
|
|
10
|
+
*/
|
|
11
|
+
export function extractAgentIdFromSessionKey(sessionKey: string | undefined): string | undefined {
|
|
12
|
+
if (!sessionKey) return undefined;
|
|
13
|
+
const match = /^agent:([^:]+):/.exec(sessionKey);
|
|
14
|
+
if (!match) return undefined;
|
|
15
|
+
const agentId = match[1].trim();
|
|
16
|
+
return agentId || undefined;
|
|
17
|
+
}
|
|
@@ -491,4 +491,61 @@ describe('Nocturnal Arbiter', () => {
|
|
|
491
491
|
expect(result.artifact?.sourceSnapshotRef).toBe('');
|
|
492
492
|
});
|
|
493
493
|
});
|
|
494
|
+
|
|
495
|
+
// -------------------------------------------------------------------------
|
|
496
|
+
// Tests: quality threshold gates (Rule 10/11)
|
|
497
|
+
// -------------------------------------------------------------------------
|
|
498
|
+
|
|
499
|
+
describe('quality threshold gates', () => {
|
|
500
|
+
const defaultThresholds = { thinkingModelDeltaMin: 0.05, planningRatioGainMin: -0.5 };
|
|
501
|
+
|
|
502
|
+
it('rejects when thinkingModelDelta is below threshold', () => {
|
|
503
|
+
const artifact = makeValidArtifact({ thinkingModelDelta: 0.03 });
|
|
504
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
505
|
+
expect(result.passed).toBe(false);
|
|
506
|
+
expect(result.failures).toHaveLength(1);
|
|
507
|
+
expect(result.failures[0].field).toBe('thinkingModelDelta');
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
it('passes when thinkingModelDelta equals threshold exactly (boundary value)', () => {
|
|
511
|
+
const artifact = makeValidArtifact({ thinkingModelDelta: 0.05 });
|
|
512
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
513
|
+
expect(result.passed).toBe(true);
|
|
514
|
+
});
|
|
515
|
+
|
|
516
|
+
it('passes when thinkingModelDelta exceeds threshold', () => {
|
|
517
|
+
const artifact = makeValidArtifact({ thinkingModelDelta: 0.15 });
|
|
518
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
519
|
+
expect(result.passed).toBe(true);
|
|
520
|
+
});
|
|
521
|
+
|
|
522
|
+
it('passes when thinkingModelDelta is absent (optional field)', () => {
|
|
523
|
+
const artifact = makeValidArtifact();
|
|
524
|
+
delete artifact.thinkingModelDelta;
|
|
525
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
526
|
+
expect(result.passed).toBe(true);
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
it('rejects when planningRatioGain is below threshold', () => {
|
|
530
|
+
const artifact = makeValidArtifact({ planningRatioGain: -0.6 });
|
|
531
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
532
|
+
expect(result.passed).toBe(false);
|
|
533
|
+
expect(result.failures.some(f => f.field === 'planningRatioGain')).toBe(true);
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
it('passes when planningRatioGain equals threshold exactly (boundary value)', () => {
|
|
537
|
+
const artifact = makeValidArtifact({ planningRatioGain: -0.5 });
|
|
538
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
539
|
+
expect(result.passed).toBe(true);
|
|
540
|
+
});
|
|
541
|
+
|
|
542
|
+
it('rejects both quality thresholds simultaneously', () => {
|
|
543
|
+
const artifact = makeValidArtifact({ thinkingModelDelta: 0.01, planningRatioGain: -0.8 });
|
|
544
|
+
const result = validateArtifact(artifact, { qualityThresholds: defaultThresholds });
|
|
545
|
+
expect(result.passed).toBe(false);
|
|
546
|
+
expect(result.failures.length).toBeGreaterThanOrEqual(2);
|
|
547
|
+
expect(result.failures.some(f => f.field === 'thinkingModelDelta')).toBe(true);
|
|
548
|
+
expect(result.failures.some(f => f.field === 'planningRatioGain')).toBe(true);
|
|
549
|
+
});
|
|
550
|
+
});
|
|
494
551
|
});
|
|
@@ -35,18 +35,22 @@ vi.mock('../../src/service/subagent-workflow/nocturnal-workflow-manager.js', ()
|
|
|
35
35
|
|
|
36
36
|
const { mockGetNocturnalSessionSnapshot, mockListRecentNocturnalCandidateSessions } = vi.hoisted(() => ({
|
|
37
37
|
mockGetNocturnalSessionSnapshot: vi.fn(),
|
|
38
|
-
mockListRecentNocturnalCandidateSessions: vi.fn(() => []),
|
|
38
|
+
mockListRecentNocturnalCandidateSessions: vi.fn(() => [] as Array<{ sessionId: string; startedAt: string; failureCount: number; painEventCount: number; gateBlockCount: number }>),
|
|
39
39
|
}));
|
|
40
|
+
|
|
41
|
+
// Create a shared mock extractor instance so spy calls are tracked correctly
|
|
42
|
+
const mockExtractorInstance = {
|
|
43
|
+
getNocturnalSessionSnapshot: mockGetNocturnalSessionSnapshot,
|
|
44
|
+
listRecentNocturnalCandidateSessions: mockListRecentNocturnalCandidateSessions,
|
|
45
|
+
};
|
|
46
|
+
|
|
40
47
|
vi.mock('../../src/core/nocturnal-trajectory-extractor.js', async () => {
|
|
41
48
|
const actual = await vi.importActual<typeof import('../../src/core/nocturnal-trajectory-extractor.js')>(
|
|
42
49
|
'../../src/core/nocturnal-trajectory-extractor.js'
|
|
43
50
|
);
|
|
44
51
|
return {
|
|
45
52
|
...actual,
|
|
46
|
-
createNocturnalTrajectoryExtractor: vi.fn(() =>
|
|
47
|
-
getNocturnalSessionSnapshot: mockGetNocturnalSessionSnapshot,
|
|
48
|
-
listRecentNocturnalCandidateSessions: mockListRecentNocturnalCandidateSessions,
|
|
49
|
-
})),
|
|
53
|
+
createNocturnalTrajectoryExtractor: vi.fn(() => mockExtractorInstance),
|
|
50
54
|
};
|
|
51
55
|
});
|
|
52
56
|
|
|
@@ -55,6 +59,17 @@ import { WorkspaceContext } from '../../src/core/workspace-context.js';
|
|
|
55
59
|
import { handlePdReflect } from '../../src/commands/pd-reflect.js';
|
|
56
60
|
import { safeRmDir } from '../test-utils.js';
|
|
57
61
|
|
|
62
|
+
// Helper to create a mock API for E2E tests
|
|
63
|
+
function createMockApi() {
|
|
64
|
+
return {
|
|
65
|
+
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() },
|
|
66
|
+
runtime: { agent: { runEmbeddedPiAgent: vi.fn() } },
|
|
67
|
+
} as any;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Helper config for fast poll cycle
|
|
71
|
+
const fastPollConfig = { get: (k: string) => k === 'intervals.worker_poll_ms' ? 100 : undefined };
|
|
72
|
+
|
|
58
73
|
function readQueue(stateDir: string) {
|
|
59
74
|
return JSON.parse(fs.readFileSync(path.join(stateDir, 'evolution_queue.json'), 'utf8'));
|
|
60
75
|
}
|
|
@@ -93,11 +108,11 @@ session_id: explicit-session-from-pain
|
|
|
93
108
|
|
|
94
109
|
try {
|
|
95
110
|
const context = readRecentPainContext(wctx);
|
|
96
|
-
|
|
111
|
+
|
|
97
112
|
// Verify the session_id was extracted from the pain flag file
|
|
98
113
|
expect(context.mostRecent).toBeDefined();
|
|
99
|
-
expect(context.mostRecent
|
|
100
|
-
expect(context.mostRecent
|
|
114
|
+
expect(context.mostRecent!.sessionId).toBe('explicit-session-from-pain');
|
|
115
|
+
expect(context.mostRecent!.score).toBe(80);
|
|
101
116
|
expect(context.recentPainCount).toBe(1);
|
|
102
117
|
} finally {
|
|
103
118
|
safeRmDir(workspaceDir);
|
|
@@ -155,9 +170,9 @@ session_id: pain-session-abc
|
|
|
155
170
|
|
|
156
171
|
// Contract: session_id must be extracted from the pain flag
|
|
157
172
|
expect(painContext.mostRecent).toBeDefined();
|
|
158
|
-
expect(painContext.mostRecent
|
|
159
|
-
expect(painContext.mostRecent
|
|
160
|
-
expect(painContext.mostRecent
|
|
173
|
+
expect(painContext.mostRecent!.sessionId).toBe('pain-session-abc');
|
|
174
|
+
expect(painContext.mostRecent!.score).toBe(70);
|
|
175
|
+
expect(painContext.mostRecent!.source).toBe('tool_failure');
|
|
161
176
|
|
|
162
177
|
// Now simulate what the worker does: attach this context to a queued task
|
|
163
178
|
const simulatedTask = {
|
|
@@ -167,7 +182,7 @@ session_id: pain-session-abc
|
|
|
167
182
|
};
|
|
168
183
|
|
|
169
184
|
// Verify the contract holds end-to-end
|
|
170
|
-
expect(simulatedTask.recentPainContext.mostRecent
|
|
185
|
+
expect(simulatedTask.recentPainContext.mostRecent!.sessionId).toBe('pain-session-abc');
|
|
171
186
|
});
|
|
172
187
|
|
|
173
188
|
it('e2e: /pd-reflect command writes to workspace/.state, never to HOME/.state', async () => {
|
|
@@ -214,4 +229,359 @@ session_id: pain-session-abc
|
|
|
214
229
|
safeRmDir(workspaceDir);
|
|
215
230
|
}
|
|
216
231
|
});
|
|
232
|
+
|
|
233
|
+
// === Nocturnal E2E Pipeline Tests (from PR #243) ===
|
|
234
|
+
|
|
235
|
+
it('does not start a nocturnal workflow when only an empty fallback snapshot is available', async () => {
|
|
236
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-empty-'));
|
|
237
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
238
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
239
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
240
|
+
|
|
241
|
+
mockGetNocturnalSessionSnapshot.mockReturnValue(null);
|
|
242
|
+
|
|
243
|
+
fs.writeFileSync(
|
|
244
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
245
|
+
JSON.stringify([
|
|
246
|
+
{
|
|
247
|
+
id: 'sleep-empty',
|
|
248
|
+
taskKind: 'sleep_reflection',
|
|
249
|
+
priority: 'medium',
|
|
250
|
+
score: 50,
|
|
251
|
+
source: 'nocturnal',
|
|
252
|
+
reason: 'Sleep reflection',
|
|
253
|
+
timestamp: '2026-04-10T00:00:00.000Z',
|
|
254
|
+
enqueued_at: '2026-04-10T00:00:00.000Z',
|
|
255
|
+
status: 'pending',
|
|
256
|
+
retryCount: 0,
|
|
257
|
+
maxRetries: 1,
|
|
258
|
+
recentPainContext: {
|
|
259
|
+
mostRecent: null,
|
|
260
|
+
recentPainCount: 0,
|
|
261
|
+
recentMaxPainScore: 0,
|
|
262
|
+
},
|
|
263
|
+
},
|
|
264
|
+
], null, 2),
|
|
265
|
+
'utf8'
|
|
266
|
+
);
|
|
267
|
+
|
|
268
|
+
const mockApi = createMockApi();
|
|
269
|
+
EvolutionWorkerService.api = mockApi;
|
|
270
|
+
|
|
271
|
+
try {
|
|
272
|
+
EvolutionWorkerService.start({
|
|
273
|
+
workspaceDir,
|
|
274
|
+
stateDir,
|
|
275
|
+
logger: mockApi.logger,
|
|
276
|
+
config: fastPollConfig,
|
|
277
|
+
api: mockApi,
|
|
278
|
+
} as any);
|
|
279
|
+
|
|
280
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
281
|
+
|
|
282
|
+
const queue = readQueue(stateDir);
|
|
283
|
+
expect(queue[0].status).toBe('failed');
|
|
284
|
+
expect(queue[0].lastError).toContain('invalid_snapshot_ingress');
|
|
285
|
+
expect(queue[0].lastError).toContain('fallback snapshot must contain at least one pain signal');
|
|
286
|
+
expect(queue[0].resultRef).toBeFalsy();
|
|
287
|
+
expect(mockStartWorkflow).not.toHaveBeenCalled();
|
|
288
|
+
} finally {
|
|
289
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
290
|
+
safeRmDir(workspaceDir);
|
|
291
|
+
}
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it('uses stub_fallback for expected gateway-only background unavailability', async () => {
|
|
295
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-gateway-'));
|
|
296
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
297
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
298
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
299
|
+
|
|
300
|
+
mockGetNocturnalSessionSnapshot.mockReturnValue({
|
|
301
|
+
sessionId: 'sleep-gateway',
|
|
302
|
+
startedAt: '2026-04-10T00:00:00.000Z',
|
|
303
|
+
updatedAt: '2026-04-10T00:01:00.000Z',
|
|
304
|
+
assistantTurns: [],
|
|
305
|
+
userTurns: [],
|
|
306
|
+
toolCalls: [],
|
|
307
|
+
painEvents: [],
|
|
308
|
+
gateBlocks: [],
|
|
309
|
+
stats: { totalAssistantTurns: 1, totalToolCalls: 1, totalPainEvents: 0, totalGateBlocks: 0, failureCount: 0 },
|
|
310
|
+
});
|
|
311
|
+
mockStartWorkflow.mockResolvedValue({ workflowId: 'wf-1', childSessionKey: 'child-1', state: 'active' });
|
|
312
|
+
mockGetWorkflowDebugSummary.mockResolvedValue({
|
|
313
|
+
state: 'terminal_error',
|
|
314
|
+
metadata: {},
|
|
315
|
+
recentEvents: [{ reason: 'Error: Plugin runtime subagent methods are only available during a gateway request.', payload: {} }],
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
fs.writeFileSync(
|
|
319
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
320
|
+
JSON.stringify([
|
|
321
|
+
{
|
|
322
|
+
id: 'sleep-gateway',
|
|
323
|
+
taskKind: 'sleep_reflection',
|
|
324
|
+
priority: 'medium',
|
|
325
|
+
score: 50,
|
|
326
|
+
source: 'nocturnal',
|
|
327
|
+
reason: 'Sleep reflection',
|
|
328
|
+
timestamp: '2026-04-10T00:00:00.000Z',
|
|
329
|
+
enqueued_at: '2026-04-10T00:00:00.000Z',
|
|
330
|
+
status: 'pending',
|
|
331
|
+
retryCount: 0,
|
|
332
|
+
maxRetries: 1,
|
|
333
|
+
recentPainContext: {
|
|
334
|
+
mostRecent: { source: 'test', score: 50, reason: 'test', timestamp: '2026-04-10T00:00:00.000Z', sessionId: 'sleep-gateway' },
|
|
335
|
+
recentPainCount: 1,
|
|
336
|
+
recentMaxPainScore: 50,
|
|
337
|
+
},
|
|
338
|
+
},
|
|
339
|
+
], null, 2),
|
|
340
|
+
'utf8'
|
|
341
|
+
);
|
|
342
|
+
|
|
343
|
+
const mockApi = createMockApi();
|
|
344
|
+
EvolutionWorkerService.api = mockApi;
|
|
345
|
+
|
|
346
|
+
try {
|
|
347
|
+
EvolutionWorkerService.start({
|
|
348
|
+
workspaceDir,
|
|
349
|
+
stateDir,
|
|
350
|
+
logger: mockApi.logger,
|
|
351
|
+
config: fastPollConfig,
|
|
352
|
+
api: mockApi,
|
|
353
|
+
} as any);
|
|
354
|
+
|
|
355
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
356
|
+
|
|
357
|
+
const queue = readQueue(stateDir);
|
|
358
|
+
expect(queue[0].status).toBe('completed');
|
|
359
|
+
expect(queue[0].resolution).toBe('stub_fallback');
|
|
360
|
+
} finally {
|
|
361
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
362
|
+
safeRmDir(workspaceDir);
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
it('uses stub_fallback for expected subagent runtime unavailability', async () => {
|
|
367
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-subagent-'));
|
|
368
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
369
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
370
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
371
|
+
|
|
372
|
+
mockGetNocturnalSessionSnapshot.mockReturnValue({
|
|
373
|
+
sessionId: 'sleep-subagent',
|
|
374
|
+
startedAt: '2026-04-10T00:00:00.000Z',
|
|
375
|
+
updatedAt: '2026-04-10T00:01:00.000Z',
|
|
376
|
+
assistantTurns: [],
|
|
377
|
+
userTurns: [],
|
|
378
|
+
toolCalls: [],
|
|
379
|
+
painEvents: [],
|
|
380
|
+
gateBlocks: [],
|
|
381
|
+
stats: { totalAssistantTurns: 1, totalToolCalls: 1, totalPainEvents: 0, totalGateBlocks: 0, failureCount: 0 },
|
|
382
|
+
});
|
|
383
|
+
mockStartWorkflow.mockRejectedValue(new Error('NocturnalWorkflowManager: subagent runtime unavailable'));
|
|
384
|
+
|
|
385
|
+
fs.writeFileSync(
|
|
386
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
387
|
+
JSON.stringify([
|
|
388
|
+
{
|
|
389
|
+
id: 'sleep-subagent',
|
|
390
|
+
taskKind: 'sleep_reflection',
|
|
391
|
+
priority: 'medium',
|
|
392
|
+
score: 50,
|
|
393
|
+
source: 'nocturnal',
|
|
394
|
+
reason: 'Sleep reflection',
|
|
395
|
+
timestamp: '2026-04-10T00:00:00.000Z',
|
|
396
|
+
enqueued_at: '2026-04-10T00:00:00.000Z',
|
|
397
|
+
status: 'pending',
|
|
398
|
+
retryCount: 0,
|
|
399
|
+
maxRetries: 1,
|
|
400
|
+
recentPainContext: {
|
|
401
|
+
mostRecent: { source: 'test', score: 50, reason: 'test', timestamp: '2026-04-10T00:00:00.000Z', sessionId: 'sleep-subagent' },
|
|
402
|
+
recentPainCount: 1,
|
|
403
|
+
recentMaxPainScore: 50,
|
|
404
|
+
},
|
|
405
|
+
},
|
|
406
|
+
], null, 2),
|
|
407
|
+
'utf8'
|
|
408
|
+
);
|
|
409
|
+
|
|
410
|
+
const mockApi = createMockApi();
|
|
411
|
+
EvolutionWorkerService.api = mockApi;
|
|
412
|
+
|
|
413
|
+
try {
|
|
414
|
+
EvolutionWorkerService.start({
|
|
415
|
+
workspaceDir,
|
|
416
|
+
stateDir,
|
|
417
|
+
logger: mockApi.logger,
|
|
418
|
+
config: fastPollConfig,
|
|
419
|
+
api: mockApi,
|
|
420
|
+
} as any);
|
|
421
|
+
|
|
422
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
423
|
+
|
|
424
|
+
const queue = readQueue(stateDir);
|
|
425
|
+
expect(queue[0].status).toBe('completed');
|
|
426
|
+
expect(queue[0].resolution).toBe('stub_fallback');
|
|
427
|
+
} finally {
|
|
428
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
429
|
+
safeRmDir(workspaceDir);
|
|
430
|
+
}
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
it('prioritizes pain signal session ID for snapshot extraction', async () => {
|
|
434
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-pain-session-'));
|
|
435
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
436
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
437
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
438
|
+
|
|
439
|
+
const painSessionId = 'pain-signal-session-123';
|
|
440
|
+
|
|
441
|
+
mockGetNocturnalSessionSnapshot.mockImplementation((sessionId: string) => {
|
|
442
|
+
if (sessionId === painSessionId) {
|
|
443
|
+
return {
|
|
444
|
+
sessionId: painSessionId,
|
|
445
|
+
startedAt: '2026-04-09T23:00:00.000Z',
|
|
446
|
+
updatedAt: '2026-04-09T23:01:00.000Z',
|
|
447
|
+
assistantTurns: [],
|
|
448
|
+
userTurns: [],
|
|
449
|
+
toolCalls: [],
|
|
450
|
+
painEvents: [{ source: 'tool_failure', score: 70, severity: null, reason: 'test', createdAt: '2026-04-09T23:00:00.000Z' }],
|
|
451
|
+
gateBlocks: [],
|
|
452
|
+
stats: { totalAssistantTurns: 1, totalToolCalls: 1, failureCount: 1, totalPainEvents: 1, totalGateBlocks: 0 },
|
|
453
|
+
};
|
|
454
|
+
}
|
|
455
|
+
return null;
|
|
456
|
+
});
|
|
457
|
+
mockStartWorkflow.mockResolvedValue({ workflowId: 'wf-pain', childSessionKey: 'child-pain', state: 'active' });
|
|
458
|
+
|
|
459
|
+
fs.writeFileSync(
|
|
460
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
461
|
+
JSON.stringify([
|
|
462
|
+
{
|
|
463
|
+
id: 'sleep-pain-priority',
|
|
464
|
+
taskKind: 'sleep_reflection',
|
|
465
|
+
priority: 'medium',
|
|
466
|
+
score: 50,
|
|
467
|
+
source: 'nocturnal',
|
|
468
|
+
reason: 'Sleep reflection',
|
|
469
|
+
timestamp: '2026-04-10T00:00:00.000Z',
|
|
470
|
+
enqueued_at: '2026-04-10T00:00:00.000Z',
|
|
471
|
+
status: 'pending',
|
|
472
|
+
retryCount: 0,
|
|
473
|
+
maxRetries: 1,
|
|
474
|
+
recentPainContext: {
|
|
475
|
+
mostRecent: { source: 'tool_failure', score: 70, reason: 'test', timestamp: '2026-04-10T00:00:00.000Z', sessionId: painSessionId },
|
|
476
|
+
recentPainCount: 1,
|
|
477
|
+
recentMaxPainScore: 70,
|
|
478
|
+
},
|
|
479
|
+
},
|
|
480
|
+
], null, 2),
|
|
481
|
+
'utf8'
|
|
482
|
+
);
|
|
483
|
+
|
|
484
|
+
const mockApi = createMockApi();
|
|
485
|
+
EvolutionWorkerService.api = mockApi;
|
|
486
|
+
|
|
487
|
+
try {
|
|
488
|
+
EvolutionWorkerService.start({
|
|
489
|
+
workspaceDir,
|
|
490
|
+
stateDir,
|
|
491
|
+
logger: mockApi.logger,
|
|
492
|
+
config: fastPollConfig,
|
|
493
|
+
api: mockApi,
|
|
494
|
+
} as any);
|
|
495
|
+
|
|
496
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
497
|
+
|
|
498
|
+
expect(mockStartWorkflow).toHaveBeenCalledTimes(1);
|
|
499
|
+
const metadata = mockStartWorkflow.mock.calls[0][1].metadata;
|
|
500
|
+
expect(metadata.snapshot.sessionId).toBe(painSessionId);
|
|
501
|
+
} finally {
|
|
502
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
503
|
+
safeRmDir(workspaceDir);
|
|
504
|
+
}
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
it('e2e: bounded session selection — never picks a session newer than the triggering task', async () => {
|
|
508
|
+
const workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-nocturnal-e2e-bounded-'));
|
|
509
|
+
const stateDir = path.join(workspaceDir, '.state');
|
|
510
|
+
fs.mkdirSync(path.join(stateDir, 'sessions'), { recursive: true });
|
|
511
|
+
fs.mkdirSync(path.join(stateDir, 'logs'), { recursive: true });
|
|
512
|
+
|
|
513
|
+
const taskTimestamp = '2026-04-10T00:00:00.000Z';
|
|
514
|
+
const validSessionTimestamp = '2026-04-09T23:00:00.000Z';
|
|
515
|
+
const invalidSessionTimestamp = '2026-04-10T01:00:00.000Z';
|
|
516
|
+
|
|
517
|
+
mockGetNocturnalSessionSnapshot.mockImplementation((sessionId: string) => {
|
|
518
|
+
if (sessionId === 'valid-session') {
|
|
519
|
+
return {
|
|
520
|
+
sessionId: 'valid-session',
|
|
521
|
+
startedAt: validSessionTimestamp,
|
|
522
|
+
updatedAt: validSessionTimestamp,
|
|
523
|
+
assistantTurns: [],
|
|
524
|
+
userTurns: [],
|
|
525
|
+
toolCalls: [],
|
|
526
|
+
painEvents: [{ source: 'tool_failure', score: 50, severity: null, reason: 'test', createdAt: validSessionTimestamp }],
|
|
527
|
+
gateBlocks: [],
|
|
528
|
+
stats: { totalAssistantTurns: 1, totalToolCalls: 1, failureCount: 1, totalPainEvents: 1, totalGateBlocks: 0 },
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
return null;
|
|
532
|
+
});
|
|
533
|
+
mockListRecentNocturnalCandidateSessions.mockReturnValue([
|
|
534
|
+
{ sessionId: 'valid-session', startedAt: validSessionTimestamp, failureCount: 1, painEventCount: 1, gateBlockCount: 0 },
|
|
535
|
+
{ sessionId: 'invalid-session', startedAt: invalidSessionTimestamp, failureCount: 1, painEventCount: 0, gateBlockCount: 0 },
|
|
536
|
+
]);
|
|
537
|
+
mockStartWorkflow.mockResolvedValue({ workflowId: 'wf-bounded', childSessionKey: 'child-bounded', state: 'active' });
|
|
538
|
+
|
|
539
|
+
fs.writeFileSync(
|
|
540
|
+
path.join(stateDir, 'evolution_queue.json'),
|
|
541
|
+
JSON.stringify([
|
|
542
|
+
{
|
|
543
|
+
id: 'sleep-e2e-bounded',
|
|
544
|
+
taskKind: 'sleep_reflection',
|
|
545
|
+
priority: 'medium',
|
|
546
|
+
score: 50,
|
|
547
|
+
source: 'nocturnal',
|
|
548
|
+
reason: 'Sleep reflection',
|
|
549
|
+
timestamp: taskTimestamp,
|
|
550
|
+
enqueued_at: taskTimestamp,
|
|
551
|
+
status: 'pending',
|
|
552
|
+
retryCount: 0,
|
|
553
|
+
maxRetries: 1,
|
|
554
|
+
recentPainContext: {
|
|
555
|
+
mostRecent: { source: 'test', score: 50, reason: 'test', timestamp: taskTimestamp, sessionId: 'pain-session' },
|
|
556
|
+
recentPainCount: 1,
|
|
557
|
+
recentMaxPainScore: 50,
|
|
558
|
+
},
|
|
559
|
+
},
|
|
560
|
+
], null, 2),
|
|
561
|
+
'utf8'
|
|
562
|
+
);
|
|
563
|
+
|
|
564
|
+
const mockApi = createMockApi();
|
|
565
|
+
EvolutionWorkerService.api = mockApi;
|
|
566
|
+
|
|
567
|
+
try {
|
|
568
|
+
EvolutionWorkerService.start({
|
|
569
|
+
workspaceDir,
|
|
570
|
+
stateDir,
|
|
571
|
+
logger: mockApi.logger,
|
|
572
|
+
config: fastPollConfig,
|
|
573
|
+
api: mockApi,
|
|
574
|
+
} as any);
|
|
575
|
+
|
|
576
|
+
await vi.advanceTimersByTimeAsync(6000);
|
|
577
|
+
|
|
578
|
+
expect(mockStartWorkflow).toHaveBeenCalledTimes(1);
|
|
579
|
+
const metadata = mockStartWorkflow.mock.calls[0][1].metadata;
|
|
580
|
+
expect(metadata.snapshot.sessionId).toBe('valid-session');
|
|
581
|
+
expect(new Date(metadata.snapshot.startedAt).getTime()).toBeLessThanOrEqual(new Date(taskTimestamp).getTime());
|
|
582
|
+
} finally {
|
|
583
|
+
EvolutionWorkerService.stop!({ workspaceDir, stateDir, logger: console } as any);
|
|
584
|
+
safeRmDir(workspaceDir);
|
|
585
|
+
}
|
|
586
|
+
});
|
|
217
587
|
});
|