principles-disciple 1.28.2 → 1.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/scripts/acceptance-test.mjs +314 -0
- package/scripts/seed-nocturnal-scenarios.mjs +16 -9
- package/scripts/validate-live-path.ts +141 -17
- package/src/commands/archive-impl.ts +3 -0
- package/src/commands/context.ts +4 -1
- package/src/commands/disable-impl.ts +2 -2
- package/src/commands/evolution-status.ts +2 -2
- package/src/commands/focus.ts +4 -2
- package/src/commands/nocturnal-train.ts +9 -1
- package/src/commands/pain.ts +3 -1
- package/src/commands/pd-reflect.ts +2 -0
- package/src/commands/rollback-impl.ts +5 -1
- package/src/commands/rollback.ts +2 -1
- package/src/commands/samples.ts +1 -0
- package/src/commands/workflow-debug.ts +1 -0
- package/src/core/adaptive-thresholds.ts +2 -1
- package/src/core/code-implementation-storage.ts +2 -0
- package/src/core/config.ts +1 -0
- package/src/core/diagnostician-task-store.ts +2 -0
- package/src/core/empathy-keyword-matcher.ts +4 -1
- package/src/core/event-log.ts +6 -3
- package/src/core/evolution-engine.ts +4 -1
- package/src/core/evolution-logger.ts +1 -0
- package/src/core/external-training-contract.ts +2 -1
- package/src/core/focus-history.ts +15 -3
- package/src/core/init.ts +3 -1
- package/src/core/merge-gate-audit.ts +3 -0
- package/src/core/model-deployment-registry.ts +1 -0
- package/src/core/model-training-registry.ts +1 -0
- package/src/core/nocturnal-arbiter.ts +4 -3
- package/src/core/nocturnal-candidate-scoring.ts +5 -0
- package/src/core/nocturnal-compliance.ts +22 -1
- package/src/core/nocturnal-dataset.ts +3 -1
- package/src/core/nocturnal-export.ts +5 -0
- package/src/core/nocturnal-reasoning-deriver.ts +6 -1
- package/src/core/nocturnal-snapshot-contract.ts +1 -0
- package/src/core/nocturnal-trinity.ts +24 -3
- package/src/core/pain-context-extractor.ts +3 -1
- package/src/core/pain.ts +3 -1
- package/src/core/path-resolver.ts +10 -4
- package/src/core/pd-task-reconciler.ts +3 -1
- package/src/core/pd-task-store.ts +1 -0
- package/src/core/principle-internalization/deprecated-readiness.ts +2 -1
- package/src/core/principle-training-state.ts +2 -0
- package/src/core/principle-tree-ledger.ts +4 -0
- package/src/core/principle-tree-migration.ts +2 -1
- package/src/core/promotion-gate.ts +7 -1
- package/src/core/replay-engine.ts +10 -4
- package/src/core/risk-calculator.ts +2 -1
- package/src/core/rule-host.ts +3 -2
- package/src/core/session-tracker.ts +5 -2
- package/src/core/shadow-observation-registry.ts +1 -0
- package/src/core/thinking-os-parser.ts +1 -0
- package/src/core/trajectory.ts +9 -5
- package/src/hooks/bash-risk.ts +2 -0
- package/src/hooks/edit-verification.ts +3 -0
- package/src/hooks/gate-block-helper.ts +3 -0
- package/src/hooks/gate.ts +8 -0
- package/src/hooks/gfi-gate.ts +2 -0
- package/src/hooks/lifecycle.ts +1 -0
- package/src/hooks/llm.ts +1 -0
- package/src/hooks/pain.ts +3 -1
- package/src/hooks/progressive-trust-gate.ts +3 -0
- package/src/hooks/prompt.ts +5 -2
- package/src/hooks/subagent.ts +1 -0
- package/src/hooks/thinking-checkpoint.ts +1 -0
- package/src/hooks/trajectory-collector.ts +2 -1
- package/src/http/principles-console-route.ts +5 -2
- package/src/index.ts +7 -0
- package/src/service/central-health-service.ts +1 -0
- package/src/service/central-overview-service.ts +2 -0
- package/src/service/evolution-query-service.ts +1 -0
- package/src/service/evolution-worker.ts +31 -1
- package/src/service/health-query-service.ts +6 -6
- package/src/service/monitoring-query-service.ts +4 -0
- package/src/service/nocturnal-runtime.ts +7 -5
- package/src/service/nocturnal-service.ts +21 -0
- package/src/service/nocturnal-target-selector.ts +2 -0
- package/src/service/runtime-summary-service.ts +6 -5
- package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +2 -1
- package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +2 -0
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +3 -2
- package/src/service/subagent-workflow/workflow-manager-base.ts +6 -1
- package/src/service/subagent-workflow/workflow-store.ts +2 -0
- package/src/tools/deep-reflect.ts +9 -0
- package/src/tools/model-index.ts +1 -0
- package/src/tools/write-pain-flag.ts +1 -0
- package/src/utils/file-lock.ts +1 -0
- package/src/utils/io.ts +2 -1
- package/tests/core/nocturnal-e2e.test.ts +10 -0
- package/tests/tools/write-pain-flag.test.ts +29 -13
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Nocturnal Pipeline — End-to-End Acceptance Test
|
|
5
|
+
*
|
|
6
|
+
* Verifies that all components of the Nocturnal reflection pipeline
|
|
7
|
+
* work correctly in a real environment (not unit tests).
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node scripts/acceptance-test.mjs --workspace /path/to/workspace
|
|
11
|
+
*
|
|
12
|
+
* Output: Pass/Fail for each checkpoint with detailed diagnostics.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { existsSync, readFileSync, writeFileSync, rmSync, mkdirSync, readdirSync } from 'fs';
|
|
16
|
+
import { join, dirname } from 'path';
|
|
17
|
+
import { fileURLToPath } from 'url';
|
|
18
|
+
import { execFileSync } from 'child_process';
|
|
19
|
+
|
|
20
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
21
|
+
const __dirname = dirname(__filename);
|
|
22
|
+
|
|
23
|
+
// ─── Helpers ───
|
|
24
|
+
let passCount = 0;
|
|
25
|
+
let failCount = 0;
|
|
26
|
+
let warnCount = 0;
|
|
27
|
+
|
|
28
|
+
function assert(condition, testName, detail = '') {
|
|
29
|
+
if (condition) {
|
|
30
|
+
console.log(` ✅ ${testName}`);
|
|
31
|
+
passCount++;
|
|
32
|
+
} else {
|
|
33
|
+
console.log(` ❌ ${testName}${detail ? ` — ${detail}` : ''}`);
|
|
34
|
+
failCount++;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function warn(testName, detail = '') {
|
|
39
|
+
console.log(` ⚠️ ${testName}${detail ? ` — ${detail}` : ''}`);
|
|
40
|
+
warnCount++;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function runSql(dbPath, sql) {
|
|
44
|
+
return execFileSync('sqlite3', [dbPath, sql], {
|
|
45
|
+
encoding: 'utf-8',
|
|
46
|
+
timeout: 5000,
|
|
47
|
+
}).trim();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// ─── Parse workspace ───
|
|
51
|
+
function parseArgs() {
|
|
52
|
+
const argv = process.argv.slice(2);
|
|
53
|
+
for (let i = 0; i < argv.length; i++) {
|
|
54
|
+
if (argv[i] === '--workspace' && argv[i + 1]) return argv[++i];
|
|
55
|
+
}
|
|
56
|
+
try {
|
|
57
|
+
return execFileSync('git', ['rev-parse', '--show-toplevel'], { encoding: 'utf-8' }).trim();
|
|
58
|
+
} catch {
|
|
59
|
+
return process.cwd();
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ─── Main ───
|
|
64
|
+
function main() {
|
|
65
|
+
const workspaceDir = parseArgs();
|
|
66
|
+
const stateDir = join(workspaceDir, '.state');
|
|
67
|
+
const dbPath = join(stateDir, 'trajectory.db');
|
|
68
|
+
|
|
69
|
+
if (!existsSync(dbPath)) {
|
|
70
|
+
console.error('❌ trajectory.db not found. Run seed script first.');
|
|
71
|
+
process.exit(1);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
console.log('\n🧪 Nocturnal Pipeline Acceptance Test');
|
|
75
|
+
console.log('═'.repeat(55));
|
|
76
|
+
console.log(`Workspace: ${workspaceDir}`);
|
|
77
|
+
console.log(`Database: ${dbPath}\n`);
|
|
78
|
+
|
|
79
|
+
// ═══════════════════════════════════════════════
|
|
80
|
+
// SECTION 1: Seed Scenario Data Integrity
|
|
81
|
+
// ═══════════════════════════════════════════════
|
|
82
|
+
console.log('── 1. Seed Scenario Data Integrity ──');
|
|
83
|
+
|
|
84
|
+
// 1.1 Count seed sessions
|
|
85
|
+
const sessionCount = parseInt(runSql(dbPath, "SELECT COUNT(*) FROM sessions WHERE session_id LIKE 'seed-%';"));
|
|
86
|
+
assert(sessionCount === 10, '10 seed sessions exist', `found ${sessionCount}`);
|
|
87
|
+
|
|
88
|
+
// 1.2 Count seed pain events
|
|
89
|
+
const painCount = parseInt(runSql(dbPath, "SELECT COUNT(*) FROM pain_events WHERE session_id LIKE 'seed-%';"));
|
|
90
|
+
assert(painCount >= 10, 'Pain events exist for seed sessions', `found ${painCount}`);
|
|
91
|
+
|
|
92
|
+
// 1.3 Verify signal diversity
|
|
93
|
+
const sources = runSql(dbPath, "SELECT DISTINCT source FROM pain_events WHERE session_id LIKE 'seed-%';").split('\n');
|
|
94
|
+
assert(sources.includes('tool_failure'), 'tool_failure signal present');
|
|
95
|
+
assert(sources.includes('user_empathy'), 'user_empathy signal present');
|
|
96
|
+
|
|
97
|
+
// 1.4 Verify pain event scores are in valid range
|
|
98
|
+
const invalidScores = runSql(dbPath, "SELECT COUNT(*) FROM pain_events WHERE session_id LIKE 'seed-%' AND (score < 0 OR score > 100);");
|
|
99
|
+
assert(parseInt(invalidScores) === 0, 'All pain scores in 0-100 range', `${invalidScores} invalid`);
|
|
100
|
+
|
|
101
|
+
// 1.5 Verify correction cues exist
|
|
102
|
+
const correctionCount = parseInt(runSql(dbPath, "SELECT COUNT(*) FROM user_turns WHERE session_id LIKE 'seed-%' AND correction_detected = 1;"));
|
|
103
|
+
assert(correctionCount >= 5, 'Multiple correction cues present', `found ${correctionCount}`);
|
|
104
|
+
|
|
105
|
+
// 1.6 Verify tool calls (both success and failure)
|
|
106
|
+
const failureCalls = parseInt(runSql(dbPath, "SELECT COUNT(*) FROM tool_calls WHERE session_id LIKE 'seed-%' AND outcome = 'failure';"));
|
|
107
|
+
const successCalls = parseInt(runSql(dbPath, "SELECT COUNT(*) FROM tool_calls WHERE session_id LIKE 'seed-%' AND outcome = 'success';"));
|
|
108
|
+
assert(failureCalls > 0, 'Failed tool calls in seed data', `found ${failureCalls}`);
|
|
109
|
+
assert(successCalls > 0, 'Successful tool calls in seed data', `found ${successCalls}`);
|
|
110
|
+
|
|
111
|
+
// 1.7 Verify scenario descriptions are unique and meaningful
|
|
112
|
+
const reasons = runSql(dbPath, "SELECT DISTINCT reason FROM pain_events WHERE session_id LIKE 'seed-%' ORDER BY reason;").split('\n');
|
|
113
|
+
const uniqueReasons = new Set(reasons);
|
|
114
|
+
assert(uniqueReasons.size === reasons.length, 'All pain reasons are unique', `${uniqueReasons.size}/${reasons.length} unique`);
|
|
115
|
+
|
|
116
|
+
// ═══════════════════════════════════════════════
|
|
117
|
+
// SECTION 2: write_pain_flag Tool
|
|
118
|
+
// ═══════════════════════════════════════════════
|
|
119
|
+
console.log('\n── 2. write_pain_flag Tool ──');
|
|
120
|
+
|
|
121
|
+
// 2.1 Test tool registered in source code
|
|
122
|
+
const indexSource = join(__dirname, '..', 'src', 'index.ts');
|
|
123
|
+
if (existsSync(indexSource)) {
|
|
124
|
+
const indexContent = readFileSync(indexSource, 'utf-8');
|
|
125
|
+
const hasImport = indexContent.includes("write-pain-flag");
|
|
126
|
+
const hasRegister = indexContent.includes('createWritePainFlagTool');
|
|
127
|
+
assert(hasImport && hasRegister, 'write_pain_flag registered in index.ts');
|
|
128
|
+
} else {
|
|
129
|
+
warn('write_pain_flag tool check', 'index.ts not found');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// 2.2 Verify tool source file exists
|
|
133
|
+
const toolSource = join(__dirname, '..', 'src', 'tools', 'write-pain-flag.ts');
|
|
134
|
+
assert(existsSync(toolSource), 'write-pain-flag.ts source exists');
|
|
135
|
+
|
|
136
|
+
// 2.3 Verify atomic write function
|
|
137
|
+
const toolContent = readFileSync(toolSource, 'utf-8');
|
|
138
|
+
assert(toolContent.includes('renameSync'), 'Uses atomic write (renameSync)', 'renameSync not found');
|
|
139
|
+
|
|
140
|
+
// 2.4 Verify KV serialization
|
|
141
|
+
assert(toolContent.includes('serializeKvLines'), 'Uses serializeKvLines for KV format', 'serializeKvLines not found');
|
|
142
|
+
|
|
143
|
+
// 2.5 Verify [object Object] protection
|
|
144
|
+
assert(toolContent.includes('buildPainFlag'), 'Uses buildPainFlag factory', 'buildPainFlag not found');
|
|
145
|
+
|
|
146
|
+
// 2.6 Test actual write end-to-end
|
|
147
|
+
const testFlagPath = join(stateDir, '.pain_flag_acceptance_test');
|
|
148
|
+
const testStateDir = join(stateDir, '.state_test_' + Date.now());
|
|
149
|
+
mkdirSync(testStateDir, { recursive: true });
|
|
150
|
+
try {
|
|
151
|
+
const testPath = join(testStateDir, '.pain_flag');
|
|
152
|
+
writeFileSync(testPath + '.tmp', 'source: manual\nscore: 80\ntime: 2026-04-13T00:00:00.000Z\nreason: acceptance test\n', 'utf-8');
|
|
153
|
+
execFileSync('mv', [testPath + '.tmp', testPath]);
|
|
154
|
+
const content = readFileSync(testPath, 'utf-8');
|
|
155
|
+
assert(content.includes('source: manual'), 'Manual pain flag write works');
|
|
156
|
+
assert(!content.includes('[object Object]'), 'No [object Object] corruption');
|
|
157
|
+
} finally {
|
|
158
|
+
rmSync(testStateDir, { recursive: true, force: true });
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// ═══════════════════════════════════════════════
|
|
162
|
+
// SECTION 3: Dedup Logic
|
|
163
|
+
// ═══════════════════════════════════════════════
|
|
164
|
+
console.log('\n── 3. Dedup Logic (Phase 3c) ──');
|
|
165
|
+
|
|
166
|
+
const workerSource = join(__dirname, '..', 'src', 'service', 'evolution-worker.ts');
|
|
167
|
+
if (existsSync(workerSource)) {
|
|
168
|
+
const workerContent = readFileSync(workerSource, 'utf-8');
|
|
169
|
+
|
|
170
|
+
// 3.1 Helper functions exist
|
|
171
|
+
assert(workerContent.includes('hasRecentSimilarReflection'), 'hasRecentSimilarReflection helper extracted');
|
|
172
|
+
assert(workerContent.includes('buildPainSourceKey'), 'buildPainSourceKey helper extracted');
|
|
173
|
+
assert(workerContent.includes('shouldSkipForDedup'), 'shouldSkipForDedup helper extracted');
|
|
174
|
+
|
|
175
|
+
// 3.2 Dedup window is configured
|
|
176
|
+
assert(workerContent.includes('4 * 60 * 60 * 1000') || workerContent.includes('DEDUP_WINDOW_MS'), '4-hour dedup window configured');
|
|
177
|
+
|
|
178
|
+
// 3.3 No-pain-context bypass
|
|
179
|
+
const bypassCheck = workerContent.includes('!painSourceKey') || workerContent.includes('painSourceKey === null') ||
|
|
180
|
+
workerContent.includes('painSourceKey) return false') || workerContent.includes('if (!painSourceKey) return false');
|
|
181
|
+
assert(bypassCheck, 'no_pain_context bypasses dedup', 'bypass pattern not found');
|
|
182
|
+
|
|
183
|
+
// 3.4 Only completed tasks are checked (not failed)
|
|
184
|
+
assert(workerContent.includes("status !== 'completed'") || workerContent.includes("status === 'completed'"), 'Only completed tasks matched for dedup');
|
|
185
|
+
} else {
|
|
186
|
+
fail('Dedup logic check', 'evolution-worker.ts not found');
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// ═══════════════════════════════════════════════
|
|
190
|
+
// SECTION 4: Correction Rejected Pain Event
|
|
191
|
+
// ═══════════════════════════════════════════════
|
|
192
|
+
console.log('\n── 4. Correction Rejected Pain Signal ──');
|
|
193
|
+
|
|
194
|
+
const trajectorySource = join(__dirname, '..', 'src', 'core', 'trajectory.ts');
|
|
195
|
+
if (existsSync(trajectorySource)) {
|
|
196
|
+
const trajContent = readFileSync(trajectorySource, 'utf-8');
|
|
197
|
+
|
|
198
|
+
// 4.1 Method exists
|
|
199
|
+
assert(trajContent.includes('recordCorrectionRejectedPain'), 'recordCorrectionRejectedPain method exists');
|
|
200
|
+
|
|
201
|
+
// 4.2 Called on rejected status
|
|
202
|
+
assert(trajContent.includes("status === 'rejected'"), 'Pain event created on rejected status');
|
|
203
|
+
|
|
204
|
+
// 4.3 Uses correct source
|
|
205
|
+
assert(trajContent.includes("'correction_rejected'"), 'Uses correction_rejected source');
|
|
206
|
+
|
|
207
|
+
// 4.4 Score is clamped
|
|
208
|
+
assert(trajContent.includes('Math.max(0') && trajContent.includes('Math.min(100'), 'Score clamped 0-100');
|
|
209
|
+
} else {
|
|
210
|
+
fail('Correction rejected check', 'trajectory.ts not found');
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// ═══════════════════════════════════════════════
|
|
214
|
+
// SECTION 5: Shell Injection Safety
|
|
215
|
+
// ═══════════════════════════════════════════════
|
|
216
|
+
console.log('\n── 5. Shell Injection Safety ──');
|
|
217
|
+
|
|
218
|
+
const seedScript = join(__dirname, '..', 'scripts', 'seed-nocturnal-scenarios.mjs');
|
|
219
|
+
const diagnoseScript = join(__dirname, '..', 'scripts', 'diagnose-nocturnal.mjs');
|
|
220
|
+
|
|
221
|
+
if (existsSync(seedScript)) {
|
|
222
|
+
const seedContent = readFileSync(seedScript, 'utf-8');
|
|
223
|
+
const execSyncCalls = (seedContent.match(/execSync\s*\(/g) || []).length;
|
|
224
|
+
const execFileCalls = (seedContent.match(/execFileSync\s*\(/g) || []).length;
|
|
225
|
+
assert(execSyncCalls === 0 || seedContent.includes("execSync('git") || seedContent.includes('execSync("git'),
|
|
226
|
+
'Seed script: no sqlite3 in execSync',
|
|
227
|
+
`found ${execSyncCalls} execSync calls`);
|
|
228
|
+
assert(execFileCalls > 0, 'Seed script: uses execFileSync', `found ${execFileCalls} calls`);
|
|
229
|
+
|
|
230
|
+
// 5.1 Pre-flight check
|
|
231
|
+
assert(seedContent.includes('ensureSqlite3') || seedContent.includes('sqlite3 --version'), 'Seed script: sqlite3 pre-flight check');
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (existsSync(diagnoseScript)) {
|
|
235
|
+
const diagContent = readFileSync(diagnoseScript, 'utf-8');
|
|
236
|
+
// 5.2 Diagnose script uses execFileSync for sqlite3
|
|
237
|
+
const diagExecFileCalls = (diagContent.match(/execFileSync\s*\(\s*['"]sqlite3/g) || []).length;
|
|
238
|
+
assert(diagExecFileCalls > 0, 'Diagnose script: uses execFileSync for sqlite3', `found ${diagExecFileCalls} calls`);
|
|
239
|
+
|
|
240
|
+
// 5.3 No shell-interpolated sqlite3 calls
|
|
241
|
+
const shellSqliteCalls = (diagContent.match(/execSync\s*\(\s*['"]sqlite3/g) || []).length;
|
|
242
|
+
assert(shellSqliteCalls === 0, 'Diagnose script: no sqlite3 in execSync', `found ${shellSqliteCalls} unsafe calls`);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// ═══════════════════════════════════════════════
|
|
246
|
+
// SECTION 6: Pending Review Warning Fix
|
|
247
|
+
// ═══════════════════════════════════════════════
|
|
248
|
+
console.log('\n── 6. Pending Review Warning Fix ──');
|
|
249
|
+
|
|
250
|
+
if (existsSync(diagnoseScript)) {
|
|
251
|
+
const diagContent = readFileSync(diagnoseScript, 'utf-8');
|
|
252
|
+
// 6.1 Pending case returns warn object
|
|
253
|
+
const pendingWarnPattern = /if\s*\(\s*pending\s*>\s*0\s*\)\s*\{[\s\S]*?status:\s*['"]warn['"]/;
|
|
254
|
+
assert(pendingWarnPattern.test(diagContent), 'Pending review returns {status:"warn"} object');
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ═══════════════════════════════════════════════
|
|
258
|
+
// SECTION 7: Path Resolver Fallback
|
|
259
|
+
// ═══════════════════════════════════════════════
|
|
260
|
+
console.log('\n── 7. Path Resolver Fallback ──');
|
|
261
|
+
|
|
262
|
+
const pathResolverSource = join(__dirname, '..', 'src', 'core', 'path-resolver.ts');
|
|
263
|
+
if (existsSync(pathResolverSource)) {
|
|
264
|
+
const prContent = readFileSync(pathResolverSource, 'utf-8');
|
|
265
|
+
// 7.1 resolveWorkspaceDirFromApi checks config.workspaceDir
|
|
266
|
+
assert(prContent.includes('config.workspaceDir') || prContent.includes('cfgWorkspaceDir'),
|
|
267
|
+
'resolveWorkspaceDirFromApi checks config.workspaceDir');
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// ═══════════════════════════════════════════════
|
|
271
|
+
// SECTION 8: Seed Scenario Quality (Manual Review)
|
|
272
|
+
// ═══════════════════════════════════════════════
|
|
273
|
+
console.log('\n── 8. Seed Scenario Quality ──');
|
|
274
|
+
|
|
275
|
+
// 8.1 Verify pain score distribution
|
|
276
|
+
const avgScore = parseFloat(runSql(dbPath, "SELECT ROUND(AVG(score), 1) FROM pain_events WHERE session_id LIKE 'seed-%';"));
|
|
277
|
+
assert(avgScore > 50 && avgScore < 100, 'Average pain score reasonable (50-100)', `avg=${avgScore}`);
|
|
278
|
+
|
|
279
|
+
// 8.2 Verify severity levels are set
|
|
280
|
+
const nullSeverity = parseInt(runSql(dbPath, "SELECT COUNT(*) FROM pain_events WHERE session_id LIKE 'seed-%' AND severity IS NULL;"));
|
|
281
|
+
assert(nullSeverity === 0, 'All seed pain events have severity', `${nullSeverity} null`);
|
|
282
|
+
|
|
283
|
+
// 8.3 Verify origin is set
|
|
284
|
+
const nullOrigin = parseInt(runSql(dbPath, "SELECT COUNT(*) FROM pain_events WHERE session_id LIKE 'seed-%' AND origin IS NULL;"));
|
|
285
|
+
assert(nullOrigin === 0, 'All seed pain events have origin', `${nullOrigin} null`);
|
|
286
|
+
|
|
287
|
+
// 8.4 Verify all 10 scenario IDs follow naming convention
|
|
288
|
+
const seedSessions = runSql(dbPath, "SELECT session_id FROM sessions WHERE session_id LIKE 'seed-%' ORDER BY session_id;").split('\n');
|
|
289
|
+
const validPattern = /^seed-[a-z-]+-\d{3}$/;
|
|
290
|
+
const allValid = seedSessions.every(s => validPattern.test(s));
|
|
291
|
+
assert(allValid, 'All session IDs follow naming convention', `${seedSessions.filter(s => !validPattern.test(s)).length} invalid`);
|
|
292
|
+
|
|
293
|
+
// ═══════════════════════════════════════════════
|
|
294
|
+
// SUMMARY
|
|
295
|
+
// ═══════════════════════════════════════════════
|
|
296
|
+
console.log('\n' + '═'.repeat(55));
|
|
297
|
+
console.log(` Acceptance Test Summary`);
|
|
298
|
+
console.log('═'.repeat(55));
|
|
299
|
+
console.log(` ✅ Passed: ${passCount}`);
|
|
300
|
+
console.log(` ❌ Failed: ${failCount}`);
|
|
301
|
+
console.log(` ⚠️ Warnings: ${warnCount}`);
|
|
302
|
+
console.log(` Total: ${passCount + failCount + warnCount}`);
|
|
303
|
+
console.log('═'.repeat(55));
|
|
304
|
+
|
|
305
|
+
if (failCount === 0) {
|
|
306
|
+
console.log('\n🎉 All acceptance tests passed!');
|
|
307
|
+
process.exit(0);
|
|
308
|
+
} else {
|
|
309
|
+
console.log(`\n❌ ${failCount} test(s) failed. Review details above.`);
|
|
310
|
+
process.exit(1);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
main();
|
|
@@ -299,12 +299,11 @@ function main() {
|
|
|
299
299
|
for (const s of scenarios) {
|
|
300
300
|
const createdAt = daysAgo(s.days);
|
|
301
301
|
|
|
302
|
-
// Check if scenario already exists
|
|
303
|
-
// This prevents partial data if the script was interrupted mid-insert
|
|
302
|
+
// Check if scenario already exists (check sessions table first since that's inserted first)
|
|
304
303
|
try {
|
|
305
|
-
const existing = runSql(dbPath, `SELECT COUNT(*) FROM
|
|
304
|
+
const existing = runSql(dbPath, `SELECT COUNT(*) FROM sessions WHERE session_id = '${esc(s.sessionId)}';`);
|
|
306
305
|
if (parseInt(existing) > 0) {
|
|
307
|
-
console.log(` ⏭️ Skipping ${s.sessionId} (already exists
|
|
306
|
+
console.log(` ⏭️ Skipping ${s.sessionId} (already exists)`);
|
|
308
307
|
skipped++;
|
|
309
308
|
continue;
|
|
310
309
|
}
|
|
@@ -336,12 +335,12 @@ VALUES ('${esc(s.sessionId)}', '${esc(tc.toolName)}', '${esc(tc.outcome)}', ${tc
|
|
|
336
335
|
|
|
337
336
|
// 4. Pain events
|
|
338
337
|
for (const pe of s.painEvents) {
|
|
339
|
-
sql.push(`INSERT INTO pain_events (session_id, source, score, reason, severity, origin, confidence,
|
|
340
|
-
VALUES ('${esc(s.sessionId)}', '${esc(pe.source)}', ${pe.score}, '${esc(pe.reason)}', '${esc(pe.severity)}', '${esc(pe.origin)}', ${pe.confidence !== undefined ? pe.confidence : 'NULL'},
|
|
338
|
+
sql.push(`INSERT INTO pain_events (session_id, source, score, reason, severity, origin, confidence, created_at)
|
|
339
|
+
VALUES ('${esc(s.sessionId)}', '${esc(pe.source)}', ${pe.score}, '${esc(pe.reason)}', '${esc(pe.severity)}', '${esc(pe.origin)}', ${pe.confidence !== undefined ? pe.confidence : 'NULL'}, '${createdAt}');`);
|
|
341
340
|
}
|
|
342
341
|
|
|
343
342
|
// Execute all SQL in one transaction via stdin piping
|
|
344
|
-
const fullSql =
|
|
343
|
+
const fullSql = `.bail on\nBEGIN TRANSACTION;\n${sql.join('\n')}\nCOMMIT;`;
|
|
345
344
|
try {
|
|
346
345
|
execFileSync('sqlite3', [dbPath], {
|
|
347
346
|
input: fullSql,
|
|
@@ -362,11 +361,19 @@ VALUES ('${esc(s.sessionId)}', '${esc(pe.source)}', ${pe.score}, '${esc(pe.reaso
|
|
|
362
361
|
|
|
363
362
|
// Print signal diversity report
|
|
364
363
|
try {
|
|
365
|
-
const painSummary =
|
|
364
|
+
const painSummary = execFileSync('sqlite3', [dbPath], {
|
|
365
|
+
input: `.mode column\n.headers on\nSELECT source, COUNT(*) as count, ROUND(AVG(score), 1) as avg_score FROM pain_events WHERE session_id LIKE 'seed-%' GROUP BY source;`,
|
|
366
|
+
encoding: 'utf-8',
|
|
367
|
+
timeout: 5000,
|
|
368
|
+
});
|
|
366
369
|
console.log('\n📈 Signal diversity report (seed scenarios only):');
|
|
367
370
|
console.log(painSummary);
|
|
368
371
|
|
|
369
|
-
const correctionSummary =
|
|
372
|
+
const correctionSummary = execFileSync('sqlite3', [dbPath], {
|
|
373
|
+
input: `.mode column\n.headers on\nSELECT COUNT(*) as total, SUM(CASE WHEN correction_detected = 1 THEN 1 ELSE 0 END) as with_correction FROM user_turns WHERE session_id LIKE 'seed-%';`,
|
|
374
|
+
encoding: 'utf-8',
|
|
375
|
+
timeout: 5000,
|
|
376
|
+
});
|
|
370
377
|
console.log('📝 Correction scenarios:');
|
|
371
378
|
console.log(correctionSummary);
|
|
372
379
|
} catch (e) {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
* Validate Live Path Script (Phase 18)
|
|
3
|
+
* Validate Live Path Script (Phase 18) — with Data Flow Monitoring
|
|
4
4
|
*
|
|
5
5
|
* Validates the end-to-end nocturnal workflow path with bootstrapped principles.
|
|
6
6
|
*
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
* - Polls subagent_workflows.db directly for nocturnal workflows
|
|
12
12
|
* - Correlates workflow to queue item via taskId
|
|
13
13
|
* - Verifies state='completed' and explicit resolution (not 'expired')
|
|
14
|
+
* - Monitors data flow: queue state → workflow state → artifact persistence
|
|
14
15
|
* - Outputs summary and exits 0 on success, non-zero on failure
|
|
15
16
|
*
|
|
16
17
|
* Usage:
|
|
@@ -36,6 +37,29 @@ const STATE_DIR = path.join(WORKSPACE_DIR, '.state');
|
|
|
36
37
|
const QUEUE_PATH = path.join(STATE_DIR, 'EVOLUTION_QUEUE');
|
|
37
38
|
const LEDGER_PATH = path.join(STATE_DIR, 'principle_training_state.json');
|
|
38
39
|
const DB_PATH = path.join(STATE_DIR, 'subagent_workflows.db');
|
|
40
|
+
const PAIN_FLAG_PATH = path.join(STATE_DIR, '.pain_flag');
|
|
41
|
+
const SAMPLES_DIR = path.join(STATE_DIR, 'nocturnal', 'samples');
|
|
42
|
+
|
|
43
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────
|
|
44
|
+
function timestamp(): string {
|
|
45
|
+
return new Date().toISOString();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function logStep(step: string, detail: string): void {
|
|
49
|
+
console.log(`[${timestamp()}] ▸ ${step}: ${detail}`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function logData(label: string, data: unknown): void {
|
|
53
|
+
const display = typeof data === 'string' ? data : JSON.stringify(data).slice(0, 300);
|
|
54
|
+
console.log(`[${timestamp()}] 📦 ${label}: ${display}`);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function safeReadJson(filePath: string): unknown {
|
|
58
|
+
try {
|
|
59
|
+
if (!fs.existsSync(filePath)) return null;
|
|
60
|
+
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
61
|
+
} catch { return null; }
|
|
62
|
+
}
|
|
39
63
|
|
|
40
64
|
// ─── Types ───────────────────────────────────────────────────────────────
|
|
41
65
|
interface LedgerRule {
|
|
@@ -277,11 +301,39 @@ function verifyWorkflowCompletion(taskId: string): {
|
|
|
277
301
|
async function main() {
|
|
278
302
|
const verbose = process.argv.includes('--verbose');
|
|
279
303
|
|
|
304
|
+
console.log('╔══════════════════════════════════════════════════════════╗');
|
|
305
|
+
console.log('║ Nocturnal Live Path Validation + Data Flow Monitor ║');
|
|
306
|
+
console.log('╚══════════════════════════════════════════════════════════╝');
|
|
307
|
+
logStep('WORKSPACE', WORKSPACE_DIR);
|
|
308
|
+
|
|
309
|
+
// 0. Baseline: snapshot current state
|
|
310
|
+
logStep('BASELINE', 'Capturing current state before validation');
|
|
311
|
+
const queueBefore = safeReadJson(QUEUE_PATH) as QueueItem[] | null;
|
|
312
|
+
logData('EVOLUTION_QUEUE (before)', queueBefore?.length ?? 0);
|
|
313
|
+
if (fs.existsSync(PAIN_FLAG_PATH)) {
|
|
314
|
+
logData('.pain_flag', 'EXISTS — ' + fs.readFileSync(PAIN_FLAG_PATH, 'utf8').slice(0, 100));
|
|
315
|
+
} else {
|
|
316
|
+
logData('.pain_flag', 'not present');
|
|
317
|
+
}
|
|
318
|
+
if (fs.existsSync(SAMPLES_DIR)) {
|
|
319
|
+
const samplesBefore = fs.readdirSync(SAMPLES_DIR).length;
|
|
320
|
+
logData('nocturnal/samples/', `${samplesBefore} files`);
|
|
321
|
+
} else {
|
|
322
|
+
logData('nocturnal/samples/', 'directory not present');
|
|
323
|
+
}
|
|
324
|
+
if (fs.existsSync(DB_PATH)) {
|
|
325
|
+
const wfCount = listNocturnalWorkflows().length;
|
|
326
|
+
logData('subagent_workflows.db', `${wfCount} nocturnal workflows`);
|
|
327
|
+
} else {
|
|
328
|
+
logData('subagent_workflows.db', 'not present');
|
|
329
|
+
}
|
|
330
|
+
|
|
280
331
|
// 1. Check bootstrapped rules
|
|
281
332
|
// eslint-disable-next-line @typescript-eslint/init-declarations
|
|
282
333
|
let rules: LedgerRule[];
|
|
283
334
|
try {
|
|
284
335
|
rules = loadBootstrappedRules();
|
|
336
|
+
logStep('STEP 1', `Found ${rules.length} bootstrapped rule(s)`);
|
|
285
337
|
} catch {
|
|
286
338
|
console.error('FAIL: principle_training_state.json not found. Run Phase 17 bootstrap first: npm run bootstrap-rules');
|
|
287
339
|
process.exit(1);
|
|
@@ -293,7 +345,6 @@ async function main() {
|
|
|
293
345
|
}
|
|
294
346
|
|
|
295
347
|
if (verbose) {
|
|
296
|
-
console.log(`Found ${rules.length} bootstrapped rule(s)`);
|
|
297
348
|
for (const rule of rules) {
|
|
298
349
|
console.log(` - ${rule.id} (principleId=${rule.principleId}, action=${rule.action})`);
|
|
299
350
|
}
|
|
@@ -304,33 +355,94 @@ async function main() {
|
|
|
304
355
|
|
|
305
356
|
// 3. Build synthetic snapshot for validation
|
|
306
357
|
const snapshot = buildSyntheticSnapshot(taskId);
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
}
|
|
358
|
+
logStep('STEP 2', `Synthetic snapshot: sessionId=${snapshot.sessionId}`);
|
|
359
|
+
logData('snapshot.recentPain', JSON.stringify(snapshot.recentPain));
|
|
310
360
|
|
|
311
361
|
// 4. Enqueue task (with lock acquisition)
|
|
312
362
|
try {
|
|
313
363
|
await enqueueSleepReflectionTask(taskId);
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
364
|
+
logStep('STEP 3', `Enqueued sleep_reflection task: ${taskId}`);
|
|
365
|
+
|
|
366
|
+
// Post-enqueue: verify queue state
|
|
367
|
+
const queueAfter = safeReadJson(QUEUE_PATH) as QueueItem[] | null;
|
|
368
|
+
const taskItem = queueAfter?.find(q => q.id === taskId);
|
|
369
|
+
logData('EVOLUTION_QUEUE (after)', `${queueAfter?.length ?? 0} tasks`);
|
|
370
|
+
logData(`task[${taskId}]`, taskItem ? JSON.stringify(taskItem) : 'NOT FOUND');
|
|
317
371
|
} catch (error: unknown) {
|
|
318
372
|
console.error('FAIL: Failed to enqueue sleep_reflection task:', String(error));
|
|
319
373
|
process.exit(1);
|
|
320
374
|
}
|
|
321
375
|
|
|
322
|
-
// 5. Poll for completion
|
|
376
|
+
// 5. Poll for completion — with data flow monitoring
|
|
323
377
|
const deadline = Date.now() + POLL_TIMEOUT_MS;
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
378
|
+
let pollCount = 0;
|
|
379
|
+
let lastQueueStatus = 'unknown';
|
|
380
|
+
let lastWorkflowState = 'none';
|
|
381
|
+
logStep('STEP 4', `Polling for workflow completion (timeout: ${POLL_TIMEOUT_MS / 1000 / 60}min, interval: ${POLL_INTERVAL_MS / 1000}s)`);
|
|
327
382
|
|
|
328
383
|
while (Date.now() < deadline) {
|
|
384
|
+
pollCount++;
|
|
329
385
|
await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
330
386
|
|
|
387
|
+
// Capture queue state
|
|
388
|
+
const queueNow = safeReadJson(QUEUE_PATH) as QueueItem[] | null;
|
|
389
|
+
const taskNow = queueNow?.find(q => q.id === taskId);
|
|
390
|
+
const currentQueueStatus = taskNow?.status ?? 'not_in_queue';
|
|
391
|
+
|
|
392
|
+
// Capture workflow DB state
|
|
393
|
+
const workflows = listNocturnalWorkflows();
|
|
394
|
+
const matchingWf = workflows.find(w => {
|
|
395
|
+
try {
|
|
396
|
+
const meta = JSON.parse(w.metadata_json);
|
|
397
|
+
return meta.taskId === taskId;
|
|
398
|
+
} catch { return false; }
|
|
399
|
+
});
|
|
400
|
+
const currentWorkflowState = matchingWf?.state ?? 'not_in_db';
|
|
401
|
+
|
|
402
|
+
// Log state changes
|
|
403
|
+
if (currentQueueStatus !== lastQueueStatus || currentWorkflowState !== lastWorkflowState) {
|
|
404
|
+
logStep(`POLL #${pollCount}`, `queue=${currentQueueStatus}, workflow=${currentWorkflowState}`);
|
|
405
|
+
if (taskNow) logData('queue item', JSON.stringify({ status: taskNow.status, resolution: taskNow.resolution }));
|
|
406
|
+
if (matchingWf) logData('workflow', JSON.stringify({ state: matchingWf.state, type: matchingWf.workflow_type }));
|
|
407
|
+
lastQueueStatus = currentQueueStatus;
|
|
408
|
+
lastWorkflowState = currentWorkflowState;
|
|
409
|
+
} else if (verbose) {
|
|
410
|
+
process.stdout.write('.');
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// Check for completion
|
|
331
414
|
const result = verifyWorkflowCompletion(taskId);
|
|
332
415
|
if (result) {
|
|
333
|
-
console.log(
|
|
416
|
+
console.log(''); // newline if dots were printed
|
|
417
|
+
logStep('STEP 5', `Workflow completed!`);
|
|
418
|
+
logData('RESULT', `workflowId=${result.workflowId} state=${result.state} resolution=${result.resolution}`);
|
|
419
|
+
|
|
420
|
+
// Check artifact persistence
|
|
421
|
+
if (fs.existsSync(SAMPLES_DIR)) {
|
|
422
|
+
const newSamples = fs.readdirSync(SAMPLES_DIR).filter(f => {
|
|
423
|
+
const stat = fs.statSync(path.join(SAMPLES_DIR, f));
|
|
424
|
+
return stat.isFile() && f.endsWith('.json') && (Date.now() - stat.mtimeMs) < 60000; // created in last minute
|
|
425
|
+
});
|
|
426
|
+
if (newSamples.length > 0) {
|
|
427
|
+
logData('new artifacts', newSamples.join(', '));
|
|
428
|
+
const firstArtifact = safeReadJson(path.join(SAMPLES_DIR, newSamples[0]));
|
|
429
|
+
if (firstArtifact) logData('artifact content (first)', JSON.stringify(firstArtifact).slice(0, 300));
|
|
430
|
+
} else {
|
|
431
|
+
logData('new artifacts', 'none created in last 60s');
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Check pain_flag cleanup
|
|
436
|
+
if (fs.existsSync(PAIN_FLAG_PATH)) {
|
|
437
|
+
const flagContent = fs.readFileSync(PAIN_FLAG_PATH, 'utf8');
|
|
438
|
+
if (flagContent.includes('[object Object]')) {
|
|
439
|
+
logStep('⚠️ WARNING', 'pain_flag is corrupted ([object Object])');
|
|
440
|
+
} else {
|
|
441
|
+
logData('.pain_flag (after)', `still exists, ${flagContent.length} bytes`);
|
|
442
|
+
}
|
|
443
|
+
} else {
|
|
444
|
+
logData('.pain_flag (after)', 'cleaned up (file removed)');
|
|
445
|
+
}
|
|
334
446
|
|
|
335
447
|
if (result.resolution === 'MISSING' || result.resolution === 'expired') {
|
|
336
448
|
console.error('FAIL: resolution not explicit');
|
|
@@ -340,13 +452,25 @@ async function main() {
|
|
|
340
452
|
console.log('PASS: Live path validation successful');
|
|
341
453
|
process.exit(0);
|
|
342
454
|
}
|
|
455
|
+
}
|
|
343
456
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
457
|
+
// Timeout — dump final state for debugging
|
|
458
|
+
console.log('');
|
|
459
|
+
logStep('TIMEOUT', `Poll timeout after ${pollCount} polls (${POLL_TIMEOUT_MS / 1000 / 60}min)`);
|
|
460
|
+
logData('FINAL queue status', lastQueueStatus);
|
|
461
|
+
logData('FINAL workflow state', lastWorkflowState);
|
|
462
|
+
|
|
463
|
+
// Dump full queue for debugging
|
|
464
|
+
const finalQueue = safeReadJson(QUEUE_PATH);
|
|
465
|
+
if (finalQueue) logData('FINAL queue dump', JSON.stringify(finalQueue).slice(0, 500));
|
|
466
|
+
|
|
467
|
+
// Dump full workflow DB for debugging
|
|
468
|
+
const finalWorkflows = listNocturnalWorkflows();
|
|
469
|
+
if (finalWorkflows.length > 0) {
|
|
470
|
+
logData('FINAL workflows', finalWorkflows.map(w => `${w.workflow_id}: state=${w.state}`).join(', '));
|
|
347
471
|
}
|
|
348
472
|
|
|
349
|
-
console.error('FAIL:
|
|
473
|
+
console.error('FAIL: No completed nocturnal workflow found for taskId');
|
|
350
474
|
process.exit(1);
|
|
351
475
|
}
|
|
352
476
|
|
|
@@ -52,12 +52,14 @@ export function handleArchiveImplCommand(ctx: PluginCommandContext): PluginComma
|
|
|
52
52
|
// Subcommand: list
|
|
53
53
|
if (subcommand === 'list') {
|
|
54
54
|
|
|
55
|
+
// eslint-disable-next-line @typescript-eslint/no-use-before-define
|
|
55
56
|
return _handleListArchivable(stateDir, isZh);
|
|
56
57
|
}
|
|
57
58
|
|
|
58
59
|
// Archive by ID
|
|
59
60
|
const targetId = subcommand;
|
|
60
61
|
|
|
62
|
+
// eslint-disable-next-line @typescript-eslint/no-use-before-define
|
|
61
63
|
return _handleArchiveImpl(workspaceDir, stateDir, targetId, isZh);
|
|
62
64
|
}
|
|
63
65
|
|
|
@@ -95,6 +97,7 @@ function _handleListArchivable(
|
|
|
95
97
|
}
|
|
96
98
|
|
|
97
99
|
|
|
100
|
+
// eslint-disable-next-line @typescript-eslint/max-params
|
|
98
101
|
function _handleArchiveImpl(
|
|
99
102
|
workspaceDir: string,
|
|
100
103
|
stateDir: string,
|