principles-disciple 1.32.0 → 1.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +4 -4
- package/package.json +1 -1
- package/src/core/correction-cue-learner.ts +203 -0
- package/src/core/correction-types.ts +88 -0
- package/src/core/evolution-logger.ts +3 -3
- package/src/core/init.ts +67 -0
- package/src/service/correction-observer-types.ts +58 -0
- package/src/service/correction-observer-workflow-manager.ts +218 -0
- package/src/service/evolution-worker.ts +172 -146
- package/src/service/nocturnal-service.ts +4 -1
- package/src/service/subagent-workflow/index.ts +14 -0
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +3 -1
- package/tests/service/evolution-worker.nocturnal.test.ts +14 -1
- package/tests/service/evolution-worker.timeout.test.ts +350 -0
- package/tests/commands/implementation-lifecycle.test.ts +0 -362
- package/tests/core/detection-funnel.test.ts +0 -63
- package/tests/core/evolution-e2e.test.ts +0 -58
- package/tests/core/evolution-engine-gate-integration.test.ts +0 -543
- package/tests/core/evolution-engine.test.ts +0 -562
- package/tests/core/evolution-reducer.test.ts +0 -180
- package/tests/core/evolution-user-stories.e2e.test.ts +0 -249
- package/tests/core/local-worker-routing.test.ts +0 -757
- package/tests/core/rule-host.test.ts +0 -389
- package/tests/core/trajectory-correction-pain.test.ts +0 -180
- package/tests/hooks/gate-edit-verification.test.ts +0 -435
- package/tests/hooks/llm.test.ts +0 -308
- package/tests/hooks/progressive-trust-gate.test.ts +0 -277
- package/tests/hooks/prompt.test.ts +0 -1473
- package/tests/index.integration.test.ts +0 -179
- package/tests/index.shadow-routing.integration.test.ts +0 -140
- package/tests/service/evolution-worker.test.ts +0 -462
- package/tests/service/nocturnal-service.test.ts +0 -577
- package/tests/service/nocturnal-workflow-manager.test.ts +0 -441
- package/tests/tools/critique-prompt.test.ts +0 -260
- package/tests/tools/deep-reflect.test.ts +0 -232
- package/tests/tools/model-index.test.ts +0 -246
- package/tests/ui/app.test.tsx +0 -114
|
@@ -5,7 +5,7 @@ import { createHash } from 'crypto';
|
|
|
5
5
|
import type { OpenClawPluginServiceContext, OpenClawPluginApi, PluginLogger } from '../openclaw-sdk.js';
|
|
6
6
|
import { DictionaryService } from '../core/dictionary-service.js';
|
|
7
7
|
import { DetectionService } from '../core/detection-service.js';
|
|
8
|
-
import { ensureStateTemplates } from '../core/init.js';
|
|
8
|
+
import { ensureStateTemplates, ensureCorePrinciples } from '../core/init.js';
|
|
9
9
|
import { SystemLogger } from '../core/system-logger.js';
|
|
10
10
|
import { WorkspaceContext } from '../core/workspace-context.js';
|
|
11
11
|
import type { EventLog } from '../core/event-log.js';
|
|
@@ -32,6 +32,14 @@ import { validateNocturnalSnapshotIngress } from '../core/nocturnal-snapshot-con
|
|
|
32
32
|
import { isExpectedSubagentError } from './subagent-workflow/subagent-error-utils.js';
|
|
33
33
|
import { readPainFlagContract } from '../core/pain.js';
|
|
34
34
|
|
|
35
|
+
// ── Atomic File Write ────────────────────────────────────────────────────────
|
|
36
|
+
// Write to temp then rename — atomic on POSIX, prevents partial-write corruption on crash.
|
|
37
|
+
function atomicWriteFileSync(filePath: string, data: string): void {
|
|
38
|
+
const tmpPath = filePath + '.tmp';
|
|
39
|
+
fs.writeFileSync(tmpPath, data, 'utf8');
|
|
40
|
+
fs.renameSync(tmpPath, filePath);
|
|
41
|
+
}
|
|
42
|
+
|
|
35
43
|
const WORKFLOW_TTL_MS = 5 * 60 * 1000; // 5 minutes default TTL for helper workflows
|
|
36
44
|
import { OpenClawTrinityRuntimeAdapter } from '../core/nocturnal-trinity.js';
|
|
37
45
|
|
|
@@ -48,6 +56,7 @@ interface WatchdogResult {
|
|
|
48
56
|
details: string[];
|
|
49
57
|
}
|
|
50
58
|
|
|
59
|
+
|
|
51
60
|
async function runWorkflowWatchdog(
|
|
52
61
|
wctx: WorkspaceContext,
|
|
53
62
|
api: OpenClawPluginApi | null,
|
|
@@ -63,104 +72,10 @@ async function runWorkflowWatchdog(
|
|
|
63
72
|
try {
|
|
64
73
|
const allWorkflows: WorkflowRow[] = store.listWorkflows();
|
|
65
74
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
(wf: WorkflowRow) => wf.state === 'active' && (now - wf.created_at) > staleThreshold,
|
|
70
|
-
);
|
|
71
|
-
if (staleActive.length > 0) {
|
|
72
|
-
for (const wf of staleActive) {
|
|
73
|
-
const ageMin = Math.round((now - wf.created_at) / 60000);
|
|
74
|
-
details.push(`stale_active: ${wf.workflow_id} (${wf.workflow_type}, ${ageMin}min old)`);
|
|
75
|
-
|
|
76
|
-
// #257: Check if the last recorded event reason indicates expected subagent unavailability.
|
|
77
|
-
// If so, skip marking as terminal_error — the workflow is stale because the subagent
|
|
78
|
-
// was expectedly unavailable (daemon mode, process isolation), not due to a hard failure.
|
|
79
|
-
const events = store.getEvents(wf.workflow_id);
|
|
80
|
-
const lastEventReason = events.length > 0 ? events[events.length - 1].reason : 'unknown';
|
|
81
|
-
if (isExpectedSubagentError(lastEventReason)) {
|
|
82
|
-
logger?.debug?.(`[PD:Watchdog] Skipping stale active workflow ${wf.workflow_id}: expected subagent error (${lastEventReason})`);
|
|
83
|
-
continue;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
store.updateWorkflowState(wf.workflow_id, 'terminal_error');
|
|
87
|
-
store.recordEvent(wf.workflow_id, 'watchdog_timeout', 'active', 'terminal_error', `Stale active > ${staleThreshold / 60000}s`, { ageMs: now - wf.created_at });
|
|
88
|
-
|
|
89
|
-
// Cleanup session if possible (#188: gateway-safe fallback)
|
|
90
|
-
if (wf.child_session_key) {
|
|
91
|
-
try {
|
|
92
|
-
if (subagentRuntime) {
|
|
93
|
-
await subagentRuntime.deleteSession({ sessionKey: wf.child_session_key, deleteTranscript: true });
|
|
94
|
-
logger?.info?.(`[PD:Watchdog] Cleaned up stale session: ${wf.child_session_key}`);
|
|
95
|
-
} else if (agentSession) {
|
|
96
|
-
const storePath = agentSession.resolveStorePath();
|
|
97
|
-
const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
|
|
98
|
-
const normalizedKey = wf.child_session_key.toLowerCase();
|
|
99
|
-
if (sessionStore[normalizedKey]) {
|
|
100
|
-
delete sessionStore[normalizedKey];
|
|
101
|
-
await agentSession.saveSessionStore(storePath, sessionStore);
|
|
102
|
-
logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback: ${wf.child_session_key}`);
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
} catch (cleanupErr) {
|
|
106
|
-
const errMsg = String(cleanupErr);
|
|
107
|
-
if (errMsg.includes('gateway request') && agentSession) {
|
|
108
|
-
const storePath = agentSession.resolveStorePath();
|
|
109
|
-
const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
|
|
110
|
-
const normalizedKey = wf.child_session_key.toLowerCase();
|
|
111
|
-
if (sessionStore[normalizedKey]) {
|
|
112
|
-
delete sessionStore[normalizedKey];
|
|
113
|
-
await agentSession.saveSessionStore(storePath, sessionStore);
|
|
114
|
-
logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback after gateway error: ${wf.child_session_key}`);
|
|
115
|
-
}
|
|
116
|
-
} else {
|
|
117
|
-
logger?.warn?.(`[PD:Watchdog] Failed to cleanup session ${wf.child_session_key}: ${errMsg}`);
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
}
|
|
75
|
+
runWorkflowWatchdogCheckStale(allWorkflows, store, now, details, subagentRuntime, agentSession, logger);
|
|
76
|
+
runWorkflowWatchdogCheckUncleared(allWorkflows, details);
|
|
77
|
+
runWorkflowWatchdogCheckNocturnal(allWorkflows, details);
|
|
123
78
|
|
|
124
|
-
// Check 2: Workflows in terminal_error/expired without cleanup
|
|
125
|
-
const unclearedTerminal = allWorkflows.filter(
|
|
126
|
-
(wf: WorkflowRow) => (wf.state === 'terminal_error' || wf.state === 'expired') && wf.cleanup_state === 'pending',
|
|
127
|
-
);
|
|
128
|
-
if (unclearedTerminal.length > 0) {
|
|
129
|
-
details.push(`uncleared_terminal: ${unclearedTerminal.length} workflows (will be swept next cycle)`);
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
// Check 3: Nocturnal workflow result validation (#181 pattern)
|
|
133
|
-
const nocturnalCompleted = allWorkflows.filter(
|
|
134
|
-
(wf: WorkflowRow) => wf.workflow_type === 'nocturnal' && wf.state === 'completed',
|
|
135
|
-
);
|
|
136
|
-
for (const wf of nocturnalCompleted) {
|
|
137
|
-
// Check if the metadata snapshot has all zeros (invalid data)
|
|
138
|
-
try {
|
|
139
|
-
const meta = JSON.parse(wf.metadata_json) as Record<string, unknown>;
|
|
140
|
-
const snapshot = meta.snapshot as Record<string, unknown> | undefined;
|
|
141
|
-
if (snapshot) {
|
|
142
|
-
// #219: Check for fallback data source (partial stats from pain context)
|
|
143
|
-
const dataSource = snapshot._dataSource as string | undefined;
|
|
144
|
-
if (dataSource === 'pain_context_fallback') {
|
|
145
|
-
details.push(`fallback_snapshot: nocturnal workflow ${wf.workflow_id} uses pain-context fallback (stats may be incomplete)`);
|
|
146
|
-
}
|
|
147
|
-
const stats = snapshot.stats as Record<string, number> | undefined;
|
|
148
|
-
// #246: Stats are now always number (never null). Detect "empty" fallback:
|
|
149
|
-
// fallback + all counts zero means no real data was available.
|
|
150
|
-
// NOTE: totalAssistantTurns may be 0 even for valid sessions because
|
|
151
|
-
// listRecentNocturnalCandidateSessions (used in fallback path) does not
|
|
152
|
-
// populate assistantTurnCount (only getNocturnalSessionSnapshot does).
|
|
153
|
-
// We use totalToolCalls=0 as the primary indicator instead.
|
|
154
|
-
if (stats && dataSource === 'pain_context_fallback' &&
|
|
155
|
-
stats.totalToolCalls === 0 && stats.totalGateBlocks === 0 &&
|
|
156
|
-
stats.failureCount === 0) {
|
|
157
|
-
details.push(`fallback_snapshot_stats: nocturnal workflow ${wf.workflow_id} has empty fallback stats (no trajectory data found)`);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
} catch { /* ignore malformed metadata */ }
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// Summary
|
|
164
79
|
const stateCounts: Record<string, number> = {};
|
|
165
80
|
for (const wf of allWorkflows) {
|
|
166
81
|
stateCounts[wf.state] = (stateCounts[wf.state] || 0) + 1;
|
|
@@ -181,6 +96,106 @@ async function runWorkflowWatchdog(
|
|
|
181
96
|
return { anomalies: details.length, details };
|
|
182
97
|
}
|
|
183
98
|
|
|
99
|
+
// ── Watchdog helpers (extracted from runWorkflowWatchdog for complexity) ──
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async function cleanupStaleWorkflowSession(
|
|
103
|
+
wf: WorkflowRow,
|
|
104
|
+
subagentRuntime: { deleteSession: (opts: { sessionKey: string; deleteTranscript: boolean }) => Promise<void> } | undefined,
|
|
105
|
+
agentSession: { resolveStorePath: () => string; loadSessionStore: (p: string, o: { skipCache: boolean }) => Record<string, unknown>; saveSessionStore: (p: string, s: Record<string, unknown>) => Promise<void> } | undefined,
|
|
106
|
+
logger?: PluginLogger,
|
|
107
|
+
): Promise<void> {
|
|
108
|
+
if (!wf.child_session_key) return;
|
|
109
|
+
try {
|
|
110
|
+
if (subagentRuntime) {
|
|
111
|
+
await subagentRuntime.deleteSession({ sessionKey: wf.child_session_key, deleteTranscript: true });
|
|
112
|
+
logger?.info?.(`[PD:Watchdog] Cleaned up stale session: ${wf.child_session_key}`);
|
|
113
|
+
} else if (agentSession) {
|
|
114
|
+
const storePath = agentSession.resolveStorePath();
|
|
115
|
+
const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
|
|
116
|
+
const normalizedKey = wf.child_session_key.toLowerCase();
|
|
117
|
+
if (sessionStore[normalizedKey]) {
|
|
118
|
+
delete sessionStore[normalizedKey];
|
|
119
|
+
await agentSession.saveSessionStore(storePath, sessionStore);
|
|
120
|
+
logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback: ${wf.child_session_key}`);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
} catch (cleanupErr) {
|
|
124
|
+
const errMsg = String(cleanupErr);
|
|
125
|
+
if (errMsg.includes('gateway request') && agentSession) {
|
|
126
|
+
const storePath = agentSession.resolveStorePath();
|
|
127
|
+
const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
|
|
128
|
+
const normalizedKey = wf.child_session_key.toLowerCase();
|
|
129
|
+
if (sessionStore[normalizedKey]) {
|
|
130
|
+
delete sessionStore[normalizedKey];
|
|
131
|
+
await agentSession.saveSessionStore(storePath, sessionStore);
|
|
132
|
+
logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback after gateway error: ${wf.child_session_key}`);
|
|
133
|
+
}
|
|
134
|
+
} else {
|
|
135
|
+
logger?.warn?.(`[PD:Watchdog] Failed to cleanup session ${wf.child_session_key}: ${errMsg}`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function runWorkflowWatchdogCheckStale(
|
|
141
|
+
allWorkflows: WorkflowRow[],
|
|
142
|
+
store: WorkflowStore,
|
|
143
|
+
now: number,
|
|
144
|
+
details: string[],
|
|
145
|
+
subagentRuntime: { deleteSession: (opts: { sessionKey: string; deleteTranscript: boolean }) => Promise<void> } | undefined,
|
|
146
|
+
agentSession: { resolveStorePath: () => string; loadSessionStore: (p: string, o: { skipCache: boolean }) => Record<string, unknown>; saveSessionStore: (p: string, s: Record<string, unknown>) => Promise<void> } | undefined,
|
|
147
|
+
logger?: PluginLogger,
|
|
148
|
+
): void {
|
|
149
|
+
const staleThreshold = WORKFLOW_TTL_MS * 2;
|
|
150
|
+
for (const wf of allWorkflows) {
|
|
151
|
+
if (wf.state !== 'active' || (now - wf.created_at) <= staleThreshold) continue;
|
|
152
|
+
const ageMin = Math.round((now - wf.created_at) / 60000);
|
|
153
|
+
details.push(`stale_active: ${wf.workflow_id} (${wf.workflow_type}, ${ageMin}min old)`);
|
|
154
|
+
|
|
155
|
+
const events = store.getEvents(wf.workflow_id);
|
|
156
|
+
const lastEventReason = events.length > 0 ? events[events.length - 1].reason : 'unknown';
|
|
157
|
+
if (isExpectedSubagentError(lastEventReason)) {
|
|
158
|
+
logger?.debug?.(`[PD:Watchdog] Skipping stale active workflow ${wf.workflow_id}: expected subagent error (${lastEventReason})`);
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
store.updateWorkflowState(wf.workflow_id, 'terminal_error');
|
|
163
|
+
store.recordEvent(wf.workflow_id, 'watchdog_timeout', 'active', 'terminal_error', `Stale active > ${staleThreshold / 60000}s`, { ageMs: now - wf.created_at });
|
|
164
|
+
void cleanupStaleWorkflowSession(wf, subagentRuntime, agentSession, logger);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function runWorkflowWatchdogCheckUncleared(allWorkflows: WorkflowRow[], details: string[]): void {
|
|
169
|
+
const unclearedTerminal = allWorkflows.filter(
|
|
170
|
+
(wf: WorkflowRow) => (wf.state === 'terminal_error' || wf.state === 'expired') && wf.cleanup_state === 'pending',
|
|
171
|
+
);
|
|
172
|
+
if (unclearedTerminal.length > 0) {
|
|
173
|
+
details.push(`uncleared_terminal: ${unclearedTerminal.length} workflows (will be swept next cycle)`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
function runWorkflowWatchdogCheckNocturnal(allWorkflows: WorkflowRow[], details: string[]): void {
|
|
179
|
+
for (const wf of allWorkflows) {
|
|
180
|
+
if (wf.workflow_type !== 'nocturnal' || wf.state !== 'completed') continue;
|
|
181
|
+
try {
|
|
182
|
+
const meta = JSON.parse(wf.metadata_json) as Record<string, unknown>;
|
|
183
|
+
const snapshot = meta.snapshot as Record<string, unknown> | undefined;
|
|
184
|
+
if (!snapshot) continue;
|
|
185
|
+
const dataSource = snapshot._dataSource as string | undefined;
|
|
186
|
+
if (dataSource === 'pain_context_fallback') {
|
|
187
|
+
details.push(`fallback_snapshot: nocturnal workflow ${wf.workflow_id} uses pain-context fallback (stats may be incomplete)`);
|
|
188
|
+
const stats = snapshot.stats as Record<string, number> | undefined;
|
|
189
|
+
if (stats && stats.totalToolCalls === 0 && stats.totalGateBlocks === 0 && stats.failureCount === 0) {
|
|
190
|
+
details.push(`fallback_snapshot_stats: nocturnal workflow ${wf.workflow_id} has empty fallback stats (no trajectory data found)`);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
} catch { /* ignore malformed metadata */ }
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// ── End watchdog helpers ──
|
|
198
|
+
|
|
184
199
|
let timeoutId: NodeJS.Timeout | null = null;
|
|
185
200
|
|
|
186
201
|
/**
|
|
@@ -193,7 +208,12 @@ let timeoutId: NodeJS.Timeout | null = null;
|
|
|
193
208
|
* Old queue items (without taskKind) are migrated to pain_diagnosis for compatibility.
|
|
194
209
|
*/
|
|
195
210
|
export type QueueStatus = 'pending' | 'in_progress' | 'completed' | 'failed' | 'canceled';
|
|
196
|
-
export type TaskResolution = 'marker_detected' | 'auto_completed_timeout' | 'failed_max_retries' | 'runtime_unavailable' | 'canceled' | 'late_marker_principle_created' | 'late_marker_no_principle' | 'stub_fallback' | 'skipped_thin_violation';
|
|
211
|
+
export type TaskResolution = 'marker_detected' | 'auto_completed_timeout' | 'failed_max_retries' | 'runtime_unavailable' | 'canceled' | 'late_marker_principle_created' | 'late_marker_no_principle' | 'stub_fallback' | 'skipped_thin_violation' | 'diagnostician_timeout';
|
|
212
|
+
|
|
213
|
+
/** Timeout for pain_diagnosis tasks (30 min) — separate from sleep_reflection timeout.
|
|
214
|
+
* Pain diagnostics run via HEARTBEAT (main session LLM), not as a subagent.
|
|
215
|
+
* If the agent is persistently busy, we don't want the task to starve indefinitely. */
|
|
216
|
+
const PAIN_DIAGNOSIS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
197
217
|
|
|
198
218
|
/**
|
|
199
219
|
* Recent pain context attached to sleep_reflection tasks.
|
|
@@ -355,6 +375,7 @@ function isSessionAtOrBeforeTriggerTime(
|
|
|
355
375
|
return true;
|
|
356
376
|
}
|
|
357
377
|
|
|
378
|
+
|
|
358
379
|
function buildFallbackNocturnalSnapshot(
|
|
359
380
|
sleepTask: EvolutionQueueItem,
|
|
360
381
|
extractor?: ReturnType<typeof createNocturnalTrajectoryExtractor> | null,
|
|
@@ -430,7 +451,7 @@ export const LOCK_RETRY_DELAY_MS = 50;
|
|
|
430
451
|
export const LOCK_STALE_MS = 30_000;
|
|
431
452
|
|
|
432
453
|
|
|
433
|
-
|
|
454
|
+
|
|
434
455
|
export function createEvolutionTaskId(
|
|
435
456
|
source: string,
|
|
436
457
|
score: number,
|
|
@@ -464,7 +485,7 @@ export async function acquireQueueLock(resourcePath: string, logger: PluginLogge
|
|
|
464
485
|
}
|
|
465
486
|
|
|
466
487
|
|
|
467
|
-
|
|
488
|
+
|
|
468
489
|
async function requireQueueLock(resourcePath: string, logger: PluginLogger | { warn?: (message: string) => void; info?: (message: string) => void } | undefined, scope: string, lockSuffix: string = EVOLUTION_QUEUE_LOCK_SUFFIX): Promise<() => void> {
|
|
469
490
|
try {
|
|
470
491
|
return await acquireQueueLock(resourcePath, logger, lockSuffix);
|
|
@@ -480,7 +501,7 @@ export function extractEvolutionTaskId(task: string): string | null {
|
|
|
480
501
|
}
|
|
481
502
|
|
|
482
503
|
|
|
483
|
-
|
|
504
|
+
|
|
484
505
|
function findRecentDuplicateTask(
|
|
485
506
|
queue: EvolutionQueueItem[],
|
|
486
507
|
source: string,
|
|
@@ -488,14 +509,14 @@ function findRecentDuplicateTask(
|
|
|
488
509
|
now: number,
|
|
489
510
|
reason?: string
|
|
490
511
|
): EvolutionQueueItem | undefined {
|
|
491
|
-
|
|
512
|
+
|
|
492
513
|
const key = normalizePainDedupKey(source, preview, reason);
|
|
493
514
|
return queue.find((task) => {
|
|
494
515
|
if (task.status === 'completed') return false;
|
|
495
|
-
|
|
516
|
+
|
|
496
517
|
const taskTime = new Date(task.enqueued_at || task.timestamp).getTime();
|
|
497
518
|
if (!Number.isFinite(taskTime) || (now - taskTime) > PAIN_QUEUE_DEDUP_WINDOW_MS) return false;
|
|
498
|
-
|
|
519
|
+
|
|
499
520
|
return normalizePainDedupKey(task.source, task.trigger_text_preview || '', task.reason) === key;
|
|
500
521
|
});
|
|
501
522
|
}
|
|
@@ -550,7 +571,7 @@ function normalizePainDedupKey(source: string, preview: string, reason?: string)
|
|
|
550
571
|
|
|
551
572
|
|
|
552
573
|
|
|
553
|
-
|
|
574
|
+
|
|
554
575
|
export function hasRecentDuplicateTask(queue: EvolutionQueueItem[], source: string, preview: string, now: number, reason?: string): boolean {
|
|
555
576
|
return !!findRecentDuplicateTask(queue, source, preview, now, reason);
|
|
556
577
|
}
|
|
@@ -678,7 +699,7 @@ function shouldSkipForDedup(
|
|
|
678
699
|
* Load and migrate the evolution queue. Returns empty array if file doesn't exist.
|
|
679
700
|
*/
|
|
680
701
|
function loadEvolutionQueue(queuePath: string): EvolutionQueueItem[] {
|
|
681
|
-
|
|
702
|
+
|
|
682
703
|
let rawQueue: RawQueueItem[] = [];
|
|
683
704
|
try {
|
|
684
705
|
rawQueue = JSON.parse(fs.readFileSync(queuePath, 'utf8'));
|
|
@@ -693,7 +714,7 @@ function loadEvolutionQueue(queuePath: string): EvolutionQueueItem[] {
|
|
|
693
714
|
* Build and persist a new sleep_reflection task.
|
|
694
715
|
*/
|
|
695
716
|
|
|
696
|
-
|
|
717
|
+
|
|
697
718
|
function enqueueNewSleepReflectionTask(
|
|
698
719
|
queue: EvolutionQueueItem[],
|
|
699
720
|
recentPainContext: ReturnType<typeof readRecentPainContext>,
|
|
@@ -720,7 +741,7 @@ function enqueueNewSleepReflectionTask(
|
|
|
720
741
|
recentPainContext,
|
|
721
742
|
});
|
|
722
743
|
|
|
723
|
-
|
|
744
|
+
atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
|
|
724
745
|
logger?.info?.(`[PD:EvolutionWorker] Enqueued sleep_reflection task ${taskId}`);
|
|
725
746
|
}
|
|
726
747
|
|
|
@@ -765,7 +786,8 @@ interface ParsedPainValues {
|
|
|
765
786
|
}
|
|
766
787
|
|
|
767
788
|
|
|
768
|
-
|
|
789
|
+
|
|
790
|
+
|
|
769
791
|
async function doEnqueuePainTask(
|
|
770
792
|
wctx: WorkspaceContext, logger: PluginLogger, painFlagPath: string,
|
|
771
793
|
result: WorkerStatusReport['pain_flag'], v: ParsedPainValues,
|
|
@@ -811,7 +833,7 @@ async function doEnqueuePainTask(
|
|
|
811
833
|
retryCount: 0, maxRetries: 3,
|
|
812
834
|
});
|
|
813
835
|
|
|
814
|
-
|
|
836
|
+
atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
|
|
815
837
|
fs.appendFileSync(painFlagPath, `\nstatus: queued\ntask_id: ${taskId}\n`, 'utf8');
|
|
816
838
|
result.enqueued = true;
|
|
817
839
|
|
|
@@ -839,6 +861,7 @@ async function doEnqueuePainTask(
|
|
|
839
861
|
return result;
|
|
840
862
|
}
|
|
841
863
|
|
|
864
|
+
|
|
842
865
|
async function checkPainFlag(wctx: WorkspaceContext, logger: PluginLogger): Promise<WorkerStatusReport['pain_flag']> {
|
|
843
866
|
const result: WorkerStatusReport['pain_flag'] = { exists: false, score: null, source: null, enqueued: false, skipped_reason: null };
|
|
844
867
|
try {
|
|
@@ -1012,7 +1035,8 @@ async function checkPainFlag(wctx: WorkspaceContext, logger: PluginLogger): Prom
|
|
|
1012
1035
|
}
|
|
1013
1036
|
|
|
1014
1037
|
|
|
1015
|
-
|
|
1038
|
+
|
|
1039
|
+
|
|
1016
1040
|
async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogger, eventLog: EventLog, api?: OpenClawPluginApi) {
|
|
1017
1041
|
const queuePath = wctx.resolve('EVOLUTION_QUEUE');
|
|
1018
1042
|
if (!fs.existsSync(queuePath)) {
|
|
@@ -1290,8 +1314,8 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1290
1314
|
}
|
|
1291
1315
|
|
|
1292
1316
|
const age = Date.now() - startedAt.getTime();
|
|
1293
|
-
if (age >
|
|
1294
|
-
const timeoutMinutes = Math.round(
|
|
1317
|
+
if (age > PAIN_DIAGNOSIS_TIMEOUT_MS) {
|
|
1318
|
+
const timeoutMinutes = Math.round(PAIN_DIAGNOSIS_TIMEOUT_MS / 60000);
|
|
1295
1319
|
|
|
1296
1320
|
const timeoutCompleteMarker = path.join(wctx.stateDir, `.evolution_complete_${task.id}`);
|
|
1297
1321
|
const timeoutReportPath = path.join(wctx.stateDir, `.diagnostician_report_${task.id}.json`);
|
|
@@ -1339,13 +1363,13 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1339
1363
|
} catch { /* report may not exist, not critical */ }
|
|
1340
1364
|
task.resolution = principleCreated ? 'late_marker_principle_created' : 'late_marker_no_principle';
|
|
1341
1365
|
} else {
|
|
1342
|
-
if (logger) logger.info(`[PD:EvolutionWorker]
|
|
1366
|
+
if (logger) logger.info(`[PD:EvolutionWorker] Pain diagnosis task ${task.id} timed out after ${timeoutMinutes} minutes`);
|
|
1343
1367
|
// #190: Clean up diagnostician report file even on timeout (may have been written late)
|
|
1344
1368
|
try {
|
|
1345
1369
|
const autoTimeoutReportPath = path.join(wctx.stateDir, `.diagnostician_report_${task.id}.json`);
|
|
1346
1370
|
if (fs.existsSync(autoTimeoutReportPath)) fs.unlinkSync(autoTimeoutReportPath);
|
|
1347
1371
|
} catch { /* report may not exist, not critical */ }
|
|
1348
|
-
task.resolution = '
|
|
1372
|
+
task.resolution = 'diagnostician_timeout';
|
|
1349
1373
|
}
|
|
1350
1374
|
|
|
1351
1375
|
// Critical: mark task as completed so it doesn't get re-processed
|
|
@@ -1371,7 +1395,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1371
1395
|
sessionId: task.assigned_session_key || 'heartbeat:diagnostician',
|
|
1372
1396
|
taskId: task.id,
|
|
1373
1397
|
outcome: 'timeout',
|
|
1374
|
-
summary: `
|
|
1398
|
+
summary: `Pain diagnosis task ${task.id} timed out after ${timeoutMinutes} minutes.`
|
|
1375
1399
|
});
|
|
1376
1400
|
queueChanged = true;
|
|
1377
1401
|
}
|
|
@@ -1595,7 +1619,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1595
1619
|
|
|
1596
1620
|
// Write claimed state (includes any pain changes from above) and release lock
|
|
1597
1621
|
if (queueChanged) {
|
|
1598
|
-
|
|
1622
|
+
atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
|
|
1599
1623
|
}
|
|
1600
1624
|
releaseLock();
|
|
1601
1625
|
for (const sleepTask of sleepReflectionTasks) {
|
|
@@ -1610,11 +1634,11 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1610
1634
|
logger?.info?.(`[PD:EvolutionWorker] Processing sleep_reflection task ${sleepTask.id}`);
|
|
1611
1635
|
}
|
|
1612
1636
|
|
|
1613
|
-
|
|
1637
|
+
|
|
1614
1638
|
let workflowId: string | undefined;
|
|
1615
|
-
|
|
1639
|
+
|
|
1616
1640
|
let nocturnalManager: NocturnalWorkflowManager;
|
|
1617
|
-
|
|
1641
|
+
|
|
1618
1642
|
let snapshotData: NocturnalSessionSnapshot | undefined;
|
|
1619
1643
|
|
|
1620
1644
|
if (isPollingTask) {
|
|
@@ -1652,13 +1676,13 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1652
1676
|
s => s.failureCount > 0 || s.painEventCount > 0 || s.gateBlockCount > 0
|
|
1653
1677
|
);
|
|
1654
1678
|
if (sessionsWithViolations.length > 0) {
|
|
1655
|
-
|
|
1679
|
+
|
|
1656
1680
|
const targetSession = sessionsWithViolations[0];
|
|
1657
1681
|
logger?.info?.(`[PD:EvolutionWorker] Task ${sleepTask.id} using session with violations: ${targetSession.sessionId} (failed=${targetSession.failureCount}, pain=${targetSession.painEventCount}, gates=${targetSession.gateBlockCount})`);
|
|
1658
1682
|
fullSnapshot = extractor.getNocturnalSessionSnapshot(targetSession.sessionId);
|
|
1659
1683
|
} else if (recentSessions.length > 0) {
|
|
1660
1684
|
// No sessions with violations, use most recent as last resort
|
|
1661
|
-
|
|
1685
|
+
|
|
1662
1686
|
const latestSession = recentSessions[0];
|
|
1663
1687
|
logger?.warn?.(`[PD:EvolutionWorker] Task ${sleepTask.id} no sessions with violations found, using most recent: ${latestSession.sessionId} (failed=${latestSession.failureCount}, pain=${latestSession.painEventCount}, gates=${latestSession.gateBlockCount})`);
|
|
1664
1688
|
fullSnapshot = extractor.getNocturnalSessionSnapshot(latestSession.sessionId);
|
|
@@ -1728,7 +1752,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1728
1752
|
},
|
|
1729
1753
|
});
|
|
1730
1754
|
sleepTask.resultRef = workflowHandle.workflowId;
|
|
1731
|
-
|
|
1755
|
+
|
|
1732
1756
|
workflowId = workflowHandle.workflowId;
|
|
1733
1757
|
}
|
|
1734
1758
|
|
|
@@ -1847,7 +1871,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1847
1871
|
freshQueue[idx] = sleepTask;
|
|
1848
1872
|
}
|
|
1849
1873
|
}
|
|
1850
|
-
|
|
1874
|
+
atomicWriteFileSync(queuePath, JSON.stringify(freshQueue, null, 2));
|
|
1851
1875
|
|
|
1852
1876
|
// Log completions to EvolutionLogger
|
|
1853
1877
|
for (const sleepTask of sleepReflectionTasks) {
|
|
@@ -1879,7 +1903,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1879
1903
|
}
|
|
1880
1904
|
|
|
1881
1905
|
if (queueChanged) {
|
|
1882
|
-
|
|
1906
|
+
atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
|
|
1883
1907
|
}
|
|
1884
1908
|
|
|
1885
1909
|
// Pipeline observability: log stage-level summary at end of cycle
|
|
@@ -1906,6 +1930,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1906
1930
|
}
|
|
1907
1931
|
|
|
1908
1932
|
|
|
1933
|
+
|
|
1909
1934
|
async function processDetectionQueue(wctx: WorkspaceContext, api: OpenClawPluginApi, eventLog: EventLog) {
|
|
1910
1935
|
const {logger} = api;
|
|
1911
1936
|
try {
|
|
@@ -1961,7 +1986,7 @@ async function processDetectionQueue(wctx: WorkspaceContext, api: OpenClawPlugin
|
|
|
1961
1986
|
// Evolution queue is now the single active pain→principle path
|
|
1962
1987
|
|
|
1963
1988
|
|
|
1964
|
-
|
|
1989
|
+
|
|
1965
1990
|
export async function registerEvolutionTaskSession(
|
|
1966
1991
|
workspaceResolve: (key: string) => string,
|
|
1967
1992
|
taskId: string,
|
|
@@ -1975,7 +2000,7 @@ export async function registerEvolutionTaskSession(
|
|
|
1975
2000
|
|
|
1976
2001
|
try {
|
|
1977
2002
|
|
|
1978
|
-
|
|
2003
|
+
|
|
1979
2004
|
let rawQueue: RawQueueItem[];
|
|
1980
2005
|
try {
|
|
1981
2006
|
rawQueue = JSON.parse(fs.readFileSync(queuePath, 'utf8'));
|
|
@@ -1997,7 +2022,7 @@ export async function registerEvolutionTaskSession(
|
|
|
1997
2022
|
if (!task.started_at) {
|
|
1998
2023
|
task.started_at = new Date().toISOString();
|
|
1999
2024
|
}
|
|
2000
|
-
|
|
2025
|
+
atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
|
|
2001
2026
|
return true;
|
|
2002
2027
|
} finally {
|
|
2003
2028
|
releaseLock();
|
|
@@ -2037,14 +2062,14 @@ interface WorkerStatusReport {
|
|
|
2037
2062
|
function writeWorkerStatus(stateDir: string, report: WorkerStatusReport): void {
|
|
2038
2063
|
try {
|
|
2039
2064
|
const statusPath = path.join(stateDir, 'worker-status.json');
|
|
2040
|
-
|
|
2065
|
+
atomicWriteFileSync(statusPath, JSON.stringify(report, null, 2));
|
|
2041
2066
|
} catch {
|
|
2042
2067
|
// Non-critical: worker-status.json is for monitoring, failure is acceptable
|
|
2043
2068
|
}
|
|
2044
2069
|
}
|
|
2045
2070
|
|
|
2046
2071
|
|
|
2047
|
-
|
|
2072
|
+
|
|
2048
2073
|
async function processEvolutionQueueWithResult(
|
|
2049
2074
|
wctx: WorkspaceContext,
|
|
2050
2075
|
logger: PluginLogger,
|
|
@@ -2066,7 +2091,7 @@ async function processEvolutionQueueWithResult(
|
|
|
2066
2091
|
const purgeResult = purgeStaleFailedTasks(queue, logger);
|
|
2067
2092
|
if (purgeResult.purged > 0) {
|
|
2068
2093
|
// Write back the cleaned queue
|
|
2069
|
-
|
|
2094
|
+
atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
|
|
2070
2095
|
}
|
|
2071
2096
|
|
|
2072
2097
|
queueResult.total = queue.length;
|
|
@@ -2093,6 +2118,7 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
|
|
|
2093
2118
|
api: null,
|
|
2094
2119
|
_startedWorkspaces: new Set<string>(),
|
|
2095
2120
|
|
|
2121
|
+
|
|
2096
2122
|
start(ctx: OpenClawPluginServiceContext): void {
|
|
2097
2123
|
const workspaceDir = ctx?.workspaceDir;
|
|
2098
2124
|
const logger = ctx?.logger || console;
|
|
@@ -2121,6 +2147,7 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
|
|
|
2121
2147
|
const {config} = wctx;
|
|
2122
2148
|
const language = config.get('language') || 'en';
|
|
2123
2149
|
ensureStateTemplates({ logger }, wctx.stateDir, language);
|
|
2150
|
+
ensureCorePrinciples(wctx.stateDir, logger);
|
|
2124
2151
|
|
|
2125
2152
|
const initialDelay = 5000;
|
|
2126
2153
|
const interval = config.get('intervals.worker_poll_ms') || (15 * 60 * 1000);
|
|
@@ -2128,6 +2155,7 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
|
|
|
2128
2155
|
// Periodic trigger tracking
|
|
2129
2156
|
let heartbeatCounter = 0;
|
|
2130
2157
|
|
|
2158
|
+
|
|
2131
2159
|
async function runCycle(): Promise<void> {
|
|
2132
2160
|
const cycleStart = Date.now();
|
|
2133
2161
|
heartbeatCounter++;
|
|
@@ -2208,23 +2236,21 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
|
|
|
2208
2236
|
// with a diagnostician task, immediately trigger a heartbeat to start
|
|
2209
2237
|
// the diagnostician without waiting for the next 15-minute interval.
|
|
2210
2238
|
// Must run AFTER processEvolutionQueue — HEARTBEAT.md must be written first.
|
|
2239
|
+
//
|
|
2240
|
+
// P3 (#299): Use requestHeartbeatNow instead of runHeartbeatOnce.
|
|
2241
|
+
// requestHeartbeatNow enters the wake layer which auto-retries on
|
|
2242
|
+
// requests-in-flight (1s intervals). runHeartbeatOnce was a one-shot
|
|
2243
|
+
// that got permanently skipped when agent was busy.
|
|
2211
2244
|
if (painCheckResult.enqueued) {
|
|
2212
|
-
const canTrigger = !!api?.runtime?.system?.
|
|
2213
|
-
logger.info(`[PD:EvolutionWorker] Pain flag enqueued —
|
|
2245
|
+
const canTrigger = !!api?.runtime?.system?.requestHeartbeatNow;
|
|
2246
|
+
logger.info(`[PD:EvolutionWorker] Pain flag enqueued — requestHeartbeatNow available: ${canTrigger}`);
|
|
2214
2247
|
if (canTrigger) {
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
logger.info(`[PD:EvolutionWorker] Immediate heartbeat result: status=${hbResult.status}${hbResult.status === 'ran' ? ` duration=${hbResult.durationMs}ms` : ''}${hbResult.status === 'skipped' || hbResult.status === 'failed' ? ` reason=${hbResult.reason}` : ''}`);
|
|
2220
|
-
if (hbResult.status === 'skipped' || hbResult.status === 'failed') {
|
|
2221
|
-
logger.warn(`[PD:EvolutionWorker] Immediate heartbeat was ${hbResult.status} (${hbResult.reason}). Diagnostician will start on next regular heartbeat cycle.`);
|
|
2222
|
-
}
|
|
2223
|
-
} catch (hbErr) {
|
|
2224
|
-
logger.warn(`[PD:EvolutionWorker] Failed to trigger immediate heartbeat: ${String(hbErr)}. Diagnostician will start on next regular heartbeat cycle.`);
|
|
2225
|
-
}
|
|
2248
|
+
api.runtime.system.requestHeartbeatNow({
|
|
2249
|
+
reason: `pd-pain-diagnosis: pain flag detected, starting diagnostician`,
|
|
2250
|
+
});
|
|
2251
|
+
logger.info(`[PD:EvolutionWorker] Heartbeat wake requested — wake layer will auto-retry if busy`);
|
|
2226
2252
|
} else {
|
|
2227
|
-
logger.warn(`[PD:EvolutionWorker]
|
|
2253
|
+
logger.warn(`[PD:EvolutionWorker] requestHeartbeatNow not available. Diagnostician will start on next regular heartbeat cycle.`);
|
|
2228
2254
|
}
|
|
2229
2255
|
}
|
|
2230
2256
|
|
|
@@ -385,7 +385,10 @@ function persistArtifact(
|
|
|
385
385
|
fs.mkdirSync(dir, { recursive: true });
|
|
386
386
|
}
|
|
387
387
|
|
|
388
|
-
|
|
388
|
+
// Atomic write: temp file + rename prevents corruption on crash
|
|
389
|
+
const tmpPath = artifactPath + '.tmp';
|
|
390
|
+
fs.writeFileSync(tmpPath, JSON.stringify(sampleRecord, null, 2), 'utf8');
|
|
391
|
+
fs.renameSync(tmpPath, artifactPath);
|
|
389
392
|
return artifactPath;
|
|
390
393
|
}
|
|
391
394
|
|
|
@@ -36,6 +36,20 @@ export {
|
|
|
36
36
|
type NocturnalResult,
|
|
37
37
|
} from './nocturnal-workflow-manager.js';
|
|
38
38
|
|
|
39
|
+
// TODO: correction-observer-workflow-manager.ts is missing from repo
|
|
40
|
+
// export {
|
|
41
|
+
// CorrectionObserverWorkflowManager,
|
|
42
|
+
// createCorrectionObserverWorkflowManager,
|
|
43
|
+
// correctionObserverWorkflowSpec,
|
|
44
|
+
// type CorrectionObserverWorkflowOptions,
|
|
45
|
+
// } from './correction-observer-workflow-manager.js';
|
|
46
|
+
|
|
47
|
+
// export type {
|
|
48
|
+
// CorrectionObserverWorkflowSpec,
|
|
49
|
+
// CorrectionObserverPayload,
|
|
50
|
+
// CorrectionObserverResult,
|
|
51
|
+
// } from './correction-observer-types.js';
|
|
52
|
+
|
|
39
53
|
export type {
|
|
40
54
|
WorkflowState,
|
|
41
55
|
WorkflowTransport,
|
|
@@ -311,7 +311,9 @@ export class NocturnalWorkflowManager implements WorkflowManager {
|
|
|
311
311
|
this.logger.warn(`[PD:NocturnalWorkflow] [${workflowId}] Arbiter result: passed=${result.diagnostics.arbiterResult.passed}, failures=${result.diagnostics.arbiterResult.failures.map(f => f.reason).join('; ')}`);
|
|
312
312
|
}
|
|
313
313
|
if (result.diagnostics?.selection) {
|
|
314
|
-
|
|
314
|
+
const sel = result.diagnostics.selection;
|
|
315
|
+
const diag = sel.diagnostics;
|
|
316
|
+
this.logger.warn(`[PD:NocturnalWorkflow] [${workflowId}] Selection: decision=${sel.decision}, principleId=${sel.selectedPrincipleId ?? 'none'}, sessionId=${sel.selectedSessionId ?? 'none'}, totalEvaluable=${diag.totalEvaluablePrinciples ?? 0}, filteredByCooldown=${diag.filteredByCooldown ?? 0}, passed=${diag.passedPrinciples?.length ?? 0}`);
|
|
315
317
|
}
|
|
316
318
|
|
|
317
319
|
this.store.updateWorkflowState(workflowId, 'terminal_error');
|