principles-disciple 1.33.0 → 1.34.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ import { createHash } from 'crypto';
5
5
  import type { OpenClawPluginServiceContext, OpenClawPluginApi, PluginLogger } from '../openclaw-sdk.js';
6
6
  import { DictionaryService } from '../core/dictionary-service.js';
7
7
  import { DetectionService } from '../core/detection-service.js';
8
- import { ensureStateTemplates, ensureCorePrinciples } from '../core/init.js';
8
+ import { ensureStateTemplates } from '../core/init.js';
9
9
  import { SystemLogger } from '../core/system-logger.js';
10
10
  import { WorkspaceContext } from '../core/workspace-context.js';
11
11
  import type { EventLog } from '../core/event-log.js';
@@ -31,14 +31,11 @@ import {
31
31
  import { validateNocturnalSnapshotIngress } from '../core/nocturnal-snapshot-contract.js';
32
32
  import { isExpectedSubagentError } from './subagent-workflow/subagent-error-utils.js';
33
33
  import { readPainFlagContract } from '../core/pain.js';
34
-
35
- // ── Atomic File Write ────────────────────────────────────────────────────────
36
- // Write to temp then rename — atomic on POSIX, prevents partial-write corruption on crash.
37
- function atomicWriteFileSync(filePath: string, data: string): void {
38
- const tmpPath = filePath + '.tmp';
39
- fs.writeFileSync(tmpPath, data, 'utf8');
40
- fs.renameSync(tmpPath, filePath);
41
- }
34
+ import { CorrectionObserverWorkflowManager, correctionObserverWorkflowSpec } from './correction-observer-workflow-manager.js';
35
+ import type { CorrectionObserverPayload } from './correction-observer-types.js';
36
+ import { KeywordOptimizationService } from './keyword-optimization-service.js';
37
+ import { TrajectoryRegistry } from '../core/trajectory.js';
38
+ import { CorrectionCueLearner } from '../core/correction-cue-learner.js';
42
39
 
43
40
  const WORKFLOW_TTL_MS = 5 * 60 * 1000; // 5 minutes default TTL for helper workflows
44
41
  import { OpenClawTrinityRuntimeAdapter } from '../core/nocturnal-trinity.js';
@@ -56,7 +53,6 @@ interface WatchdogResult {
56
53
  details: string[];
57
54
  }
58
55
 
59
- // eslint-disable-next-line complexity
60
56
  async function runWorkflowWatchdog(
61
57
  wctx: WorkspaceContext,
62
58
  api: OpenClawPluginApi | null,
@@ -72,10 +68,104 @@ async function runWorkflowWatchdog(
72
68
  try {
73
69
  const allWorkflows: WorkflowRow[] = store.listWorkflows();
74
70
 
75
- runWorkflowWatchdogCheckStale(allWorkflows, store, now, details, subagentRuntime, agentSession, logger);
76
- runWorkflowWatchdogCheckUncleared(allWorkflows, details);
77
- runWorkflowWatchdogCheckNocturnal(allWorkflows, details);
71
+ // Check 1: Stale active workflows (active > 2x TTL)
72
+ const staleThreshold = WORKFLOW_TTL_MS * 2;
73
+ const staleActive = allWorkflows.filter(
74
+ (wf: WorkflowRow) => wf.state === 'active' && (now - wf.created_at) > staleThreshold,
75
+ );
76
+ if (staleActive.length > 0) {
77
+ for (const wf of staleActive) {
78
+ const ageMin = Math.round((now - wf.created_at) / 60000);
79
+ details.push(`stale_active: ${wf.workflow_id} (${wf.workflow_type}, ${ageMin}min old)`);
80
+
81
+ // #257: Check if the last recorded event reason indicates expected subagent unavailability.
82
+ // If so, skip marking as terminal_error — the workflow is stale because the subagent
83
+ // was expectedly unavailable (daemon mode, process isolation), not due to a hard failure.
84
+ const events = store.getEvents(wf.workflow_id);
85
+ const lastEventReason = events.length > 0 ? events[events.length - 1].reason : 'unknown';
86
+ if (isExpectedSubagentError(lastEventReason)) {
87
+ logger?.debug?.(`[PD:Watchdog] Skipping stale active workflow ${wf.workflow_id}: expected subagent error (${lastEventReason})`);
88
+ continue;
89
+ }
90
+
91
+ store.updateWorkflowState(wf.workflow_id, 'terminal_error');
92
+ store.recordEvent(wf.workflow_id, 'watchdog_timeout', 'active', 'terminal_error', `Stale active > ${staleThreshold / 60000}s`, { ageMs: now - wf.created_at });
93
+
94
+ // Cleanup session if possible (#188: gateway-safe fallback)
95
+ if (wf.child_session_key) {
96
+ try {
97
+ if (subagentRuntime) {
98
+ await subagentRuntime.deleteSession({ sessionKey: wf.child_session_key, deleteTranscript: true });
99
+ logger?.info?.(`[PD:Watchdog] Cleaned up stale session: ${wf.child_session_key}`);
100
+ } else if (agentSession) {
101
+ const storePath = agentSession.resolveStorePath();
102
+ const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
103
+ const normalizedKey = wf.child_session_key.toLowerCase();
104
+ if (sessionStore[normalizedKey]) {
105
+ delete sessionStore[normalizedKey];
106
+ await agentSession.saveSessionStore(storePath, sessionStore);
107
+ logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback: ${wf.child_session_key}`);
108
+ }
109
+ }
110
+ } catch (cleanupErr) {
111
+ const errMsg = String(cleanupErr);
112
+ if (errMsg.includes('gateway request') && agentSession) {
113
+ const storePath = agentSession.resolveStorePath();
114
+ const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
115
+ const normalizedKey = wf.child_session_key.toLowerCase();
116
+ if (sessionStore[normalizedKey]) {
117
+ delete sessionStore[normalizedKey];
118
+ await agentSession.saveSessionStore(storePath, sessionStore);
119
+ logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback after gateway error: ${wf.child_session_key}`);
120
+ }
121
+ } else {
122
+ logger?.warn?.(`[PD:Watchdog] Failed to cleanup session ${wf.child_session_key}: ${errMsg}`);
123
+ }
124
+ }
125
+ }
126
+ }
127
+ }
78
128
 
129
+ // Check 2: Workflows in terminal_error/expired without cleanup
130
+ const unclearedTerminal = allWorkflows.filter(
131
+ (wf: WorkflowRow) => (wf.state === 'terminal_error' || wf.state === 'expired') && wf.cleanup_state === 'pending',
132
+ );
133
+ if (unclearedTerminal.length > 0) {
134
+ details.push(`uncleared_terminal: ${unclearedTerminal.length} workflows (will be swept next cycle)`);
135
+ }
136
+
137
+ // Check 3: Nocturnal workflow result validation (#181 pattern)
138
+ const nocturnalCompleted = allWorkflows.filter(
139
+ (wf: WorkflowRow) => wf.workflow_type === 'nocturnal' && wf.state === 'completed',
140
+ );
141
+ for (const wf of nocturnalCompleted) {
142
+ // Check if the metadata snapshot has all zeros (invalid data)
143
+ try {
144
+ const meta = JSON.parse(wf.metadata_json) as Record<string, unknown>;
145
+ const snapshot = meta.snapshot as Record<string, unknown> | undefined;
146
+ if (snapshot) {
147
+ // #219: Check for fallback data source (partial stats from pain context)
148
+ const dataSource = snapshot._dataSource as string | undefined;
149
+ if (dataSource === 'pain_context_fallback') {
150
+ details.push(`fallback_snapshot: nocturnal workflow ${wf.workflow_id} uses pain-context fallback (stats may be incomplete)`);
151
+ }
152
+ const stats = snapshot.stats as Record<string, number> | undefined;
153
+ // #246: Stats are now always number (never null). Detect "empty" fallback:
154
+ // fallback + all counts zero means no real data was available.
155
+ // NOTE: totalAssistantTurns may be 0 even for valid sessions because
156
+ // listRecentNocturnalCandidateSessions (used in fallback path) does not
157
+ // populate assistantTurnCount (only getNocturnalSessionSnapshot does).
158
+ // We use totalToolCalls=0 as the primary indicator instead.
159
+ if (stats && dataSource === 'pain_context_fallback' &&
160
+ stats.totalToolCalls === 0 && stats.totalGateBlocks === 0 &&
161
+ stats.failureCount === 0) {
162
+ details.push(`fallback_snapshot_stats: nocturnal workflow ${wf.workflow_id} has empty fallback stats (no trajectory data found)`);
163
+ }
164
+ }
165
+ } catch { /* ignore malformed metadata */ }
166
+ }
167
+
168
+ // Summary
79
169
  const stateCounts: Record<string, number> = {};
80
170
  for (const wf of allWorkflows) {
81
171
  stateCounts[wf.state] = (stateCounts[wf.state] || 0) + 1;
@@ -96,106 +186,6 @@ async function runWorkflowWatchdog(
96
186
  return { anomalies: details.length, details };
97
187
  }
98
188
 
99
- // ── Watchdog helpers (extracted from runWorkflowWatchdog for complexity) ──
100
-
101
- // eslint-disable-next-line complexity
102
- async function cleanupStaleWorkflowSession(
103
- wf: WorkflowRow,
104
- subagentRuntime: { deleteSession: (opts: { sessionKey: string; deleteTranscript: boolean }) => Promise<void> } | undefined,
105
- agentSession: { resolveStorePath: () => string; loadSessionStore: (p: string, o: { skipCache: boolean }) => Record<string, unknown>; saveSessionStore: (p: string, s: Record<string, unknown>) => Promise<void> } | undefined,
106
- logger?: PluginLogger,
107
- ): Promise<void> {
108
- if (!wf.child_session_key) return;
109
- try {
110
- if (subagentRuntime) {
111
- await subagentRuntime.deleteSession({ sessionKey: wf.child_session_key, deleteTranscript: true });
112
- logger?.info?.(`[PD:Watchdog] Cleaned up stale session: ${wf.child_session_key}`);
113
- } else if (agentSession) {
114
- const storePath = agentSession.resolveStorePath();
115
- const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
116
- const normalizedKey = wf.child_session_key.toLowerCase();
117
- if (sessionStore[normalizedKey]) {
118
- delete sessionStore[normalizedKey];
119
- await agentSession.saveSessionStore(storePath, sessionStore);
120
- logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback: ${wf.child_session_key}`);
121
- }
122
- }
123
- } catch (cleanupErr) {
124
- const errMsg = String(cleanupErr);
125
- if (errMsg.includes('gateway request') && agentSession) {
126
- const storePath = agentSession.resolveStorePath();
127
- const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
128
- const normalizedKey = wf.child_session_key.toLowerCase();
129
- if (sessionStore[normalizedKey]) {
130
- delete sessionStore[normalizedKey];
131
- await agentSession.saveSessionStore(storePath, sessionStore);
132
- logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback after gateway error: ${wf.child_session_key}`);
133
- }
134
- } else {
135
- logger?.warn?.(`[PD:Watchdog] Failed to cleanup session ${wf.child_session_key}: ${errMsg}`);
136
- }
137
- }
138
- }
139
-
140
- function runWorkflowWatchdogCheckStale(
141
- allWorkflows: WorkflowRow[],
142
- store: WorkflowStore,
143
- now: number,
144
- details: string[],
145
- subagentRuntime: { deleteSession: (opts: { sessionKey: string; deleteTranscript: boolean }) => Promise<void> } | undefined,
146
- agentSession: { resolveStorePath: () => string; loadSessionStore: (p: string, o: { skipCache: boolean }) => Record<string, unknown>; saveSessionStore: (p: string, s: Record<string, unknown>) => Promise<void> } | undefined,
147
- logger?: PluginLogger,
148
- ): void {
149
- const staleThreshold = WORKFLOW_TTL_MS * 2;
150
- for (const wf of allWorkflows) {
151
- if (wf.state !== 'active' || (now - wf.created_at) <= staleThreshold) continue;
152
- const ageMin = Math.round((now - wf.created_at) / 60000);
153
- details.push(`stale_active: ${wf.workflow_id} (${wf.workflow_type}, ${ageMin}min old)`);
154
-
155
- const events = store.getEvents(wf.workflow_id);
156
- const lastEventReason = events.length > 0 ? events[events.length - 1].reason : 'unknown';
157
- if (isExpectedSubagentError(lastEventReason)) {
158
- logger?.debug?.(`[PD:Watchdog] Skipping stale active workflow ${wf.workflow_id}: expected subagent error (${lastEventReason})`);
159
- continue;
160
- }
161
-
162
- store.updateWorkflowState(wf.workflow_id, 'terminal_error');
163
- store.recordEvent(wf.workflow_id, 'watchdog_timeout', 'active', 'terminal_error', `Stale active > ${staleThreshold / 60000}s`, { ageMs: now - wf.created_at });
164
- void cleanupStaleWorkflowSession(wf, subagentRuntime, agentSession, logger);
165
- }
166
- }
167
-
168
- function runWorkflowWatchdogCheckUncleared(allWorkflows: WorkflowRow[], details: string[]): void {
169
- const unclearedTerminal = allWorkflows.filter(
170
- (wf: WorkflowRow) => (wf.state === 'terminal_error' || wf.state === 'expired') && wf.cleanup_state === 'pending',
171
- );
172
- if (unclearedTerminal.length > 0) {
173
- details.push(`uncleared_terminal: ${unclearedTerminal.length} workflows (will be swept next cycle)`);
174
- }
175
- }
176
-
177
- // eslint-disable-next-line complexity
178
- function runWorkflowWatchdogCheckNocturnal(allWorkflows: WorkflowRow[], details: string[]): void {
179
- for (const wf of allWorkflows) {
180
- if (wf.workflow_type !== 'nocturnal' || wf.state !== 'completed') continue;
181
- try {
182
- const meta = JSON.parse(wf.metadata_json) as Record<string, unknown>;
183
- const snapshot = meta.snapshot as Record<string, unknown> | undefined;
184
- if (!snapshot) continue;
185
- const dataSource = snapshot._dataSource as string | undefined;
186
- if (dataSource === 'pain_context_fallback') {
187
- details.push(`fallback_snapshot: nocturnal workflow ${wf.workflow_id} uses pain-context fallback (stats may be incomplete)`);
188
- const stats = snapshot.stats as Record<string, number> | undefined;
189
- if (stats && stats.totalToolCalls === 0 && stats.totalGateBlocks === 0 && stats.failureCount === 0) {
190
- details.push(`fallback_snapshot_stats: nocturnal workflow ${wf.workflow_id} has empty fallback stats (no trajectory data found)`);
191
- }
192
- }
193
- } catch { /* ignore malformed metadata */ }
194
- }
195
- }
196
-
197
- // ── End watchdog helpers ──
198
-
199
189
  let timeoutId: NodeJS.Timeout | null = null;
200
190
 
201
191
  /**
@@ -370,7 +360,6 @@ function isSessionAtOrBeforeTriggerTime(
370
360
  return true;
371
361
  }
372
362
 
373
- // eslint-disable-next-line complexity
374
363
  function buildFallbackNocturnalSnapshot(
375
364
  sleepTask: EvolutionQueueItem,
376
365
  extractor?: ReturnType<typeof createNocturnalTrajectoryExtractor> | null,
@@ -505,13 +494,14 @@ function findRecentDuplicateTask(
505
494
  reason?: string
506
495
  ): EvolutionQueueItem | undefined {
507
496
 
497
+
508
498
  const key = normalizePainDedupKey(source, preview, reason);
509
499
  return queue.find((task) => {
510
500
  if (task.status === 'completed') return false;
511
-
512
501
  const taskTime = new Date(task.enqueued_at || task.timestamp).getTime();
513
502
  if (!Number.isFinite(taskTime) || (now - taskTime) > PAIN_QUEUE_DEDUP_WINDOW_MS) return false;
514
503
 
504
+
515
505
  return normalizePainDedupKey(task.source, task.trigger_text_preview || '', task.reason) === key;
516
506
  });
517
507
  }
@@ -566,7 +556,6 @@ function normalizePainDedupKey(source: string, preview: string, reason?: string)
566
556
 
567
557
 
568
558
 
569
-
570
559
  export function hasRecentDuplicateTask(queue: EvolutionQueueItem[], source: string, preview: string, now: number, reason?: string): boolean {
571
560
  return !!findRecentDuplicateTask(queue, source, preview, now, reason);
572
561
  }
@@ -709,7 +698,6 @@ function loadEvolutionQueue(queuePath: string): EvolutionQueueItem[] {
709
698
  * Build and persist a new sleep_reflection task.
710
699
  */
711
700
 
712
-
713
701
  function enqueueNewSleepReflectionTask(
714
702
  queue: EvolutionQueueItem[],
715
703
  recentPainContext: ReturnType<typeof readRecentPainContext>,
@@ -736,7 +724,7 @@ function enqueueNewSleepReflectionTask(
736
724
  recentPainContext,
737
725
  });
738
726
 
739
- atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
727
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
740
728
  logger?.info?.(`[PD:EvolutionWorker] Enqueued sleep_reflection task ${taskId}`);
741
729
  }
742
730
 
@@ -775,6 +763,60 @@ async function enqueueSleepReflectionTask(
775
763
  }
776
764
  }
777
765
 
766
+ /**
767
+ * Enqueue a keyword_optimization task if one is not already pending/in-progress (CORR-08).
768
+ * Dispatches LLM subagent via CorrectionObserverWorkflowManager to optimize
769
+ * correction keywords based on FPR and match history.
770
+ */
771
+ async function enqueueKeywordOptimizationTask(
772
+ wctx: WorkspaceContext,
773
+ logger: PluginLogger,
774
+ ): Promise<void> {
775
+ const queuePath = wctx.resolve('EVOLUTION_QUEUE');
776
+ const releaseLock = await requireQueueLock(queuePath, logger, 'enqueueKeywordOpt', EVOLUTION_QUEUE_LOCK_SUFFIX);
777
+
778
+ try {
779
+ const queue = loadEvolutionQueue(queuePath);
780
+
781
+ // Guard: Skip if a keyword_optimization task is already pending/in-progress (CORR-08)
782
+ if (hasPendingTask(queue, 'keyword_optimization')) {
783
+ logger?.debug?.('[PD:EvolutionWorker] keyword_optimization task already pending/in-progress, skipping');
784
+ return;
785
+ }
786
+
787
+ // Guard: Skip if daily optimization throttle is exhausted (CORR-08)
788
+ const learner = CorrectionCueLearner.get(wctx.stateDir);
789
+ if (!learner.canRunKeywordOptimization()) {
790
+ logger?.debug?.('[PD:EvolutionWorker] keyword_optimization throttle exhausted, skipping');
791
+ return;
792
+ }
793
+
794
+ const taskId = createEvolutionTaskId('keyword_optimization', 50, 'keyword optimization', 'Keyword optimization via LLM', Date.now());
795
+ const nowIso = new Date().toISOString();
796
+
797
+ queue.push({
798
+ id: taskId,
799
+ taskKind: 'keyword_optimization',
800
+ priority: 'medium',
801
+ score: 50,
802
+ source: 'correction',
803
+ reason: 'Keyword optimization triggered by heartbeat',
804
+ trigger_text_preview: 'Keyword optimization via LLM',
805
+ timestamp: nowIso,
806
+ enqueued_at: nowIso,
807
+ status: 'pending',
808
+ traceId: taskId,
809
+ retryCount: 0,
810
+ maxRetries: 1,
811
+ });
812
+
813
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
814
+ logger?.info?.(`[PD:EvolutionWorker] Enqueued keyword_optimization task ${taskId}`);
815
+ } finally {
816
+ releaseLock();
817
+ }
818
+ }
819
+
778
820
  interface ParsedPainValues {
779
821
  score: number; source: string; reason: string; preview: string;
780
822
  traceId: string; sessionId: string; agentId: string;
@@ -782,7 +824,6 @@ interface ParsedPainValues {
782
824
 
783
825
 
784
826
 
785
- // eslint-disable-next-line complexity
786
827
  async function doEnqueuePainTask(
787
828
  wctx: WorkspaceContext, logger: PluginLogger, painFlagPath: string,
788
829
  result: WorkerStatusReport['pain_flag'], v: ParsedPainValues,
@@ -828,7 +869,7 @@ async function doEnqueuePainTask(
828
869
  retryCount: 0, maxRetries: 3,
829
870
  });
830
871
 
831
- atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
872
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
832
873
  fs.appendFileSync(painFlagPath, `\nstatus: queued\ntask_id: ${taskId}\n`, 'utf8');
833
874
  result.enqueued = true;
834
875
 
@@ -856,7 +897,6 @@ async function doEnqueuePainTask(
856
897
  return result;
857
898
  }
858
899
 
859
- // eslint-disable-next-line complexity
860
900
  async function checkPainFlag(wctx: WorkspaceContext, logger: PluginLogger): Promise<WorkerStatusReport['pain_flag']> {
861
901
  const result: WorkerStatusReport['pain_flag'] = { exists: false, score: null, source: null, enqueued: false, skipped_reason: null };
862
902
  try {
@@ -1031,7 +1071,6 @@ async function checkPainFlag(wctx: WorkspaceContext, logger: PluginLogger): Prom
1031
1071
 
1032
1072
 
1033
1073
 
1034
- // eslint-disable-next-line complexity
1035
1074
  async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogger, eventLog: EventLog, api?: OpenClawPluginApi) {
1036
1075
  const queuePath = wctx.resolve('EVOLUTION_QUEUE');
1037
1076
  if (!fs.existsSync(queuePath)) {
@@ -1069,6 +1108,11 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1069
1108
 
1070
1109
  let queueChanged = rawQueue.some(isLegacyQueueItem);
1071
1110
 
1111
+ // Guard: Skip keyword_optimization if one is already pending/in-progress (CORR-08)
1112
+ if (hasPendingTask(queue, 'keyword_optimization')) {
1113
+ logger?.debug?.('[PD:EvolutionWorker] keyword_optimization task already pending/in-progress, skipping enqueue');
1114
+ }
1115
+
1072
1116
  const {config} = wctx;
1073
1117
  const timeout = config.get('intervals.task_timeout_ms') || (60 * 60 * 1000); // Default 1 hour
1074
1118
 
@@ -1614,7 +1658,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1614
1658
 
1615
1659
  // Write claimed state (includes any pain changes from above) and release lock
1616
1660
  if (queueChanged) {
1617
- atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
1661
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
1618
1662
  }
1619
1663
  releaseLock();
1620
1664
  for (const sleepTask of sleepReflectionTasks) {
@@ -1632,8 +1676,10 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1632
1676
 
1633
1677
  let workflowId: string | undefined;
1634
1678
 
1679
+
1635
1680
  let nocturnalManager: NocturnalWorkflowManager;
1636
1681
 
1682
+
1637
1683
  let snapshotData: NocturnalSessionSnapshot | undefined;
1638
1684
 
1639
1685
  if (isPollingTask) {
@@ -1866,7 +1912,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1866
1912
  freshQueue[idx] = sleepTask;
1867
1913
  }
1868
1914
  }
1869
- atomicWriteFileSync(queuePath, JSON.stringify(freshQueue, null, 2));
1915
+ fs.writeFileSync(queuePath, JSON.stringify(freshQueue, null, 2), 'utf8');
1870
1916
 
1871
1917
  // Log completions to EvolutionLogger
1872
1918
  for (const sleepTask of sleepReflectionTasks) {
@@ -1897,8 +1943,161 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1897
1943
  return;
1898
1944
  }
1899
1945
 
1946
+ // ── keyword_optimization task processing ──────────────────────────────
1947
+ // Process keyword_optimization tasks independently of sleep_reflection.
1948
+ // Uses CorrectionObserverWorkflowManager to dispatch LLM subagent and
1949
+ // KeywordOptimizationService to apply mutations to keyword store (CORR-09).
1950
+ const pendingKeywordOptTasks = queue.filter(t => t.status === 'pending' && t.taskKind === 'keyword_optimization');
1951
+ const inProgressKeywordOptTasks = queue.filter(t =>
1952
+ t.status === 'in_progress' &&
1953
+ t.taskKind === 'keyword_optimization' &&
1954
+ t.resultRef &&
1955
+ !t.resultRef.startsWith('trinity-draft')
1956
+ );
1957
+ const keywordOptTasks = [...pendingKeywordOptTasks, ...inProgressKeywordOptTasks];
1958
+ if (keywordOptTasks.length > 0) {
1959
+ // Claim pending tasks inside lock
1960
+ for (const koTask of pendingKeywordOptTasks) {
1961
+ koTask.status = 'in_progress';
1962
+ koTask.started_at = new Date().toISOString();
1963
+ }
1964
+ queueChanged = queueChanged || pendingKeywordOptTasks.length > 0;
1965
+
1966
+ // Release lock during LLM dispatch (long-running)
1967
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
1968
+ releaseLock();
1969
+ lockReleased = true;
1970
+
1971
+ for (const koTask of keywordOptTasks) {
1972
+ const isPolling = !!koTask.resultRef && !koTask.resultRef.startsWith('trinity-draft');
1973
+
1974
+ if (isPolling) {
1975
+ logger?.debug?.(`[PD:EvolutionWorker] Polling existing keyword_optimization task ${koTask.id}`);
1976
+ } else {
1977
+ logger?.info?.(`[PD:EvolutionWorker] Processing keyword_optimization task ${koTask.id}`);
1978
+ }
1979
+
1980
+ try {
1981
+ // Build trajectoryHistory via KeywordOptimizationService
1982
+ const koService = KeywordOptimizationService.get(wctx.stateDir, wctx.workspaceDir, logger);
1983
+ const db = TrajectoryRegistry.get(wctx.workspaceDir);
1984
+ const recentSessionIds = db.listRecentSessions({ limit: 10 }).map(s => s.sessionId);
1985
+ const trajectoryHistory = await koService.buildTrajectoryHistory(recentSessionIds);
1986
+
1987
+ // Build full payload (CORR-09, D-40-07, D-40-08)
1988
+ const learner = CorrectionCueLearner.get(wctx.stateDir);
1989
+ const store = learner.getStore();
1990
+ const payload: CorrectionObserverPayload = {
1991
+ workspaceDir: wctx.workspaceDir,
1992
+ parentSessionId: `keyword_optimization:${koTask.id}`,
1993
+ keywordStoreSummary: {
1994
+ totalKeywords: store.keywords.length,
1995
+ terms: store.keywords.map(k => ({
1996
+ term: k.term,
1997
+ weight: k.weight,
1998
+ hitCount: k.hitCount ?? 0,
1999
+ truePositiveCount: k.truePositiveCount ?? 0,
2000
+ falsePositiveCount: k.falsePositiveCount ?? 0,
2001
+ })),
2002
+ },
2003
+ recentMessages: [],
2004
+ trajectoryHistory,
2005
+ };
2006
+
2007
+ // Dispatch LLM subagent via CorrectionObserverWorkflowManager
2008
+ const manager = new CorrectionObserverWorkflowManager({
2009
+ workspaceDir: wctx.workspaceDir,
2010
+ logger,
2011
+ subagent: api?.runtime?.subagent!,
2012
+ agentSession: api?.runtime?.agent?.session,
2013
+ });
2014
+
2015
+ let workflowId: string | undefined;
2016
+ if (!isPolling) {
2017
+ const handle = await manager.startWorkflow(correctionObserverWorkflowSpec, {
2018
+ parentSessionId: `keyword_optimization:${koTask.id}`,
2019
+ workspaceDir: wctx.workspaceDir,
2020
+ taskInput: payload,
2021
+ });
2022
+ workflowId = handle.workflowId;
2023
+ koTask.resultRef = workflowId;
2024
+ } else {
2025
+ workflowId = koTask.resultRef!;
2026
+ }
2027
+
2028
+ // Poll workflow state
2029
+ const summary = await manager.getWorkflowDebugSummary(workflowId);
2030
+ if (summary) {
2031
+ if (summary.state === 'completed') {
2032
+ // Get parsed LLM result and apply mutations to keyword store (CORR-09)
2033
+ const parsedResult = await manager.getWorkflowResult(workflowId);
2034
+
2035
+ if (parsedResult?.updated) {
2036
+ koService.applyResult(parsedResult);
2037
+ await learner.recordOptimizationPerformed();
2038
+ logger?.info?.(`[PD:EvolutionWorker] keyword_optimization applied mutations: ${parsedResult.summary}`);
2039
+ } else {
2040
+ logger?.info?.(`[PD:EvolutionWorker] keyword_optimization completed with no updates`);
2041
+ }
2042
+
2043
+ koTask.status = 'completed';
2044
+ koTask.completed_at = new Date().toISOString();
2045
+ koTask.resolution = 'marker_detected';
2046
+ logger?.info?.(`[PD:EvolutionWorker] keyword_optimization task ${koTask.id} workflow completed`);
2047
+ } else if (summary.state === 'terminal_error') {
2048
+ koTask.status = 'failed';
2049
+ koTask.completed_at = new Date().toISOString();
2050
+ koTask.resolution = 'failed_max_retries';
2051
+ koTask.retryCount = (koTask.retryCount ?? 0) + 1;
2052
+ const lastEvent = summary.recentEvents[summary.recentEvents.length - 1];
2053
+ koTask.lastError = `keyword_optimization failed: ${lastEvent?.reason ?? 'unknown'}`;
2054
+ logger?.warn?.(`[PD:EvolutionWorker] keyword_optimization task ${koTask.id} workflow terminal_error: ${koTask.lastError}`);
2055
+ } else {
2056
+ logger?.info?.(`[PD:EvolutionWorker] keyword_optimization task ${koTask.id} workflow ${summary.state}, will poll again next cycle`);
2057
+ }
2058
+ }
2059
+ } catch (koErr) {
2060
+ koTask.status = 'failed';
2061
+ koTask.completed_at = new Date().toISOString();
2062
+ koTask.resolution = 'failed_max_retries';
2063
+ koTask.lastError = String(koErr);
2064
+ koTask.retryCount = (koTask.retryCount ?? 0) + 1;
2065
+ logger?.error?.(`[PD:EvolutionWorker] keyword_optimization task ${koTask.id} threw: ${koErr}`);
2066
+ }
2067
+ }
2068
+
2069
+ // Re-acquire lock to write results
2070
+ const koResultLock = await requireQueueLock(queuePath, logger, 'keywordOptResult');
2071
+ try {
2072
+ let freshQueue: (RawQueueItem | EvolutionQueueItem)[] = [];
2073
+ try {
2074
+ freshQueue = JSON.parse(fs.readFileSync(queuePath, 'utf8'));
2075
+ } catch (readErr) {
2076
+ // Queue file corrupted — log warning but preserve in-memory task state
2077
+ logger?.warn?.(`[PD:EvolutionWorker] Queue file corrupted (${String(readErr)}), preserving in-memory state`);
2078
+ freshQueue = [];
2079
+ }
2080
+
2081
+ // Append or replace keyword_optimization tasks
2082
+ for (const koTask of keywordOptTasks) {
2083
+ const idx = freshQueue.findIndex((t) => (t as { id?: string }).id === koTask.id);
2084
+ if (idx >= 0) {
2085
+ freshQueue[idx] = koTask;
2086
+ } else {
2087
+ freshQueue.push(koTask);
2088
+ }
2089
+ }
2090
+ fs.writeFileSync(queuePath, JSON.stringify(freshQueue, null, 2));
2091
+ } catch (koResultErr) {
2092
+ logger?.warn?.(`[PD:EvolutionWorker] Failed to write keyword_optimization results: ${String(koResultErr)}`);
2093
+ } finally {
2094
+ koResultLock();
2095
+ }
2096
+ return;
2097
+ }
2098
+
1900
2099
  if (queueChanged) {
1901
- atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
2100
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
1902
2101
  }
1903
2102
 
1904
2103
  // Pipeline observability: log stage-level summary at end of cycle
@@ -1925,7 +2124,6 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1925
2124
  }
1926
2125
 
1927
2126
 
1928
- // eslint-disable-next-line complexity
1929
2127
  async function processDetectionQueue(wctx: WorkspaceContext, api: OpenClawPluginApi, eventLog: EventLog) {
1930
2128
  const {logger} = api;
1931
2129
  try {
@@ -2017,7 +2215,7 @@ export async function registerEvolutionTaskSession(
2017
2215
  if (!task.started_at) {
2018
2216
  task.started_at = new Date().toISOString();
2019
2217
  }
2020
- atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
2218
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
2021
2219
  return true;
2022
2220
  } finally {
2023
2221
  releaseLock();
@@ -2057,9 +2255,11 @@ interface WorkerStatusReport {
2057
2255
  function writeWorkerStatus(stateDir: string, report: WorkerStatusReport): void {
2058
2256
  try {
2059
2257
  const statusPath = path.join(stateDir, 'worker-status.json');
2060
- atomicWriteFileSync(statusPath, JSON.stringify(report, null, 2));
2061
- } catch {
2258
+ fs.writeFileSync(statusPath, JSON.stringify(report, null, 2), 'utf8');
2259
+ } catch (statusErr) {
2062
2260
  // Non-critical: worker-status.json is for monitoring, failure is acceptable
2261
+ // (no logger available in this standalone helper)
2262
+ void statusErr;
2063
2263
  }
2064
2264
  }
2065
2265
 
@@ -2086,7 +2286,7 @@ async function processEvolutionQueueWithResult(
2086
2286
  const purgeResult = purgeStaleFailedTasks(queue, logger);
2087
2287
  if (purgeResult.purged > 0) {
2088
2288
  // Write back the cleaned queue
2089
- atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
2289
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
2090
2290
  }
2091
2291
 
2092
2292
  queueResult.total = queue.length;
@@ -2113,7 +2313,6 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2113
2313
  api: null,
2114
2314
  _startedWorkspaces: new Set<string>(),
2115
2315
 
2116
- // eslint-disable-next-line complexity
2117
2316
  start(ctx: OpenClawPluginServiceContext): void {
2118
2317
  const workspaceDir = ctx?.workspaceDir;
2119
2318
  const logger = ctx?.logger || console;
@@ -2142,7 +2341,6 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2142
2341
  const {config} = wctx;
2143
2342
  const language = config.get('language') || 'en';
2144
2343
  ensureStateTemplates({ logger }, wctx.stateDir, language);
2145
- ensureCorePrinciples(wctx.stateDir, logger);
2146
2344
 
2147
2345
  const initialDelay = 5000;
2148
2346
  const interval = config.get('intervals.worker_poll_ms') || (15 * 60 * 1000);
@@ -2150,7 +2348,6 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2150
2348
  // Periodic trigger tracking
2151
2349
  let heartbeatCounter = 0;
2152
2350
 
2153
- // eslint-disable-next-line complexity
2154
2351
  async function runCycle(): Promise<void> {
2155
2352
  const cycleStart = Date.now();
2156
2353
  heartbeatCounter++;
@@ -2193,7 +2390,17 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2193
2390
  }
2194
2391
 
2195
2392
  // Path 2: Periodic trigger (fires regardless of idle state)
2393
+ // keyword_optimization fires every period_heartbeats (CORR-07).
2394
+ // IMPORTANT: check keyword_optimization BEFORE resetting counter for sleep_reflection.
2196
2395
  if (sleepConfig.trigger_mode === 'periodic') {
2396
+ // keyword_optimization check BEFORE counter reset (CORR-07 fix)
2397
+ if (heartbeatCounter > 0 && heartbeatCounter % sleepConfig.period_heartbeats === 0) {
2398
+ logger?.info?.(`[PD:EvolutionWorker] Periodic keyword_optimization trigger at heartbeat ${heartbeatCounter}`);
2399
+ enqueueKeywordOptimizationTask(wctx, logger).catch((err) => {
2400
+ logger?.error?.(`[PD:EvolutionWorker] Failed to enqueue keyword_optimization task: ${String(err)}`);
2401
+ });
2402
+ }
2403
+
2197
2404
  if (heartbeatCounter >= sleepConfig.period_heartbeats) {
2198
2405
  logger?.info?.(`[PD:EvolutionWorker] Periodic trigger: heartbeatCounter=${heartbeatCounter} >= period_heartbeats=${sleepConfig.period_heartbeats}`);
2199
2406
  shouldTrySleepReflection = true;
@@ -2231,21 +2438,23 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2231
2438
  // with a diagnostician task, immediately trigger a heartbeat to start
2232
2439
  // the diagnostician without waiting for the next 15-minute interval.
2233
2440
  // Must run AFTER processEvolutionQueue — HEARTBEAT.md must be written first.
2234
- //
2235
- // P3 (#299): Use requestHeartbeatNow instead of runHeartbeatOnce.
2236
- // requestHeartbeatNow enters the wake layer which auto-retries on
2237
- // requests-in-flight (1s intervals). runHeartbeatOnce was a one-shot
2238
- // that got permanently skipped when agent was busy.
2239
2441
  if (painCheckResult.enqueued) {
2240
- const canTrigger = !!api?.runtime?.system?.requestHeartbeatNow;
2241
- logger.info(`[PD:EvolutionWorker] Pain flag enqueued — requestHeartbeatNow available: ${canTrigger}`);
2442
+ const canTrigger = !!api?.runtime?.system?.runHeartbeatOnce;
2443
+ logger.info(`[PD:EvolutionWorker] Pain flag enqueued — runHeartbeatOnce available: ${canTrigger} (api=${!!api}, runtime=${!!api?.runtime}, system=${!!api?.runtime?.system})`);
2242
2444
  if (canTrigger) {
2243
- api.runtime.system.requestHeartbeatNow({
2244
- reason: `pd-pain-diagnosis: pain flag detected, starting diagnostician`,
2245
- });
2246
- logger.info(`[PD:EvolutionWorker] Heartbeat wake requested — wake layer will auto-retry if busy`);
2445
+ try {
2446
+ const hbResult = await api.runtime.system.runHeartbeatOnce({
2447
+ reason: `pd-pain-diagnosis: pain flag detected, starting diagnostician`,
2448
+ });
2449
+ logger.info(`[PD:EvolutionWorker] Immediate heartbeat result: status=${hbResult.status}${hbResult.status === 'ran' ? ` duration=${hbResult.durationMs}ms` : ''}${hbResult.status === 'skipped' || hbResult.status === 'failed' ? ` reason=${hbResult.reason}` : ''}`);
2450
+ if (hbResult.status === 'skipped' || hbResult.status === 'failed') {
2451
+ logger.warn(`[PD:EvolutionWorker] Immediate heartbeat was ${hbResult.status} (${hbResult.reason}). Diagnostician will start on next regular heartbeat cycle.`);
2452
+ }
2453
+ } catch (hbErr) {
2454
+ logger.warn(`[PD:EvolutionWorker] Failed to trigger immediate heartbeat: ${String(hbErr)}. Diagnostician will start on next regular heartbeat cycle.`);
2455
+ }
2247
2456
  } else {
2248
- logger.warn(`[PD:EvolutionWorker] requestHeartbeatNow not available. Diagnostician will start on next regular heartbeat cycle.`);
2457
+ logger.warn(`[PD:EvolutionWorker] runHeartbeatOnce not available. Diagnostician will start on next regular heartbeat cycle.`);
2249
2458
  }
2250
2459
  }
2251
2460