principles-disciple 1.34.0 → 1.34.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ import { createHash } from 'crypto';
5
5
  import type { OpenClawPluginServiceContext, OpenClawPluginApi, PluginLogger } from '../openclaw-sdk.js';
6
6
  import { DictionaryService } from '../core/dictionary-service.js';
7
7
  import { DetectionService } from '../core/detection-service.js';
8
- import { ensureStateTemplates, ensureCorePrinciples } from '../core/init.js';
8
+ import { ensureStateTemplates } from '../core/init.js';
9
9
  import { SystemLogger } from '../core/system-logger.js';
10
10
  import { WorkspaceContext } from '../core/workspace-context.js';
11
11
  import type { EventLog } from '../core/event-log.js';
@@ -31,18 +31,24 @@ import {
31
31
  import { validateNocturnalSnapshotIngress } from '../core/nocturnal-snapshot-contract.js';
32
32
  import { isExpectedSubagentError } from './subagent-workflow/subagent-error-utils.js';
33
33
  import { readPainFlagContract } from '../core/pain.js';
34
-
35
- // ── Atomic File Write ────────────────────────────────────────────────────────
36
- // Write to temp then rename — atomic on POSIX, prevents partial-write corruption on crash.
37
- function atomicWriteFileSync(filePath: string, data: string): void {
38
- const tmpPath = filePath + '.tmp';
39
- fs.writeFileSync(tmpPath, data, 'utf8');
40
- fs.renameSync(tmpPath, filePath);
41
- }
34
+ import { CorrectionObserverWorkflowManager, correctionObserverWorkflowSpec } from './correction-observer-workflow-manager.js';
35
+ import type { CorrectionObserverPayload } from './correction-observer-types.js';
36
+ import { KeywordOptimizationService } from './keyword-optimization-service.js';
37
+ import { TrajectoryRegistry } from '../core/trajectory.js';
38
+ import { CorrectionCueLearner } from '../core/correction-cue-learner.js';
42
39
 
43
40
  const WORKFLOW_TTL_MS = 5 * 60 * 1000; // 5 minutes default TTL for helper workflows
44
41
  import { OpenClawTrinityRuntimeAdapter } from '../core/nocturnal-trinity.js';
45
42
 
43
+ /**
44
+ * Atomic file write — write to temp then rename to prevent partial writes on crash.
45
+ */
46
+ function atomicWriteFileSync(filePath: string, data: string): void {
47
+ const tmpPath = filePath + '.tmp';
48
+ fs.writeFileSync(tmpPath, data, 'utf8');
49
+ fs.renameSync(tmpPath, filePath);
50
+ }
51
+
46
52
  // ── Workflow Watchdog ────────────────────────────────────────────────────────
47
53
  // Detects stale/orphaned workflows, invalid results, and cleanup failures.
48
54
  // Runs every heartbeat cycle, catching bugs like:
@@ -56,7 +62,6 @@ interface WatchdogResult {
56
62
  details: string[];
57
63
  }
58
64
 
59
-
60
65
  async function runWorkflowWatchdog(
61
66
  wctx: WorkspaceContext,
62
67
  api: OpenClawPluginApi | null,
@@ -72,10 +77,104 @@ async function runWorkflowWatchdog(
72
77
  try {
73
78
  const allWorkflows: WorkflowRow[] = store.listWorkflows();
74
79
 
75
- runWorkflowWatchdogCheckStale(allWorkflows, store, now, details, subagentRuntime, agentSession, logger);
76
- runWorkflowWatchdogCheckUncleared(allWorkflows, details);
77
- runWorkflowWatchdogCheckNocturnal(allWorkflows, details);
80
+ // Check 1: Stale active workflows (active > 2x TTL)
81
+ const staleThreshold = WORKFLOW_TTL_MS * 2;
82
+ const staleActive = allWorkflows.filter(
83
+ (wf: WorkflowRow) => wf.state === 'active' && (now - wf.created_at) > staleThreshold,
84
+ );
85
+ if (staleActive.length > 0) {
86
+ for (const wf of staleActive) {
87
+ const ageMin = Math.round((now - wf.created_at) / 60000);
88
+ details.push(`stale_active: ${wf.workflow_id} (${wf.workflow_type}, ${ageMin}min old)`);
89
+
90
+ // #257: Check if the last recorded event reason indicates expected subagent unavailability.
91
+ // If so, skip marking as terminal_error — the workflow is stale because the subagent
92
+ // was expectedly unavailable (daemon mode, process isolation), not due to a hard failure.
93
+ const events = store.getEvents(wf.workflow_id);
94
+ const lastEventReason = events.length > 0 ? events[events.length - 1].reason : 'unknown';
95
+ if (isExpectedSubagentError(lastEventReason)) {
96
+ logger?.debug?.(`[PD:Watchdog] Skipping stale active workflow ${wf.workflow_id}: expected subagent error (${lastEventReason})`);
97
+ continue;
98
+ }
99
+
100
+ store.updateWorkflowState(wf.workflow_id, 'terminal_error');
101
+ store.recordEvent(wf.workflow_id, 'watchdog_timeout', 'active', 'terminal_error', `Stale active > ${staleThreshold / 60000}s`, { ageMs: now - wf.created_at });
102
+
103
+ // Cleanup session if possible (#188: gateway-safe fallback)
104
+ if (wf.child_session_key) {
105
+ try {
106
+ if (subagentRuntime) {
107
+ await subagentRuntime.deleteSession({ sessionKey: wf.child_session_key, deleteTranscript: true });
108
+ logger?.info?.(`[PD:Watchdog] Cleaned up stale session: ${wf.child_session_key}`);
109
+ } else if (agentSession) {
110
+ const storePath = agentSession.resolveStorePath();
111
+ const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
112
+ const normalizedKey = wf.child_session_key.toLowerCase();
113
+ if (sessionStore[normalizedKey]) {
114
+ delete sessionStore[normalizedKey];
115
+ await agentSession.saveSessionStore(storePath, sessionStore);
116
+ logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback: ${wf.child_session_key}`);
117
+ }
118
+ }
119
+ } catch (cleanupErr) {
120
+ const errMsg = String(cleanupErr);
121
+ if (errMsg.includes('gateway request') && agentSession) {
122
+ const storePath = agentSession.resolveStorePath();
123
+ const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
124
+ const normalizedKey = wf.child_session_key.toLowerCase();
125
+ if (sessionStore[normalizedKey]) {
126
+ delete sessionStore[normalizedKey];
127
+ await agentSession.saveSessionStore(storePath, sessionStore);
128
+ logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback after gateway error: ${wf.child_session_key}`);
129
+ }
130
+ } else {
131
+ logger?.warn?.(`[PD:Watchdog] Failed to cleanup session ${wf.child_session_key}: ${errMsg}`);
132
+ }
133
+ }
134
+ }
135
+ }
136
+ }
137
+
138
+ // Check 2: Workflows in terminal_error/expired without cleanup
139
+ const unclearedTerminal = allWorkflows.filter(
140
+ (wf: WorkflowRow) => (wf.state === 'terminal_error' || wf.state === 'expired') && wf.cleanup_state === 'pending',
141
+ );
142
+ if (unclearedTerminal.length > 0) {
143
+ details.push(`uncleared_terminal: ${unclearedTerminal.length} workflows (will be swept next cycle)`);
144
+ }
145
+
146
+ // Check 3: Nocturnal workflow result validation (#181 pattern)
147
+ const nocturnalCompleted = allWorkflows.filter(
148
+ (wf: WorkflowRow) => wf.workflow_type === 'nocturnal' && wf.state === 'completed',
149
+ );
150
+ for (const wf of nocturnalCompleted) {
151
+ // Check if the metadata snapshot has all zeros (invalid data)
152
+ try {
153
+ const meta = JSON.parse(wf.metadata_json) as Record<string, unknown>;
154
+ const snapshot = meta.snapshot as Record<string, unknown> | undefined;
155
+ if (snapshot) {
156
+ // #219: Check for fallback data source (partial stats from pain context)
157
+ const dataSource = snapshot._dataSource as string | undefined;
158
+ if (dataSource === 'pain_context_fallback') {
159
+ details.push(`fallback_snapshot: nocturnal workflow ${wf.workflow_id} uses pain-context fallback (stats may be incomplete)`);
160
+ }
161
+ const stats = snapshot.stats as Record<string, number> | undefined;
162
+ // #246: Stats are now always number (never null). Detect "empty" fallback:
163
+ // fallback + all counts zero means no real data was available.
164
+ // NOTE: totalAssistantTurns may be 0 even for valid sessions because
165
+ // listRecentNocturnalCandidateSessions (used in fallback path) does not
166
+ // populate assistantTurnCount (only getNocturnalSessionSnapshot does).
167
+ // We use totalToolCalls=0 as the primary indicator instead.
168
+ if (stats && dataSource === 'pain_context_fallback' &&
169
+ stats.totalToolCalls === 0 && stats.totalGateBlocks === 0 &&
170
+ stats.failureCount === 0) {
171
+ details.push(`fallback_snapshot_stats: nocturnal workflow ${wf.workflow_id} has empty fallback stats (no trajectory data found)`);
172
+ }
173
+ }
174
+ } catch { /* ignore malformed metadata */ }
175
+ }
78
176
 
177
+ // Summary
79
178
  const stateCounts: Record<string, number> = {};
80
179
  for (const wf of allWorkflows) {
81
180
  stateCounts[wf.state] = (stateCounts[wf.state] || 0) + 1;
@@ -96,106 +195,6 @@ async function runWorkflowWatchdog(
96
195
  return { anomalies: details.length, details };
97
196
  }
98
197
 
99
- // ── Watchdog helpers (extracted from runWorkflowWatchdog for complexity) ──
100
-
101
-
102
- async function cleanupStaleWorkflowSession(
103
- wf: WorkflowRow,
104
- subagentRuntime: { deleteSession: (opts: { sessionKey: string; deleteTranscript: boolean }) => Promise<void> } | undefined,
105
- agentSession: { resolveStorePath: () => string; loadSessionStore: (p: string, o: { skipCache: boolean }) => Record<string, unknown>; saveSessionStore: (p: string, s: Record<string, unknown>) => Promise<void> } | undefined,
106
- logger?: PluginLogger,
107
- ): Promise<void> {
108
- if (!wf.child_session_key) return;
109
- try {
110
- if (subagentRuntime) {
111
- await subagentRuntime.deleteSession({ sessionKey: wf.child_session_key, deleteTranscript: true });
112
- logger?.info?.(`[PD:Watchdog] Cleaned up stale session: ${wf.child_session_key}`);
113
- } else if (agentSession) {
114
- const storePath = agentSession.resolveStorePath();
115
- const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
116
- const normalizedKey = wf.child_session_key.toLowerCase();
117
- if (sessionStore[normalizedKey]) {
118
- delete sessionStore[normalizedKey];
119
- await agentSession.saveSessionStore(storePath, sessionStore);
120
- logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback: ${wf.child_session_key}`);
121
- }
122
- }
123
- } catch (cleanupErr) {
124
- const errMsg = String(cleanupErr);
125
- if (errMsg.includes('gateway request') && agentSession) {
126
- const storePath = agentSession.resolveStorePath();
127
- const sessionStore = agentSession.loadSessionStore(storePath, { skipCache: true });
128
- const normalizedKey = wf.child_session_key.toLowerCase();
129
- if (sessionStore[normalizedKey]) {
130
- delete sessionStore[normalizedKey];
131
- await agentSession.saveSessionStore(storePath, sessionStore);
132
- logger?.info?.(`[PD:Watchdog] Cleaned up stale session via agentSession fallback after gateway error: ${wf.child_session_key}`);
133
- }
134
- } else {
135
- logger?.warn?.(`[PD:Watchdog] Failed to cleanup session ${wf.child_session_key}: ${errMsg}`);
136
- }
137
- }
138
- }
139
-
140
- function runWorkflowWatchdogCheckStale(
141
- allWorkflows: WorkflowRow[],
142
- store: WorkflowStore,
143
- now: number,
144
- details: string[],
145
- subagentRuntime: { deleteSession: (opts: { sessionKey: string; deleteTranscript: boolean }) => Promise<void> } | undefined,
146
- agentSession: { resolveStorePath: () => string; loadSessionStore: (p: string, o: { skipCache: boolean }) => Record<string, unknown>; saveSessionStore: (p: string, s: Record<string, unknown>) => Promise<void> } | undefined,
147
- logger?: PluginLogger,
148
- ): void {
149
- const staleThreshold = WORKFLOW_TTL_MS * 2;
150
- for (const wf of allWorkflows) {
151
- if (wf.state !== 'active' || (now - wf.created_at) <= staleThreshold) continue;
152
- const ageMin = Math.round((now - wf.created_at) / 60000);
153
- details.push(`stale_active: ${wf.workflow_id} (${wf.workflow_type}, ${ageMin}min old)`);
154
-
155
- const events = store.getEvents(wf.workflow_id);
156
- const lastEventReason = events.length > 0 ? events[events.length - 1].reason : 'unknown';
157
- if (isExpectedSubagentError(lastEventReason)) {
158
- logger?.debug?.(`[PD:Watchdog] Skipping stale active workflow ${wf.workflow_id}: expected subagent error (${lastEventReason})`);
159
- continue;
160
- }
161
-
162
- store.updateWorkflowState(wf.workflow_id, 'terminal_error');
163
- store.recordEvent(wf.workflow_id, 'watchdog_timeout', 'active', 'terminal_error', `Stale active > ${staleThreshold / 60000}s`, { ageMs: now - wf.created_at });
164
- void cleanupStaleWorkflowSession(wf, subagentRuntime, agentSession, logger);
165
- }
166
- }
167
-
168
- function runWorkflowWatchdogCheckUncleared(allWorkflows: WorkflowRow[], details: string[]): void {
169
- const unclearedTerminal = allWorkflows.filter(
170
- (wf: WorkflowRow) => (wf.state === 'terminal_error' || wf.state === 'expired') && wf.cleanup_state === 'pending',
171
- );
172
- if (unclearedTerminal.length > 0) {
173
- details.push(`uncleared_terminal: ${unclearedTerminal.length} workflows (will be swept next cycle)`);
174
- }
175
- }
176
-
177
-
178
- function runWorkflowWatchdogCheckNocturnal(allWorkflows: WorkflowRow[], details: string[]): void {
179
- for (const wf of allWorkflows) {
180
- if (wf.workflow_type !== 'nocturnal' || wf.state !== 'completed') continue;
181
- try {
182
- const meta = JSON.parse(wf.metadata_json) as Record<string, unknown>;
183
- const snapshot = meta.snapshot as Record<string, unknown> | undefined;
184
- if (!snapshot) continue;
185
- const dataSource = snapshot._dataSource as string | undefined;
186
- if (dataSource === 'pain_context_fallback') {
187
- details.push(`fallback_snapshot: nocturnal workflow ${wf.workflow_id} uses pain-context fallback (stats may be incomplete)`);
188
- const stats = snapshot.stats as Record<string, number> | undefined;
189
- if (stats && stats.totalToolCalls === 0 && stats.totalGateBlocks === 0 && stats.failureCount === 0) {
190
- details.push(`fallback_snapshot_stats: nocturnal workflow ${wf.workflow_id} has empty fallback stats (no trajectory data found)`);
191
- }
192
- }
193
- } catch { /* ignore malformed metadata */ }
194
- }
195
- }
196
-
197
- // ── End watchdog helpers ──
198
-
199
198
  let timeoutId: NodeJS.Timeout | null = null;
200
199
 
201
200
  /**
@@ -208,12 +207,7 @@ let timeoutId: NodeJS.Timeout | null = null;
208
207
  * Old queue items (without taskKind) are migrated to pain_diagnosis for compatibility.
209
208
  */
210
209
  export type QueueStatus = 'pending' | 'in_progress' | 'completed' | 'failed' | 'canceled';
211
- export type TaskResolution = 'marker_detected' | 'auto_completed_timeout' | 'failed_max_retries' | 'runtime_unavailable' | 'canceled' | 'late_marker_principle_created' | 'late_marker_no_principle' | 'stub_fallback' | 'skipped_thin_violation' | 'diagnostician_timeout';
212
-
213
- /** Timeout for pain_diagnosis tasks (30 min) — separate from sleep_reflection timeout.
214
- * Pain diagnostics run via HEARTBEAT (main session LLM), not as a subagent.
215
- * If the agent is persistently busy, we don't want the task to starve indefinitely. */
216
- const PAIN_DIAGNOSIS_TIMEOUT_MS = 30 * 60 * 1000;
210
+ export type TaskResolution = 'marker_detected' | 'auto_completed_timeout' | 'failed_max_retries' | 'runtime_unavailable' | 'canceled' | 'late_marker_principle_created' | 'late_marker_no_principle' | 'stub_fallback' | 'skipped_thin_violation';
217
211
 
218
212
  /**
219
213
  * Recent pain context attached to sleep_reflection tasks.
@@ -375,7 +369,6 @@ function isSessionAtOrBeforeTriggerTime(
375
369
  return true;
376
370
  }
377
371
 
378
-
379
372
  function buildFallbackNocturnalSnapshot(
380
373
  sleepTask: EvolutionQueueItem,
381
374
  extractor?: ReturnType<typeof createNocturnalTrajectoryExtractor> | null,
@@ -510,13 +503,14 @@ function findRecentDuplicateTask(
510
503
  reason?: string
511
504
  ): EvolutionQueueItem | undefined {
512
505
 
506
+
513
507
  const key = normalizePainDedupKey(source, preview, reason);
514
508
  return queue.find((task) => {
515
509
  if (task.status === 'completed') return false;
516
-
517
510
  const taskTime = new Date(task.enqueued_at || task.timestamp).getTime();
518
511
  if (!Number.isFinite(taskTime) || (now - taskTime) > PAIN_QUEUE_DEDUP_WINDOW_MS) return false;
519
512
 
513
+
520
514
  return normalizePainDedupKey(task.source, task.trigger_text_preview || '', task.reason) === key;
521
515
  });
522
516
  }
@@ -571,7 +565,6 @@ function normalizePainDedupKey(source: string, preview: string, reason?: string)
571
565
 
572
566
 
573
567
 
574
-
575
568
  export function hasRecentDuplicateTask(queue: EvolutionQueueItem[], source: string, preview: string, now: number, reason?: string): boolean {
576
569
  return !!findRecentDuplicateTask(queue, source, preview, now, reason);
577
570
  }
@@ -699,7 +692,7 @@ function shouldSkipForDedup(
699
692
  * Load and migrate the evolution queue. Returns empty array if file doesn't exist.
700
693
  */
701
694
  function loadEvolutionQueue(queuePath: string): EvolutionQueueItem[] {
702
-
695
+ // eslint-disable-next-line no-useless-assignment
703
696
  let rawQueue: RawQueueItem[] = [];
704
697
  try {
705
698
  rawQueue = JSON.parse(fs.readFileSync(queuePath, 'utf8'));
@@ -714,7 +707,6 @@ function loadEvolutionQueue(queuePath: string): EvolutionQueueItem[] {
714
707
  * Build and persist a new sleep_reflection task.
715
708
  */
716
709
 
717
-
718
710
  function enqueueNewSleepReflectionTask(
719
711
  queue: EvolutionQueueItem[],
720
712
  recentPainContext: ReturnType<typeof readRecentPainContext>,
@@ -780,6 +772,60 @@ async function enqueueSleepReflectionTask(
780
772
  }
781
773
  }
782
774
 
775
+ /**
776
+ * Enqueue a keyword_optimization task if one is not already pending/in-progress (CORR-08).
777
+ * Dispatches LLM subagent via CorrectionObserverWorkflowManager to optimize
778
+ * correction keywords based on FPR and match history.
779
+ */
780
+ async function enqueueKeywordOptimizationTask(
781
+ wctx: WorkspaceContext,
782
+ logger: PluginLogger,
783
+ ): Promise<void> {
784
+ const queuePath = wctx.resolve('EVOLUTION_QUEUE');
785
+ const releaseLock = await requireQueueLock(queuePath, logger, 'enqueueKeywordOpt', EVOLUTION_QUEUE_LOCK_SUFFIX);
786
+
787
+ try {
788
+ const queue = loadEvolutionQueue(queuePath);
789
+
790
+ // Guard: Skip if a keyword_optimization task is already pending/in-progress (CORR-08)
791
+ if (hasPendingTask(queue, 'keyword_optimization')) {
792
+ logger?.debug?.('[PD:EvolutionWorker] keyword_optimization task already pending/in-progress, skipping');
793
+ return;
794
+ }
795
+
796
+ // Guard: Skip if daily optimization throttle is exhausted (CORR-08)
797
+ const learner = CorrectionCueLearner.get(wctx.stateDir);
798
+ if (!learner.canRunKeywordOptimization()) {
799
+ logger?.debug?.('[PD:EvolutionWorker] keyword_optimization throttle exhausted, skipping');
800
+ return;
801
+ }
802
+
803
+ const taskId = createEvolutionTaskId('keyword_optimization', 50, 'keyword optimization', 'Keyword optimization via LLM', Date.now());
804
+ const nowIso = new Date().toISOString();
805
+
806
+ queue.push({
807
+ id: taskId,
808
+ taskKind: 'keyword_optimization',
809
+ priority: 'medium',
810
+ score: 50,
811
+ source: 'correction',
812
+ reason: 'Keyword optimization triggered by heartbeat',
813
+ trigger_text_preview: 'Keyword optimization via LLM',
814
+ timestamp: nowIso,
815
+ enqueued_at: nowIso,
816
+ status: 'pending',
817
+ traceId: taskId,
818
+ retryCount: 0,
819
+ maxRetries: 1,
820
+ });
821
+
822
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
823
+ logger?.info?.(`[PD:EvolutionWorker] Enqueued keyword_optimization task ${taskId}`);
824
+ } finally {
825
+ releaseLock();
826
+ }
827
+ }
828
+
783
829
  interface ParsedPainValues {
784
830
  score: number; source: string; reason: string; preview: string;
785
831
  traceId: string; sessionId: string; agentId: string;
@@ -787,7 +833,6 @@ interface ParsedPainValues {
787
833
 
788
834
 
789
835
 
790
-
791
836
  async function doEnqueuePainTask(
792
837
  wctx: WorkspaceContext, logger: PluginLogger, painFlagPath: string,
793
838
  result: WorkerStatusReport['pain_flag'], v: ParsedPainValues,
@@ -861,7 +906,6 @@ async function doEnqueuePainTask(
861
906
  return result;
862
907
  }
863
908
 
864
-
865
909
  async function checkPainFlag(wctx: WorkspaceContext, logger: PluginLogger): Promise<WorkerStatusReport['pain_flag']> {
866
910
  const result: WorkerStatusReport['pain_flag'] = { exists: false, score: null, source: null, enqueued: false, skipped_reason: null };
867
911
  try {
@@ -1036,7 +1080,6 @@ async function checkPainFlag(wctx: WorkspaceContext, logger: PluginLogger): Prom
1036
1080
 
1037
1081
 
1038
1082
 
1039
-
1040
1083
  async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogger, eventLog: EventLog, api?: OpenClawPluginApi) {
1041
1084
  const queuePath = wctx.resolve('EVOLUTION_QUEUE');
1042
1085
  if (!fs.existsSync(queuePath)) {
@@ -1074,6 +1117,11 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1074
1117
 
1075
1118
  let queueChanged = rawQueue.some(isLegacyQueueItem);
1076
1119
 
1120
+ // Guard: Skip keyword_optimization if one is already pending/in-progress (CORR-08)
1121
+ if (hasPendingTask(queue, 'keyword_optimization')) {
1122
+ logger?.debug?.('[PD:EvolutionWorker] keyword_optimization task already pending/in-progress, skipping enqueue');
1123
+ }
1124
+
1077
1125
  const {config} = wctx;
1078
1126
  const timeout = config.get('intervals.task_timeout_ms') || (60 * 60 * 1000); // Default 1 hour
1079
1127
 
@@ -1314,8 +1362,8 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1314
1362
  }
1315
1363
 
1316
1364
  const age = Date.now() - startedAt.getTime();
1317
- if (age > PAIN_DIAGNOSIS_TIMEOUT_MS) {
1318
- const timeoutMinutes = Math.round(PAIN_DIAGNOSIS_TIMEOUT_MS / 60000);
1365
+ if (age > timeout) {
1366
+ const timeoutMinutes = Math.round(timeout / 60000);
1319
1367
 
1320
1368
  const timeoutCompleteMarker = path.join(wctx.stateDir, `.evolution_complete_${task.id}`);
1321
1369
  const timeoutReportPath = path.join(wctx.stateDir, `.diagnostician_report_${task.id}.json`);
@@ -1363,13 +1411,13 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1363
1411
  } catch { /* report may not exist, not critical */ }
1364
1412
  task.resolution = principleCreated ? 'late_marker_principle_created' : 'late_marker_no_principle';
1365
1413
  } else {
1366
- if (logger) logger.info(`[PD:EvolutionWorker] Pain diagnosis task ${task.id} timed out after ${timeoutMinutes} minutes`);
1414
+ if (logger) logger.info(`[PD:EvolutionWorker] Task ${task.id} auto-completed after ${timeoutMinutes} minute timeout`);
1367
1415
  // #190: Clean up diagnostician report file even on timeout (may have been written late)
1368
1416
  try {
1369
1417
  const autoTimeoutReportPath = path.join(wctx.stateDir, `.diagnostician_report_${task.id}.json`);
1370
1418
  if (fs.existsSync(autoTimeoutReportPath)) fs.unlinkSync(autoTimeoutReportPath);
1371
1419
  } catch { /* report may not exist, not critical */ }
1372
- task.resolution = 'diagnostician_timeout';
1420
+ task.resolution = 'auto_completed_timeout';
1373
1421
  }
1374
1422
 
1375
1423
  // Critical: mark task as completed so it doesn't get re-processed
@@ -1395,7 +1443,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1395
1443
  sessionId: task.assigned_session_key || 'heartbeat:diagnostician',
1396
1444
  taskId: task.id,
1397
1445
  outcome: 'timeout',
1398
- summary: `Pain diagnosis task ${task.id} timed out after ${timeoutMinutes} minutes.`
1446
+ summary: `Task ${task.id} auto-completed after ${timeoutMinutes} minute timeout.`
1399
1447
  });
1400
1448
  queueChanged = true;
1401
1449
  }
@@ -1637,8 +1685,10 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1637
1685
 
1638
1686
  let workflowId: string | undefined;
1639
1687
 
1688
+
1640
1689
  let nocturnalManager: NocturnalWorkflowManager;
1641
1690
 
1691
+
1642
1692
  let snapshotData: NocturnalSessionSnapshot | undefined;
1643
1693
 
1644
1694
  if (isPollingTask) {
@@ -1902,6 +1952,159 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1902
1952
  return;
1903
1953
  }
1904
1954
 
1955
+ // ── keyword_optimization task processing ──────────────────────────────
1956
+ // Process keyword_optimization tasks independently of sleep_reflection.
1957
+ // Uses CorrectionObserverWorkflowManager to dispatch LLM subagent and
1958
+ // KeywordOptimizationService to apply mutations to keyword store (CORR-09).
1959
+ const pendingKeywordOptTasks = queue.filter(t => t.status === 'pending' && t.taskKind === 'keyword_optimization');
1960
+ const inProgressKeywordOptTasks = queue.filter(t =>
1961
+ t.status === 'in_progress' &&
1962
+ t.taskKind === 'keyword_optimization' &&
1963
+ t.resultRef &&
1964
+ !t.resultRef.startsWith('trinity-draft')
1965
+ );
1966
+ const keywordOptTasks = [...pendingKeywordOptTasks, ...inProgressKeywordOptTasks];
1967
+ if (keywordOptTasks.length > 0) {
1968
+ // Claim pending tasks inside lock
1969
+ for (const koTask of pendingKeywordOptTasks) {
1970
+ koTask.status = 'in_progress';
1971
+ koTask.started_at = new Date().toISOString();
1972
+ }
1973
+ queueChanged = queueChanged || pendingKeywordOptTasks.length > 0;
1974
+
1975
+ // Release lock during LLM dispatch (long-running)
1976
+ fs.writeFileSync(queuePath, JSON.stringify(queue, null, 2), 'utf8');
1977
+ releaseLock();
1978
+ lockReleased = true;
1979
+
1980
+ for (const koTask of keywordOptTasks) {
1981
+ const isPolling = !!koTask.resultRef && !koTask.resultRef.startsWith('trinity-draft');
1982
+
1983
+ if (isPolling) {
1984
+ logger?.debug?.(`[PD:EvolutionWorker] Polling existing keyword_optimization task ${koTask.id}`);
1985
+ } else {
1986
+ logger?.info?.(`[PD:EvolutionWorker] Processing keyword_optimization task ${koTask.id}`);
1987
+ }
1988
+
1989
+ try {
1990
+ // Build trajectoryHistory via KeywordOptimizationService
1991
+ const koService = KeywordOptimizationService.get(wctx.stateDir, wctx.workspaceDir, logger);
1992
+ const db = TrajectoryRegistry.get(wctx.workspaceDir);
1993
+ const recentSessionIds = db.listRecentSessions({ limit: 10 }).map(s => s.sessionId);
1994
+ const trajectoryHistory = await koService.buildTrajectoryHistory(recentSessionIds);
1995
+
1996
+ // Build full payload (CORR-09, D-40-07, D-40-08)
1997
+ const learner = CorrectionCueLearner.get(wctx.stateDir);
1998
+ const store = learner.getStore();
1999
+ const payload: CorrectionObserverPayload = {
2000
+ workspaceDir: wctx.workspaceDir,
2001
+ parentSessionId: `keyword_optimization:${koTask.id}`,
2002
+ keywordStoreSummary: {
2003
+ totalKeywords: store.keywords.length,
2004
+ terms: store.keywords.map(k => ({
2005
+ term: k.term,
2006
+ weight: k.weight,
2007
+ hitCount: k.hitCount ?? 0,
2008
+ truePositiveCount: k.truePositiveCount ?? 0,
2009
+ falsePositiveCount: k.falsePositiveCount ?? 0,
2010
+ })),
2011
+ },
2012
+ recentMessages: [],
2013
+ trajectoryHistory,
2014
+ };
2015
+
2016
+ // Dispatch LLM subagent via CorrectionObserverWorkflowManager
2017
+ const manager = new CorrectionObserverWorkflowManager({
2018
+ workspaceDir: wctx.workspaceDir,
2019
+ logger,
2020
+ subagent: api?.runtime?.subagent!,
2021
+ agentSession: api?.runtime?.agent?.session,
2022
+ });
2023
+
2024
+ let workflowId: string | undefined;
2025
+ if (!isPolling) {
2026
+ const handle = await manager.startWorkflow(correctionObserverWorkflowSpec, {
2027
+ parentSessionId: `keyword_optimization:${koTask.id}`,
2028
+ workspaceDir: wctx.workspaceDir,
2029
+ taskInput: payload,
2030
+ });
2031
+ workflowId = handle.workflowId;
2032
+ koTask.resultRef = workflowId;
2033
+ } else {
2034
+ workflowId = koTask.resultRef!;
2035
+ }
2036
+
2037
+ // Poll workflow state
2038
+ const summary = await manager.getWorkflowDebugSummary(workflowId);
2039
+ if (summary) {
2040
+ if (summary.state === 'completed') {
2041
+ // Get parsed LLM result and apply mutations to keyword store (CORR-09)
2042
+ const parsedResult = await manager.getWorkflowResult(workflowId);
2043
+
2044
+ if (parsedResult?.updated) {
2045
+ koService.applyResult(parsedResult);
2046
+ await learner.recordOptimizationPerformed();
2047
+ logger?.info?.(`[PD:EvolutionWorker] keyword_optimization applied mutations: ${parsedResult.summary}`);
2048
+ } else {
2049
+ logger?.info?.(`[PD:EvolutionWorker] keyword_optimization completed with no updates`);
2050
+ }
2051
+
2052
+ koTask.status = 'completed';
2053
+ koTask.completed_at = new Date().toISOString();
2054
+ koTask.resolution = 'marker_detected';
2055
+ logger?.info?.(`[PD:EvolutionWorker] keyword_optimization task ${koTask.id} workflow completed`);
2056
+ } else if (summary.state === 'terminal_error') {
2057
+ koTask.status = 'failed';
2058
+ koTask.completed_at = new Date().toISOString();
2059
+ koTask.resolution = 'failed_max_retries';
2060
+ koTask.retryCount = (koTask.retryCount ?? 0) + 1;
2061
+ const lastEvent = summary.recentEvents[summary.recentEvents.length - 1];
2062
+ koTask.lastError = `keyword_optimization failed: ${lastEvent?.reason ?? 'unknown'}`;
2063
+ logger?.warn?.(`[PD:EvolutionWorker] keyword_optimization task ${koTask.id} workflow terminal_error: ${koTask.lastError}`);
2064
+ } else {
2065
+ logger?.info?.(`[PD:EvolutionWorker] keyword_optimization task ${koTask.id} workflow ${summary.state}, will poll again next cycle`);
2066
+ }
2067
+ }
2068
+ } catch (koErr) {
2069
+ koTask.status = 'failed';
2070
+ koTask.completed_at = new Date().toISOString();
2071
+ koTask.resolution = 'failed_max_retries';
2072
+ koTask.lastError = String(koErr);
2073
+ koTask.retryCount = (koTask.retryCount ?? 0) + 1;
2074
+ logger?.error?.(`[PD:EvolutionWorker] keyword_optimization task ${koTask.id} threw: ${koErr}`);
2075
+ }
2076
+ }
2077
+
2078
+ // Re-acquire lock to write results
2079
+ const koResultLock = await requireQueueLock(queuePath, logger, 'keywordOptResult');
2080
+ try {
2081
+ let freshQueue: (RawQueueItem | EvolutionQueueItem)[] = [];
2082
+ try {
2083
+ freshQueue = JSON.parse(fs.readFileSync(queuePath, 'utf8'));
2084
+ } catch (readErr) {
2085
+ // Queue file corrupted — log warning but preserve in-memory task state
2086
+ logger?.warn?.(`[PD:EvolutionWorker] Queue file corrupted (${String(readErr)}), preserving in-memory state`);
2087
+ freshQueue = [];
2088
+ }
2089
+
2090
+ // Append or replace keyword_optimization tasks
2091
+ for (const koTask of keywordOptTasks) {
2092
+ const idx = freshQueue.findIndex((t) => (t as { id?: string }).id === koTask.id);
2093
+ if (idx >= 0) {
2094
+ freshQueue[idx] = koTask;
2095
+ } else {
2096
+ freshQueue.push(koTask);
2097
+ }
2098
+ }
2099
+ fs.writeFileSync(queuePath, JSON.stringify(freshQueue, null, 2));
2100
+ } catch (koResultErr) {
2101
+ logger?.warn?.(`[PD:EvolutionWorker] Failed to write keyword_optimization results: ${String(koResultErr)}`);
2102
+ } finally {
2103
+ koResultLock();
2104
+ }
2105
+ return;
2106
+ }
2107
+
1905
2108
  if (queueChanged) {
1906
2109
  atomicWriteFileSync(queuePath, JSON.stringify(queue, null, 2));
1907
2110
  }
@@ -1930,7 +2133,6 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1930
2133
  }
1931
2134
 
1932
2135
 
1933
-
1934
2136
  async function processDetectionQueue(wctx: WorkspaceContext, api: OpenClawPluginApi, eventLog: EventLog) {
1935
2137
  const {logger} = api;
1936
2138
  try {
@@ -2063,8 +2265,10 @@ function writeWorkerStatus(stateDir: string, report: WorkerStatusReport): void {
2063
2265
  try {
2064
2266
  const statusPath = path.join(stateDir, 'worker-status.json');
2065
2267
  atomicWriteFileSync(statusPath, JSON.stringify(report, null, 2));
2066
- } catch {
2268
+ } catch (statusErr) {
2067
2269
  // Non-critical: worker-status.json is for monitoring, failure is acceptable
2270
+ // (no logger available in this standalone helper)
2271
+ void statusErr;
2068
2272
  }
2069
2273
  }
2070
2274
 
@@ -2118,7 +2322,6 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2118
2322
  api: null,
2119
2323
  _startedWorkspaces: new Set<string>(),
2120
2324
 
2121
-
2122
2325
  start(ctx: OpenClawPluginServiceContext): void {
2123
2326
  const workspaceDir = ctx?.workspaceDir;
2124
2327
  const logger = ctx?.logger || console;
@@ -2147,7 +2350,6 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2147
2350
  const {config} = wctx;
2148
2351
  const language = config.get('language') || 'en';
2149
2352
  ensureStateTemplates({ logger }, wctx.stateDir, language);
2150
- ensureCorePrinciples(wctx.stateDir, logger);
2151
2353
 
2152
2354
  const initialDelay = 5000;
2153
2355
  const interval = config.get('intervals.worker_poll_ms') || (15 * 60 * 1000);
@@ -2155,7 +2357,6 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2155
2357
  // Periodic trigger tracking
2156
2358
  let heartbeatCounter = 0;
2157
2359
 
2158
-
2159
2360
  async function runCycle(): Promise<void> {
2160
2361
  const cycleStart = Date.now();
2161
2362
  heartbeatCounter++;
@@ -2198,7 +2399,17 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2198
2399
  }
2199
2400
 
2200
2401
  // Path 2: Periodic trigger (fires regardless of idle state)
2402
+ // keyword_optimization fires every period_heartbeats (CORR-07).
2403
+ // IMPORTANT: check keyword_optimization BEFORE resetting counter for sleep_reflection.
2201
2404
  if (sleepConfig.trigger_mode === 'periodic') {
2405
+ // keyword_optimization check BEFORE counter reset (CORR-07 fix)
2406
+ if (heartbeatCounter > 0 && heartbeatCounter % sleepConfig.period_heartbeats === 0) {
2407
+ logger?.info?.(`[PD:EvolutionWorker] Periodic keyword_optimization trigger at heartbeat ${heartbeatCounter}`);
2408
+ enqueueKeywordOptimizationTask(wctx, logger).catch((err) => {
2409
+ logger?.error?.(`[PD:EvolutionWorker] Failed to enqueue keyword_optimization task: ${String(err)}`);
2410
+ });
2411
+ }
2412
+
2202
2413
  if (heartbeatCounter >= sleepConfig.period_heartbeats) {
2203
2414
  logger?.info?.(`[PD:EvolutionWorker] Periodic trigger: heartbeatCounter=${heartbeatCounter} >= period_heartbeats=${sleepConfig.period_heartbeats}`);
2204
2415
  shouldTrySleepReflection = true;
@@ -2236,21 +2447,23 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
2236
2447
  // with a diagnostician task, immediately trigger a heartbeat to start
2237
2448
  // the diagnostician without waiting for the next 15-minute interval.
2238
2449
  // Must run AFTER processEvolutionQueue — HEARTBEAT.md must be written first.
2239
- //
2240
- // P3 (#299): Use requestHeartbeatNow instead of runHeartbeatOnce.
2241
- // requestHeartbeatNow enters the wake layer which auto-retries on
2242
- // requests-in-flight (1s intervals). runHeartbeatOnce was a one-shot
2243
- // that got permanently skipped when agent was busy.
2244
2450
  if (painCheckResult.enqueued) {
2245
- const canTrigger = !!api?.runtime?.system?.requestHeartbeatNow;
2246
- logger.info(`[PD:EvolutionWorker] Pain flag enqueued — requestHeartbeatNow available: ${canTrigger}`);
2451
+ const canTrigger = !!api?.runtime?.system?.runHeartbeatOnce;
2452
+ logger.info(`[PD:EvolutionWorker] Pain flag enqueued — runHeartbeatOnce available: ${canTrigger} (api=${!!api}, runtime=${!!api?.runtime}, system=${!!api?.runtime?.system})`);
2247
2453
  if (canTrigger) {
2248
- api.runtime.system.requestHeartbeatNow({
2249
- reason: `pd-pain-diagnosis: pain flag detected, starting diagnostician`,
2250
- });
2251
- logger.info(`[PD:EvolutionWorker] Heartbeat wake requested — wake layer will auto-retry if busy`);
2454
+ try {
2455
+ const hbResult = await api.runtime.system.runHeartbeatOnce({
2456
+ reason: `pd-pain-diagnosis: pain flag detected, starting diagnostician`,
2457
+ });
2458
+ logger.info(`[PD:EvolutionWorker] Immediate heartbeat result: status=${hbResult.status}${hbResult.status === 'ran' ? ` duration=${hbResult.durationMs}ms` : ''}${hbResult.status === 'skipped' || hbResult.status === 'failed' ? ` reason=${hbResult.reason}` : ''}`);
2459
+ if (hbResult.status === 'skipped' || hbResult.status === 'failed') {
2460
+ logger.warn(`[PD:EvolutionWorker] Immediate heartbeat was ${hbResult.status} (${hbResult.reason}). Diagnostician will start on next regular heartbeat cycle.`);
2461
+ }
2462
+ } catch (hbErr) {
2463
+ logger.warn(`[PD:EvolutionWorker] Failed to trigger immediate heartbeat: ${String(hbErr)}. Diagnostician will start on next regular heartbeat cycle.`);
2464
+ }
2252
2465
  } else {
2253
- logger.warn(`[PD:EvolutionWorker] requestHeartbeatNow not available. Diagnostician will start on next regular heartbeat cycle.`);
2466
+ logger.warn(`[PD:EvolutionWorker] runHeartbeatOnce not available. Diagnostician will start on next regular heartbeat cycle.`);
2254
2467
  }
2255
2468
  }
2256
2469