principles-disciple 1.31.0 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/openclaw.plugin.json +1 -1
  2. package/package.json +1 -1
  3. package/src/core/correction-cue-learner.ts +203 -0
  4. package/src/core/correction-types.ts +88 -0
  5. package/src/core/init.ts +67 -0
  6. package/src/service/correction-observer-types.ts +58 -0
  7. package/src/service/correction-observer-workflow-manager.ts +218 -0
  8. package/src/service/evolution-worker.ts +164 -140
  9. package/src/service/nocturnal-service.ts +4 -1
  10. package/src/service/subagent-workflow/index.ts +14 -0
  11. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +7 -8
  12. package/tests/service/evolution-worker.nocturnal.test.ts +14 -1
  13. package/tests/commands/implementation-lifecycle.test.ts +0 -362
  14. package/tests/core/detection-funnel.test.ts +0 -63
  15. package/tests/core/evolution-e2e.test.ts +0 -58
  16. package/tests/core/evolution-engine-gate-integration.test.ts +0 -543
  17. package/tests/core/evolution-engine.test.ts +0 -562
  18. package/tests/core/evolution-reducer.test.ts +0 -180
  19. package/tests/core/evolution-user-stories.e2e.test.ts +0 -249
  20. package/tests/core/local-worker-routing.test.ts +0 -757
  21. package/tests/core/rule-host.test.ts +0 -389
  22. package/tests/core/trajectory-correction-pain.test.ts +0 -180
  23. package/tests/hooks/gate-edit-verification.test.ts +0 -435
  24. package/tests/hooks/llm.test.ts +0 -308
  25. package/tests/hooks/progressive-trust-gate.test.ts +0 -277
  26. package/tests/hooks/prompt.test.ts +0 -1473
  27. package/tests/index.integration.test.ts +0 -179
  28. package/tests/index.shadow-routing.integration.test.ts +0 -140
  29. package/tests/service/evolution-worker.test.ts +0 -462
  30. package/tests/service/nocturnal-service.test.ts +0 -577
  31. package/tests/service/nocturnal-workflow-manager.test.ts +0 -441
  32. package/tests/tools/critique-prompt.test.ts +0 -260
  33. package/tests/tools/deep-reflect.test.ts +0 -232
  34. package/tests/tools/model-index.test.ts +0 -246
  35. package/tests/ui/app.test.tsx +0 -114
@@ -2,7 +2,7 @@
2
2
  "id": "principles-disciple",
3
3
  "name": "Principles Disciple",
4
4
  "description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
5
- "version": "1.31.0",
5
+ "version": "1.33.0",
6
6
  "skills": [
7
7
  "./skills"
8
8
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "principles-disciple",
3
- "version": "1.31.0",
3
+ "version": "1.33.0",
4
4
  "description": "Native OpenClaw plugin for Principles Disciple",
5
5
  "type": "module",
6
6
  "main": "./dist/bundle.js",
@@ -0,0 +1,203 @@
1
+ /**
2
+ * Correction Cue Learner
3
+ *
4
+ * Persistent, learnable keyword store for correction cue detection.
5
+ * Replaces the hardcoded cue list in detectCorrectionCue() with a
6
+ * crash-safe JSON store that can grow over time.
7
+ *
8
+ * Persistence contract:
9
+ * - Atomic write: temp-file-then-rename (T-38-02)
10
+ * - Cache invalidated after every write (D-05)
11
+ * - 200-term hard cap enforced before any write (T-38-01)
12
+ */
13
+
14
+ import * as fs from 'fs';
15
+ import * as path from 'path';
16
+ import {
17
+ CorrectionKeyword,
18
+ CorrectionKeywordStore,
19
+ CorrectionMatchResult,
20
+ CORRECTION_SEED_KEYWORDS,
21
+ MAX_CORRECTION_KEYWORDS,
22
+ } from './correction-types.js';
23
+
24
+ const KEYWORD_STORE_FILE = 'correction_keywords.json';
25
+
26
+ // =========================================================================
27
+ // Module-level cache (D-04, D-05)
28
+ // =========================================================================
29
+
30
+ /**
31
+ * Invalidated on every successful save so the next load re-reads from disk.
32
+ * Set to null intentionally — never assume disk and memory are in sync after a write.
33
+ */
34
+ let _correctionCueCache: CorrectionKeywordStore | null = null;
35
+
36
+ /**
37
+ * Resets the module-level cache (for testing only).
38
+ * @internal
39
+ */
40
+ export function _resetCorrectionCueCache(): void {
41
+ _correctionCueCache = null;
42
+ }
43
+
44
+ // =========================================================================
45
+ // Default store factory
46
+ // =========================================================================
47
+
48
+ /**
49
+ * Creates a fresh store populated with the 16 seed keywords (D-08, D-09).
50
+ * addedAt is stamped with the current ISO timestamp.
51
+ */
52
+ function createDefaultStore(): CorrectionKeywordStore {
53
+ const now = new Date().toISOString();
54
+ const keywords: CorrectionKeyword[] = CORRECTION_SEED_KEYWORDS.map((k) => ({
55
+ ...k,
56
+ source: 'seed' as const,
57
+ addedAt: now,
58
+ }));
59
+ return { keywords, version: 1, lastOptimizedAt: now };
60
+ }
61
+
62
+ // =========================================================================
63
+ // Load / save
64
+ // =========================================================================
65
+
66
+ /**
67
+ * Loads the keyword store from disk.
68
+ * On first run (file absent) or parse failure, creates and persists the default store.
69
+ */
70
+ export function loadCorrectionKeywordStore(stateDir: string): CorrectionKeywordStore {
71
+ if (_correctionCueCache) return _correctionCueCache;
72
+
73
+ const filePath = path.join(stateDir, KEYWORD_STORE_FILE);
74
+
75
+ if (fs.existsSync(filePath)) {
76
+ try {
77
+ const raw = fs.readFileSync(filePath, 'utf-8');
78
+ _correctionCueCache = JSON.parse(raw) as CorrectionKeywordStore;
79
+ return _correctionCueCache;
80
+ } catch {
81
+ // Parse failure — fall through to default
82
+ }
83
+ }
84
+
85
+ // File absent or corrupt: seed the store and persist it (D-01)
86
+ const defaultStore = createDefaultStore();
87
+ saveCorrectionKeywordStore(stateDir, defaultStore);
88
+ _correctionCueCache = defaultStore;
89
+ return _correctionCueCache;
90
+ }
91
+
92
+ /**
93
+ * Atomically saves the keyword store to disk (D-03, T-38-02).
94
+ * Uses temp-file-then-rename to ensure the file is always valid JSON or
95
+ * the previous valid state if a crash occurs mid-write.
96
+ * MUST invalidate the cache after the rename (D-05).
97
+ */
98
+ export function saveCorrectionKeywordStore(
99
+ stateDir: string,
100
+ store: CorrectionKeywordStore
101
+ ): void {
102
+ const filePath = path.join(stateDir, KEYWORD_STORE_FILE);
103
+ const tmpPath = filePath + '.tmp';
104
+
105
+ fs.mkdirSync(stateDir, { recursive: true });
106
+ fs.writeFileSync(tmpPath, JSON.stringify(store, null, 2), 'utf-8');
107
+ fs.renameSync(tmpPath, filePath);
108
+
109
+ // Invalidate cache so the next read re-loads from disk (D-05)
110
+ _correctionCueCache = null;
111
+ }
112
+
113
+ // =========================================================================
114
+ // Singleton state
115
+ // =========================================================================
116
+
117
+ let _instance: CorrectionCueLearner | null = null;
118
+ let _lastStateDir: string | null = null;
119
+
120
+ /** Resets singleton state (for testing only). @internal */
121
+ export function _resetCorrectionCueLearnerInstance(): void {
122
+ _instance = null;
123
+ _lastStateDir = null;
124
+ }
125
+
126
+ // =========================================================================
127
+ // CorrectionCueLearner class
128
+ // =========================================================================
129
+
130
+ export class CorrectionCueLearner {
131
+ private store: CorrectionKeywordStore;
132
+ private stateDir: string;
133
+
134
+ constructor(stateDir: string) {
135
+ this.stateDir = stateDir;
136
+ this.store = loadCorrectionKeywordStore(stateDir);
137
+ }
138
+
139
+ // ── Public API ──────────────────────────────────────────────────────────
140
+
141
+ /**
142
+ * Checks whether text contains a correction cue (D-11).
143
+ * Normalisation is equivalent to the original detectCorrectionCue():
144
+ * trim → lowercase → strip punctuation
145
+ * Returns the first matched term only (first-match semantics).
146
+ */
147
+ match(text: string): CorrectionMatchResult {
148
+ const normalized = text
149
+ .trim()
150
+ .toLowerCase()
151
+ .replace(/[.,!?;:,。!?;:]/g, '');
152
+
153
+ for (const keyword of this.store.keywords) {
154
+ if (normalized.includes(keyword.term.toLowerCase())) {
155
+ return { matched: true, matchedTerms: [keyword.term], score: keyword.weight, confidence: 0.9 };
156
+ }
157
+ }
158
+
159
+ return { matched: false, matchedTerms: [], score: 0.0, confidence: 0.0 };
160
+ }
161
+
162
+ /**
163
+ * Adds a new keyword to the store and immediately flushes (D-06, D-07).
164
+ * Throws if the 200-term limit would be exceeded.
165
+ */
166
+ add(keyword: Omit<CorrectionKeyword, 'addedAt'>): void {
167
+ if (this.store.keywords.length >= MAX_CORRECTION_KEYWORDS) {
168
+ throw new Error('Correction keyword store limit reached (200 terms)');
169
+ }
170
+
171
+ const entry: CorrectionKeyword = {
172
+ ...keyword,
173
+ addedAt: new Date().toISOString(),
174
+ };
175
+
176
+ this.store.keywords.push(entry);
177
+ this.flush();
178
+ }
179
+
180
+ /** Returns a reference to the in-memory store. */
181
+ getStore(): CorrectionKeywordStore {
182
+ return this.store;
183
+ }
184
+
185
+ /** Persists the current in-memory store to disk atomically. */
186
+ flush(): void {
187
+ saveCorrectionKeywordStore(this.stateDir, this.store);
188
+ }
189
+
190
+ // ── Singleton factory ───────────────────────────────────────────────────
191
+
192
+ /**
193
+ * Returns the shared CorrectionCueLearner instance for a given stateDir.
194
+ * Re-creates the instance if stateDir changes (e.g. workspace switch).
195
+ */
196
+ static get(stateDir: string): CorrectionCueLearner {
197
+ if (!_instance || _lastStateDir !== stateDir) {
198
+ _instance = new CorrectionCueLearner(stateDir);
199
+ _lastStateDir = stateDir;
200
+ }
201
+ return _instance;
202
+ }
203
+ }
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Correction Cue Keyword Types
3
+ *
4
+ * Types for the dynamic correction cue detection system.
5
+ * Replaces the previous hardcoded cue list in detectCorrectionCue()
6
+ * with a persistent, learnable keyword store.
7
+ */
8
+
9
+ // =========================================================================
10
+ // Keyword Store
11
+ // =========================================================================
12
+
13
+ export interface CorrectionKeyword {
14
+ /** The keyword term to match against normalized user text */
15
+ term: string;
16
+ /** Contribution weight (0-1) */
17
+ weight: number;
18
+ /** How this keyword was introduced */
19
+ source: 'seed' | 'llm' | 'user';
20
+ /** ISO 8601 timestamp of when this keyword was added */
21
+ addedAt: string;
22
+ /** Total times this keyword has matched (default: 0) */
23
+ hitCount?: number;
24
+ /** Confirmed correct matches (default: 0) */
25
+ truePositiveCount?: number;
26
+ /** Confirmed incorrect matches (default: 0) */
27
+ falsePositiveCount?: number;
28
+ /** Last time this keyword matched (ISO timestamp) */
29
+ lastHitAt?: string;
30
+ }
31
+
32
+ export interface CorrectionKeywordStore {
33
+ /** All correction keywords */
34
+ keywords: CorrectionKeyword[];
35
+ /** Schema version */
36
+ version: number;
37
+ /** Last time keyword optimization was performed (ISO timestamp) */
38
+ lastOptimizedAt: string;
39
+ }
40
+
41
+ // =========================================================================
42
+ // Match Result
43
+ // =========================================================================
44
+
45
+ export interface CorrectionMatchResult {
46
+ /** Whether any keyword matched */
47
+ matched: boolean;
48
+ /** The first matched term (empty array when no match) */
49
+ matchedTerms: string[];
50
+ /** Weighted score (0-1) based on keyword weight and accuracy */
51
+ score: number;
52
+ /** Confidence in the match result (0-1) */
53
+ confidence: number;
54
+ }
55
+
56
+ // =========================================================================
57
+ // Seed Keywords (16 terms — sourced from detectCorrectionCue)
58
+ // =========================================================================
59
+
60
+ /** Maximum number of keywords the store may hold (D-06). */
61
+ export const MAX_CORRECTION_KEYWORDS = 200;
62
+
63
+ /**
64
+ * Preset seed keywords for correction cue detection.
65
+ * Mirrors the hardcoded list in detectCorrectionCue() exactly (D-08).
66
+ * addedAt is intentionally empty here — it is filled in at runtime by
67
+ * createDefaultStore() when the store is first persisted to disk.
68
+ */
69
+ export const CORRECTION_SEED_KEYWORDS: CorrectionKeyword[] = [
70
+ // Chinese (8)
71
+ { term: '不是这个', weight: 0.6, source: 'seed', addedAt: '' },
72
+ { term: '不对', weight: 0.5, source: 'seed', addedAt: '' },
73
+ { term: '错了', weight: 0.5, source: 'seed', addedAt: '' },
74
+ { term: '搞错了', weight: 0.7, source: 'seed', addedAt: '' },
75
+ { term: '理解错了', weight: 0.7, source: 'seed', addedAt: '' },
76
+ { term: '你理解错了', weight: 0.8, source: 'seed', addedAt: '' },
77
+ { term: '重新来', weight: 0.6, source: 'seed', addedAt: '' },
78
+ { term: '再试一次', weight: 0.4, source: 'seed', addedAt: '' },
79
+ // English (8)
80
+ { term: 'you are wrong', weight: 0.7, source: 'seed', addedAt: '' },
81
+ { term: 'wrong file', weight: 0.6, source: 'seed', addedAt: '' },
82
+ { term: 'not this', weight: 0.4, source: 'seed', addedAt: '' },
83
+ { term: 'redo', weight: 0.6, source: 'seed', addedAt: '' },
84
+ { term: 'try again', weight: 0.4, source: 'seed', addedAt: '' },
85
+ { term: 'again', weight: 0.3, source: 'seed', addedAt: '' },
86
+ { term: 'please redo', weight: 0.6, source: 'seed', addedAt: '' },
87
+ { term: 'please try again', weight: 0.5, source: 'seed', addedAt: '' },
88
+ ];
package/src/core/init.ts CHANGED
@@ -4,6 +4,7 @@ import { fileURLToPath } from 'url';
4
4
  import type { OpenClawPluginApi, PluginLogger } from '../openclaw-sdk.js';
5
5
  import { PD_DIRS } from './paths.js';
6
6
  import { defaultContextConfig } from '../types.js';
7
+ import { loadStore, setPrincipleState, type PrincipleTrainingState } from './principle-training-state.js';
7
8
 
8
9
  /**
9
10
  * Default PROFILE.json content
@@ -143,6 +144,72 @@ function copyRecursiveSync(srcDir: string, destDir: string, api: OpenClawPluginA
143
144
  }
144
145
  }
145
146
 
147
+ /**
148
+ * Core thinking model definitions (T-01 through T-10).
149
+ * These are the built-in cognitive patterns that every workspace should have.
150
+ */
151
+ const CORE_THINKING_MODELS: Array<{
152
+ id: string;
153
+ name: string;
154
+ description: string;
155
+ }> = [
156
+ { id: 'T-01', name: 'Survey Before Acting', description: 'Understand the structure first before making changes.' },
157
+ { id: 'T-02', name: 'Respect Constraints', description: 'Trust files, not your context window. Write conclusions to files.' },
158
+ { id: 'T-03', name: 'Evidence Over Assumption', description: 'Use logs, code, and outputs before inferring causes.' },
159
+ { id: 'T-04', name: 'Reversible First', description: 'Prefer changes that are safe to roll back when risk is high.' },
160
+ { id: 'T-05', name: 'Safety Rails', description: 'Call out guardrails, prohibitions, and failure-prevention constraints.' },
161
+ { id: 'T-06', name: 'Simplicity First', description: 'Prefer the smallest understandable solution over over-engineering.' },
162
+ { id: 'T-07', name: 'Minimal Change Surface', description: 'Limit the blast radius and touch only what is necessary.' },
163
+ { id: 'T-08', name: 'Pain As Signal', description: 'Treat failures and friction as clues to step back and rethink.' },
164
+ { id: 'T-09', name: 'Divide And Conquer', description: 'Split the task into smaller phases before execution.' },
165
+ { id: 'T-10', name: 'Memory Externalization', description: 'Write intermediate conclusions to files for persistence.' },
166
+ ];
167
+
168
+ /**
169
+ * Initialize core thinking models into the training store if it's empty.
170
+ * This ensures every workspace has evaluable principles for nocturnal reflection.
171
+ *
172
+ * @param stateDir - State directory path
173
+ * @param logger - Plugin logger
174
+ * @returns true if initialization was performed, false if already had principles
175
+ */
176
+ export function ensureCorePrinciples(stateDir: string, logger: PluginLogger): boolean {
177
+ try {
178
+ const store = loadStore(stateDir);
179
+ const existingIds = Object.keys(store).filter(k => k !== '_tree');
180
+
181
+ // If already has principles, skip initialization
182
+ if (existingIds.length > 0) {
183
+ return false;
184
+ }
185
+
186
+ logger.info(`[PD] Initializing core thinking models (${CORE_THINKING_MODELS.length} principles) into training store`);
187
+
188
+ for (const model of CORE_THINKING_MODELS) {
189
+ const state: PrincipleTrainingState = {
190
+ principleId: model.id,
191
+ evaluability: 'deterministic',
192
+ applicableOpportunityCount: 0,
193
+ observedViolationCount: 0,
194
+ complianceRate: 0,
195
+ violationTrend: 0,
196
+ generatedSampleCount: 0,
197
+ approvedSampleCount: 0,
198
+ includedTrainRunIds: [],
199
+ deployedCheckpointIds: [],
200
+ internalizationStatus: 'needs_training',
201
+ };
202
+ setPrincipleState(stateDir, state);
203
+ }
204
+
205
+ logger.info(`[PD] Initialized ${CORE_THINKING_MODELS.length} core thinking models: T-01 through T-10`);
206
+ return true;
207
+ } catch (err) {
208
+ logger.error(`[PD] Failed to initialize core principles: ${String(err)}`);
209
+ return false;
210
+ }
211
+ }
212
+
146
213
  /**
147
214
  * Ensures that the state directory has the necessary files (like pain_dictionary.json).
148
215
  */
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Correction Observer Workflow - Type Definitions
3
+ *
4
+ * Types for the correction observer LLM optimization workflow.
5
+ * This workflow dispatches an LLM subagent to analyze keyword performance
6
+ * and recommend ADD/UPDATE/REMOVE actions for the correction keyword store.
7
+ */
8
+
9
+ import type { SubagentWorkflowSpec, WorkflowPersistContext, WorkflowResultContext } from './subagent-workflow/types.js';
10
+
11
+ /**
12
+ * Input passed to the correction observer subagent.
13
+ */
14
+ export interface CorrectionObserverPayload {
15
+ /** Parent session that triggered the optimization */
16
+ parentSessionId: string;
17
+ /** Workspace directory */
18
+ workspaceDir: string;
19
+ /** Current keyword store summary for context */
20
+ keywordStoreSummary: {
21
+ totalKeywords: number;
22
+ terms: Array<{
23
+ term: string;
24
+ weight: number;
25
+ hitCount: number;
26
+ truePositiveCount: number;
27
+ falsePositiveCount: number;
28
+ }>;
29
+ };
30
+ /** Recent user messages for pattern analysis */
31
+ recentMessages: string[];
32
+ }
33
+
34
+ /**
35
+ * Result from the correction observer subagent.
36
+ */
37
+ export interface CorrectionObserverResult {
38
+ /** Whether any changes were made */
39
+ updated: boolean;
40
+ /** The optimization decisions returned by the LLM */
41
+ updates: Record<string, {
42
+ action: 'add' | 'update' | 'remove';
43
+ weight?: number;
44
+ falsePositiveRate?: number;
45
+ reasoning: string;
46
+ }>;
47
+ /** Human-readable summary */
48
+ summary: string;
49
+ }
50
+
51
+ /**
52
+ * Workflow spec for the correction observer optimization workflow.
53
+ */
54
+ export interface CorrectionObserverWorkflowSpec extends SubagentWorkflowSpec<CorrectionObserverResult> {
55
+ workflowType: 'correction_observer';
56
+ payload: CorrectionObserverPayload;
57
+ result?: CorrectionObserverResult;
58
+ }
@@ -0,0 +1,218 @@
1
+ /**
2
+ * CorrectionObserverWorkflowManager
3
+ *
4
+ * Workflow manager that dispatches an LLM subagent to optimize correction
5
+ * keywords based on recent match performance data and user feedback.
6
+ *
7
+ * Follows the established WorkflowManagerBase pattern from EmpathyObserverWorkflowManager.
8
+ */
9
+
10
+ import type { PluginLogger } from '../openclaw-sdk.js';
11
+ import type {
12
+ SubagentWorkflowSpec,
13
+ WorkflowMetadata,
14
+ WorkflowResultContext,
15
+ WorkflowPersistContext,
16
+ WorkflowHandle,
17
+ } from './subagent-workflow/types.js';
18
+ import type { RuntimeDirectDriver } from './subagent-workflow/runtime-direct-driver.js';
19
+ import { WorkflowManagerBase } from './subagent-workflow/workflow-manager-base.js';
20
+ import { isSubagentRuntimeAvailable } from '../utils/subagent-probe.js';
21
+ import type {
22
+ CorrectionObserverPayload,
23
+ CorrectionObserverResult,
24
+ CorrectionObserverWorkflowSpec,
25
+ } from './correction-observer-types.js';
26
+
27
+ const WORKFLOW_SESSION_PREFIX = 'agent:main:subagent:workflow-correction-';
28
+
29
+ const DEFAULT_TIMEOUT_MS = 30_000;
30
+ const DEFAULT_TTL_MS = 5 * 60 * 1000;
31
+
32
+ // ── Options ─────────────────────────────────────────────────────────────────
33
+
34
+ export interface CorrectionObserverWorkflowOptions {
35
+ workspaceDir: string;
36
+ logger: PluginLogger;
37
+ subagent: RuntimeDirectDriver['subagent'];
38
+ /** Pass api.runtime.agent.session to enable heartbeat-safe cleanup (#188) */
39
+ agentSession?: RuntimeDirectDriver['agentSession'];
40
+ }
41
+
42
+ // ── Helper Functions ─────────────────────────────────────────────────────────
43
+
44
+ /**
45
+ * Extract raw assistant text from messages or assistantTexts array.
46
+ */
47
+ function extractAssistantTextForSpec(messages: unknown[], assistantTexts?: string[]): string {
48
+ if (assistantTexts && assistantTexts.length > 0) {
49
+ return assistantTexts[assistantTexts.length - 1] || '';
50
+ }
51
+ for (let i = messages.length - 1; i >= 0; i--) {
52
+ const msg = messages[i] as { role?: string; content?: unknown };
53
+ if (msg?.role !== 'assistant') continue;
54
+ if (typeof msg.content === 'string') return msg.content;
55
+ if (Array.isArray(msg.content)) {
56
+ const txt = msg.content
57
+ .filter((part: unknown) => part && typeof part === 'object' && (part as { type?: string }).type === 'text' && typeof (part as { text?: unknown }).text === 'string')
58
+ .map((part: unknown) => (part as { text: string }).text)
59
+ .join('\n');
60
+ if (txt) return txt;
61
+ }
62
+ }
63
+ return '';
64
+ }
65
+
66
+ /**
67
+ * Parse correction observer JSON payload from raw text.
68
+ */
69
+ function parseCorrectionObserverPayload(rawText: string): CorrectionObserverResult | null {
70
+ if (!rawText?.trim()) return null;
71
+ try {
72
+ return JSON.parse(rawText.trim()) as CorrectionObserverResult;
73
+ } catch {
74
+ const match = /\{[\s\S]*\}/.exec(rawText);
75
+ if (!match) return null;
76
+ try {
77
+ return JSON.parse(match[0]) as CorrectionObserverResult;
78
+ } catch {
79
+ return null;
80
+ }
81
+ }
82
+ }
83
+
84
+ // ── Workflow Spec ─────────────────────────────────────────────────────────────
85
+
86
+ export const correctionObserverWorkflowSpec: SubagentWorkflowSpec<CorrectionObserverResult> = {
87
+ workflowType: 'correction_observer',
88
+ transport: 'runtime_direct',
89
+ timeoutMs: 30_000,
90
+ ttlMs: 300_000,
91
+ shouldDeleteSessionAfterFinalize: true,
92
+
93
+ buildPrompt(taskInput: unknown, _metadata: WorkflowMetadata): string {
94
+ const payload = taskInput as CorrectionObserverPayload;
95
+ const { keywordStoreSummary, recentMessages } = payload;
96
+
97
+ const termsList = keywordStoreSummary.terms
98
+ .map(t => ` - term="${t.term}", weight=${t.weight}, hits=${t.hitCount}, TP=${t.truePositiveCount}, FP=${t.falsePositiveCount}`)
99
+ .join('\n');
100
+
101
+ const messages = recentMessages.length > 0
102
+ ? recentMessages.map(m => ` - ${JSON.stringify(m)}`).join('\n')
103
+ : ' (none)';
104
+
105
+ return [
106
+ 'You are a correction keyword optimizer.',
107
+ '',
108
+ '## TASK',
109
+ 'Analyze the current correction keyword store and recent user messages.',
110
+ 'Recommend ADD/UPDATE/REMOVE actions to improve correction cue accuracy.',
111
+ '',
112
+ '## Current Keyword Store (' + keywordStoreSummary.totalKeywords + ' terms):',
113
+ termsList,
114
+ '',
115
+ '## Recent User Messages (' + recentMessages.length + ' messages):',
116
+ messages,
117
+ '',
118
+ '## Rules:',
119
+ '- ADD: If a correction pattern is detected in messages but not in store',
120
+ '- UPDATE: If a term\'s weight should change based on TP/FP ratio',
121
+ '- REMOVE: If a term has 0 hits after many uses AND high false positive rate (>0.3)',
122
+ '- Keep reasoning concise (max 100 chars)',
123
+ '- Weight range: 0.1-0.9',
124
+ '',
125
+ 'Return strict JSON (no markdown):',
126
+ '{"updated": boolean, "updates": {...}, "summary": string}',
127
+ ].join('\n');
128
+ },
129
+
130
+ async parseResult(ctx: WorkflowResultContext): Promise<CorrectionObserverResult | null> {
131
+ const rawText = extractAssistantTextForSpec(ctx.messages, ctx.assistantTexts);
132
+ return parseCorrectionObserverPayload(rawText);
133
+ },
134
+
135
+ async persistResult(_ctx: WorkflowPersistContext<CorrectionObserverResult>): Promise<void> {
136
+ // Result persistence is handled by the caller (evolution-worker.ts)
137
+ // which reads the result and applies keyword store updates.
138
+ // This spec handles only the LLM dispatch and result parsing.
139
+ },
140
+
141
+ shouldFinalizeOnWaitStatus(status: 'ok' | 'error' | 'timeout'): boolean {
142
+ return status === 'ok';
143
+ },
144
+ };
145
+
146
+ // ── Manager Class ─────────────────────────────────────────────────────────────
147
+
148
+ export class CorrectionObserverWorkflowManager extends WorkflowManagerBase {
149
+ constructor(opts: CorrectionObserverWorkflowOptions) {
150
+ super({
151
+ workspaceDir: opts.workspaceDir,
152
+ logger: opts.logger,
153
+ subagent: opts.subagent,
154
+ agentSession: opts.agentSession,
155
+ workflowType: 'correction_observer',
156
+ sessionPrefix: WORKFLOW_SESSION_PREFIX,
157
+ defaultTimeoutMs: DEFAULT_TIMEOUT_MS,
158
+ defaultTtlMs: DEFAULT_TTL_MS,
159
+ });
160
+ }
161
+
162
+ async startWorkflow<TResult>(
163
+ spec: SubagentWorkflowSpec<TResult>,
164
+ options: {
165
+ parentSessionId: string;
166
+ workspaceDir?: string;
167
+ taskInput: unknown;
168
+ metadata?: Record<string, unknown>;
169
+ }
170
+ ): Promise<WorkflowHandle> {
171
+ // Surface degrade: skip boot sessions
172
+ if (options.parentSessionId.startsWith('boot-')) {
173
+ this.logger.info(`[PD:CorrectionObserver] Skipping workflow: boot session`);
174
+ throw new Error(`CorrectionObserverWorkflowManager: cannot start workflow for boot session`);
175
+ }
176
+
177
+ // Surface degrade: check subagent runtime availability
178
+ if (!isSubagentRuntimeAvailable(this.driver.getSubagent())) {
179
+ this.logger.info(`[PD:CorrectionObserver] Skipping workflow: subagent runtime unavailable`);
180
+ throw new Error(`CorrectionObserverWorkflowManager: subagent runtime unavailable`);
181
+ }
182
+
183
+ if (spec.transport !== 'runtime_direct') {
184
+ throw new Error(`CorrectionObserverWorkflowManager only supports runtime_direct transport`);
185
+ }
186
+
187
+ return super.startWorkflow(spec, options);
188
+ }
189
+
190
+ // eslint-disable-next-line @typescript-eslint/class-methods-use-this
191
+ protected override createWorkflowMetadata<TResult>(
192
+ spec: SubagentWorkflowSpec<TResult>,
193
+ options: {
194
+ parentSessionId: string;
195
+ workspaceDir?: string;
196
+ taskInput: unknown;
197
+ metadata?: Record<string, unknown>;
198
+ },
199
+ now: number
200
+ ): WorkflowMetadata {
201
+ return {
202
+ parentSessionId: options.parentSessionId,
203
+ workspaceDir: options.workspaceDir,
204
+ taskInput: options.taskInput,
205
+ startedAt: now,
206
+ workflowType: spec.workflowType,
207
+ ...options.metadata,
208
+ };
209
+ }
210
+ }
211
+
212
+ // ── Factory ─────────────────────────────────────────────────────────────────
213
+
214
+ export function createCorrectionObserverWorkflowManager(
215
+ opts: CorrectionObserverWorkflowOptions
216
+ ): CorrectionObserverWorkflowManager {
217
+ return new CorrectionObserverWorkflowManager(opts);
218
+ }