principles-disciple 1.51.0 → 1.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/core/init.ts CHANGED
@@ -5,6 +5,8 @@ import type { OpenClawPluginApi, PluginLogger } from '../openclaw-sdk.js';
5
5
  import { PD_DIRS } from './paths.js';
6
6
  import { defaultContextConfig } from '../types.js';
7
7
  import { loadStore, setPrincipleState, type PrincipleTrainingState } from './principle-training-state.js';
8
+ import { addPrincipleToLedger } from './principle-tree-ledger.js';
9
+ import type { LedgerPrinciple } from './principle-tree-ledger.js';
8
10
  import { atomicWriteFileSync } from '../utils/io.js';
9
11
  import { createDefaultKeywordStore, saveKeywordStore } from './empathy-keyword-matcher.js';
10
12
 
@@ -150,7 +152,7 @@ function copyRecursiveSync(srcDir: string, destDir: string, api: OpenClawPluginA
150
152
  * Core thinking model definitions (T-01 through T-10).
151
153
  * These are the built-in cognitive patterns that every workspace should have.
152
154
  */
153
- const CORE_THINKING_MODELS: Array<{
155
+ export const CORE_THINKING_MODELS: Array<{
154
156
  id: string;
155
157
  name: string;
156
158
  description: string;
@@ -190,7 +192,7 @@ export function ensureCorePrinciples(stateDir: string, logger: PluginLogger): bo
190
192
  for (const model of CORE_THINKING_MODELS) {
191
193
  const state: PrincipleTrainingState = {
192
194
  principleId: model.id,
193
- evaluability: 'deterministic',
195
+ evaluability: 'manual_only',
194
196
  applicableOpportunityCount: 0,
195
197
  observedViolationCount: 0,
196
198
  complianceRate: 0,
@@ -202,6 +204,31 @@ export function ensureCorePrinciples(stateDir: string, logger: PluginLogger): bo
202
204
  internalizationStatus: 'needs_training',
203
205
  };
204
206
  setPrincipleState(stateDir, state);
207
+
208
+ // Also write to Ledger Tree so bootstrapRules() can find them
209
+ const now = new Date().toISOString();
210
+ const ledgerPrinciple: LedgerPrinciple = {
211
+ id: model.id,
212
+ version: 1,
213
+ text: model.description,
214
+ coreAxiomId: model.id,
215
+ triggerPattern: '',
216
+ action: '',
217
+ status: 'active',
218
+ priority: 'P1',
219
+ scope: 'general',
220
+ evaluability: 'manual_only',
221
+ valueScore: 0,
222
+ adherenceRate: 0,
223
+ painPreventedCount: 0,
224
+ derivedFromPainIds: [],
225
+ ruleIds: [],
226
+ conflictsWithPrincipleIds: [],
227
+ createdAt: now,
228
+ updatedAt: now,
229
+ suggestedRules: [],
230
+ };
231
+ addPrincipleToLedger(stateDir, ledgerPrinciple);
205
232
  }
206
233
 
207
234
  logger.info(`[PD] Initialized ${CORE_THINKING_MODELS.length} core thinking models: T-01 through T-10`);
@@ -2211,6 +2211,20 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
2211
2211
  telemetry.eligibleCandidateCount = draftArtifact.telemetry.eligibleCandidateCount;
2212
2212
  }
2213
2213
 
2214
+ // Hallucination detection (SDK-QUAL-02): validate extraction against snapshot
2215
+ const hallucinationResult = validateExtraction(draftArtifact, snapshot);
2216
+ if (!hallucinationResult.isGrounded) {
2217
+ const reason = hallucinationResult.reason ?? 'Extraction not grounded in session evidence';
2218
+ console.warn(`[Trinity] HALLUCINATION_DETECTED: ${reason}`);
2219
+ telemetry.stageFailures.push(`Hallucination: ${reason}`);
2220
+ return {
2221
+ success: false,
2222
+ telemetry,
2223
+ failures: [{ stage: 'scribe', reason }],
2224
+ fallbackOccurred: false,
2225
+ };
2226
+ }
2227
+
2214
2228
  return {
2215
2229
  success: true,
2216
2230
  artifact: draftArtifact,
@@ -2339,6 +2353,20 @@ function runTrinityWithStubs(
2339
2353
  telemetry.eligibleCandidateCount = draftArtifact.telemetry.eligibleCandidateCount;
2340
2354
  }
2341
2355
 
2356
+ // Hallucination detection (SDK-QUAL-02): validate extraction against snapshot
2357
+ const hallucinationResult = validateExtraction(draftArtifact, snapshot);
2358
+ if (!hallucinationResult.isGrounded) {
2359
+ const reason = hallucinationResult.reason ?? 'Extraction not grounded in session evidence';
2360
+ console.warn(`[Trinity] HALLUCINATION_DETECTED: ${reason}`);
2361
+ telemetry.stageFailures.push(`Hallucination: ${reason}`);
2362
+ return {
2363
+ success: false,
2364
+ telemetry,
2365
+ failures: [{ stage: 'scribe', reason }],
2366
+ fallbackOccurred: false,
2367
+ };
2368
+ }
2369
+
2342
2370
  return {
2343
2371
  success: true,
2344
2372
  artifact: draftArtifact,
@@ -2405,6 +2433,208 @@ export function validateDraftArtifact(draft: TrinityDraftArtifact): DraftValidat
2405
2433
  };
2406
2434
  }
2407
2435
 
2436
+ // ---------------------------------------------------------------------------
2437
+ // Hallucination Detection (SDK-QUAL-02)
2438
+ // ---------------------------------------------------------------------------
2439
+
2440
+ /**
2441
+ * Result of hallucination validation against session snapshot evidence.
2442
+ */
2443
+ export interface HallucinationDetectionResult {
2444
+ /** Whether the extraction is grounded in real session evidence */
2445
+ isGrounded: boolean;
2446
+ /** List of evidence types found in the snapshot supporting the extraction */
2447
+ evidenceTypes: string[];
2448
+ /** Detailed reason if hallucination is detected */
2449
+ reason?: string;
2450
+ /** Matching evidence items for telemetry (truncated for safety) */
2451
+ evidencePreview: string[];
2452
+ }
2453
+
2454
+ /**
2455
+ * Validate that an extracted badDecision corresponds to actual events in the
2456
+ * NocturnalSessionSnapshot. This catches hallucinated extractions where the
2457
+ * Trinity chain produces a badDecision that has no grounding in real failures,
2458
+ * pain events, or gate blocks.
2459
+ *
2460
+ * Evidence sources checked:
2461
+ * 1. Failed tool calls (snapshot.toolCalls with outcome='failure')
2462
+ * 2. Pain events (snapshot.painEvents with score >= 50)
2463
+ * 3. Gate blocks (snapshot.gateBlocks)
2464
+ * 4. User corrections (snapshot.userTurns with correctionDetected=true)
2465
+ *
2466
+ * The function uses keyword overlap heuristics: it extracts tool names, file
2467
+ * paths, error messages, and pain reasons from the snapshot and checks if the
2468
+ * badDecision text overlaps meaningfully with any of them.
2469
+ *
2470
+ * @param artifact The draft artifact produced by the Scribe stage
2471
+ * @param snapshot The session snapshot used to generate the extraction
2472
+ * @returns HallucinationDetectionResult indicating whether the extraction is grounded
2473
+ */
2474
+ export function validateExtraction(
2475
+ artifact: TrinityDraftArtifact,
2476
+ snapshot: NocturnalSessionSnapshot
2477
+ ): HallucinationDetectionResult {
2478
+ const evidenceTypes: string[] = [];
2479
+ const evidencePreview: string[] = [];
2480
+
2481
+ // Shared token normalizer: lowercase + strip punctuation, same as badDecisionTokens
2482
+ const normalizeEvidenceToken = (value: string): string =>
2483
+ value.toLowerCase().replace(/[^a-z0-9]/g, '');
2484
+
2485
+ // Build a set of evidence tokens from the snapshot
2486
+ const evidenceTokens = new Set<string>();
2487
+ const badDecisionLower = artifact.badDecision.toLowerCase();
2488
+
2489
+ // 1. Failed tool calls
2490
+ const failedToolCalls = (snapshot.toolCalls ?? []).filter(tc => tc.outcome === 'failure');
2491
+ if (failedToolCalls.length > 0) {
2492
+ evidenceTypes.push('tool_failures');
2493
+ for (const tc of failedToolCalls) {
2494
+ // Extract tool name tokens
2495
+ evidenceTokens.add(tc.toolName.toLowerCase());
2496
+ if (tc.filePath) {
2497
+ // Extract all path segments and normalize each for matching
2498
+ const rawPathParts = [tc.filePath, ...tc.filePath.split(/[\\/]/)];
2499
+ for (const part of rawPathParts) {
2500
+ const normalized = normalizeEvidenceToken(part);
2501
+ if (normalized.length > 0) evidenceTokens.add(normalized);
2502
+ }
2503
+ }
2504
+ if (tc.errorMessage) {
2505
+ // Extract key words from error messages (filter stop words)
2506
+ const errorWords = tc.errorMessage.toLowerCase().split(/\s+/)
2507
+ .filter(w => w.length > 3 && !['with', 'from', 'that', 'this', 'which', 'been', 'have', 'were', 'they', 'their'].includes(w));
2508
+ for (const w of errorWords) {
2509
+ const normalized = normalizeEvidenceToken(w);
2510
+ if (normalized.length > 0) evidenceTokens.add(normalized);
2511
+ }
2512
+ }
2513
+ if (tc.errorType) evidenceTokens.add(tc.errorType.toLowerCase());
2514
+ evidencePreview.push(`tool:${tc.toolName}${tc.filePath ? `@${tc.filePath}` : ''} -> ${tc.errorMessage ?? 'unknown'}`.slice(0, 100));
2515
+ }
2516
+ }
2517
+
2518
+ // 2. Pain events (score >= 50 indicates meaningful pain)
2519
+ const significantPainEvents = (snapshot.painEvents ?? []).filter(pe => pe.score >= 50);
2520
+ if (significantPainEvents.length > 0) {
2521
+ evidenceTypes.push('pain_events');
2522
+ for (const pe of significantPainEvents) {
2523
+ evidenceTokens.add(pe.source.toLowerCase());
2524
+ if (pe.reason) {
2525
+ const painWords = pe.reason.toLowerCase().split(/\s+/)
2526
+ .filter(w => w.length > 3 && !['with', 'from', 'that', 'this', 'which', 'been', 'have', 'were', 'they', 'their'].includes(w));
2527
+ for (const w of painWords) {
2528
+ const normalized = normalizeEvidenceToken(w);
2529
+ if (normalized.length > 0) evidenceTokens.add(normalized);
2530
+ }
2531
+ }
2532
+ evidencePreview.push(`pain:${pe.score} [${pe.source}] ${pe.reason ?? ''}`.slice(0, 100));
2533
+ }
2534
+ }
2535
+
2536
+ // 3. Gate blocks
2537
+ if ((snapshot.gateBlocks ?? []).length > 0) {
2538
+ evidenceTypes.push('gate_blocks');
2539
+ for (const gb of snapshot.gateBlocks) {
2540
+ evidenceTokens.add(gb.toolName.toLowerCase());
2541
+ evidenceTokens.add('gate');
2542
+ evidenceTokens.add('blocked');
2543
+ if (gb.reason) {
2544
+ const blockWords = gb.reason.toLowerCase().split(/\s+/)
2545
+ .filter(w => w.length > 3);
2546
+ for (const w of blockWords) {
2547
+ const normalized = normalizeEvidenceToken(w);
2548
+ if (normalized.length > 0) evidenceTokens.add(normalized);
2549
+ }
2550
+ }
2551
+ evidencePreview.push(`gate:${gb.toolName} -> ${gb.reason}`.slice(0, 100));
2552
+ }
2553
+ }
2554
+
2555
+ // 4. User corrections
2556
+ const userCorrections = (snapshot.userTurns ?? []).filter(ut => ut.correctionDetected);
2557
+ if (userCorrections.length > 0) {
2558
+ evidenceTypes.push('user_corrections');
2559
+ evidenceTokens.add('correction');
2560
+ evidenceTokens.add('wrong');
2561
+ evidenceTokens.add('incorrect');
2562
+ evidencePreview.push(`corrections:${userCorrections.length}`);
2563
+ }
2564
+
2565
+ // If no evidence exists at all in the snapshot, we cannot validate.
2566
+ // Allow the extraction through — the pipeline already has guardrails for
2567
+ // empty snapshots (Dreamer returns valid:false).
2568
+ if (evidenceTypes.length === 0) {
2569
+ return {
2570
+ isGrounded: true,
2571
+ evidenceTypes: [],
2572
+ reason: undefined,
2573
+ evidencePreview: [],
2574
+ };
2575
+ }
2576
+
2577
+ // Check for overlap between badDecision text and evidence tokens
2578
+ // We look for meaningful keyword matches (tokens of length > 4)
2579
+ const badDecisionTokens = badDecisionLower.split(/\s+/)
2580
+ .map(t => t.replace(/[^a-z0-9]/g, ''))
2581
+ .filter(t => t.length > 4);
2582
+
2583
+ let matchCount = 0;
2584
+ const matchedTokens: string[] = [];
2585
+ for (const token of badDecisionTokens) {
2586
+ // Direct match
2587
+ if (evidenceTokens.has(token)) {
2588
+ matchCount++;
2589
+ matchedTokens.push(token);
2590
+ continue;
2591
+ }
2592
+ // Partial match: check if any evidence token contains this token or vice versa
2593
+ for (const evToken of evidenceTokens) {
2594
+ if (evToken.length > 4 && (evToken.includes(token) || token.includes(evToken))) {
2595
+ matchCount++;
2596
+ matchedTokens.push(token);
2597
+ break;
2598
+ }
2599
+ }
2600
+ }
2601
+
2602
+ // Heuristic: if at least 2 meaningful tokens overlap, consider grounded
2603
+ // Single overlap is acceptable if the token is highly specific (length > 8)
2604
+ const minOverlap = badDecisionTokens.length > 0
2605
+ ? Math.max(1, Math.ceil(badDecisionTokens.length * 0.15))
2606
+ : 0;
2607
+
2608
+ if (matchCount >= Math.max(2, minOverlap)) {
2609
+ return {
2610
+ isGrounded: true,
2611
+ evidenceTypes,
2612
+ evidencePreview: evidencePreview.slice(0, 5),
2613
+ };
2614
+ }
2615
+
2616
+ // Also check for at least one highly-specific match (length > 8)
2617
+ const hasHighlySpecificMatch = matchedTokens.some(t => t.length > 8);
2618
+ if (hasHighlySpecificMatch) {
2619
+ return {
2620
+ isGrounded: true,
2621
+ evidenceTypes,
2622
+ evidencePreview: evidencePreview.slice(0, 5),
2623
+ };
2624
+ }
2625
+
2626
+ // Hallucination detected — badDecision has no grounding in snapshot evidence
2627
+ const reason = `Hallucinated extraction: badDecision "${artifact.badDecision.slice(0, 80)}" has insufficient overlap with session evidence. ` +
2628
+ `Evidence types available: [${evidenceTypes.join(', ')}]. Matched tokens: [${matchedTokens.join(', ')}] (needed >= ${Math.max(2, minOverlap)}).`;
2629
+
2630
+ return {
2631
+ isGrounded: false,
2632
+ evidenceTypes,
2633
+ reason,
2634
+ evidencePreview: evidencePreview.slice(0, 5),
2635
+ };
2636
+ }
2637
+
2408
2638
  /**
2409
2639
  * Convert a TrinityDraftArtifact to a NocturnalArtifact-compatible structure.
2410
2640
  */
@@ -0,0 +1,242 @@
1
+ /**
2
+ * Observability Baselines for the Evolution SDK.
3
+ *
4
+ * Provides calculateBaselines() which measures the current state of the
5
+ * principle evolution system across four dimensions:
6
+ *
7
+ * 1. Principle Stock: total count of principles in the ledger
8
+ * 2. Structure: average sub-principles (rules) and implementations per principle
9
+ * 3. Association Rate: principles created / total pain events recorded
10
+ * 4. Internalization Rate: internalized principles / total principles
11
+ *
12
+ * Results are logged via SystemLogger and persisted to .state/baselines.json.
13
+ */
14
+ import * as fs from 'fs';
15
+ import * as path from 'path';
16
+ import { loadLedger } from './principle-tree-ledger.js';
17
+ import { SystemLogger } from './system-logger.js';
18
+ import { atomicWriteFileSync } from '../utils/io.js';
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Types
22
+ // ---------------------------------------------------------------------------
23
+
24
+ export interface ObservabilityBaselines {
25
+ /** ISO 8601 timestamp when baselines were calculated */
26
+ calculatedAt: string;
27
+
28
+ /** Principle Stock: total count of principles in the ledger */
29
+ principleStock: number;
30
+
31
+ /** Total rules across all principles */
32
+ totalRules: number;
33
+
34
+ /** Total implementations across all rules */
35
+ totalImplementations: number;
36
+
37
+ /** Structure: average rules per principle (0 if no principles) */
38
+ avgRulesPerPrinciple: number;
39
+
40
+ /** Structure: average implementations per rule (0 if no rules) */
41
+ avgImplementationsPerRule: number;
42
+
43
+ /** Total pain events from trajectory DB (0 if DB unavailable) */
44
+ totalPainEvents: number;
45
+
46
+ /** Association Rate: principles / total pain events (0 if no pain events) */
47
+ associationRate: number;
48
+
49
+ /** Count of principles with internalizationStatus = 'internalized' */
50
+ internalizedCount: number;
51
+
52
+ /** Internalization Rate: internalized / total principles (0 if no principles) */
53
+ internalizationRate: number;
54
+
55
+ /** Distribution of principle statuses */
56
+ statusDistribution: Record<string, number>;
57
+
58
+ /** Distribution of principle priorities */
59
+ priorityDistribution: Record<string, number>;
60
+
61
+ /** Distribution of internalization statuses from training store */
62
+ internalizationDistribution: Record<string, number>;
63
+ }
64
+
65
+ // ---------------------------------------------------------------------------
66
+ // Constants
67
+ // ---------------------------------------------------------------------------
68
+
69
+ const BASELINES_FILE = 'baselines.json';
70
+
71
+ // ---------------------------------------------------------------------------
72
+ // Implementation
73
+ // ---------------------------------------------------------------------------
74
+
75
+ /**
76
+ * Calculate observability baselines for the principle evolution system.
77
+ *
78
+ * Reads the principle ledger from stateDir, computes metrics across four
79
+ * dimensions (Stock, Structure, Association, Internalization), logs a summary
80
+ * via SystemLogger, and persists results to .state/baselines.json.
81
+ *
82
+ * @param stateDir - The .state directory containing the principle ledger
83
+ * @param workspaceDir - Optional workspace dir for SystemLogger routing
84
+ * @returns The computed baselines
85
+ */
86
+ export function calculateBaselines(
87
+ stateDir: string,
88
+ workspaceDir?: string,
89
+ ): ObservabilityBaselines {
90
+ const ledger = loadLedger(stateDir);
91
+ const { tree, trainingStore } = ledger;
92
+
93
+ const principles = Object.values(tree.principles);
94
+ const rules = Object.values(tree.rules);
95
+ const implementations = Object.values(tree.implementations);
96
+
97
+ const principleStock = principles.length;
98
+ const totalRules = rules.length;
99
+ const totalImplementations = implementations.length;
100
+
101
+ // Structure metrics
102
+ const avgRulesPerPrinciple = principleStock > 0
103
+ ? totalRules / principleStock
104
+ : 0;
105
+ const avgImplementationsPerRule = totalRules > 0
106
+ ? totalImplementations / totalRules
107
+ : 0;
108
+
109
+ // Count pain events from trajectory DB
110
+ const totalPainEvents = countPainEvents(stateDir);
111
+
112
+ // Association Rate: how many principles were created per pain event
113
+ const associationRate = totalPainEvents > 0
114
+ ? principleStock / totalPainEvents
115
+ : 0;
116
+
117
+ // Internalization Rate from training store
118
+ // Filter to only entries whose principleId still exists in the ledger tree
119
+ // to avoid orphaned/deleted entries inflating the ratio
120
+ const trainingEntries = Object.values(trainingStore);
121
+ const activePrincipleIds = new Set(Object.keys(tree.principles));
122
+ const activeEntries = trainingEntries.filter(
123
+ (entry) => activePrincipleIds.has(entry.principleId),
124
+ );
125
+ const internalizedCount = activeEntries.filter(
126
+ (entry) => entry.internalizationStatus === 'internalized',
127
+ ).length;
128
+ const internalizationRate = principleStock > 0
129
+ ? internalizedCount / principleStock
130
+ : 0;
131
+
132
+ // Status distribution
133
+ const statusDistribution: Record<string, number> = {};
134
+ for (const p of principles) {
135
+ statusDistribution[p.status] = (statusDistribution[p.status] ?? 0) + 1;
136
+ }
137
+
138
+ // Priority distribution
139
+ const priorityDistribution: Record<string, number> = {};
140
+ for (const p of principles) {
141
+ priorityDistribution[p.priority] = (priorityDistribution[p.priority] ?? 0) + 1;
142
+ }
143
+
144
+ // Internalization status distribution from training store
145
+ const internalizationDistribution: Record<string, number> = {};
146
+ for (const entry of trainingEntries) {
147
+ internalizationDistribution[entry.internalizationStatus] =
148
+ (internalizationDistribution[entry.internalizationStatus] ?? 0) + 1;
149
+ }
150
+
151
+ const baselines: ObservabilityBaselines = {
152
+ calculatedAt: new Date().toISOString(),
153
+ principleStock,
154
+ totalRules,
155
+ totalImplementations,
156
+ avgRulesPerPrinciple: roundTo3(avgRulesPerPrinciple),
157
+ avgImplementationsPerRule: roundTo3(avgImplementationsPerRule),
158
+ totalPainEvents,
159
+ associationRate: roundTo3(associationRate),
160
+ internalizedCount,
161
+ internalizationRate: roundTo3(internalizationRate),
162
+ statusDistribution,
163
+ priorityDistribution,
164
+ internalizationDistribution,
165
+ };
166
+
167
+ // Log summary
168
+ SystemLogger.log(
169
+ workspaceDir,
170
+ 'OBSERVABILITY_BASELINES',
171
+ formatBaselineSummary(baselines),
172
+ );
173
+
174
+ // Persist to .state/baselines.json
175
+ persistBaselines(stateDir, baselines);
176
+
177
+ return baselines;
178
+ }
179
+
180
+ // ---------------------------------------------------------------------------
181
+ // Internal helpers
182
+ // ---------------------------------------------------------------------------
183
+
184
+ function roundTo3(n: number): number {
185
+ return Math.round(n * 1000) / 1000;
186
+ }
187
+
188
+ function formatBaselineSummary(b: ObservabilityBaselines): string {
189
+ return [
190
+ `Principle Stock: ${b.principleStock}`,
191
+ `Structure: ${b.avgRulesPerPrinciple} rules/principle, ${b.avgImplementationsPerRule} impls/rule`,
192
+ `Association Rate: ${b.associationRate} (${b.principleStock} principles / ${b.totalPainEvents} pain events)`,
193
+ `Internalization Rate: ${b.internalizationRate} (${b.internalizedCount}/${b.principleStock})`,
194
+ ].join(' | ');
195
+ }
196
+
197
+ /**
198
+ * Count pain events from the trajectory SQLite database.
199
+ * Returns 0 if the database is unavailable or the table doesn't exist.
200
+ */
201
+ function countPainEvents(stateDir: string): number {
202
+ const dbPath = path.join(stateDir, 'trajectory.db');
203
+ if (!fs.existsSync(dbPath)) {
204
+ return 0;
205
+ }
206
+
207
+ try {
208
+ // Use dynamic import for better-sqlite3 to avoid hard dependency
209
+ // at module load time. If not available, return 0.
210
+
211
+ const Database = require('better-sqlite3') as typeof import('better-sqlite3');
212
+ const db = new Database(dbPath, { readonly: true });
213
+
214
+ try {
215
+ const row = db.prepare('SELECT COUNT(*) as count FROM pain_events').get() as { count: number } | undefined;
216
+ return row?.count ?? 0;
217
+ } finally {
218
+ db.close();
219
+ }
220
+ } catch (err) {
221
+ // better-sqlite3 not available, or table doesn't exist — log and return 0
222
+ SystemLogger.log(stateDir, 'OBSERVABILITY_SQL_ERROR', `countPainEvents failed: ${String(err)}`);
223
+ return 0;
224
+ }
225
+ }
226
+
227
+ /**
228
+ * Persist baselines to .state/baselines.json atomically.
229
+ */
230
+ function persistBaselines(stateDir: string, baselines: ObservabilityBaselines): void {
231
+ try {
232
+ const filePath = path.join(stateDir, BASELINES_FILE);
233
+ const dir = path.dirname(filePath);
234
+ if (!fs.existsSync(dir)) {
235
+ fs.mkdirSync(dir, { recursive: true });
236
+ }
237
+ atomicWriteFileSync(filePath, JSON.stringify(baselines, null, 2));
238
+ } catch (err) {
239
+ // Baselines persistence is best-effort — don't crash the caller
240
+ // (the SystemLogger call above already logged the values)
241
+ }
242
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * PainSignalAdapter interface for the Evolution SDK.
3
+ *
4
+ * This interface decouples the evolution engine from specific AI agent
5
+ * frameworks (OpenClaw, Claude Code, etc.). All modules that need to
6
+ * capture pain signals from tool failures should depend on this interface
7
+ * rather than importing framework-specific event types directly.
8
+ *
9
+ * The interface uses a generic type parameter for the raw framework event,
10
+ * so each framework implementation provides its own concrete type.
11
+ */
12
+ import type { PainSignal } from './pain-signal.js';
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // PainSignalAdapter Interface
16
+ // ---------------------------------------------------------------------------
17
+
18
+ /**
19
+ * Framework-agnostic adapter for capturing pain signals.
20
+ *
21
+ * @typeParam TRawEvent - The framework-specific event type
22
+ * (e.g., PluginHookAfterToolCallEvent for OpenClaw)
23
+ */
24
+ export interface PainSignalAdapter<TRawEvent> {
25
+ /**
26
+ * Translate a framework-specific event into a universal PainSignal.
27
+ *
28
+ * Returns null when the event does not produce a pain signal (e.g., the
29
+ * event type is not a failure, or the event lacks required fields).
30
+ *
31
+ * This method performs pure translation only. Trigger decision logic
32
+ * (e.g., GFI threshold checks, tool name filtering) stays in the
33
+ * framework-side hook logic. Per D-02, capture() only translates.
34
+ *
35
+ * Translation failures (malformed events, missing required fields)
36
+ * return null rather than throwing. This keeps the adapter resilient.
37
+ *
38
+ * @param rawEvent - The framework-specific event to translate
39
+ * @returns A valid PainSignal, or null if the event does not produce one
40
+ */
41
+ capture(rawEvent: TRawEvent): PainSignal | null;
42
+ }