principles-disciple 1.27.0 → 1.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/openclaw.plugin.json +4 -4
  2. package/package.json +4 -4
  3. package/scripts/diagnose-nocturnal.mjs +139 -2
  4. package/scripts/seed-nocturnal-scenarios.mjs +377 -0
  5. package/scripts/validate-live-path.ts +18 -18
  6. package/src/commands/nocturnal-train.ts +4 -6
  7. package/src/commands/pain.ts +8 -11
  8. package/src/commands/pd-reflect.ts +1 -1
  9. package/src/core/bootstrap-rules.ts +3 -3
  10. package/src/core/merge-gate-audit.ts +1 -1
  11. package/src/core/nocturnal-candidate-scoring.ts +131 -0
  12. package/src/core/nocturnal-reasoning-deriver.ts +337 -0
  13. package/src/core/nocturnal-trinity.ts +462 -25
  14. package/src/core/pain-context-extractor.ts +1 -3
  15. package/src/core/principle-tree-migration.ts +2 -4
  16. package/src/core/thinking-os-parser.ts +3 -3
  17. package/src/hooks/bash-risk.ts +1 -1
  18. package/src/hooks/gfi-gate.ts +1 -1
  19. package/src/hooks/pain.ts +1 -1
  20. package/src/hooks/prompt.ts +36 -2
  21. package/src/hooks/subagent.ts +1 -1
  22. package/src/index.ts +3 -1
  23. package/src/service/evolution-worker.ts +138 -44
  24. package/src/service/health-query-service.ts +15 -6
  25. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -1
  26. package/src/tools/write-pain-flag.ts +191 -0
  27. package/templates/langs/en/skills/pd-pain-signal/SKILL.md +34 -20
  28. package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +34 -20
  29. package/tests/core/nocturnal-candidate-scoring.test.ts +132 -0
  30. package/tests/core/nocturnal-e2e.test.ts +224 -0
  31. package/tests/core/nocturnal-reasoning-deriver.test.ts +372 -0
  32. package/tests/core/nocturnal-trinity.test.ts +791 -0
  33. package/tests/tools/write-pain-flag.test.ts +240 -0
@@ -20,6 +20,7 @@
20
20
  * WORKSPACE_DIR - Optional workspace directory (defaults to process.cwd())
21
21
  */
22
22
 
23
+ import * as Database from 'better-sqlite3';
23
24
  import * as fs from 'fs';
24
25
  import * as path from 'path';
25
26
 
@@ -33,7 +34,6 @@ const LOCK_STALE_MS = 30_000;
33
34
  const WORKSPACE_DIR = process.env.WORKSPACE_DIR || process.cwd();
34
35
  const STATE_DIR = path.join(WORKSPACE_DIR, '.state');
35
36
  const QUEUE_PATH = path.join(STATE_DIR, 'EVOLUTION_QUEUE');
36
- const QUEUE_LOCK_PATH = QUEUE_PATH + LOCK_SUFFIX;
37
37
  const LEDGER_PATH = path.join(STATE_DIR, 'principle_training_state.json');
38
38
  const DB_PATH = path.join(STATE_DIR, 'subagent_workflows.db');
39
39
 
@@ -80,17 +80,14 @@ async function acquireLockAsync(filePath: string, options: {
80
80
  baseRetryDelayMs?: number;
81
81
  lockStaleMs?: number;
82
82
  } = {}): Promise<LockContext> {
83
- const opts = {
84
- lockSuffix: LOCK_SUFFIX,
85
- maxRetries: LOCK_MAX_RETRIES,
86
- baseRetryDelayMs: LOCK_RETRY_DELAY_MS,
87
- lockStaleMs: LOCK_STALE_MS,
88
- ...options,
89
- };
83
+ const lockSuffix = options.lockSuffix ?? LOCK_SUFFIX;
84
+ const maxRetries = options.maxRetries ?? LOCK_MAX_RETRIES;
85
+ const baseRetryDelayMs = options.baseRetryDelayMs ?? LOCK_RETRY_DELAY_MS;
86
+ const lockStaleMs = options.lockStaleMs ?? LOCK_STALE_MS;
90
87
  const { pid } = process;
91
- const lockPath = filePath + opts.lockSuffix;
88
+ const lockPath = filePath + lockSuffix;
92
89
 
93
- for (let attempt = 0; attempt < opts.maxRetries!; attempt++) {
90
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
94
91
  try {
95
92
  // Check if lock file exists and is stale
96
93
  if (fs.existsSync(lockPath)) {
@@ -100,11 +97,11 @@ async function acquireLockAsync(filePath: string, options: {
100
97
  const lockAge = Date.now() - lockStats.mtimeMs;
101
98
 
102
99
  // Clean up stale lock
103
- if (lockAge > opts.lockStaleMs!) {
100
+ if (lockAge > lockStaleMs) {
104
101
  fs.unlinkSync(lockPath);
105
102
  } else if (lockPid !== pid) {
106
103
  // Lock held by another process
107
- await new Promise(resolve => setTimeout(resolve, opts.baseRetryDelayMs!));
104
+ await new Promise(resolve => setTimeout(resolve, baseRetryDelayMs));
108
105
  continue;
109
106
  }
110
107
  }
@@ -125,17 +122,20 @@ async function acquireLockAsync(filePath: string, options: {
125
122
  },
126
123
  };
127
124
  } catch (error: unknown) {
128
- if ((error as NodeJS.ErrnoException).code === 'EEXIST') {
129
- if (attempt < opts.maxRetries! - 1) {
130
- await new Promise(resolve => setTimeout(resolve, opts.baseRetryDelayMs!));
125
+ const err = error as { code?: string };
126
+ if (err.code === 'EEXIST') {
127
+ if (attempt < maxRetries - 1) {
128
+ await new Promise(resolve => setTimeout(resolve, baseRetryDelayMs));
131
129
  continue;
132
130
  }
133
131
  }
134
- throw new Error(`Failed to acquire lock for ${filePath}: ${String(error)}`);
132
+ const lockError = new Error(`Failed to acquire lock for ${filePath}: ${String(error)}`);
133
+ lockError.cause = error;
134
+ throw lockError;
135
135
  }
136
136
  }
137
137
 
138
- throw new Error(`Failed to acquire lock for ${filePath} after ${opts.maxRetries} attempts`);
138
+ throw new Error(`Failed to acquire lock for ${filePath} after ${maxRetries} attempts`);
139
139
  }
140
140
 
141
141
  function releaseLock(ctx: LockContext): void {
@@ -225,7 +225,6 @@ function listNocturnalWorkflows(): WorkflowRow[] {
225
225
  return [];
226
226
  }
227
227
 
228
- const Database = require('better-sqlite3');
229
228
  const db = new Database(DB_PATH, { readonly: true });
230
229
  const rows = db.prepare(`
231
230
  SELECT workflow_id, workflow_type, state, metadata_json, created_at
@@ -279,6 +278,7 @@ async function main() {
279
278
  const verbose = process.argv.includes('--verbose');
280
279
 
281
280
  // 1. Check bootstrapped rules
281
+ // eslint-disable-next-line @typescript-eslint/init-declarations
282
282
  let rules: LedgerRule[];
283
283
  try {
284
284
  rules = loadBootstrappedRules();
@@ -30,6 +30,7 @@ import type { PluginCommandContext, PluginCommandResult } from '../openclaw-sdk.
30
30
  import {
31
31
  type TrainerBackendKind,
32
32
  type HardwareTier,
33
+ type TrainingExperimentResult,
33
34
  } from '../core/external-training-contract.js';
34
35
  import {
35
36
  TrainingProgram,
@@ -270,14 +271,11 @@ Hardware tiers:
270
271
  // This closes the gap in the create-experiment -> trainer -> import-result chain.
271
272
  // NOTE: This blocks until training completes (could be minutes).
272
273
  if (runNow) {
273
-
274
- const {spec} = createResult;
275
274
  const baseDir = TRAINER_SCRIPTS_DIR;
276
275
  const scriptPath = path.join(baseDir, 'main.py');
277
276
  const specPath = path.join(baseDir, `experiment-${spec.experimentId}.json`);
278
-
279
- const {outputDir} = spec;
280
- const resultFilePath = path.join(outputDir, `result-${spec.experimentId}.json`);
277
+
278
+ const resultFilePath = path.join(spec.outputDir, `result-${spec.experimentId}.json`);
281
279
 
282
280
  // Write spec file
283
281
  const specDir = path.dirname(specPath);
@@ -287,7 +285,7 @@ Hardware tiers:
287
285
  fs.writeFileSync(specPath, JSON.stringify(spec, null, 2), 'utf-8');
288
286
 
289
287
 
290
- let trainerResult!: import('../core/external-training-contract.js').TrainingExperimentResult;
288
+ let trainerResult!: TrainingExperimentResult;
291
289
 
292
290
  try {
293
291
  if (spec.backend === 'dry-run') {
@@ -127,20 +127,17 @@ export function handlePainCommand(ctx: PluginCommandContext): PluginCommandResul
127
127
 
128
128
  // Determine Mental Mode (aligned with prompt.ts logic)
129
129
 
130
- let mentalMode = '';
131
- if (isZh) {
132
- if (gfi >= 70) mentalMode = '🚑 救赎模式 (HUMBLE_RECOVERY)';
133
- else if (gfi >= 40) mentalMode = '🤝 安抚模式 (CONCILIATORY)';
134
- else mentalMode = '⚡ 高效模式 (EFFICIENT)';
135
- } else {
136
- if (gfi >= 70) mentalMode = '🚑 HUMBLE_RECOVERY';
137
- else if (gfi >= 40) mentalMode = '🤝 CONCILIATORY';
138
- else mentalMode = '⚡ EFFICIENT';
139
- }
130
+ const mentalMode = isZh
131
+ ? gfi >= 70 ? '🚑 救赎模式 (HUMBLE_RECOVERY)'
132
+ : gfi >= 40 ? '🤝 安抚模式 (CONCILIATORY)'
133
+ : ' 高效模式 (EFFICIENT)'
134
+ : gfi >= 70 ? '🚑 HUMBLE_RECOVERY'
135
+ : gfi >= 40 ? '🤝 CONCILIATORY'
136
+ : ' EFFICIENT';
140
137
 
141
138
  // Determine health status based on GFI
142
139
 
143
- let healthLabel = 'Healthy';
140
+ let healthLabel: string;
144
141
  let suggestionText = '';
145
142
 
146
143
  if (isZh) {
@@ -5,7 +5,7 @@
5
5
  * This command must operate on an explicitly resolved active workspace.
6
6
  */
7
7
 
8
- import { PluginCommandDefinition, PluginCommandContext, PluginCommandResult, OpenClawPluginApi } from '../openclaw-sdk.js';
8
+ import type { PluginCommandDefinition, PluginCommandContext, PluginCommandResult, OpenClawPluginApi } from '../openclaw-sdk.js';
9
9
  import { acquireQueueLock, EVOLUTION_QUEUE_LOCK_SUFFIX } from '../service/evolution-worker.js';
10
10
  import * as fs from 'fs';
11
11
  import * as path from 'path';
@@ -30,7 +30,7 @@ export interface BootstrapResult {
30
30
  * @returns Array of principle IDs sorted by observedViolationCount (descending)
31
31
  * @throws Error if no deterministic principles found
32
32
  */
33
- export function selectPrinciplesForBootstrap(stateDir: string, limit: number = 3): string[] {
33
+ export function selectPrinciplesForBootstrap(stateDir: string, limit = 3): string[] {
34
34
  // Load training store to get evaluability and violation data
35
35
  const store = loadStore(stateDir);
36
36
 
@@ -76,7 +76,7 @@ export function selectPrinciplesForBootstrap(stateDir: string, limit: number = 3
76
76
  * @returns Array of results indicating created or skipped status
77
77
  * @throws Error if no deterministic principles found
78
78
  */
79
- export function bootstrapRules(stateDir: string, limit: number = 3): BootstrapResult[] {
79
+ export function bootstrapRules(stateDir: string, limit = 3): BootstrapResult[] {
80
80
  // Select principles for bootstrap
81
81
  const selectedPrincipleIds = selectPrinciplesForBootstrap(stateDir, limit);
82
82
 
@@ -107,7 +107,7 @@ export function bootstrapRules(stateDir: string, limit: number = 3): BootstrapRe
107
107
 
108
108
  // Create stub rule
109
109
  const now = new Date().toISOString();
110
- const rule = createRule(stateDir, {
110
+ createRule(stateDir, {
111
111
  id: ruleId,
112
112
  version: 1,
113
113
  name: `Stub bootstrap rule for ${principleId}`,
@@ -364,7 +364,7 @@ function validateSingleReplayReport(reportPath: string): ReplayValidationCategor
364
364
  return 'missing_evidence_summary';
365
365
  }
366
366
 
367
- const evidenceSummary = (parsed as ReplayReport).evidenceSummary;
367
+ const evidenceSummary = parsed.evidenceSummary;
368
368
  if (parsed.overallDecision === 'pass' && evidenceSummary.totalSamples === 0) {
369
369
  return 'unsupported_pass';
370
370
  }
@@ -116,6 +116,23 @@ export const DEFAULT_SCORING_WEIGHTS: ScoringWeights = {
116
116
  confidence: 0.15,
117
117
  };
118
118
 
119
+ /**
120
+ * Result of diversity validation on Dreamer candidates.
121
+ * Soft enforcement: result is informational, never gates the pipeline.
122
+ */
123
+ export interface DiversityValidationResult {
124
+ /** Whether candidates passed diversity checks */
125
+ diversityCheckPassed: boolean;
126
+ /** Whether at least 2 distinct risk levels were present */
127
+ riskLevelDiversity: boolean;
128
+ /** Whether no candidate pair exceeded keyword overlap threshold */
129
+ keywordOverlapPassed: boolean;
130
+ /** Highest pairwise keyword overlap score (for telemetry) */
131
+ maxOverlapScore: number;
132
+ /** Human-readable summary of check results */
133
+ details: string;
134
+ }
135
+
119
136
  // ---------------------------------------------------------------------------
120
137
  // Scoring Logic
121
138
  // ---------------------------------------------------------------------------
@@ -232,6 +249,120 @@ export function checkThresholds(
232
249
  return [failedThresholds.length === 0, failedThresholds];
233
250
  }
234
251
 
252
+ /**
253
+ * Validate that Dreamer candidates are strategically diverse.
254
+ *
255
+ * DIVER-03: Checks risk level diversity (Set.size >= 2 when candidates >= 2)
256
+ * and keyword overlap similarity (reject if intersection / max(|A|, |B|) > 0.8
257
+ * for words > 3 chars per D-05).
258
+ *
259
+ * This is SOFT enforcement: returns a result, never throws.
260
+ * Pipeline continues regardless of diversityCheckPassed value.
261
+ *
262
+ * @param candidates - Dreamer candidates to validate
263
+ * @returns DiversityValidationResult with pass/fail details
264
+ */
265
+ export function validateCandidateDiversity(
266
+ candidates: DreamerCandidate[],
267
+ ): DiversityValidationResult {
268
+ // Edge cases: empty, null, or single candidate always passes
269
+ if (!candidates || candidates.length <= 1) {
270
+ return {
271
+ diversityCheckPassed: true,
272
+ riskLevelDiversity: true,
273
+ keywordOverlapPassed: true,
274
+ maxOverlapScore: 0,
275
+ details: candidates?.length === 1
276
+ ? 'Single candidate — diversity check not applicable'
277
+ : 'No candidates to validate',
278
+ };
279
+ }
280
+
281
+ // Check 1: Risk level diversity (D-05)
282
+ const riskLevels = new Set(
283
+ candidates
284
+ .map(c => c.riskLevel)
285
+ .filter((r): r is "low" | "medium" | "high" => typeof r === 'string')
286
+ );
287
+ // If NO candidates have riskLevel, skip risk diversity check (graceful degradation)
288
+ const riskLevelDiversity = riskLevels.size === 0 || riskLevels.size >= 2;
289
+
290
+ // Check 2: Keyword overlap (D-05: intersection / max(|A|, |B|) for words > 3 chars)
291
+ let maxOverlapScore = 0;
292
+ let keywordOverlapPassed = true;
293
+
294
+ for (let i = 0; i < candidates.length; i++) {
295
+ for (let j = i + 1; j < candidates.length; j++) {
296
+ const overlap = computeKeywordOverlap(
297
+ candidates[i].betterDecision ?? '',
298
+ candidates[j].betterDecision ?? '',
299
+ );
300
+ if (overlap > maxOverlapScore) {
301
+ maxOverlapScore = overlap;
302
+ }
303
+ if (overlap > 0.8) {
304
+ keywordOverlapPassed = false;
305
+ }
306
+ }
307
+ }
308
+
309
+ const diversityCheckPassed = riskLevelDiversity && keywordOverlapPassed;
310
+
311
+ // Build details string
312
+ const parts: string[] = [];
313
+ if (!riskLevelDiversity) {
314
+ parts.push(`Risk levels not diverse (found: ${[...riskLevels].join(', ') || 'none'})`);
315
+ }
316
+ if (!keywordOverlapPassed) {
317
+ parts.push(`Keyword overlap too high (max: ${maxOverlapScore.toFixed(2)})`);
318
+ }
319
+
320
+ return {
321
+ diversityCheckPassed,
322
+ riskLevelDiversity,
323
+ keywordOverlapPassed,
324
+ maxOverlapScore: Math.round(maxOverlapScore * 100) / 100,
325
+ details: diversityCheckPassed
326
+ ? 'Diversity check passed'
327
+ : parts.join('; '),
328
+ };
329
+ }
330
+
331
+ /**
332
+ * Compute keyword overlap between two strings.
333
+ * Algorithm: intersection / max(|A|, |B|) for words > 3 chars (per D-05).
334
+ * Returns value between 0 and 1.
335
+ */
336
+ function computeKeywordOverlap(textA: string, textB: string): number {
337
+ const wordsA = extractKeywords(textA);
338
+ const wordsB = extractKeywords(textB);
339
+
340
+ if (wordsA.length === 0 && wordsB.length === 0) return 0;
341
+ if (wordsA.length === 0 || wordsB.length === 0) return 0;
342
+
343
+ const setA = new Set(wordsA);
344
+ const setB = new Set(wordsB);
345
+
346
+ let intersection = 0;
347
+ for (const word of setA) {
348
+ if (setB.has(word)) intersection++;
349
+ }
350
+
351
+ const denominator = Math.max(setA.size, setB.size);
352
+ return denominator === 0 ? 0 : intersection / denominator;
353
+ }
354
+
355
+ /**
356
+ * Extract keywords from text: words > 3 characters, lowercased.
357
+ */
358
+ function extractKeywords(text: string): string[] {
359
+ if (!text) return [];
360
+ return text
361
+ .toLowerCase()
362
+ .split(/[^a-z0-9]+/)
363
+ .filter(w => w.length > 3);
364
+ }
365
+
235
366
  /**
236
367
  * Score and rank all candidates deterministically.
237
368
  *
@@ -0,0 +1,337 @@
1
+ /**
2
+ * Nocturnal Reasoning Deriver — Runtime Reasoning Signal Extraction
3
+ * ==============================================================
4
+ *
5
+ * PURPOSE: Derive structured reasoning signals from existing snapshot data
6
+ * without any snapshot schema changes. Pure functions, zero dependencies.
7
+ *
8
+ * THREE FUNCTIONS:
9
+ * - deriveReasoningChain: Extract thinking content, uncertainty, confidence from assistant turns
10
+ * - deriveDecisionPoints: Extract before/after context per tool call (Plan 02)
11
+ * - deriveContextualFactors: Compute contextual factors from snapshot (Plan 02)
12
+ */
13
+
14
+ import type { NocturnalAssistantTurn, NocturnalToolCall, NocturnalUserTurn, NocturnalSessionSnapshot } from './nocturnal-trajectory-extractor.js';
15
+ import { detectThinkingModelMatches, listThinkingModels } from './thinking-models.js';
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // Shared helpers
19
+ // ---------------------------------------------------------------------------
20
+
21
+ /** Parse an ISO 8601 timestamp, returning NaN for invalid formats. */
22
+ function parseTs(ts: string): number {
23
+ // ISO 8601 strings without Z suffix or offset are treated as local time.
24
+ // Log a warning for ambiguous formats (missing timezone indicator).
25
+ if (
26
+ typeof ts === 'string' &&
27
+ !ts.endsWith('Z') &&
28
+ !ts.includes('+') &&
29
+ ts.includes('-', 4)
30
+ ) {
31
+ // Looks like an ISO date but no timezone — could be ambiguous
32
+ const bare = ts.slice(0, 10);
33
+ if (/^\d{4}-\d{2}-\d{2}$/.test(bare)) {
34
+ // Plain YYYY-MM-DD without time or Z — definitely ambiguous
35
+ console.warn(`[Deriver] Timestamp missing timezone: "${ts}"`);
36
+ }
37
+ }
38
+ return Date.parse(ts);
39
+ }
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // Shared types (used across all three derive functions)
43
+ // ---------------------------------------------------------------------------
44
+
45
+ export interface DerivedReasoningSignal {
46
+ turnIndex: number;
47
+ thinkingContent: string;
48
+ uncertaintyMarkers: string[];
49
+ confidenceSignal: "high" | "medium" | "low";
50
+ }
51
+
52
+ export interface DerivedDecisionPoint {
53
+ toolName: string;
54
+ outcome: "success" | "failure" | "blocked";
55
+ beforeContext: string;
56
+ afterReflection?: string;
57
+ confidenceDelta?: number;
58
+ }
59
+
60
+ export interface DerivedContextualFactors {
61
+ fileStructureKnown: boolean;
62
+ errorHistoryPresent: boolean;
63
+ userGuidanceAvailable: boolean;
64
+ timePressure: boolean;
65
+ }
66
+
67
+ // ---------------------------------------------------------------------------
68
+ // Constants
69
+ // ---------------------------------------------------------------------------
70
+
71
+ const UNCERTAINTY_PATTERNS: RegExp[] = [
72
+ /let me (check|verify|confirm|understand)/gi,
73
+ /I should (first|probably|maybe)/gi,
74
+ /not sure (if|whether|about)/gi,
75
+ ];
76
+
77
+ const THINKING_TAG_REGEX = /<thinking>([\s\S]*?)<\/thinking>/g;
78
+
79
+ // ---------------------------------------------------------------------------
80
+ // Helpers
81
+ // ---------------------------------------------------------------------------
82
+
83
+ /**
84
+ * Compute thinking model activation ratio for text.
85
+ * Uses detectThinkingModelMatches() from thinking-models.ts.
86
+ * Returns 0-1, rounded to 2 decimal places.
87
+ */
88
+ function computeThinkingModelActivation(text: string): number {
89
+ if (!text || text.trim().length === 0) return 0;
90
+ const matches = detectThinkingModelMatches(text);
91
+ const totalModels = listThinkingModels().length;
92
+ if (totalModels === 0) return 0;
93
+ return Math.round((matches.length / totalModels) * 100) / 100;
94
+ }
95
+
96
+ /**
97
+ * Map activation ratio (0-1) to confidence signal.
98
+ * Thresholds: high > 0.6, medium 0.3-0.6, low < 0.3
99
+ */
100
+ function mapConfidenceSignal(activation: number): "high" | "medium" | "low" {
101
+ if (activation > 0.6) return "high";
102
+ if (activation >= 0.3) return "medium";
103
+ return "low";
104
+ }
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // deriveReasoningChain (DERIV-01)
108
+ // ---------------------------------------------------------------------------
109
+
110
+ /**
111
+ * Extract thinking content, uncertainty markers, and confidence signal
112
+ * from each assistant turn in the snapshot.
113
+ *
114
+ * DERIV-01: Returns one DerivedReasoningSignal per assistant turn.
115
+ * Empty input returns empty array. Never throws.
116
+ */
117
+ export function deriveReasoningChain(assistantTurns: NocturnalAssistantTurn[]): DerivedReasoningSignal[] {
118
+ if (!assistantTurns || assistantTurns.length === 0) return [];
119
+
120
+ return assistantTurns.map(turn => {
121
+ const text = turn.sanitizedText ?? '';
122
+
123
+ // Extract all <thinking> content blocks (multiple blocks per turn possible)
124
+ const thinkingMatches = [...text.matchAll(THINKING_TAG_REGEX)];
125
+ const thinkingContent = thinkingMatches.map(m => m[1].trim()).join('\n');
126
+
127
+ // Detect uncertainty markers (collect all unique matches across 3 patterns)
128
+ const uncertaintyMarkers: string[] = [];
129
+ for (const pattern of UNCERTAINTY_PATTERNS) {
130
+ // Reset lastIndex to avoid g-flag state issues
131
+ pattern.lastIndex = 0;
132
+ const matches = text.match(pattern);
133
+ if (matches) {
134
+ for (const m of matches) {
135
+ if (!uncertaintyMarkers.includes(m)) {
136
+ uncertaintyMarkers.push(m);
137
+ }
138
+ }
139
+ }
140
+ }
141
+
142
+ // Confidence signal: only meaningful when <thinking> content exists.
143
+ // Without thinking tags we cannot extract a genuine reasoning trace, so
144
+ // we fall back to 'low' rather than misleading the downstream pipeline
145
+ // with activation derived from non-thinking patterns in the response text.
146
+ let confidenceSignal: "high" | "medium" | "low";
147
+ if (thinkingContent.length === 0) {
148
+ confidenceSignal = 'low';
149
+ } else {
150
+ const activation = computeThinkingModelActivation(text);
151
+ confidenceSignal = mapConfidenceSignal(activation);
152
+ }
153
+
154
+ return {
155
+ turnIndex: turn.turnIndex,
156
+ thinkingContent,
157
+ uncertaintyMarkers,
158
+ confidenceSignal,
159
+ };
160
+ });
161
+ }
162
+
163
+ // ---------------------------------------------------------------------------
164
+ // Helpers (Plan 02)
165
+ // ---------------------------------------------------------------------------
166
+
167
+ /**
168
+ * Convert confidence signal to numeric value for delta computation.
169
+ * high=1, medium=0.5, low=0
170
+ */
171
+ function confidenceToNumber(signal: "high" | "medium" | "low"): number {
172
+ switch (signal) {
173
+ case "high": return 1;
174
+ case "medium": return 0.5;
175
+ case "low": return 0;
176
+ }
177
+ }
178
+
179
+ // ---------------------------------------------------------------------------
180
+ // deriveDecisionPoints (DERIV-02)
181
+ // ---------------------------------------------------------------------------
182
+
183
+ /**
184
+ * Extract before-context and after-reflection for each tool call.
185
+ *
186
+ * DERIV-02: For each tool call, find the assistant turn immediately before it
187
+ * (by createdAt timestamp) and extract last 500 chars as beforeContext.
188
+ * On failure outcome, find the next assistant turn and extract first 300 chars
189
+ * as afterReflection. Compute confidence delta between before/after.
190
+ *
191
+ * Empty inputs return empty array. Never throws.
192
+ */
193
+ export function deriveDecisionPoints(
194
+ assistantTurns: NocturnalAssistantTurn[],
195
+ toolCalls: NocturnalToolCall[],
196
+ ): DerivedDecisionPoint[] {
197
+ if (!toolCalls || toolCalls.length === 0) return [];
198
+ if (!assistantTurns || assistantTurns.length === 0) {
199
+ // Return decision points with empty beforeContext when no assistant turns
200
+ return toolCalls.map(tc => ({
201
+ toolName: tc.toolName,
202
+ outcome: tc.outcome,
203
+ beforeContext: '',
204
+ }));
205
+ }
206
+
207
+ // Sort assistant turns by createdAt for binary search
208
+ const sortedTurns = [...assistantTurns].sort(
209
+ (a, b) => parseTs(a.createdAt) - parseTs(b.createdAt)
210
+ );
211
+
212
+ // Binary search: find rightmost assistant turn with createdAt < tcTime
213
+ const findBeforeTurn = (tcTime: number): NocturnalAssistantTurn | undefined => {
214
+ let lo = 0, hi = sortedTurns.length - 1, result: NocturnalAssistantTurn | undefined;
215
+ while (lo <= hi) {
216
+ const mid = (lo + hi) >>> 1;
217
+ if (parseTs(sortedTurns[mid].createdAt) < tcTime) {
218
+ result = sortedTurns[mid];
219
+ lo = mid + 1;
220
+ } else {
221
+ hi = mid - 1;
222
+ }
223
+ }
224
+ return result;
225
+ };
226
+
227
+ return toolCalls.map(tc => {
228
+ const tcTime = parseTs(tc.createdAt);
229
+ const beforeTurn = findBeforeTurn(tcTime);
230
+
231
+ const beforeContext = beforeTurn
232
+ ? beforeTurn.sanitizedText.slice(-500)
233
+ : '';
234
+
235
+ // On failure, find next assistant turn after tool call
236
+ let afterReflection: string | undefined;
237
+ let confidenceDelta: number | undefined;
238
+
239
+ if (tc.outcome === 'failure') {
240
+ const afterTurn = sortedTurns.find(
241
+ turn => parseTs(turn.createdAt) > tcTime
242
+ );
243
+ if (afterTurn) {
244
+ afterReflection = afterTurn.sanitizedText.slice(0, 300);
245
+ }
246
+
247
+ // Compute confidence delta if both before and after turns exist
248
+ if (beforeTurn && afterTurn) {
249
+ const beforeConfidence = confidenceToNumber(
250
+ mapConfidenceSignal(computeThinkingModelActivation(beforeTurn.sanitizedText))
251
+ );
252
+ const afterConfidence = confidenceToNumber(
253
+ mapConfidenceSignal(computeThinkingModelActivation(afterTurn.sanitizedText))
254
+ );
255
+ confidenceDelta = Math.round((afterConfidence - beforeConfidence) * 100) / 100;
256
+ }
257
+ }
258
+
259
+ const result: DerivedDecisionPoint = {
260
+ toolName: tc.toolName,
261
+ outcome: tc.outcome,
262
+ beforeContext,
263
+ };
264
+ if (afterReflection !== undefined) result.afterReflection = afterReflection;
265
+ if (confidenceDelta !== undefined) result.confidenceDelta = confidenceDelta;
266
+ return result;
267
+ });
268
+ }
269
+
270
+ // ---------------------------------------------------------------------------
271
+ // deriveContextualFactors (DERIV-03)
272
+ // ---------------------------------------------------------------------------
273
+
274
+ /**
275
+ * Compute contextual factors from session snapshot data.
276
+ *
277
+ * DERIV-03: Four boolean factors indicating the environment
278
+ * the agent was operating in. All derived from existing snapshot
279
+ * fields -- no schema changes.
280
+ *
281
+ * Empty/missing data returns all-false defaults. Never throws.
282
+ */
283
+ export function deriveContextualFactors(
284
+ snapshot: NocturnalSessionSnapshot,
285
+ ): DerivedContextualFactors {
286
+ const defaults: DerivedContextualFactors = {
287
+ fileStructureKnown: false,
288
+ errorHistoryPresent: false,
289
+ userGuidanceAvailable: false,
290
+ timePressure: false,
291
+ };
292
+
293
+ if (!snapshot) return defaults;
294
+
295
+ const { toolCalls = [], userTurns = [] } = snapshot;
296
+
297
+ // fileStructureKnown: any Read tool precedes any Write tool in chronological order
298
+ let fileStructureKnown = false;
299
+ const isReadTool = (name: string) => /^(read|grep|search|find|inspect|look)/i.test(name);
300
+ const isWriteTool = (name: string) => /^(edit|write|create|delete|remove|move|rename)/i.test(name);
301
+ let hasSeenRead = false;
302
+ for (const tc of toolCalls) {
303
+ if (isReadTool(tc.toolName)) hasSeenRead = true;
304
+ if (isWriteTool(tc.toolName) && hasSeenRead) {
305
+ fileStructureKnown = true;
306
+ break;
307
+ }
308
+ }
309
+
310
+ // errorHistoryPresent: any tool call with outcome === 'failure'
311
+ const errorHistoryPresent = toolCalls.some(tc => tc.outcome === 'failure');
312
+
313
+ // userGuidanceAvailable: any user turn with correctionDetected === true
314
+ const userGuidanceAvailable = (userTurns || []).some(ut => ut.correctionDetected === true);
315
+
316
+ // timePressure: >50% of consecutive tool call pairs have < 2s gap
317
+ let timePressure = false;
318
+ if (toolCalls.length >= 2) {
319
+ const sorted = [...toolCalls].sort(
320
+ (a, b) => parseTs(a.createdAt) - parseTs(b.createdAt)
321
+ );
322
+ let rapidGaps = 0;
323
+ for (let i = 0; i < sorted.length - 1; i++) {
324
+ const gap = parseTs(sorted[i + 1].createdAt) - parseTs(sorted[i].createdAt);
325
+ if (gap < 2000) rapidGaps++;
326
+ }
327
+ const totalPairs = sorted.length - 1;
328
+ timePressure = rapidGaps / totalPairs > 0.5;
329
+ }
330
+
331
+ return {
332
+ fileStructureKnown,
333
+ errorHistoryPresent,
334
+ userGuidanceAvailable,
335
+ timePressure,
336
+ };
337
+ }