principles-disciple 1.28.0 → 1.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,12 +35,17 @@ import * as os from 'os';
35
35
  import * as path from 'path';
36
36
  import type { NocturnalSessionSnapshot } from './nocturnal-trajectory-extractor.js';
37
37
  import { computeThinkingModelDelta } from './nocturnal-trajectory-extractor.js';
38
+ import {
39
+ deriveReasoningChain,
40
+ deriveContextualFactors,
41
+ } from './nocturnal-reasoning-deriver.js';
38
42
  import type { TrinityArtificerContext } from './nocturnal-artificer.js';
39
43
  import {
40
44
  runTournament,
41
45
  DEFAULT_SCORING_WEIGHTS,
42
46
  type ScoringWeights,
43
47
  type TournamentTraceEntry,
48
+ validateCandidateDiversity,
44
49
  } from './nocturnal-candidate-scoring.js';
45
50
  import {
46
51
  DEFAULT_THRESHOLDS,
@@ -61,7 +66,7 @@ const FALLBACK_MODEL = process.env.OPENCLAW_DEFAULT_MODEL || 'MiniMax-M2.7';
61
66
  // These prompts are embedded at build time. The agents/ directory was removed
62
67
  // to eliminate fragile runtime file dependencies on the file system.
63
68
 
64
- const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
69
+ export const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
65
70
 
66
71
  > System prompt for Trinity Dreamer stage.
67
72
  > Role: Generate multiple alternative "better decision" candidates from a session snapshot.
@@ -104,7 +109,9 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
104
109
  "badDecision": "<what the agent did wrong>",
105
110
  "betterDecision": "<what the agent should have done>",
106
111
  "rationale": "<why this is better>",
107
- "confidence": 0.95
112
+ "confidence": 0.95,
113
+ "riskLevel": "low",
114
+ "strategicPerspective": "conservative_fix"
108
115
  }
109
116
  ],
110
117
  "generatedAt": "<ISO timestamp>"
@@ -131,6 +138,23 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
131
138
  - Do not generate candidates with identical betterDecisions
132
139
  - Vary the confidence scores to reflect genuine uncertainty
133
140
 
141
+ ## Strategic Perspective Requirements
142
+
143
+ Generate candidates from DISTINCT strategic perspectives:
144
+
145
+ - **conservative_fix**: Minimal deviation from original approach. Add a
146
+ verification or validation step that was missing.
147
+ - **structural_improvement**: Reorder operations or introduce an intermediate
148
+ checkpoint. Change HOW the goal is achieved.
149
+ - **paradigm_shift**: Challenge whether the original goal was correct.
150
+ Consider a fundamentally different approach.
151
+
152
+ Each candidate MUST specify \`riskLevel\` ("low"|"medium"|"high") and
153
+ \`strategicPerspective\` matching one of the above.
154
+
155
+ ANTI-PATTERN: Candidates that differ only in wording, not in substance,
156
+ will be rejected.
157
+
134
158
  ### Candidates must NOT:
135
159
  - Contain raw user text or private content
136
160
  - Reference non-existent tools or impossible actions
@@ -148,7 +172,7 @@ If you cannot generate valid candidates (e.g., no clear violation found, insuffi
148
172
  "generatedAt": "<ISO timestamp>"
149
173
  }`;
150
174
 
151
- const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
175
+ export const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
152
176
 
153
177
  > System prompt for Trinity Philosopher stage.
154
178
  > Role: Evaluate Dreamer's candidates and rank them by principle alignment and quality.
@@ -187,7 +211,20 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
187
211
  "critique": "<principle-grounded critique>",
188
212
  "principleAligned": true,
189
213
  "score": 0.92,
190
- "rank": 1
214
+ "rank": 1,
215
+ "scores": {
216
+ "principleAlignment": 0.9,
217
+ "specificity": 0.85,
218
+ "actionability": 0.9,
219
+ "executability": 0.95,
220
+ "safetyImpact": 0.8,
221
+ "uxImpact": 0.85
222
+ },
223
+ "risks": {
224
+ "falsePositiveEstimate": 0.1,
225
+ "implementationComplexity": "low",
226
+ "breakingChangeRisk": false
227
+ }
191
228
  }
192
229
  ],
193
230
  "overallAssessment": "<summary of candidate set quality>",
@@ -197,10 +234,18 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
197
234
  ## Evaluation Criteria
198
235
 
199
236
  ### Score Components (0-1 scale each):
200
- 1. **Principle Alignment** (weight: 0.35) — Does the betterDecision properly reflect the target principle?
201
- 2. **Specificity** (weight: 0.25) — Is badDecision specific? Is betterDecision actionable?
202
- 3. **Actionability** (weight: 0.25) — Does betterDecision describe a specific next step?
237
+ 1. **Principle Alignment** (weight: 0.20) — Does the betterDecision properly reflect the target principle?
238
+ 2. **Specificity** (weight: 0.15) — Is badDecision specific? Is betterDecision actionable?
239
+ 3. **Actionability** (weight: 0.15) — Does betterDecision describe a specific next step?
203
240
  4. **Executability** (weight: 0.15) — Does betterDecision start with a bounded verb (read, check, verify, edit, write, etc.) and reference a concrete target?
241
+ 5. **Safety Impact** (weight: 0.20) — Does the betterDecision reduce risk of data loss, corruption, or new failure modes? Would implementing this prevent dangerous operations?
242
+ 6. **UX Impact** (weight: 0.15) — Does the betterDecision reduce user frustration or improve response reliability? Would the user experience be noticeably better?
243
+
244
+ ### Risk Assessment (per candidate):
245
+ For each candidate, also assess:
246
+ - **falsePositiveEstimate** (0-1): How likely is this candidate a false positive (the "betterDecision" is actually not better)?
247
+ - **implementationComplexity** ("low"/"medium"/"high"): How complex would it be to implement this correction?
248
+ - **breakingChangeRisk** (boolean): Could implementing this correction break existing behavior?
204
249
 
205
250
  ### Executability Check:
206
251
  A betterDecision is executable if it:
@@ -243,13 +288,16 @@ and synthesize it into a final decision-point artifact that passes arbiter valid
243
288
  You will receive:
244
289
  - A **target principle** (principle ID and description)
245
290
  - A **session trajectory snapshot**
246
- - **Philosopher's judgments** — ranked candidates with critiques
291
+ - **Philosopher's judgments** — ranked candidates with critiques and 6D scores
247
292
  - **Dreamer's candidates** — the original candidate list
293
+ - **Philosopher's risk assessments** — falsePositiveEstimate, implementationComplexity, breakingChangeRisk per candidate
294
+
295
+ Use the risk assessments to determine which candidates require deeper contrastive analysis. High-risk candidates (high breakingChangeRisk or implementationComplexity) warrant thorough rejectedAnalysis.
248
296
 
249
297
  ## Task
250
298
 
251
299
  Select the best candidate (Philosopher's rank 1) and synthesize it into
252
- a final TrinityDraftArtifact.
300
+ a final TrinityDraftArtifact. Then produce a **Contrastive Analysis** that explains why the winner was chosen and what to learn from the runners-up.
253
301
 
254
302
  ## Output Format
255
303
 
@@ -271,9 +319,26 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
271
319
  "candidateCount": 2,
272
320
  "selectedCandidateIndex": 0,
273
321
  "stageFailures": []
322
+ },
323
+ "rejectedAnalysis": {
324
+ "whyRejected": "<mental model that led to the rejected candidate>",
325
+ "warningSignals": ["<observable caution trigger 1>", "<trigger 2>"],
326
+ "correctiveThinking": "<correct reasoning path that should have been taken>"
327
+ },
328
+ "chosenJustification": {
329
+ "whyChosen": "<why this candidate was selected over others>",
330
+ "keyInsights": ["<transferable insight 1>", "<insight 2>", "<insight 3>"],
331
+ "limitations": ["<when this approach does NOT apply 1>", "<limitation 2>"]
332
+ },
333
+ "contrastiveAnalysis": {
334
+ "criticalDifference": "<ONE key insight distinguishing chosen from rejected>",
335
+ "decisionTrigger": "<When X, do Y pattern>",
336
+ "preventionStrategy": "<how to systematically avoid the rejected path>"
274
337
  }
275
338
  }
276
339
 
340
+ All three analysis sections (rejectedAnalysis, chosenJustification, contrastiveAnalysis) are optional but recommended. When multiple candidates were evaluated, include them to provide richer training signals.
341
+
277
342
  ## Validation
278
343
 
279
344
  If you cannot synthesize an artifact:
@@ -403,6 +468,70 @@ export class TrinityRuntimeContractError extends Error {
403
468
  }
404
469
  }
405
470
 
471
+ // ---------------------------------------------------------------------------
472
+ // Reasoning Context Serialization (D-03, D-04)
473
+ // ---------------------------------------------------------------------------
474
+
475
+ /**
476
+ * Format derived reasoning signals into a prompt section for Dreamer.
477
+ *
478
+ * Returns the formatted "## Reasoning Context" section as a string,
479
+ * or null if no meaningful reasoning content exists to include.
480
+ *
481
+ * Only reasoningChain + contextualFactors are serialized.
482
+ * DecisionPoints are NOT injected (reserved for Phase 37 Scribe per D-04).
483
+ */
484
+ export function formatReasoningContext(snapshot: NocturnalSessionSnapshot): string | null {
485
+ const reasoningChain = deriveReasoningChain(snapshot.assistantTurns);
486
+ const contextualFactors = deriveContextualFactors(snapshot);
487
+
488
+ const hasReasoningContent = reasoningChain.length > 0 &&
489
+ reasoningChain.some(s => s.thinkingContent || s.uncertaintyMarkers.length > 0);
490
+
491
+ if (!hasReasoningContent && !contextualFactors.fileStructureKnown &&
492
+ !contextualFactors.errorHistoryPresent &&
493
+ !contextualFactors.userGuidanceAvailable &&
494
+ !contextualFactors.timePressure) {
495
+ return null;
496
+ }
497
+
498
+ const sections: string[] = ['## Reasoning Context', ''];
499
+
500
+ // Serialize reasoning chain (only turns with non-empty signals)
501
+ const significantTurns = reasoningChain.filter(
502
+ s => s.thinkingContent || s.uncertaintyMarkers.length > 0
503
+ );
504
+ for (const signal of significantTurns) {
505
+ if (signal.thinkingContent) {
506
+ sections.push(`- Turn ${signal.turnIndex}: Internal reasoning: "${signal.thinkingContent.slice(0, 200)}"`);
507
+ }
508
+ if (signal.uncertaintyMarkers.length > 0) {
509
+ sections.push(`- Turn ${signal.turnIndex}: Uncertainty detected: ${signal.uncertaintyMarkers.join(', ')}`);
510
+ }
511
+ if (signal.confidenceSignal !== 'high') {
512
+ sections.push(`- Turn ${signal.turnIndex}: Confidence: ${signal.confidenceSignal}`);
513
+ }
514
+ }
515
+
516
+ // Serialize contextual factors
517
+ const factorLabels: string[] = [];
518
+ if (contextualFactors.fileStructureKnown) factorLabels.push('File structure explored before modification');
519
+ if (contextualFactors.errorHistoryPresent) factorLabels.push('Prior error history present');
520
+ if (contextualFactors.userGuidanceAvailable) factorLabels.push('User guidance/corrections available');
521
+ if (contextualFactors.timePressure) factorLabels.push('Time pressure detected (rapid tool calls)');
522
+
523
+ if (factorLabels.length > 0) {
524
+ sections.push('');
525
+ sections.push('Environmental context:');
526
+ for (const label of factorLabels) {
527
+ sections.push(`- ${label}`);
528
+ }
529
+ }
530
+
531
+ sections.push('');
532
+ return sections.join('\n');
533
+ }
534
+
406
535
  export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
407
536
 
408
537
  private readonly api: {
@@ -478,8 +607,8 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
478
607
  fs.rmSync(fullPath, { recursive: true, force: true });
479
608
  }
480
609
  }
481
- } catch {
482
- // Non-fatal: stale temp files will be cleaned up eventually
610
+ } catch (err) {
611
+ this.api.logger?.warn?.(`[Trinity] Failed to cleanup stale temp dirs: ${err instanceof Error ? err.message.replace(/([A-Za-z]:\\[^:\\s]+|\\\/[^\s:]+)/g, '[PATH]') : String(err)}`);
483
612
  }
484
613
  }
485
614
 
@@ -559,6 +688,12 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
559
688
  .join('\n');
560
689
  }
561
690
 
691
+ /** Clamp a value to [0, 1] range — used for LLM-produced scores that may be out of range */
692
+ private clamp01(val: unknown, fallback = 0): number {
693
+ if (typeof val !== 'number' || !Number.isFinite(val)) return fallback;
694
+ return Math.min(1, Math.max(0, val));
695
+ }
696
+
562
697
  private classifyRuntimeError(error: unknown): TrinityRuntimeFailureCode {
563
698
  const detail = error instanceof Error ? error.message : String(error);
564
699
  return /timeout/i.test(detail) ? 'runtime_timeout' : 'runtime_run_failed';
@@ -606,7 +741,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
606
741
  } catch (err) {
607
742
  return this.buildRuntimeFailureDreamerOutput(this.classifyRuntimeError(err), err);
608
743
  } finally {
609
- try { fs.unlinkSync(sessionFile); } catch { /* ignore */ }
744
+ try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
610
745
  }
611
746
  }
612
747
 
@@ -650,7 +785,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
650
785
  } catch (err) {
651
786
  return this.buildRuntimeFailurePhilosopherOutput(this.classifyRuntimeError(err), err);
652
787
  } finally {
653
- try { fs.unlinkSync(sessionFile); } catch { /* ignore */ }
788
+ try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
654
789
  }
655
790
  }
656
791
 
@@ -698,7 +833,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
698
833
  this.recordFailure(this.classifyRuntimeError(err), err);
699
834
  return null;
700
835
  } finally {
701
- try { fs.unlinkSync(sessionFile); } catch { /* ignore */ }
836
+ try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
702
837
  }
703
838
  }
704
839
 
@@ -796,6 +931,12 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
796
931
  sections.push('');
797
932
  }
798
933
 
934
+ // ## Reasoning Context — derived signals from Phase 34 deriver module (D-03, D-04)
935
+ const reasoningSection = formatReasoningContext(snapshot);
936
+ if (reasoningSection) {
937
+ sections.push(reasoningSection);
938
+ }
939
+
799
940
  sections.push(`## Task`,
800
941
  `Analyze the above session and generate ${maxCandidates} candidate corrections.`,
801
942
  `Each candidate must:`,
@@ -818,6 +959,11 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
818
959
  ): string {
819
960
  const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
820
961
 
962
+ // Build per-candidate metadata from Dreamer (risk level + strategic perspective)
963
+ const candidateMeta = dreamerOutput.candidates
964
+ .filter(c => c.riskLevel || c.strategicPerspective)
965
+ .map(c => `- Candidate #${c.candidateIndex}: risk=${c.riskLevel || 'N/A'}, perspective=${c.strategicPerspective || 'N/A'}`);
966
+
821
967
  // Build violation summary from snapshot for Philosopher to validate candidates
822
968
  const failures = snapshot.toolCalls
823
969
  .filter(tc => tc.outcome === 'failure')
@@ -862,6 +1008,11 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
862
1008
  sections.push(userCues.join('\n'));
863
1009
  }
864
1010
 
1011
+ if (candidateMeta.length > 0) {
1012
+ sections.push(`\n### Candidate Risk Profiles (${candidateMeta.length})`);
1013
+ sections.push(candidateMeta.join('\n'));
1014
+ }
1015
+
865
1016
  sections.push(
866
1017
  ``,
867
1018
  `## Dreamer's Candidates`,
@@ -926,18 +1077,29 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
926
1077
  sections.push(`(No specific violations found in snapshot)`);
927
1078
  }
928
1079
 
1080
+ // Build risk summary from Philosopher 6D judgments for Scribe contrastive analysis
1081
+ const riskSummary = philosopherOutput.judgments
1082
+ .map(j => {
1083
+ const risk = j.risks ? ` [risks: fp=${j.risks.falsePositiveEstimate.toFixed(2)}, complexity=${j.risks.implementationComplexity}, breaking=${j.risks.breakingChangeRisk}]` : '';
1084
+ return ` - candidate[${j.candidateIndex}] (rank ${j.rank}, score ${j.score?.toFixed(2) ?? 'n/a'}): ${j.principleAligned ? 'aligned' : 'not aligned'}${risk}`;
1085
+ })
1086
+ .join('\n');
1087
+
929
1088
  sections.push(
930
1089
  ``,
931
1090
  `## Dreamer's Candidates`,
932
1091
  candidatesJson,
933
1092
  ``,
934
- `## Philosopher's Judgments`,
1093
+ `## Philosopher's Judgments + Risk Assessments`,
935
1094
  judgmentsJson,
936
1095
  ``,
1096
+ `## Philosopher 6D Risk Summary`,
1097
+ `Use this to determine contrastive depth — high-risk candidates need deeper analysis:`,
1098
+ riskSummary,
1099
+ ``,
937
1100
  `## Task`,
938
1101
  `Select the best candidate (Philosopher's rank 1) and synthesize it into a final TrinityDraftArtifact.`,
939
- `Use the Original Violation Evidence above to ensure your final badDecision and betterDecision`,
940
- `are grounded in the actual session events, not just Dreamer's interpretation.`,
1102
+ `Then produce contrastive analysis explaining why the winner was chosen and what the rejected candidates teach us.`,
941
1103
  ``,
942
1104
  `## CRITICAL: betterDecision Format Requirements`,
943
1105
  `Your betterDecision MUST pass executability validation. It MUST:`,
@@ -1057,7 +1219,39 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
1057
1219
  }
1058
1220
  return {
1059
1221
  valid: parsed.valid,
1060
- judgments: parsed.judgments,
1222
+ judgments: parsed.judgments.map((j: Record<string, unknown>) => ({
1223
+ candidateIndex: j.candidateIndex,
1224
+ critique: j.critique ?? '',
1225
+ principleAligned: j.principleAligned ?? false,
1226
+ score: j.score ?? 0,
1227
+ rank: j.rank ?? 0,
1228
+ // Optional 6D scores and risk assessment (Phase 36)
1229
+ // Only include a dimension if the LLM actually returned a number (not undefined/null).
1230
+ // This preserves the distinction between "LLM returned 0" vs "LLM omitted the field."
1231
+ ...(j.scores ? {
1232
+ scores: Object.fromEntries(
1233
+ (['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const)
1234
+ .map(dim => [dim, (j.scores as Record<string, unknown>)[dim]])
1235
+ .filter(([, v]) => typeof v === 'number')
1236
+ .map(([dim, v]) => [dim, this.clamp01(v as number)])
1237
+ )
1238
+ } : {}),
1239
+ ...(j.risks ? (() => {
1240
+ const risks = j.risks as Record<string, unknown>;
1241
+ const fp = risks.falsePositiveEstimate;
1242
+ const hasFp = typeof fp === 'number';
1243
+ const risksObj: {
1244
+ falsePositiveEstimate?: number;
1245
+ implementationComplexity: string;
1246
+ breakingChangeRisk: boolean;
1247
+ } = {
1248
+ implementationComplexity: (risks.implementationComplexity as string) ?? 'medium',
1249
+ breakingChangeRisk: Boolean(risks.breakingChangeRisk),
1250
+ };
1251
+ if (hasFp) risksObj.falsePositiveEstimate = this.clamp01(fp as number);
1252
+ return { risks: risksObj };
1253
+ })() : {}),
1254
+ })),
1061
1255
  overallAssessment: parsed.overallAssessment ?? '',
1062
1256
  reason: parsed.reason,
1063
1257
  generatedAt: parsed.generatedAt ?? new Date().toISOString(),
@@ -1117,6 +1311,22 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
1117
1311
  return null;
1118
1312
  }
1119
1313
 
1314
+ // Validate contrastive analysis sub-fields (H-03): only include if structure is intact
1315
+ const contrastiveAnalysis = parsed.contrastiveAnalysis
1316
+ && typeof parsed.contrastiveAnalysis === 'object'
1317
+ && typeof parsed.contrastiveAnalysis.criticalDifference === 'string'
1318
+ ? parsed.contrastiveAnalysis : undefined;
1319
+
1320
+ const rejectedAnalysis = parsed.rejectedAnalysis
1321
+ && typeof parsed.rejectedAnalysis === 'object'
1322
+ && typeof parsed.rejectedAnalysis.whyRejected === 'string'
1323
+ ? parsed.rejectedAnalysis : undefined;
1324
+
1325
+ const chosenJustification = parsed.chosenJustification
1326
+ && typeof parsed.chosenJustification === 'object'
1327
+ && typeof parsed.chosenJustification.whyChosen === 'string'
1328
+ ? parsed.chosenJustification : undefined;
1329
+
1120
1330
  return {
1121
1331
  selectedCandidateIndex: parsed.selectedCandidateIndex,
1122
1332
  badDecision: parsed.badDecision ?? '',
@@ -1135,6 +1345,9 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
1135
1345
  selectedCandidateIndex: parsed.selectedCandidateIndex,
1136
1346
  stageFailures: [],
1137
1347
  },
1348
+ ...(contrastiveAnalysis ? { contrastiveAnalysis } : {}),
1349
+ ...(rejectedAnalysis ? { rejectedAnalysis } : {}),
1350
+ ...(chosenJustification ? { chosenJustification } : {}),
1138
1351
  };
1139
1352
  } catch {
1140
1353
  this.recordFailure('runtime_run_failed', new Error(`Scribe output JSON parse error: ${json.slice(0, 200)}`));
@@ -1255,6 +1468,10 @@ export interface DreamerCandidate {
1255
1468
  rationale: string;
1256
1469
  /** Confidence that this candidate is valid (0-1) */
1257
1470
  confidence: number;
1471
+ /** Risk level of this candidate's approach -- LLM-judged per D-02 */
1472
+ riskLevel?: "low" | "medium" | "high";
1473
+ /** Which strategic perspective this candidate embodies per D-01 */
1474
+ strategicPerspective?: "conservative_fix" | "structural_improvement" | "paradigm_shift";
1258
1475
  }
1259
1476
 
1260
1477
  export interface DreamerOutput {
@@ -1272,6 +1489,24 @@ export interface DreamerOutput {
1272
1489
  * Philosopher output — principle-grounded critique and ranking.
1273
1490
  * Philosopher evaluates Dreamer's candidates and ranks them.
1274
1491
  */
1492
+ export interface PhilosopherRiskAssessment {
1493
+ /** Estimated probability that this candidate is a false positive (0-1) */
1494
+ falsePositiveEstimate: number;
1495
+ /** How complex is this candidate to implement */
1496
+ implementationComplexity: 'low' | 'medium' | 'high';
1497
+ /** Whether implementing this candidate risks breaking existing functionality */
1498
+ breakingChangeRisk: boolean;
1499
+ }
1500
+
1501
+ export interface Philosopher6DScores {
1502
+ principleAlignment: number;
1503
+ specificity: number;
1504
+ actionability: number;
1505
+ executability: number;
1506
+ safetyImpact: number;
1507
+ uxImpact: number;
1508
+ }
1509
+
1275
1510
  export interface PhilosopherJudgment {
1276
1511
  /** Index of the judged candidate (references DreamerCandidate.candidateIndex) */
1277
1512
  candidateIndex: number;
@@ -1283,6 +1518,10 @@ export interface PhilosopherJudgment {
1283
1518
  score: number;
1284
1519
  /** Rank among all candidates (1 = best) */
1285
1520
  rank: number;
1521
+ /** Per-dimension scores (6D evaluation) — informational, not used for tournament ranking */
1522
+ scores?: Philosopher6DScores;
1523
+ /** Risk assessment for this candidate — informational, consumed by Scribe (Phase 37) */
1524
+ risks?: PhilosopherRiskAssessment;
1286
1525
  }
1287
1526
 
1288
1527
  export interface PhilosopherOutput {
@@ -1298,6 +1537,45 @@ export interface PhilosopherOutput {
1298
1537
  generatedAt: string;
1299
1538
  }
1300
1539
 
1540
+ /**
1541
+ * Analysis of a rejected candidate — why it lost the tournament.
1542
+ * Informs training signal for "what to avoid".
1543
+ */
1544
+ export interface RejectedAnalysis {
1545
+ /** Mental model that led to the rejected candidate */
1546
+ whyRejected: string;
1547
+ /** Observable caution triggers that were missed or ignored */
1548
+ warningSignals: string[];
1549
+ /** Correct reasoning path that should have been taken */
1550
+ correctiveThinking: string;
1551
+ }
1552
+
1553
+ /**
1554
+ * Justification for the chosen candidate — why it won the tournament.
1555
+ * Informs training signal for "what to do".
1556
+ */
1557
+ export interface ChosenJustification {
1558
+ /** Why this candidate was selected over others */
1559
+ whyChosen: string;
1560
+ /** 1-3 transferable insights from this decision */
1561
+ keyInsights: string[];
1562
+ /** When this approach does NOT apply */
1563
+ limitations: string[];
1564
+ }
1565
+
1566
+ /**
1567
+ * Contrastive analysis: key differences between chosen and rejected paths.
1568
+ * Synthesizes the core lesson from the tournament.
1569
+ */
1570
+ export interface ContrastiveAnalysis {
1571
+ /** ONE key insight distinguishing chosen from rejected */
1572
+ criticalDifference: string;
1573
+ /** Pattern: "When X, do Y" */
1574
+ decisionTrigger: string;
1575
+ /** How to systematically avoid the rejected path */
1576
+ preventionStrategy: string;
1577
+ }
1578
+
1301
1579
  /**
1302
1580
  * Scribe output — final structured artifact draft.
1303
1581
  * Scribe synthesizes the best candidate into an approved artifact format.
@@ -1325,6 +1603,12 @@ export interface TrinityDraftArtifact {
1325
1603
  planningRatioGain?: number;
1326
1604
  /** Optional routing context for a follow-on Artificer stage */
1327
1605
  artificerContext?: TrinityArtificerContext;
1606
+ /** Contrastive analysis: chosen vs rejected reasoning paths (SCRIBE-03) */
1607
+ contrastiveAnalysis?: ContrastiveAnalysis;
1608
+ /** Analysis of the rejected candidates — why they lost the tournament (SCRIBE-01) */
1609
+ rejectedAnalysis?: RejectedAnalysis;
1610
+ /** Justification for the chosen candidate — why it won (SCRIBE-02) */
1611
+ chosenJustification?: ChosenJustification;
1328
1612
  }
1329
1613
 
1330
1614
  export interface TrinityTelemetry {
@@ -1350,6 +1634,24 @@ export interface TrinityTelemetry {
1350
1634
  winnerThresholdPassed?: boolean;
1351
1635
  /** Number of eligible candidates after threshold check (optional) */
1352
1636
  eligibleCandidateCount?: number;
1637
+ /** Whether Dreamer candidates passed diversity validation (DIVER-04) */
1638
+ diversityCheckPassed?: boolean;
1639
+ /** Risk levels assigned to Dreamer candidates (for telemetry) */
1640
+ candidateRiskLevels?: string[];
1641
+ /** Aggregate 6D Philosopher evaluation metrics (informational) */
1642
+ philosopher6D?: {
1643
+ /** Average scores across all candidates per dimension */
1644
+ avgScores: {
1645
+ principleAlignment: number;
1646
+ specificity: number;
1647
+ actionability: number;
1648
+ executability: number;
1649
+ safetyImpact: number;
1650
+ uxImpact: number;
1651
+ };
1652
+ /** Count of candidates with breakingChangeRisk = true */
1653
+ highRiskCount: number;
1654
+ };
1353
1655
  }
1354
1656
 
1355
1657
  // ---------------------------------------------------------------------------
@@ -1421,6 +1723,8 @@ export function invokeStubDreamer(
1421
1723
  betterDecision: 'Review docs/gateblocks.md and verify authorization requirements first; based on the evidence, this irreversible action must be reviewed before proceeding',
1422
1724
  rationale: 'Respecting gate blocks prevents unintended system modifications',
1423
1725
  confidence: 0.95,
1726
+ riskLevel: 'low' as const,
1727
+ strategicPerspective: 'conservative_fix' as const,
1424
1728
  });
1425
1729
  if (maxCandidates >= 2) {
1426
1730
  candidates.push({
@@ -1429,6 +1733,8 @@ export function invokeStubDreamer(
1429
1733
  betterDecision: 'Check the gatekeeper source first to diagnose the block reason; this is irreversible, so we must be certain before proceeding',
1430
1734
  rationale: 'Understanding why a gate blocked prevents repeated blocks',
1431
1735
  confidence: 0.85,
1736
+ riskLevel: 'low' as const,
1737
+ strategicPerspective: 'conservative_fix' as const,
1432
1738
  });
1433
1739
  }
1434
1740
  if (maxCandidates >= 3) {
@@ -1438,6 +1744,8 @@ export function invokeStubDreamer(
1438
1744
  betterDecision: 'Review docs/auth.md first to understand the authorization structure, then request proper review before any change',
1439
1745
  rationale: 'Proper authorization ensures accountability and prevents unintended changes',
1440
1746
  confidence: 0.75,
1747
+ riskLevel: 'low' as const,
1748
+ strategicPerspective: 'conservative_fix' as const,
1441
1749
  });
1442
1750
  }
1443
1751
  } else if (hasPain) {
@@ -1447,6 +1755,8 @@ export function invokeStubDreamer(
1447
1755
  betterDecision: 'Check logs/pain.json first to analyze pain signals; this error indicates we should stop and reconsider before proceeding',
1448
1756
  rationale: 'Pain signals indicate accumulated friction or error conditions',
1449
1757
  confidence: 0.90,
1758
+ riskLevel: 'medium' as const,
1759
+ strategicPerspective: 'structural_improvement' as const,
1450
1760
  });
1451
1761
  if (maxCandidates >= 2) {
1452
1762
  candidates.push({
@@ -1455,6 +1765,8 @@ export function invokeStubDreamer(
1455
1765
  betterDecision: 'Review src/pain-detector.ts first; based on the evidence, this indicates a deeper issue we must not ignore',
1456
1766
  rationale: 'Addressing friction reduces error rates and improves outcomes',
1457
1767
  confidence: 0.80,
1768
+ riskLevel: 'medium' as const,
1769
+ strategicPerspective: 'structural_improvement' as const,
1458
1770
  });
1459
1771
  }
1460
1772
  if (maxCandidates >= 3) {
@@ -1464,6 +1776,8 @@ export function invokeStubDreamer(
1464
1776
  betterDecision: 'Analyze logs/errors.json first to identify the failure pattern; this suggests we should stop and rethink before retrying',
1465
1777
  rationale: 'Pattern analysis prevents recurring pain from the same source',
1466
1778
  confidence: 0.70,
1779
+ riskLevel: 'medium' as const,
1780
+ strategicPerspective: 'structural_improvement' as const,
1467
1781
  });
1468
1782
  }
1469
1783
  } else if (hasFailures) {
@@ -1473,6 +1787,8 @@ export function invokeStubDreamer(
1473
1787
  betterDecision: 'Verify config.json preconditions first, based on the error in logs/failure.json, before retrying',
1474
1788
  rationale: 'Diagnosing failures before retry prevents repeated failures',
1475
1789
  confidence: 0.92,
1790
+ riskLevel: 'high' as const,
1791
+ strategicPerspective: 'paradigm_shift' as const,
1476
1792
  });
1477
1793
  if (maxCandidates >= 2) {
1478
1794
  candidates.push({
@@ -1481,6 +1797,8 @@ export function invokeStubDreamer(
1481
1797
  betterDecision: 'Check docs/debugging.md first to diagnose what failed; we must not ignore this when the action is irreversible',
1482
1798
  rationale: 'Unaddressed failures compound and cause larger issues',
1483
1799
  confidence: 0.85,
1800
+ riskLevel: 'high' as const,
1801
+ strategicPerspective: 'paradigm_shift' as const,
1484
1802
  });
1485
1803
  }
1486
1804
  if (maxCandidates >= 3) {
@@ -1490,6 +1808,8 @@ export function invokeStubDreamer(
1490
1808
  betterDecision: 'Verify src/validator.ts state first; this error indicates a deeper problem before assuming resolution',
1491
1809
  rationale: 'Verification prevents cascading failures from unresolved issues',
1492
1810
  confidence: 0.78,
1811
+ riskLevel: 'high' as const,
1812
+ strategicPerspective: 'paradigm_shift' as const,
1493
1813
  });
1494
1814
  }
1495
1815
  } else {
@@ -1572,6 +1892,70 @@ export function invokeStubPhilosopher(
1572
1892
  principleAligned = false;
1573
1893
  }
1574
1894
 
1895
+ // Deterministic 6D scores based on strategic perspective (Phase 35 D-07 mapping)
1896
+ const perspective = candidate.strategicPerspective;
1897
+ let sixDScores: Philosopher6DScores;
1898
+ let riskAssessment: PhilosopherRiskAssessment;
1899
+
1900
+ if (perspective === 'conservative_fix') {
1901
+ sixDScores = {
1902
+ principleAlignment: 0.9,
1903
+ specificity: 0.8,
1904
+ actionability: 0.85,
1905
+ executability: 0.9,
1906
+ safetyImpact: 0.95,
1907
+ uxImpact: 0.7,
1908
+ };
1909
+ riskAssessment = {
1910
+ falsePositiveEstimate: 0.1,
1911
+ implementationComplexity: 'low',
1912
+ breakingChangeRisk: false,
1913
+ };
1914
+ } else if (perspective === 'structural_improvement') {
1915
+ sixDScores = {
1916
+ principleAlignment: 0.75,
1917
+ specificity: 0.7,
1918
+ actionability: 0.75,
1919
+ executability: 0.7,
1920
+ safetyImpact: 0.7,
1921
+ uxImpact: 0.8,
1922
+ };
1923
+ riskAssessment = {
1924
+ falsePositiveEstimate: 0.25,
1925
+ implementationComplexity: 'medium',
1926
+ breakingChangeRisk: false,
1927
+ };
1928
+ } else if (perspective === 'paradigm_shift') {
1929
+ sixDScores = {
1930
+ principleAlignment: 0.6,
1931
+ specificity: 0.5,
1932
+ actionability: 0.5,
1933
+ executability: 0.45,
1934
+ safetyImpact: 0.4,
1935
+ uxImpact: 0.6,
1936
+ };
1937
+ riskAssessment = {
1938
+ falsePositiveEstimate: 0.4,
1939
+ implementationComplexity: 'high',
1940
+ breakingChangeRisk: true,
1941
+ };
1942
+ } else {
1943
+ // Fallback for candidates without strategicPerspective
1944
+ sixDScores = {
1945
+ principleAlignment: score,
1946
+ specificity: score * 0.9,
1947
+ actionability: score * 0.85,
1948
+ executability: score * 0.8,
1949
+ safetyImpact: score * 0.7,
1950
+ uxImpact: score * 0.75,
1951
+ };
1952
+ riskAssessment = {
1953
+ falsePositiveEstimate: 0.3,
1954
+ implementationComplexity: 'medium',
1955
+ breakingChangeRisk: false,
1956
+ };
1957
+ }
1958
+
1575
1959
  return {
1576
1960
  candidateIndex: candidate.candidateIndex,
1577
1961
  critique: `Candidate ${candidate.candidateIndex} scored ${score.toFixed(2)}. ${
@@ -1582,6 +1966,8 @@ export function invokeStubPhilosopher(
1582
1966
  principleAligned,
1583
1967
  score: Math.min(1, Math.max(0, score)),
1584
1968
  rank: 0, // Will be set after sorting
1969
+ scores: sixDScores,
1970
+ risks: riskAssessment,
1585
1971
  };
1586
1972
  });
1587
1973
 
@@ -1780,6 +2166,16 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
1780
2166
  telemetry.dreamerPassed = true;
1781
2167
  telemetry.candidateCount = dreamerOutput.candidates.length;
1782
2168
 
2169
+ // Diversity validation (DIVER-04): soft check, never gates pipeline
2170
+ const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
2171
+ telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
2172
+ telemetry.candidateRiskLevels = dreamerOutput.candidates
2173
+ .map(c => c.riskLevel)
2174
+ .filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
2175
+ if (!diversityResult.diversityCheckPassed) {
2176
+ console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
2177
+ }
2178
+
1783
2179
  // Step 2: Philosopher — rank candidates via real subagent
1784
2180
  const philosopherOutput = await adapter.invokePhilosopher(dreamerOutput, principleId, snapshot);
1785
2181
 
@@ -1794,6 +2190,21 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
1794
2190
 
1795
2191
  telemetry.philosopherPassed = true;
1796
2192
 
2193
+ // Aggregate 6D scores from Philosopher judgments (if available)
2194
+ const realJudgments6D = philosopherOutput.judgments.filter(j => j.scores);
2195
+ if (realJudgments6D.length > 0) {
2196
+ const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
2197
+ const avgScores: Record<string, number> = {};
2198
+ for (const dim of dims) {
2199
+ const values = realJudgments6D.map(j => j.scores?.[dim] ?? 0);
2200
+ avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
2201
+ }
2202
+ telemetry.philosopher6D = {
2203
+ avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
2204
+ highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
2205
+ };
2206
+ }
2207
+
1797
2208
  // Step 3: Scribe — synthesize final artifact via real subagent
1798
2209
  const draftArtifact = await adapter.invokeScribe(
1799
2210
  dreamerOutput,
@@ -1876,6 +2287,16 @@ function runTrinityWithStubs(
1876
2287
  telemetry.dreamerPassed = true;
1877
2288
  telemetry.candidateCount = dreamerOutput.candidates.length;
1878
2289
 
2290
+ // Diversity validation (DIVER-04): soft check, never gates pipeline
2291
+ const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
2292
+ telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
2293
+ telemetry.candidateRiskLevels = dreamerOutput.candidates
2294
+ .map(c => c.riskLevel)
2295
+ .filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
2296
+ if (!diversityResult.diversityCheckPassed) {
2297
+ console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
2298
+ }
2299
+
1879
2300
  // Step 2: Philosopher — rank candidates (stub)
1880
2301
  const philosopherOutput = invokeStubPhilosopher(dreamerOutput, principleId, snapshot);
1881
2302
 
@@ -1895,6 +2316,21 @@ function runTrinityWithStubs(
1895
2316
 
1896
2317
  telemetry.philosopherPassed = true;
1897
2318
 
2319
+ // Aggregate 6D scores from Philosopher judgments (if available)
2320
+ const judgments6D = philosopherOutput.judgments.filter(j => j.scores);
2321
+ if (judgments6D.length > 0) {
2322
+ const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
2323
+ const avgScores: Record<string, number> = {};
2324
+ for (const dim of dims) {
2325
+ const values = judgments6D.map(j => j.scores?.[dim] ?? 0);
2326
+ avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
2327
+ }
2328
+ telemetry.philosopher6D = {
2329
+ avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
2330
+ highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
2331
+ };
2332
+ }
2333
+
1898
2334
  // Step 3: Scribe — produce final artifact using tournament selection (stub)
1899
2335
  const draftArtifact = invokeStubScribe(dreamerOutput, philosopherOutput, snapshot, principleId, telemetry, config);
1900
2336