principles-disciple 1.28.0 → 1.28.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/openclaw.plugin.json +4 -4
  2. package/package.json +4 -4
  3. package/scripts/validate-live-path.ts +18 -18
  4. package/src/commands/context.ts +1 -0
  5. package/src/commands/disable-impl.ts +2 -0
  6. package/src/commands/evolution-status.ts +2 -0
  7. package/src/commands/focus.ts +2 -0
  8. package/src/commands/nocturnal-train.ts +4 -6
  9. package/src/commands/pain.ts +9 -11
  10. package/src/commands/pd-reflect.ts +1 -1
  11. package/src/commands/principle-rollback.ts +1 -0
  12. package/src/commands/rollback-impl.ts +1 -0
  13. package/src/core/adaptive-thresholds.ts +1 -0
  14. package/src/core/bootstrap-rules.ts +3 -3
  15. package/src/core/dictionary.ts +1 -0
  16. package/src/core/empathy-keyword-matcher.ts +1 -0
  17. package/src/core/event-log.ts +2 -0
  18. package/src/core/evolution-engine.ts +1 -0
  19. package/src/core/external-training-contract.ts +1 -0
  20. package/src/core/focus-history.ts +3 -0
  21. package/src/core/init.ts +1 -0
  22. package/src/core/merge-gate-audit.ts +1 -1
  23. package/src/core/nocturnal-arbiter.ts +3 -0
  24. package/src/core/nocturnal-candidate-scoring.ts +131 -0
  25. package/src/core/nocturnal-compliance.ts +1 -0
  26. package/src/core/nocturnal-dataset.ts +1 -0
  27. package/src/core/nocturnal-executability.ts +1 -0
  28. package/src/core/nocturnal-reasoning-deriver.ts +338 -0
  29. package/src/core/nocturnal-rule-implementation-validator.ts +1 -0
  30. package/src/core/nocturnal-trinity.ts +457 -18
  31. package/src/core/pain-context-extractor.ts +2 -3
  32. package/src/core/pain.ts +1 -0
  33. package/src/core/pd-task-reconciler.ts +1 -0
  34. package/src/core/pd-task-service.ts +1 -0
  35. package/src/core/principle-internalization/deprecated-readiness.ts +1 -0
  36. package/src/core/principle-internalization/principle-lifecycle-service.ts +1 -0
  37. package/src/core/principle-tree-migration.ts +3 -4
  38. package/src/core/replay-engine.ts +4 -0
  39. package/src/core/risk-calculator.ts +1 -0
  40. package/src/core/rule-host.ts +2 -0
  41. package/src/core/session-tracker.ts +2 -0
  42. package/src/core/thinking-models.ts +1 -0
  43. package/src/core/thinking-os-parser.ts +3 -3
  44. package/src/core/trajectory.ts +4 -0
  45. package/src/hooks/bash-risk.ts +1 -1
  46. package/src/hooks/gfi-gate.ts +1 -1
  47. package/src/hooks/lifecycle-routing.ts +1 -0
  48. package/src/hooks/pain.ts +2 -1
  49. package/src/hooks/prompt.ts +37 -2
  50. package/src/hooks/subagent.ts +1 -1
  51. package/src/hooks/trajectory-collector.ts +1 -0
  52. package/src/http/principles-console-route.ts +2 -0
  53. package/src/index.ts +1 -1
  54. package/src/service/central-database.ts +2 -0
  55. package/src/service/central-sync-service.ts +1 -0
  56. package/src/service/control-ui-query-service.ts +2 -0
  57. package/src/service/event-log-auditor.ts +2 -0
  58. package/src/service/evolution-worker.ts +2 -1
  59. package/src/service/health-query-service.ts +20 -6
  60. package/src/service/nocturnal-runtime.ts +4 -0
  61. package/src/service/runtime-summary-service.ts +5 -0
  62. package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +1 -0
  63. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +2 -1
  64. package/src/service/subagent-workflow/subagent-error-utils.ts +1 -0
  65. package/src/service/subagent-workflow/workflow-manager-base.ts +1 -0
  66. package/src/tools/critique-prompt.ts +1 -0
  67. package/src/utils/io.ts +1 -0
  68. package/tests/core/nocturnal-candidate-scoring.test.ts +132 -0
  69. package/tests/core/nocturnal-reasoning-deriver.test.ts +372 -0
  70. package/tests/core/nocturnal-trinity.test.ts +791 -0
@@ -35,12 +35,17 @@ import * as os from 'os';
35
35
  import * as path from 'path';
36
36
  import type { NocturnalSessionSnapshot } from './nocturnal-trajectory-extractor.js';
37
37
  import { computeThinkingModelDelta } from './nocturnal-trajectory-extractor.js';
38
+ import {
39
+ deriveReasoningChain,
40
+ deriveContextualFactors,
41
+ } from './nocturnal-reasoning-deriver.js';
38
42
  import type { TrinityArtificerContext } from './nocturnal-artificer.js';
39
43
  import {
40
44
  runTournament,
41
45
  DEFAULT_SCORING_WEIGHTS,
42
46
  type ScoringWeights,
43
47
  type TournamentTraceEntry,
48
+ validateCandidateDiversity,
44
49
  } from './nocturnal-candidate-scoring.js';
45
50
  import {
46
51
  DEFAULT_THRESHOLDS,
@@ -61,7 +66,7 @@ const FALLBACK_MODEL = process.env.OPENCLAW_DEFAULT_MODEL || 'MiniMax-M2.7';
61
66
  // These prompts are embedded at build time. The agents/ directory was removed
62
67
  // to eliminate fragile runtime file dependencies on the file system.
63
68
 
64
- const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
69
+ export const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
65
70
 
66
71
  > System prompt for Trinity Dreamer stage.
67
72
  > Role: Generate multiple alternative "better decision" candidates from a session snapshot.
@@ -104,7 +109,9 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
104
109
  "badDecision": "<what the agent did wrong>",
105
110
  "betterDecision": "<what the agent should have done>",
106
111
  "rationale": "<why this is better>",
107
- "confidence": 0.95
112
+ "confidence": 0.95,
113
+ "riskLevel": "low",
114
+ "strategicPerspective": "conservative_fix"
108
115
  }
109
116
  ],
110
117
  "generatedAt": "<ISO timestamp>"
@@ -131,6 +138,23 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
131
138
  - Do not generate candidates with identical betterDecisions
132
139
  - Vary the confidence scores to reflect genuine uncertainty
133
140
 
141
+ ## Strategic Perspective Requirements
142
+
143
+ Generate candidates from DISTINCT strategic perspectives:
144
+
145
+ - **conservative_fix**: Minimal deviation from original approach. Add a
146
+ verification or validation step that was missing.
147
+ - **structural_improvement**: Reorder operations or introduce an intermediate
148
+ checkpoint. Change HOW the goal is achieved.
149
+ - **paradigm_shift**: Challenge whether the original goal was correct.
150
+ Consider a fundamentally different approach.
151
+
152
+ Each candidate MUST specify \`riskLevel\` ("low"|"medium"|"high") and
153
+ \`strategicPerspective\` matching one of the above.
154
+
155
+ ANTI-PATTERN: Candidates that differ only in wording, not in substance,
156
+ will be rejected.
157
+
134
158
  ### Candidates must NOT:
135
159
  - Contain raw user text or private content
136
160
  - Reference non-existent tools or impossible actions
@@ -148,7 +172,7 @@ If you cannot generate valid candidates (e.g., no clear violation found, insuffi
148
172
  "generatedAt": "<ISO timestamp>"
149
173
  }`;
150
174
 
151
- const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
175
+ export const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
152
176
 
153
177
  > System prompt for Trinity Philosopher stage.
154
178
  > Role: Evaluate Dreamer's candidates and rank them by principle alignment and quality.
@@ -187,7 +211,20 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
187
211
  "critique": "<principle-grounded critique>",
188
212
  "principleAligned": true,
189
213
  "score": 0.92,
190
- "rank": 1
214
+ "rank": 1,
215
+ "scores": {
216
+ "principleAlignment": 0.9,
217
+ "specificity": 0.85,
218
+ "actionability": 0.9,
219
+ "executability": 0.95,
220
+ "safetyImpact": 0.8,
221
+ "uxImpact": 0.85
222
+ },
223
+ "risks": {
224
+ "falsePositiveEstimate": 0.1,
225
+ "implementationComplexity": "low",
226
+ "breakingChangeRisk": false
227
+ }
191
228
  }
192
229
  ],
193
230
  "overallAssessment": "<summary of candidate set quality>",
@@ -197,10 +234,18 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
197
234
  ## Evaluation Criteria
198
235
 
199
236
  ### Score Components (0-1 scale each):
200
- 1. **Principle Alignment** (weight: 0.35) — Does the betterDecision properly reflect the target principle?
201
- 2. **Specificity** (weight: 0.25) — Is badDecision specific? Is betterDecision actionable?
202
- 3. **Actionability** (weight: 0.25) — Does betterDecision describe a specific next step?
237
+ 1. **Principle Alignment** (weight: 0.20) — Does the betterDecision properly reflect the target principle?
238
+ 2. **Specificity** (weight: 0.15) — Is badDecision specific? Is betterDecision actionable?
239
+ 3. **Actionability** (weight: 0.15) — Does betterDecision describe a specific next step?
203
240
  4. **Executability** (weight: 0.15) — Does betterDecision start with a bounded verb (read, check, verify, edit, write, etc.) and reference a concrete target?
241
+ 5. **Safety Impact** (weight: 0.20) — Does the betterDecision reduce risk of data loss, corruption, or new failure modes? Would implementing this prevent dangerous operations?
242
+ 6. **UX Impact** (weight: 0.15) — Does the betterDecision reduce user frustration or improve response reliability? Would the user experience be noticeably better?
243
+
244
+ ### Risk Assessment (per candidate):
245
+ For each candidate, also assess:
246
+ - **falsePositiveEstimate** (0-1): How likely is this candidate a false positive (the "betterDecision" is actually not better)?
247
+ - **implementationComplexity** ("low"/"medium"/"high"): How complex would it be to implement this correction?
248
+ - **breakingChangeRisk** (boolean): Could implementing this correction break existing behavior?
204
249
 
205
250
  ### Executability Check:
206
251
  A betterDecision is executable if it:
@@ -243,13 +288,16 @@ and synthesize it into a final decision-point artifact that passes arbiter valid
243
288
  You will receive:
244
289
  - A **target principle** (principle ID and description)
245
290
  - A **session trajectory snapshot**
246
- - **Philosopher's judgments** — ranked candidates with critiques
291
+ - **Philosopher's judgments** — ranked candidates with critiques and 6D scores
247
292
  - **Dreamer's candidates** — the original candidate list
293
+ - **Philosopher's risk assessments** — falsePositiveEstimate, implementationComplexity, breakingChangeRisk per candidate
294
+
295
+ Use the risk assessments to determine which candidates require deeper contrastive analysis. High-risk candidates (high breakingChangeRisk or implementationComplexity) warrant thorough rejectedAnalysis.
248
296
 
249
297
  ## Task
250
298
 
251
299
  Select the best candidate (Philosopher's rank 1) and synthesize it into
252
- a final TrinityDraftArtifact.
300
+ a final TrinityDraftArtifact. Then produce a **Contrastive Analysis** that explains why the winner was chosen and what to learn from the runners-up.
253
301
 
254
302
  ## Output Format
255
303
 
@@ -271,9 +319,26 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
271
319
  "candidateCount": 2,
272
320
  "selectedCandidateIndex": 0,
273
321
  "stageFailures": []
322
+ },
323
+ "rejectedAnalysis": {
324
+ "whyRejected": "<mental model that led to the rejected candidate>",
325
+ "warningSignals": ["<observable caution trigger 1>", "<trigger 2>"],
326
+ "correctiveThinking": "<correct reasoning path that should have been taken>"
327
+ },
328
+ "chosenJustification": {
329
+ "whyChosen": "<why this candidate was selected over others>",
330
+ "keyInsights": ["<transferable insight 1>", "<insight 2>", "<insight 3>"],
331
+ "limitations": ["<when this approach does NOT apply 1>", "<limitation 2>"]
332
+ },
333
+ "contrastiveAnalysis": {
334
+ "criticalDifference": "<ONE key insight distinguishing chosen from rejected>",
335
+ "decisionTrigger": "<When X, do Y pattern>",
336
+ "preventionStrategy": "<how to systematically avoid the rejected path>"
274
337
  }
275
338
  }
276
339
 
340
+ All three analysis sections (rejectedAnalysis, chosenJustification, contrastiveAnalysis) are optional but recommended. When multiple candidates were evaluated, include them to provide richer training signals.
341
+
277
342
  ## Validation
278
343
 
279
344
  If you cannot synthesize an artifact:
@@ -403,6 +468,70 @@ export class TrinityRuntimeContractError extends Error {
403
468
  }
404
469
  }
405
470
 
471
+ // ---------------------------------------------------------------------------
472
+ // Reasoning Context Serialization (D-03, D-04)
473
+ // ---------------------------------------------------------------------------
474
+
475
+ /**
476
+ * Format derived reasoning signals into a prompt section for Dreamer.
477
+ *
478
+ * Returns the formatted "## Reasoning Context" section as a string,
479
+ * or null if no meaningful reasoning content exists to include.
480
+ *
481
+ * Only reasoningChain + contextualFactors are serialized.
482
+ * DecisionPoints are NOT injected (reserved for Phase 37 Scribe per D-04).
483
+ */
484
+ export function formatReasoningContext(snapshot: NocturnalSessionSnapshot): string | null {
485
+ const reasoningChain = deriveReasoningChain(snapshot.assistantTurns);
486
+ const contextualFactors = deriveContextualFactors(snapshot);
487
+
488
+ const hasReasoningContent = reasoningChain.length > 0 &&
489
+ reasoningChain.some(s => s.thinkingContent || s.uncertaintyMarkers.length > 0);
490
+
491
+ if (!hasReasoningContent && !contextualFactors.fileStructureKnown &&
492
+ !contextualFactors.errorHistoryPresent &&
493
+ !contextualFactors.userGuidanceAvailable &&
494
+ !contextualFactors.timePressure) {
495
+ return null;
496
+ }
497
+
498
+ const sections: string[] = ['## Reasoning Context', ''];
499
+
500
+ // Serialize reasoning chain (only turns with non-empty signals)
501
+ const significantTurns = reasoningChain.filter(
502
+ s => s.thinkingContent || s.uncertaintyMarkers.length > 0
503
+ );
504
+ for (const signal of significantTurns) {
505
+ if (signal.thinkingContent) {
506
+ sections.push(`- Turn ${signal.turnIndex}: Internal reasoning: "${signal.thinkingContent.slice(0, 200)}"`);
507
+ }
508
+ if (signal.uncertaintyMarkers.length > 0) {
509
+ sections.push(`- Turn ${signal.turnIndex}: Uncertainty detected: ${signal.uncertaintyMarkers.join(', ')}`);
510
+ }
511
+ if (signal.confidenceSignal !== 'high') {
512
+ sections.push(`- Turn ${signal.turnIndex}: Confidence: ${signal.confidenceSignal}`);
513
+ }
514
+ }
515
+
516
+ // Serialize contextual factors
517
+ const factorLabels: string[] = [];
518
+ if (contextualFactors.fileStructureKnown) factorLabels.push('File structure explored before modification');
519
+ if (contextualFactors.errorHistoryPresent) factorLabels.push('Prior error history present');
520
+ if (contextualFactors.userGuidanceAvailable) factorLabels.push('User guidance/corrections available');
521
+ if (contextualFactors.timePressure) factorLabels.push('Time pressure detected (rapid tool calls)');
522
+
523
+ if (factorLabels.length > 0) {
524
+ sections.push('');
525
+ sections.push('Environmental context:');
526
+ for (const label of factorLabels) {
527
+ sections.push(`- ${label}`);
528
+ }
529
+ }
530
+
531
+ sections.push('');
532
+ return sections.join('\n');
533
+ }
534
+
406
535
  export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
407
536
 
408
537
  private readonly api: {
@@ -478,8 +607,8 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
478
607
  fs.rmSync(fullPath, { recursive: true, force: true });
479
608
  }
480
609
  }
481
- } catch {
482
- // Non-fatal: stale temp files will be cleaned up eventually
610
+ } catch (err) {
611
+ this.api.logger?.warn?.(`[Trinity] Failed to cleanup stale temp dirs: ${err instanceof Error ? err.message.replace(/([A-Za-z]:\\[^:\\s]+|\\\/[^\s:]+)/g, '[PATH]') : String(err)}`);
483
612
  }
484
613
  }
485
614
 
@@ -513,6 +642,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
513
642
  * runEmbeddedPiAgent does NOT read config.agents.defaults.model —
514
643
  * it requires explicit params.provider and params.model.
515
644
  */
645
+ // eslint-disable-next-line complexity -- complexity 12, refactor candidate
516
646
  private resolveModel(): { provider: string; model: string } {
517
647
  const config = this.loadFullConfig();
518
648
  const agents = config?.agents as Record<string, unknown> | undefined;
@@ -559,6 +689,12 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
559
689
  .join('\n');
560
690
  }
561
691
 
692
+ /** Clamp a value to [0, 1] range — used for LLM-produced scores that may be out of range */
693
+ private clamp01(val: unknown, fallback = 0): number {
694
+ if (typeof val !== 'number' || !Number.isFinite(val)) return fallback;
695
+ return Math.min(1, Math.max(0, val));
696
+ }
697
+
562
698
  private classifyRuntimeError(error: unknown): TrinityRuntimeFailureCode {
563
699
  const detail = error instanceof Error ? error.message : String(error);
564
700
  return /timeout/i.test(detail) ? 'runtime_timeout' : 'runtime_run_failed';
@@ -606,7 +742,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
606
742
  } catch (err) {
607
743
  return this.buildRuntimeFailureDreamerOutput(this.classifyRuntimeError(err), err);
608
744
  } finally {
609
- try { fs.unlinkSync(sessionFile); } catch { /* ignore */ }
745
+ try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
610
746
  }
611
747
  }
612
748
 
@@ -650,7 +786,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
650
786
  } catch (err) {
651
787
  return this.buildRuntimeFailurePhilosopherOutput(this.classifyRuntimeError(err), err);
652
788
  } finally {
653
- try { fs.unlinkSync(sessionFile); } catch { /* ignore */ }
789
+ try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
654
790
  }
655
791
  }
656
792
 
@@ -698,7 +834,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
698
834
  this.recordFailure(this.classifyRuntimeError(err), err);
699
835
  return null;
700
836
  } finally {
701
- try { fs.unlinkSync(sessionFile); } catch { /* ignore */ }
837
+ try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
702
838
  }
703
839
  }
704
840
 
@@ -796,6 +932,12 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
796
932
  sections.push('');
797
933
  }
798
934
 
935
+ // ## Reasoning Context — derived signals from Phase 34 deriver module (D-03, D-04)
936
+ const reasoningSection = formatReasoningContext(snapshot);
937
+ if (reasoningSection) {
938
+ sections.push(reasoningSection);
939
+ }
940
+
799
941
  sections.push(`## Task`,
800
942
  `Analyze the above session and generate ${maxCandidates} candidate corrections.`,
801
943
  `Each candidate must:`,
@@ -818,6 +960,11 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
818
960
  ): string {
819
961
  const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
820
962
 
963
+ // Build per-candidate metadata from Dreamer (risk level + strategic perspective)
964
+ const candidateMeta = dreamerOutput.candidates
965
+ .filter(c => c.riskLevel || c.strategicPerspective)
966
+ .map(c => `- Candidate #${c.candidateIndex}: risk=${c.riskLevel || 'N/A'}, perspective=${c.strategicPerspective || 'N/A'}`);
967
+
821
968
  // Build violation summary from snapshot for Philosopher to validate candidates
822
969
  const failures = snapshot.toolCalls
823
970
  .filter(tc => tc.outcome === 'failure')
@@ -862,6 +1009,11 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
862
1009
  sections.push(userCues.join('\n'));
863
1010
  }
864
1011
 
1012
+ if (candidateMeta.length > 0) {
1013
+ sections.push(`\n### Candidate Risk Profiles (${candidateMeta.length})`);
1014
+ sections.push(candidateMeta.join('\n'));
1015
+ }
1016
+
865
1017
  sections.push(
866
1018
  ``,
867
1019
  `## Dreamer's Candidates`,
@@ -926,18 +1078,29 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
926
1078
  sections.push(`(No specific violations found in snapshot)`);
927
1079
  }
928
1080
 
1081
+ // Build risk summary from Philosopher 6D judgments for Scribe contrastive analysis
1082
+ const riskSummary = philosopherOutput.judgments
1083
+ .map(j => {
1084
+ const risk = j.risks ? ` [risks: fp=${j.risks.falsePositiveEstimate.toFixed(2)}, complexity=${j.risks.implementationComplexity}, breaking=${j.risks.breakingChangeRisk}]` : '';
1085
+ return ` - candidate[${j.candidateIndex}] (rank ${j.rank}, score ${j.score?.toFixed(2) ?? 'n/a'}): ${j.principleAligned ? 'aligned' : 'not aligned'}${risk}`;
1086
+ })
1087
+ .join('\n');
1088
+
929
1089
  sections.push(
930
1090
  ``,
931
1091
  `## Dreamer's Candidates`,
932
1092
  candidatesJson,
933
1093
  ``,
934
- `## Philosopher's Judgments`,
1094
+ `## Philosopher's Judgments + Risk Assessments`,
935
1095
  judgmentsJson,
936
1096
  ``,
1097
+ `## Philosopher 6D Risk Summary`,
1098
+ `Use this to determine contrastive depth — high-risk candidates need deeper analysis:`,
1099
+ riskSummary,
1100
+ ``,
937
1101
  `## Task`,
938
1102
  `Select the best candidate (Philosopher's rank 1) and synthesize it into a final TrinityDraftArtifact.`,
939
- `Use the Original Violation Evidence above to ensure your final badDecision and betterDecision`,
940
- `are grounded in the actual session events, not just Dreamer's interpretation.`,
1103
+ `Then produce contrastive analysis explaining why the winner was chosen and what the rejected candidates teach us.`,
941
1104
  ``,
942
1105
  `## CRITICAL: betterDecision Format Requirements`,
943
1106
  `Your betterDecision MUST pass executability validation. It MUST:`,
@@ -1057,7 +1220,39 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
1057
1220
  }
1058
1221
  return {
1059
1222
  valid: parsed.valid,
1060
- judgments: parsed.judgments,
1223
+ judgments: parsed.judgments.map((j: Record<string, unknown>) => ({
1224
+ candidateIndex: j.candidateIndex,
1225
+ critique: j.critique ?? '',
1226
+ principleAligned: j.principleAligned ?? false,
1227
+ score: j.score ?? 0,
1228
+ rank: j.rank ?? 0,
1229
+ // Optional 6D scores and risk assessment (Phase 36)
1230
+ // Only include a dimension if the LLM actually returned a number (not undefined/null).
1231
+ // This preserves the distinction between "LLM returned 0" vs "LLM omitted the field."
1232
+ ...(j.scores ? {
1233
+ scores: Object.fromEntries(
1234
+ (['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const)
1235
+ .map(dim => [dim, (j.scores as Record<string, unknown>)[dim]])
1236
+ .filter(([, v]) => typeof v === 'number')
1237
+ .map(([dim, v]) => [dim, this.clamp01(v as number)])
1238
+ )
1239
+ } : {}),
1240
+ ...(j.risks ? (() => {
1241
+ const risks = j.risks as Record<string, unknown>;
1242
+ const fp = risks.falsePositiveEstimate;
1243
+ const hasFp = typeof fp === 'number';
1244
+ const risksObj: {
1245
+ falsePositiveEstimate?: number;
1246
+ implementationComplexity: string;
1247
+ breakingChangeRisk: boolean;
1248
+ } = {
1249
+ implementationComplexity: (risks.implementationComplexity as string) ?? 'medium',
1250
+ breakingChangeRisk: Boolean(risks.breakingChangeRisk),
1251
+ };
1252
+ if (hasFp) risksObj.falsePositiveEstimate = this.clamp01(fp as number);
1253
+ return { risks: risksObj };
1254
+ })() : {}),
1255
+ })),
1061
1256
  overallAssessment: parsed.overallAssessment ?? '',
1062
1257
  reason: parsed.reason,
1063
1258
  generatedAt: parsed.generatedAt ?? new Date().toISOString(),
@@ -1117,6 +1312,22 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
1117
1312
  return null;
1118
1313
  }
1119
1314
 
1315
+ // Validate contrastive analysis sub-fields (H-03): only include if structure is intact
1316
+ const contrastiveAnalysis = parsed.contrastiveAnalysis
1317
+ && typeof parsed.contrastiveAnalysis === 'object'
1318
+ && typeof parsed.contrastiveAnalysis.criticalDifference === 'string'
1319
+ ? parsed.contrastiveAnalysis : undefined;
1320
+
1321
+ const rejectedAnalysis = parsed.rejectedAnalysis
1322
+ && typeof parsed.rejectedAnalysis === 'object'
1323
+ && typeof parsed.rejectedAnalysis.whyRejected === 'string'
1324
+ ? parsed.rejectedAnalysis : undefined;
1325
+
1326
+ const chosenJustification = parsed.chosenJustification
1327
+ && typeof parsed.chosenJustification === 'object'
1328
+ && typeof parsed.chosenJustification.whyChosen === 'string'
1329
+ ? parsed.chosenJustification : undefined;
1330
+
1120
1331
  return {
1121
1332
  selectedCandidateIndex: parsed.selectedCandidateIndex,
1122
1333
  badDecision: parsed.badDecision ?? '',
@@ -1135,6 +1346,9 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
1135
1346
  selectedCandidateIndex: parsed.selectedCandidateIndex,
1136
1347
  stageFailures: [],
1137
1348
  },
1349
+ ...(contrastiveAnalysis ? { contrastiveAnalysis } : {}),
1350
+ ...(rejectedAnalysis ? { rejectedAnalysis } : {}),
1351
+ ...(chosenJustification ? { chosenJustification } : {}),
1138
1352
  };
1139
1353
  } catch {
1140
1354
  this.recordFailure('runtime_run_failed', new Error(`Scribe output JSON parse error: ${json.slice(0, 200)}`));
@@ -1255,6 +1469,10 @@ export interface DreamerCandidate {
1255
1469
  rationale: string;
1256
1470
  /** Confidence that this candidate is valid (0-1) */
1257
1471
  confidence: number;
1472
+ /** Risk level of this candidate's approach -- LLM-judged per D-02 */
1473
+ riskLevel?: "low" | "medium" | "high";
1474
+ /** Which strategic perspective this candidate embodies per D-01 */
1475
+ strategicPerspective?: "conservative_fix" | "structural_improvement" | "paradigm_shift";
1258
1476
  }
1259
1477
 
1260
1478
  export interface DreamerOutput {
@@ -1272,6 +1490,24 @@ export interface DreamerOutput {
1272
1490
  * Philosopher output — principle-grounded critique and ranking.
1273
1491
  * Philosopher evaluates Dreamer's candidates and ranks them.
1274
1492
  */
1493
+ export interface PhilosopherRiskAssessment {
1494
+ /** Estimated probability that this candidate is a false positive (0-1) */
1495
+ falsePositiveEstimate: number;
1496
+ /** How complex is this candidate to implement */
1497
+ implementationComplexity: 'low' | 'medium' | 'high';
1498
+ /** Whether implementing this candidate risks breaking existing functionality */
1499
+ breakingChangeRisk: boolean;
1500
+ }
1501
+
1502
+ export interface Philosopher6DScores {
1503
+ principleAlignment: number;
1504
+ specificity: number;
1505
+ actionability: number;
1506
+ executability: number;
1507
+ safetyImpact: number;
1508
+ uxImpact: number;
1509
+ }
1510
+
1275
1511
  export interface PhilosopherJudgment {
1276
1512
  /** Index of the judged candidate (references DreamerCandidate.candidateIndex) */
1277
1513
  candidateIndex: number;
@@ -1283,6 +1519,10 @@ export interface PhilosopherJudgment {
1283
1519
  score: number;
1284
1520
  /** Rank among all candidates (1 = best) */
1285
1521
  rank: number;
1522
+ /** Per-dimension scores (6D evaluation) — informational, not used for tournament ranking */
1523
+ scores?: Philosopher6DScores;
1524
+ /** Risk assessment for this candidate — informational, consumed by Scribe (Phase 37) */
1525
+ risks?: PhilosopherRiskAssessment;
1286
1526
  }
1287
1527
 
1288
1528
  export interface PhilosopherOutput {
@@ -1298,6 +1538,45 @@ export interface PhilosopherOutput {
1298
1538
  generatedAt: string;
1299
1539
  }
1300
1540
 
1541
+ /**
1542
+ * Analysis of a rejected candidate — why it lost the tournament.
1543
+ * Informs training signal for "what to avoid".
1544
+ */
1545
+ export interface RejectedAnalysis {
1546
+ /** Mental model that led to the rejected candidate */
1547
+ whyRejected: string;
1548
+ /** Observable caution triggers that were missed or ignored */
1549
+ warningSignals: string[];
1550
+ /** Correct reasoning path that should have been taken */
1551
+ correctiveThinking: string;
1552
+ }
1553
+
1554
+ /**
1555
+ * Justification for the chosen candidate — why it won the tournament.
1556
+ * Informs training signal for "what to do".
1557
+ */
1558
+ export interface ChosenJustification {
1559
+ /** Why this candidate was selected over others */
1560
+ whyChosen: string;
1561
+ /** 1-3 transferable insights from this decision */
1562
+ keyInsights: string[];
1563
+ /** When this approach does NOT apply */
1564
+ limitations: string[];
1565
+ }
1566
+
1567
+ /**
1568
+ * Contrastive analysis: key differences between chosen and rejected paths.
1569
+ * Synthesizes the core lesson from the tournament.
1570
+ */
1571
+ export interface ContrastiveAnalysis {
1572
+ /** ONE key insight distinguishing chosen from rejected */
1573
+ criticalDifference: string;
1574
+ /** Pattern: "When X, do Y" */
1575
+ decisionTrigger: string;
1576
+ /** How to systematically avoid the rejected path */
1577
+ preventionStrategy: string;
1578
+ }
1579
+
1301
1580
  /**
1302
1581
  * Scribe output — final structured artifact draft.
1303
1582
  * Scribe synthesizes the best candidate into an approved artifact format.
@@ -1325,6 +1604,12 @@ export interface TrinityDraftArtifact {
1325
1604
  planningRatioGain?: number;
1326
1605
  /** Optional routing context for a follow-on Artificer stage */
1327
1606
  artificerContext?: TrinityArtificerContext;
1607
+ /** Contrastive analysis: chosen vs rejected reasoning paths (SCRIBE-03) */
1608
+ contrastiveAnalysis?: ContrastiveAnalysis;
1609
+ /** Analysis of the rejected candidates — why they lost the tournament (SCRIBE-01) */
1610
+ rejectedAnalysis?: RejectedAnalysis;
1611
+ /** Justification for the chosen candidate — why it won (SCRIBE-02) */
1612
+ chosenJustification?: ChosenJustification;
1328
1613
  }
1329
1614
 
1330
1615
  export interface TrinityTelemetry {
@@ -1350,6 +1635,24 @@ export interface TrinityTelemetry {
1350
1635
  winnerThresholdPassed?: boolean;
1351
1636
  /** Number of eligible candidates after threshold check (optional) */
1352
1637
  eligibleCandidateCount?: number;
1638
+ /** Whether Dreamer candidates passed diversity validation (DIVER-04) */
1639
+ diversityCheckPassed?: boolean;
1640
+ /** Risk levels assigned to Dreamer candidates (for telemetry) */
1641
+ candidateRiskLevels?: string[];
1642
+ /** Aggregate 6D Philosopher evaluation metrics (informational) */
1643
+ philosopher6D?: {
1644
+ /** Average scores across all candidates per dimension */
1645
+ avgScores: {
1646
+ principleAlignment: number;
1647
+ specificity: number;
1648
+ actionability: number;
1649
+ executability: number;
1650
+ safetyImpact: number;
1651
+ uxImpact: number;
1652
+ };
1653
+ /** Count of candidates with breakingChangeRisk = true */
1654
+ highRiskCount: number;
1655
+ };
1353
1656
  }
1354
1657
 
1355
1658
  // ---------------------------------------------------------------------------
@@ -1396,6 +1699,7 @@ export interface TrinityResult {
1396
1699
  * In production, this would call the actual Dreamer subagent.
1397
1700
  * The stub generates plausible candidates based on snapshot signals.
1398
1701
  */
1702
+ // eslint-disable-next-line complexity -- complexity 14, refactor candidate
1399
1703
  export function invokeStubDreamer(
1400
1704
  snapshot: NocturnalSessionSnapshot,
1401
1705
  principleId: string,
@@ -1421,6 +1725,8 @@ export function invokeStubDreamer(
1421
1725
  betterDecision: 'Review docs/gateblocks.md and verify authorization requirements first; based on the evidence, this irreversible action must be reviewed before proceeding',
1422
1726
  rationale: 'Respecting gate blocks prevents unintended system modifications',
1423
1727
  confidence: 0.95,
1728
+ riskLevel: 'low' as const,
1729
+ strategicPerspective: 'conservative_fix' as const,
1424
1730
  });
1425
1731
  if (maxCandidates >= 2) {
1426
1732
  candidates.push({
@@ -1429,6 +1735,8 @@ export function invokeStubDreamer(
1429
1735
  betterDecision: 'Check the gatekeeper source first to diagnose the block reason; this is irreversible, so we must be certain before proceeding',
1430
1736
  rationale: 'Understanding why a gate blocked prevents repeated blocks',
1431
1737
  confidence: 0.85,
1738
+ riskLevel: 'low' as const,
1739
+ strategicPerspective: 'conservative_fix' as const,
1432
1740
  });
1433
1741
  }
1434
1742
  if (maxCandidates >= 3) {
@@ -1438,6 +1746,8 @@ export function invokeStubDreamer(
1438
1746
  betterDecision: 'Review docs/auth.md first to understand the authorization structure, then request proper review before any change',
1439
1747
  rationale: 'Proper authorization ensures accountability and prevents unintended changes',
1440
1748
  confidence: 0.75,
1749
+ riskLevel: 'low' as const,
1750
+ strategicPerspective: 'conservative_fix' as const,
1441
1751
  });
1442
1752
  }
1443
1753
  } else if (hasPain) {
@@ -1447,6 +1757,8 @@ export function invokeStubDreamer(
1447
1757
  betterDecision: 'Check logs/pain.json first to analyze pain signals; this error indicates we should stop and reconsider before proceeding',
1448
1758
  rationale: 'Pain signals indicate accumulated friction or error conditions',
1449
1759
  confidence: 0.90,
1760
+ riskLevel: 'medium' as const,
1761
+ strategicPerspective: 'structural_improvement' as const,
1450
1762
  });
1451
1763
  if (maxCandidates >= 2) {
1452
1764
  candidates.push({
@@ -1455,6 +1767,8 @@ export function invokeStubDreamer(
1455
1767
  betterDecision: 'Review src/pain-detector.ts first; based on the evidence, this indicates a deeper issue we must not ignore',
1456
1768
  rationale: 'Addressing friction reduces error rates and improves outcomes',
1457
1769
  confidence: 0.80,
1770
+ riskLevel: 'medium' as const,
1771
+ strategicPerspective: 'structural_improvement' as const,
1458
1772
  });
1459
1773
  }
1460
1774
  if (maxCandidates >= 3) {
@@ -1464,6 +1778,8 @@ export function invokeStubDreamer(
1464
1778
  betterDecision: 'Analyze logs/errors.json first to identify the failure pattern; this suggests we should stop and rethink before retrying',
1465
1779
  rationale: 'Pattern analysis prevents recurring pain from the same source',
1466
1780
  confidence: 0.70,
1781
+ riskLevel: 'medium' as const,
1782
+ strategicPerspective: 'structural_improvement' as const,
1467
1783
  });
1468
1784
  }
1469
1785
  } else if (hasFailures) {
@@ -1473,6 +1789,8 @@ export function invokeStubDreamer(
1473
1789
  betterDecision: 'Verify config.json preconditions first, based on the error in logs/failure.json, before retrying',
1474
1790
  rationale: 'Diagnosing failures before retry prevents repeated failures',
1475
1791
  confidence: 0.92,
1792
+ riskLevel: 'high' as const,
1793
+ strategicPerspective: 'paradigm_shift' as const,
1476
1794
  });
1477
1795
  if (maxCandidates >= 2) {
1478
1796
  candidates.push({
@@ -1481,6 +1799,8 @@ export function invokeStubDreamer(
1481
1799
  betterDecision: 'Check docs/debugging.md first to diagnose what failed; we must not ignore this when the action is irreversible',
1482
1800
  rationale: 'Unaddressed failures compound and cause larger issues',
1483
1801
  confidence: 0.85,
1802
+ riskLevel: 'high' as const,
1803
+ strategicPerspective: 'paradigm_shift' as const,
1484
1804
  });
1485
1805
  }
1486
1806
  if (maxCandidates >= 3) {
@@ -1490,6 +1810,8 @@ export function invokeStubDreamer(
1490
1810
  betterDecision: 'Verify src/validator.ts state first; this error indicates a deeper problem before assuming resolution',
1491
1811
  rationale: 'Verification prevents cascading failures from unresolved issues',
1492
1812
  confidence: 0.78,
1813
+ riskLevel: 'high' as const,
1814
+ strategicPerspective: 'paradigm_shift' as const,
1493
1815
  });
1494
1816
  }
1495
1817
  } else {
@@ -1572,6 +1894,70 @@ export function invokeStubPhilosopher(
1572
1894
  principleAligned = false;
1573
1895
  }
1574
1896
 
1897
+ // Deterministic 6D scores based on strategic perspective (Phase 35 D-07 mapping)
1898
+ const perspective = candidate.strategicPerspective;
1899
+ let sixDScores: Philosopher6DScores;
1900
+ let riskAssessment: PhilosopherRiskAssessment;
1901
+
1902
+ if (perspective === 'conservative_fix') {
1903
+ sixDScores = {
1904
+ principleAlignment: 0.9,
1905
+ specificity: 0.8,
1906
+ actionability: 0.85,
1907
+ executability: 0.9,
1908
+ safetyImpact: 0.95,
1909
+ uxImpact: 0.7,
1910
+ };
1911
+ riskAssessment = {
1912
+ falsePositiveEstimate: 0.1,
1913
+ implementationComplexity: 'low',
1914
+ breakingChangeRisk: false,
1915
+ };
1916
+ } else if (perspective === 'structural_improvement') {
1917
+ sixDScores = {
1918
+ principleAlignment: 0.75,
1919
+ specificity: 0.7,
1920
+ actionability: 0.75,
1921
+ executability: 0.7,
1922
+ safetyImpact: 0.7,
1923
+ uxImpact: 0.8,
1924
+ };
1925
+ riskAssessment = {
1926
+ falsePositiveEstimate: 0.25,
1927
+ implementationComplexity: 'medium',
1928
+ breakingChangeRisk: false,
1929
+ };
1930
+ } else if (perspective === 'paradigm_shift') {
1931
+ sixDScores = {
1932
+ principleAlignment: 0.6,
1933
+ specificity: 0.5,
1934
+ actionability: 0.5,
1935
+ executability: 0.45,
1936
+ safetyImpact: 0.4,
1937
+ uxImpact: 0.6,
1938
+ };
1939
+ riskAssessment = {
1940
+ falsePositiveEstimate: 0.4,
1941
+ implementationComplexity: 'high',
1942
+ breakingChangeRisk: true,
1943
+ };
1944
+ } else {
1945
+ // Fallback for candidates without strategicPerspective
1946
+ sixDScores = {
1947
+ principleAlignment: score,
1948
+ specificity: score * 0.9,
1949
+ actionability: score * 0.85,
1950
+ executability: score * 0.8,
1951
+ safetyImpact: score * 0.7,
1952
+ uxImpact: score * 0.75,
1953
+ };
1954
+ riskAssessment = {
1955
+ falsePositiveEstimate: 0.3,
1956
+ implementationComplexity: 'medium',
1957
+ breakingChangeRisk: false,
1958
+ };
1959
+ }
1960
+
1575
1961
  return {
1576
1962
  candidateIndex: candidate.candidateIndex,
1577
1963
  critique: `Candidate ${candidate.candidateIndex} scored ${score.toFixed(2)}. ${
@@ -1582,6 +1968,8 @@ export function invokeStubPhilosopher(
1582
1968
  principleAligned,
1583
1969
  score: Math.min(1, Math.max(0, score)),
1584
1970
  rank: 0, // Will be set after sorting
1971
+ scores: sixDScores,
1972
+ risks: riskAssessment,
1585
1973
  };
1586
1974
  });
1587
1975
 
@@ -1780,6 +2168,16 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
1780
2168
  telemetry.dreamerPassed = true;
1781
2169
  telemetry.candidateCount = dreamerOutput.candidates.length;
1782
2170
 
2171
+ // Diversity validation (DIVER-04): soft check, never gates pipeline
2172
+ const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
2173
+ telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
2174
+ telemetry.candidateRiskLevels = dreamerOutput.candidates
2175
+ .map(c => c.riskLevel)
2176
+ .filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
2177
+ if (!diversityResult.diversityCheckPassed) {
2178
+ console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
2179
+ }
2180
+
1783
2181
  // Step 2: Philosopher — rank candidates via real subagent
1784
2182
  const philosopherOutput = await adapter.invokePhilosopher(dreamerOutput, principleId, snapshot);
1785
2183
 
@@ -1794,6 +2192,21 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
1794
2192
 
1795
2193
  telemetry.philosopherPassed = true;
1796
2194
 
2195
+ // Aggregate 6D scores from Philosopher judgments (if available)
2196
+ const realJudgments6D = philosopherOutput.judgments.filter(j => j.scores);
2197
+ if (realJudgments6D.length > 0) {
2198
+ const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
2199
+ const avgScores: Record<string, number> = {};
2200
+ for (const dim of dims) {
2201
+ const values = realJudgments6D.map(j => j.scores?.[dim] ?? 0);
2202
+ avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
2203
+ }
2204
+ telemetry.philosopher6D = {
2205
+ avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
2206
+ highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
2207
+ };
2208
+ }
2209
+
1797
2210
  // Step 3: Scribe — synthesize final artifact via real subagent
1798
2211
  const draftArtifact = await adapter.invokeScribe(
1799
2212
  dreamerOutput,
@@ -1837,6 +2250,7 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
1837
2250
 
1838
2251
  /**
1839
2252
  * Internal: Run Trinity chain with stub implementations (synchronous).
2253
+ // eslint-disable-next-line complexity -- complexity 14, refactor candidate
1840
2254
  */
1841
2255
  function runTrinityWithStubs(
1842
2256
  snapshot: NocturnalSessionSnapshot,
@@ -1876,6 +2290,16 @@ function runTrinityWithStubs(
1876
2290
  telemetry.dreamerPassed = true;
1877
2291
  telemetry.candidateCount = dreamerOutput.candidates.length;
1878
2292
 
2293
+ // Diversity validation (DIVER-04): soft check, never gates pipeline
2294
+ const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
2295
+ telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
2296
+ telemetry.candidateRiskLevels = dreamerOutput.candidates
2297
+ .map(c => c.riskLevel)
2298
+ .filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
2299
+ if (!diversityResult.diversityCheckPassed) {
2300
+ console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
2301
+ }
2302
+
1879
2303
  // Step 2: Philosopher — rank candidates (stub)
1880
2304
  const philosopherOutput = invokeStubPhilosopher(dreamerOutput, principleId, snapshot);
1881
2305
 
@@ -1895,6 +2319,21 @@ function runTrinityWithStubs(
1895
2319
 
1896
2320
  telemetry.philosopherPassed = true;
1897
2321
 
2322
+ // Aggregate 6D scores from Philosopher judgments (if available)
2323
+ const judgments6D = philosopherOutput.judgments.filter(j => j.scores);
2324
+ if (judgments6D.length > 0) {
2325
+ const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
2326
+ const avgScores: Record<string, number> = {};
2327
+ for (const dim of dims) {
2328
+ const values = judgments6D.map(j => j.scores?.[dim] ?? 0);
2329
+ avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
2330
+ }
2331
+ telemetry.philosopher6D = {
2332
+ avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
2333
+ highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
2334
+ };
2335
+ }
2336
+
1898
2337
  // Step 3: Scribe — produce final artifact using tournament selection (stub)
1899
2338
  const draftArtifact = invokeStubScribe(dreamerOutput, philosopherOutput, snapshot, principleId, telemetry, config);
1900
2339