principles-disciple 1.28.0 → 1.28.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +4 -4
- package/scripts/validate-live-path.ts +18 -18
- package/src/commands/nocturnal-train.ts +4 -6
- package/src/commands/pain.ts +8 -11
- package/src/commands/pd-reflect.ts +1 -1
- package/src/core/bootstrap-rules.ts +3 -3
- package/src/core/merge-gate-audit.ts +1 -1
- package/src/core/nocturnal-candidate-scoring.ts +131 -0
- package/src/core/nocturnal-reasoning-deriver.ts +337 -0
- package/src/core/nocturnal-trinity.ts +454 -18
- package/src/core/pain-context-extractor.ts +1 -3
- package/src/core/principle-tree-migration.ts +2 -4
- package/src/core/thinking-os-parser.ts +3 -3
- package/src/hooks/bash-risk.ts +1 -1
- package/src/hooks/gfi-gate.ts +1 -1
- package/src/hooks/pain.ts +1 -1
- package/src/hooks/prompt.ts +36 -2
- package/src/hooks/subagent.ts +1 -1
- package/src/index.ts +1 -1
- package/src/service/evolution-worker.ts +1 -1
- package/src/service/health-query-service.ts +15 -6
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -1
- package/tests/core/nocturnal-candidate-scoring.test.ts +132 -0
- package/tests/core/nocturnal-reasoning-deriver.test.ts +372 -0
- package/tests/core/nocturnal-trinity.test.ts +791 -0
|
@@ -35,12 +35,17 @@ import * as os from 'os';
|
|
|
35
35
|
import * as path from 'path';
|
|
36
36
|
import type { NocturnalSessionSnapshot } from './nocturnal-trajectory-extractor.js';
|
|
37
37
|
import { computeThinkingModelDelta } from './nocturnal-trajectory-extractor.js';
|
|
38
|
+
import {
|
|
39
|
+
deriveReasoningChain,
|
|
40
|
+
deriveContextualFactors,
|
|
41
|
+
} from './nocturnal-reasoning-deriver.js';
|
|
38
42
|
import type { TrinityArtificerContext } from './nocturnal-artificer.js';
|
|
39
43
|
import {
|
|
40
44
|
runTournament,
|
|
41
45
|
DEFAULT_SCORING_WEIGHTS,
|
|
42
46
|
type ScoringWeights,
|
|
43
47
|
type TournamentTraceEntry,
|
|
48
|
+
validateCandidateDiversity,
|
|
44
49
|
} from './nocturnal-candidate-scoring.js';
|
|
45
50
|
import {
|
|
46
51
|
DEFAULT_THRESHOLDS,
|
|
@@ -61,7 +66,7 @@ const FALLBACK_MODEL = process.env.OPENCLAW_DEFAULT_MODEL || 'MiniMax-M2.7';
|
|
|
61
66
|
// These prompts are embedded at build time. The agents/ directory was removed
|
|
62
67
|
// to eliminate fragile runtime file dependencies on the file system.
|
|
63
68
|
|
|
64
|
-
const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
|
|
69
|
+
export const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
|
|
65
70
|
|
|
66
71
|
> System prompt for Trinity Dreamer stage.
|
|
67
72
|
> Role: Generate multiple alternative "better decision" candidates from a session snapshot.
|
|
@@ -104,7 +109,9 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
104
109
|
"badDecision": "<what the agent did wrong>",
|
|
105
110
|
"betterDecision": "<what the agent should have done>",
|
|
106
111
|
"rationale": "<why this is better>",
|
|
107
|
-
"confidence": 0.95
|
|
112
|
+
"confidence": 0.95,
|
|
113
|
+
"riskLevel": "low",
|
|
114
|
+
"strategicPerspective": "conservative_fix"
|
|
108
115
|
}
|
|
109
116
|
],
|
|
110
117
|
"generatedAt": "<ISO timestamp>"
|
|
@@ -131,6 +138,23 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
131
138
|
- Do not generate candidates with identical betterDecisions
|
|
132
139
|
- Vary the confidence scores to reflect genuine uncertainty
|
|
133
140
|
|
|
141
|
+
## Strategic Perspective Requirements
|
|
142
|
+
|
|
143
|
+
Generate candidates from DISTINCT strategic perspectives:
|
|
144
|
+
|
|
145
|
+
- **conservative_fix**: Minimal deviation from original approach. Add a
|
|
146
|
+
verification or validation step that was missing.
|
|
147
|
+
- **structural_improvement**: Reorder operations or introduce an intermediate
|
|
148
|
+
checkpoint. Change HOW the goal is achieved.
|
|
149
|
+
- **paradigm_shift**: Challenge whether the original goal was correct.
|
|
150
|
+
Consider a fundamentally different approach.
|
|
151
|
+
|
|
152
|
+
Each candidate MUST specify \`riskLevel\` ("low"|"medium"|"high") and
|
|
153
|
+
\`strategicPerspective\` matching one of the above.
|
|
154
|
+
|
|
155
|
+
ANTI-PATTERN: Candidates that differ only in wording, not in substance,
|
|
156
|
+
will be rejected.
|
|
157
|
+
|
|
134
158
|
### Candidates must NOT:
|
|
135
159
|
- Contain raw user text or private content
|
|
136
160
|
- Reference non-existent tools or impossible actions
|
|
@@ -148,7 +172,7 @@ If you cannot generate valid candidates (e.g., no clear violation found, insuffi
|
|
|
148
172
|
"generatedAt": "<ISO timestamp>"
|
|
149
173
|
}`;
|
|
150
174
|
|
|
151
|
-
const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
|
|
175
|
+
export const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
|
|
152
176
|
|
|
153
177
|
> System prompt for Trinity Philosopher stage.
|
|
154
178
|
> Role: Evaluate Dreamer's candidates and rank them by principle alignment and quality.
|
|
@@ -187,7 +211,20 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
187
211
|
"critique": "<principle-grounded critique>",
|
|
188
212
|
"principleAligned": true,
|
|
189
213
|
"score": 0.92,
|
|
190
|
-
"rank": 1
|
|
214
|
+
"rank": 1,
|
|
215
|
+
"scores": {
|
|
216
|
+
"principleAlignment": 0.9,
|
|
217
|
+
"specificity": 0.85,
|
|
218
|
+
"actionability": 0.9,
|
|
219
|
+
"executability": 0.95,
|
|
220
|
+
"safetyImpact": 0.8,
|
|
221
|
+
"uxImpact": 0.85
|
|
222
|
+
},
|
|
223
|
+
"risks": {
|
|
224
|
+
"falsePositiveEstimate": 0.1,
|
|
225
|
+
"implementationComplexity": "low",
|
|
226
|
+
"breakingChangeRisk": false
|
|
227
|
+
}
|
|
191
228
|
}
|
|
192
229
|
],
|
|
193
230
|
"overallAssessment": "<summary of candidate set quality>",
|
|
@@ -197,10 +234,18 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
197
234
|
## Evaluation Criteria
|
|
198
235
|
|
|
199
236
|
### Score Components (0-1 scale each):
|
|
200
|
-
1. **Principle Alignment** (weight: 0.
|
|
201
|
-
2. **Specificity** (weight: 0.
|
|
202
|
-
3. **Actionability** (weight: 0.
|
|
237
|
+
1. **Principle Alignment** (weight: 0.20) — Does the betterDecision properly reflect the target principle?
|
|
238
|
+
2. **Specificity** (weight: 0.15) — Is badDecision specific? Is betterDecision actionable?
|
|
239
|
+
3. **Actionability** (weight: 0.15) — Does betterDecision describe a specific next step?
|
|
203
240
|
4. **Executability** (weight: 0.15) — Does betterDecision start with a bounded verb (read, check, verify, edit, write, etc.) and reference a concrete target?
|
|
241
|
+
5. **Safety Impact** (weight: 0.20) — Does the betterDecision reduce risk of data loss, corruption, or new failure modes? Would implementing this prevent dangerous operations?
|
|
242
|
+
6. **UX Impact** (weight: 0.15) — Does the betterDecision reduce user frustration or improve response reliability? Would the user experience be noticeably better?
|
|
243
|
+
|
|
244
|
+
### Risk Assessment (per candidate):
|
|
245
|
+
For each candidate, also assess:
|
|
246
|
+
- **falsePositiveEstimate** (0-1): How likely is this candidate a false positive (the "betterDecision" is actually not better)?
|
|
247
|
+
- **implementationComplexity** ("low"/"medium"/"high"): How complex would it be to implement this correction?
|
|
248
|
+
- **breakingChangeRisk** (boolean): Could implementing this correction break existing behavior?
|
|
204
249
|
|
|
205
250
|
### Executability Check:
|
|
206
251
|
A betterDecision is executable if it:
|
|
@@ -243,13 +288,16 @@ and synthesize it into a final decision-point artifact that passes arbiter valid
|
|
|
243
288
|
You will receive:
|
|
244
289
|
- A **target principle** (principle ID and description)
|
|
245
290
|
- A **session trajectory snapshot**
|
|
246
|
-
- **Philosopher's judgments** — ranked candidates with critiques
|
|
291
|
+
- **Philosopher's judgments** — ranked candidates with critiques and 6D scores
|
|
247
292
|
- **Dreamer's candidates** — the original candidate list
|
|
293
|
+
- **Philosopher's risk assessments** — falsePositiveEstimate, implementationComplexity, breakingChangeRisk per candidate
|
|
294
|
+
|
|
295
|
+
Use the risk assessments to determine which candidates require deeper contrastive analysis. High-risk candidates (high breakingChangeRisk or implementationComplexity) warrant thorough rejectedAnalysis.
|
|
248
296
|
|
|
249
297
|
## Task
|
|
250
298
|
|
|
251
299
|
Select the best candidate (Philosopher's rank 1) and synthesize it into
|
|
252
|
-
a final TrinityDraftArtifact.
|
|
300
|
+
a final TrinityDraftArtifact. Then produce a **Contrastive Analysis** that explains why the winner was chosen and what to learn from the runners-up.
|
|
253
301
|
|
|
254
302
|
## Output Format
|
|
255
303
|
|
|
@@ -271,9 +319,26 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
271
319
|
"candidateCount": 2,
|
|
272
320
|
"selectedCandidateIndex": 0,
|
|
273
321
|
"stageFailures": []
|
|
322
|
+
},
|
|
323
|
+
"rejectedAnalysis": {
|
|
324
|
+
"whyRejected": "<mental model that led to the rejected candidate>",
|
|
325
|
+
"warningSignals": ["<observable caution trigger 1>", "<trigger 2>"],
|
|
326
|
+
"correctiveThinking": "<correct reasoning path that should have been taken>"
|
|
327
|
+
},
|
|
328
|
+
"chosenJustification": {
|
|
329
|
+
"whyChosen": "<why this candidate was selected over others>",
|
|
330
|
+
"keyInsights": ["<transferable insight 1>", "<insight 2>", "<insight 3>"],
|
|
331
|
+
"limitations": ["<when this approach does NOT apply 1>", "<limitation 2>"]
|
|
332
|
+
},
|
|
333
|
+
"contrastiveAnalysis": {
|
|
334
|
+
"criticalDifference": "<ONE key insight distinguishing chosen from rejected>",
|
|
335
|
+
"decisionTrigger": "<When X, do Y pattern>",
|
|
336
|
+
"preventionStrategy": "<how to systematically avoid the rejected path>"
|
|
274
337
|
}
|
|
275
338
|
}
|
|
276
339
|
|
|
340
|
+
All three analysis sections (rejectedAnalysis, chosenJustification, contrastiveAnalysis) are optional but recommended. When multiple candidates were evaluated, include them to provide richer training signals.
|
|
341
|
+
|
|
277
342
|
## Validation
|
|
278
343
|
|
|
279
344
|
If you cannot synthesize an artifact:
|
|
@@ -403,6 +468,70 @@ export class TrinityRuntimeContractError extends Error {
|
|
|
403
468
|
}
|
|
404
469
|
}
|
|
405
470
|
|
|
471
|
+
// ---------------------------------------------------------------------------
|
|
472
|
+
// Reasoning Context Serialization (D-03, D-04)
|
|
473
|
+
// ---------------------------------------------------------------------------
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Format derived reasoning signals into a prompt section for Dreamer.
|
|
477
|
+
*
|
|
478
|
+
* Returns the formatted "## Reasoning Context" section as a string,
|
|
479
|
+
* or null if no meaningful reasoning content exists to include.
|
|
480
|
+
*
|
|
481
|
+
* Only reasoningChain + contextualFactors are serialized.
|
|
482
|
+
* DecisionPoints are NOT injected (reserved for Phase 37 Scribe per D-04).
|
|
483
|
+
*/
|
|
484
|
+
export function formatReasoningContext(snapshot: NocturnalSessionSnapshot): string | null {
|
|
485
|
+
const reasoningChain = deriveReasoningChain(snapshot.assistantTurns);
|
|
486
|
+
const contextualFactors = deriveContextualFactors(snapshot);
|
|
487
|
+
|
|
488
|
+
const hasReasoningContent = reasoningChain.length > 0 &&
|
|
489
|
+
reasoningChain.some(s => s.thinkingContent || s.uncertaintyMarkers.length > 0);
|
|
490
|
+
|
|
491
|
+
if (!hasReasoningContent && !contextualFactors.fileStructureKnown &&
|
|
492
|
+
!contextualFactors.errorHistoryPresent &&
|
|
493
|
+
!contextualFactors.userGuidanceAvailable &&
|
|
494
|
+
!contextualFactors.timePressure) {
|
|
495
|
+
return null;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
const sections: string[] = ['## Reasoning Context', ''];
|
|
499
|
+
|
|
500
|
+
// Serialize reasoning chain (only turns with non-empty signals)
|
|
501
|
+
const significantTurns = reasoningChain.filter(
|
|
502
|
+
s => s.thinkingContent || s.uncertaintyMarkers.length > 0
|
|
503
|
+
);
|
|
504
|
+
for (const signal of significantTurns) {
|
|
505
|
+
if (signal.thinkingContent) {
|
|
506
|
+
sections.push(`- Turn ${signal.turnIndex}: Internal reasoning: "${signal.thinkingContent.slice(0, 200)}"`);
|
|
507
|
+
}
|
|
508
|
+
if (signal.uncertaintyMarkers.length > 0) {
|
|
509
|
+
sections.push(`- Turn ${signal.turnIndex}: Uncertainty detected: ${signal.uncertaintyMarkers.join(', ')}`);
|
|
510
|
+
}
|
|
511
|
+
if (signal.confidenceSignal !== 'high') {
|
|
512
|
+
sections.push(`- Turn ${signal.turnIndex}: Confidence: ${signal.confidenceSignal}`);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// Serialize contextual factors
|
|
517
|
+
const factorLabels: string[] = [];
|
|
518
|
+
if (contextualFactors.fileStructureKnown) factorLabels.push('File structure explored before modification');
|
|
519
|
+
if (contextualFactors.errorHistoryPresent) factorLabels.push('Prior error history present');
|
|
520
|
+
if (contextualFactors.userGuidanceAvailable) factorLabels.push('User guidance/corrections available');
|
|
521
|
+
if (contextualFactors.timePressure) factorLabels.push('Time pressure detected (rapid tool calls)');
|
|
522
|
+
|
|
523
|
+
if (factorLabels.length > 0) {
|
|
524
|
+
sections.push('');
|
|
525
|
+
sections.push('Environmental context:');
|
|
526
|
+
for (const label of factorLabels) {
|
|
527
|
+
sections.push(`- ${label}`);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
sections.push('');
|
|
532
|
+
return sections.join('\n');
|
|
533
|
+
}
|
|
534
|
+
|
|
406
535
|
export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
407
536
|
|
|
408
537
|
private readonly api: {
|
|
@@ -478,8 +607,8 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
478
607
|
fs.rmSync(fullPath, { recursive: true, force: true });
|
|
479
608
|
}
|
|
480
609
|
}
|
|
481
|
-
} catch {
|
|
482
|
-
|
|
610
|
+
} catch (err) {
|
|
611
|
+
this.api.logger?.warn?.(`[Trinity] Failed to cleanup stale temp dirs: ${err instanceof Error ? err.message.replace(/([A-Za-z]:\\[^:\\s]+|\\\/[^\s:]+)/g, '[PATH]') : String(err)}`);
|
|
483
612
|
}
|
|
484
613
|
}
|
|
485
614
|
|
|
@@ -559,6 +688,12 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
559
688
|
.join('\n');
|
|
560
689
|
}
|
|
561
690
|
|
|
691
|
+
/** Clamp a value to [0, 1] range — used for LLM-produced scores that may be out of range */
|
|
692
|
+
private clamp01(val: unknown, fallback = 0): number {
|
|
693
|
+
if (typeof val !== 'number' || !Number.isFinite(val)) return fallback;
|
|
694
|
+
return Math.min(1, Math.max(0, val));
|
|
695
|
+
}
|
|
696
|
+
|
|
562
697
|
private classifyRuntimeError(error: unknown): TrinityRuntimeFailureCode {
|
|
563
698
|
const detail = error instanceof Error ? error.message : String(error);
|
|
564
699
|
return /timeout/i.test(detail) ? 'runtime_timeout' : 'runtime_run_failed';
|
|
@@ -606,7 +741,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
606
741
|
} catch (err) {
|
|
607
742
|
return this.buildRuntimeFailureDreamerOutput(this.classifyRuntimeError(err), err);
|
|
608
743
|
} finally {
|
|
609
|
-
try { fs.unlinkSync(sessionFile); } catch {
|
|
744
|
+
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
610
745
|
}
|
|
611
746
|
}
|
|
612
747
|
|
|
@@ -650,7 +785,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
650
785
|
} catch (err) {
|
|
651
786
|
return this.buildRuntimeFailurePhilosopherOutput(this.classifyRuntimeError(err), err);
|
|
652
787
|
} finally {
|
|
653
|
-
try { fs.unlinkSync(sessionFile); } catch {
|
|
788
|
+
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
654
789
|
}
|
|
655
790
|
}
|
|
656
791
|
|
|
@@ -698,7 +833,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
698
833
|
this.recordFailure(this.classifyRuntimeError(err), err);
|
|
699
834
|
return null;
|
|
700
835
|
} finally {
|
|
701
|
-
try { fs.unlinkSync(sessionFile); } catch {
|
|
836
|
+
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
702
837
|
}
|
|
703
838
|
}
|
|
704
839
|
|
|
@@ -796,6 +931,12 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
796
931
|
sections.push('');
|
|
797
932
|
}
|
|
798
933
|
|
|
934
|
+
// ## Reasoning Context — derived signals from Phase 34 deriver module (D-03, D-04)
|
|
935
|
+
const reasoningSection = formatReasoningContext(snapshot);
|
|
936
|
+
if (reasoningSection) {
|
|
937
|
+
sections.push(reasoningSection);
|
|
938
|
+
}
|
|
939
|
+
|
|
799
940
|
sections.push(`## Task`,
|
|
800
941
|
`Analyze the above session and generate ${maxCandidates} candidate corrections.`,
|
|
801
942
|
`Each candidate must:`,
|
|
@@ -818,6 +959,11 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
818
959
|
): string {
|
|
819
960
|
const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
|
|
820
961
|
|
|
962
|
+
// Build per-candidate metadata from Dreamer (risk level + strategic perspective)
|
|
963
|
+
const candidateMeta = dreamerOutput.candidates
|
|
964
|
+
.filter(c => c.riskLevel || c.strategicPerspective)
|
|
965
|
+
.map(c => `- Candidate #${c.candidateIndex}: risk=${c.riskLevel || 'N/A'}, perspective=${c.strategicPerspective || 'N/A'}`);
|
|
966
|
+
|
|
821
967
|
// Build violation summary from snapshot for Philosopher to validate candidates
|
|
822
968
|
const failures = snapshot.toolCalls
|
|
823
969
|
.filter(tc => tc.outcome === 'failure')
|
|
@@ -862,6 +1008,11 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
862
1008
|
sections.push(userCues.join('\n'));
|
|
863
1009
|
}
|
|
864
1010
|
|
|
1011
|
+
if (candidateMeta.length > 0) {
|
|
1012
|
+
sections.push(`\n### Candidate Risk Profiles (${candidateMeta.length})`);
|
|
1013
|
+
sections.push(candidateMeta.join('\n'));
|
|
1014
|
+
}
|
|
1015
|
+
|
|
865
1016
|
sections.push(
|
|
866
1017
|
``,
|
|
867
1018
|
`## Dreamer's Candidates`,
|
|
@@ -926,18 +1077,29 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
926
1077
|
sections.push(`(No specific violations found in snapshot)`);
|
|
927
1078
|
}
|
|
928
1079
|
|
|
1080
|
+
// Build risk summary from Philosopher 6D judgments for Scribe contrastive analysis
|
|
1081
|
+
const riskSummary = philosopherOutput.judgments
|
|
1082
|
+
.map(j => {
|
|
1083
|
+
const risk = j.risks ? ` [risks: fp=${j.risks.falsePositiveEstimate.toFixed(2)}, complexity=${j.risks.implementationComplexity}, breaking=${j.risks.breakingChangeRisk}]` : '';
|
|
1084
|
+
return ` - candidate[${j.candidateIndex}] (rank ${j.rank}, score ${j.score?.toFixed(2) ?? 'n/a'}): ${j.principleAligned ? 'aligned' : 'not aligned'}${risk}`;
|
|
1085
|
+
})
|
|
1086
|
+
.join('\n');
|
|
1087
|
+
|
|
929
1088
|
sections.push(
|
|
930
1089
|
``,
|
|
931
1090
|
`## Dreamer's Candidates`,
|
|
932
1091
|
candidatesJson,
|
|
933
1092
|
``,
|
|
934
|
-
`## Philosopher's Judgments`,
|
|
1093
|
+
`## Philosopher's Judgments + Risk Assessments`,
|
|
935
1094
|
judgmentsJson,
|
|
936
1095
|
``,
|
|
1096
|
+
`## Philosopher 6D Risk Summary`,
|
|
1097
|
+
`Use this to determine contrastive depth — high-risk candidates need deeper analysis:`,
|
|
1098
|
+
riskSummary,
|
|
1099
|
+
``,
|
|
937
1100
|
`## Task`,
|
|
938
1101
|
`Select the best candidate (Philosopher's rank 1) and synthesize it into a final TrinityDraftArtifact.`,
|
|
939
|
-
`
|
|
940
|
-
`are grounded in the actual session events, not just Dreamer's interpretation.`,
|
|
1102
|
+
`Then produce contrastive analysis explaining why the winner was chosen and what the rejected candidates teach us.`,
|
|
941
1103
|
``,
|
|
942
1104
|
`## CRITICAL: betterDecision Format Requirements`,
|
|
943
1105
|
`Your betterDecision MUST pass executability validation. It MUST:`,
|
|
@@ -1057,7 +1219,39 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
1057
1219
|
}
|
|
1058
1220
|
return {
|
|
1059
1221
|
valid: parsed.valid,
|
|
1060
|
-
judgments: parsed.judgments,
|
|
1222
|
+
judgments: parsed.judgments.map((j: Record<string, unknown>) => ({
|
|
1223
|
+
candidateIndex: j.candidateIndex,
|
|
1224
|
+
critique: j.critique ?? '',
|
|
1225
|
+
principleAligned: j.principleAligned ?? false,
|
|
1226
|
+
score: j.score ?? 0,
|
|
1227
|
+
rank: j.rank ?? 0,
|
|
1228
|
+
// Optional 6D scores and risk assessment (Phase 36)
|
|
1229
|
+
// Only include a dimension if the LLM actually returned a number (not undefined/null).
|
|
1230
|
+
// This preserves the distinction between "LLM returned 0" vs "LLM omitted the field."
|
|
1231
|
+
...(j.scores ? {
|
|
1232
|
+
scores: Object.fromEntries(
|
|
1233
|
+
(['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const)
|
|
1234
|
+
.map(dim => [dim, (j.scores as Record<string, unknown>)[dim]])
|
|
1235
|
+
.filter(([, v]) => typeof v === 'number')
|
|
1236
|
+
.map(([dim, v]) => [dim, this.clamp01(v as number)])
|
|
1237
|
+
)
|
|
1238
|
+
} : {}),
|
|
1239
|
+
...(j.risks ? (() => {
|
|
1240
|
+
const risks = j.risks as Record<string, unknown>;
|
|
1241
|
+
const fp = risks.falsePositiveEstimate;
|
|
1242
|
+
const hasFp = typeof fp === 'number';
|
|
1243
|
+
const risksObj: {
|
|
1244
|
+
falsePositiveEstimate?: number;
|
|
1245
|
+
implementationComplexity: string;
|
|
1246
|
+
breakingChangeRisk: boolean;
|
|
1247
|
+
} = {
|
|
1248
|
+
implementationComplexity: (risks.implementationComplexity as string) ?? 'medium',
|
|
1249
|
+
breakingChangeRisk: Boolean(risks.breakingChangeRisk),
|
|
1250
|
+
};
|
|
1251
|
+
if (hasFp) risksObj.falsePositiveEstimate = this.clamp01(fp as number);
|
|
1252
|
+
return { risks: risksObj };
|
|
1253
|
+
})() : {}),
|
|
1254
|
+
})),
|
|
1061
1255
|
overallAssessment: parsed.overallAssessment ?? '',
|
|
1062
1256
|
reason: parsed.reason,
|
|
1063
1257
|
generatedAt: parsed.generatedAt ?? new Date().toISOString(),
|
|
@@ -1117,6 +1311,22 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
1117
1311
|
return null;
|
|
1118
1312
|
}
|
|
1119
1313
|
|
|
1314
|
+
// Validate contrastive analysis sub-fields (H-03): only include if structure is intact
|
|
1315
|
+
const contrastiveAnalysis = parsed.contrastiveAnalysis
|
|
1316
|
+
&& typeof parsed.contrastiveAnalysis === 'object'
|
|
1317
|
+
&& typeof parsed.contrastiveAnalysis.criticalDifference === 'string'
|
|
1318
|
+
? parsed.contrastiveAnalysis : undefined;
|
|
1319
|
+
|
|
1320
|
+
const rejectedAnalysis = parsed.rejectedAnalysis
|
|
1321
|
+
&& typeof parsed.rejectedAnalysis === 'object'
|
|
1322
|
+
&& typeof parsed.rejectedAnalysis.whyRejected === 'string'
|
|
1323
|
+
? parsed.rejectedAnalysis : undefined;
|
|
1324
|
+
|
|
1325
|
+
const chosenJustification = parsed.chosenJustification
|
|
1326
|
+
&& typeof parsed.chosenJustification === 'object'
|
|
1327
|
+
&& typeof parsed.chosenJustification.whyChosen === 'string'
|
|
1328
|
+
? parsed.chosenJustification : undefined;
|
|
1329
|
+
|
|
1120
1330
|
return {
|
|
1121
1331
|
selectedCandidateIndex: parsed.selectedCandidateIndex,
|
|
1122
1332
|
badDecision: parsed.badDecision ?? '',
|
|
@@ -1135,6 +1345,9 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
1135
1345
|
selectedCandidateIndex: parsed.selectedCandidateIndex,
|
|
1136
1346
|
stageFailures: [],
|
|
1137
1347
|
},
|
|
1348
|
+
...(contrastiveAnalysis ? { contrastiveAnalysis } : {}),
|
|
1349
|
+
...(rejectedAnalysis ? { rejectedAnalysis } : {}),
|
|
1350
|
+
...(chosenJustification ? { chosenJustification } : {}),
|
|
1138
1351
|
};
|
|
1139
1352
|
} catch {
|
|
1140
1353
|
this.recordFailure('runtime_run_failed', new Error(`Scribe output JSON parse error: ${json.slice(0, 200)}`));
|
|
@@ -1255,6 +1468,10 @@ export interface DreamerCandidate {
|
|
|
1255
1468
|
rationale: string;
|
|
1256
1469
|
/** Confidence that this candidate is valid (0-1) */
|
|
1257
1470
|
confidence: number;
|
|
1471
|
+
/** Risk level of this candidate's approach -- LLM-judged per D-02 */
|
|
1472
|
+
riskLevel?: "low" | "medium" | "high";
|
|
1473
|
+
/** Which strategic perspective this candidate embodies per D-01 */
|
|
1474
|
+
strategicPerspective?: "conservative_fix" | "structural_improvement" | "paradigm_shift";
|
|
1258
1475
|
}
|
|
1259
1476
|
|
|
1260
1477
|
export interface DreamerOutput {
|
|
@@ -1272,6 +1489,24 @@ export interface DreamerOutput {
|
|
|
1272
1489
|
* Philosopher output — principle-grounded critique and ranking.
|
|
1273
1490
|
* Philosopher evaluates Dreamer's candidates and ranks them.
|
|
1274
1491
|
*/
|
|
1492
|
+
export interface PhilosopherRiskAssessment {
|
|
1493
|
+
/** Estimated probability that this candidate is a false positive (0-1) */
|
|
1494
|
+
falsePositiveEstimate: number;
|
|
1495
|
+
/** How complex is this candidate to implement */
|
|
1496
|
+
implementationComplexity: 'low' | 'medium' | 'high';
|
|
1497
|
+
/** Whether implementing this candidate risks breaking existing functionality */
|
|
1498
|
+
breakingChangeRisk: boolean;
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
export interface Philosopher6DScores {
|
|
1502
|
+
principleAlignment: number;
|
|
1503
|
+
specificity: number;
|
|
1504
|
+
actionability: number;
|
|
1505
|
+
executability: number;
|
|
1506
|
+
safetyImpact: number;
|
|
1507
|
+
uxImpact: number;
|
|
1508
|
+
}
|
|
1509
|
+
|
|
1275
1510
|
export interface PhilosopherJudgment {
|
|
1276
1511
|
/** Index of the judged candidate (references DreamerCandidate.candidateIndex) */
|
|
1277
1512
|
candidateIndex: number;
|
|
@@ -1283,6 +1518,10 @@ export interface PhilosopherJudgment {
|
|
|
1283
1518
|
score: number;
|
|
1284
1519
|
/** Rank among all candidates (1 = best) */
|
|
1285
1520
|
rank: number;
|
|
1521
|
+
/** Per-dimension scores (6D evaluation) — informational, not used for tournament ranking */
|
|
1522
|
+
scores?: Philosopher6DScores;
|
|
1523
|
+
/** Risk assessment for this candidate — informational, consumed by Scribe (Phase 37) */
|
|
1524
|
+
risks?: PhilosopherRiskAssessment;
|
|
1286
1525
|
}
|
|
1287
1526
|
|
|
1288
1527
|
export interface PhilosopherOutput {
|
|
@@ -1298,6 +1537,45 @@ export interface PhilosopherOutput {
|
|
|
1298
1537
|
generatedAt: string;
|
|
1299
1538
|
}
|
|
1300
1539
|
|
|
1540
|
+
/**
|
|
1541
|
+
* Analysis of a rejected candidate — why it lost the tournament.
|
|
1542
|
+
* Informs training signal for "what to avoid".
|
|
1543
|
+
*/
|
|
1544
|
+
export interface RejectedAnalysis {
|
|
1545
|
+
/** Mental model that led to the rejected candidate */
|
|
1546
|
+
whyRejected: string;
|
|
1547
|
+
/** Observable caution triggers that were missed or ignored */
|
|
1548
|
+
warningSignals: string[];
|
|
1549
|
+
/** Correct reasoning path that should have been taken */
|
|
1550
|
+
correctiveThinking: string;
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
/**
|
|
1554
|
+
* Justification for the chosen candidate — why it won the tournament.
|
|
1555
|
+
* Informs training signal for "what to do".
|
|
1556
|
+
*/
|
|
1557
|
+
export interface ChosenJustification {
|
|
1558
|
+
/** Why this candidate was selected over others */
|
|
1559
|
+
whyChosen: string;
|
|
1560
|
+
/** 1-3 transferable insights from this decision */
|
|
1561
|
+
keyInsights: string[];
|
|
1562
|
+
/** When this approach does NOT apply */
|
|
1563
|
+
limitations: string[];
|
|
1564
|
+
}
|
|
1565
|
+
|
|
1566
|
+
/**
|
|
1567
|
+
* Contrastive analysis: key differences between chosen and rejected paths.
|
|
1568
|
+
* Synthesizes the core lesson from the tournament.
|
|
1569
|
+
*/
|
|
1570
|
+
export interface ContrastiveAnalysis {
|
|
1571
|
+
/** ONE key insight distinguishing chosen from rejected */
|
|
1572
|
+
criticalDifference: string;
|
|
1573
|
+
/** Pattern: "When X, do Y" */
|
|
1574
|
+
decisionTrigger: string;
|
|
1575
|
+
/** How to systematically avoid the rejected path */
|
|
1576
|
+
preventionStrategy: string;
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1301
1579
|
/**
|
|
1302
1580
|
* Scribe output — final structured artifact draft.
|
|
1303
1581
|
* Scribe synthesizes the best candidate into an approved artifact format.
|
|
@@ -1325,6 +1603,12 @@ export interface TrinityDraftArtifact {
|
|
|
1325
1603
|
planningRatioGain?: number;
|
|
1326
1604
|
/** Optional routing context for a follow-on Artificer stage */
|
|
1327
1605
|
artificerContext?: TrinityArtificerContext;
|
|
1606
|
+
/** Contrastive analysis: chosen vs rejected reasoning paths (SCRIBE-03) */
|
|
1607
|
+
contrastiveAnalysis?: ContrastiveAnalysis;
|
|
1608
|
+
/** Analysis of the rejected candidates — why they lost the tournament (SCRIBE-01) */
|
|
1609
|
+
rejectedAnalysis?: RejectedAnalysis;
|
|
1610
|
+
/** Justification for the chosen candidate — why it won (SCRIBE-02) */
|
|
1611
|
+
chosenJustification?: ChosenJustification;
|
|
1328
1612
|
}
|
|
1329
1613
|
|
|
1330
1614
|
export interface TrinityTelemetry {
|
|
@@ -1350,6 +1634,24 @@ export interface TrinityTelemetry {
|
|
|
1350
1634
|
winnerThresholdPassed?: boolean;
|
|
1351
1635
|
/** Number of eligible candidates after threshold check (optional) */
|
|
1352
1636
|
eligibleCandidateCount?: number;
|
|
1637
|
+
/** Whether Dreamer candidates passed diversity validation (DIVER-04) */
|
|
1638
|
+
diversityCheckPassed?: boolean;
|
|
1639
|
+
/** Risk levels assigned to Dreamer candidates (for telemetry) */
|
|
1640
|
+
candidateRiskLevels?: string[];
|
|
1641
|
+
/** Aggregate 6D Philosopher evaluation metrics (informational) */
|
|
1642
|
+
philosopher6D?: {
|
|
1643
|
+
/** Average scores across all candidates per dimension */
|
|
1644
|
+
avgScores: {
|
|
1645
|
+
principleAlignment: number;
|
|
1646
|
+
specificity: number;
|
|
1647
|
+
actionability: number;
|
|
1648
|
+
executability: number;
|
|
1649
|
+
safetyImpact: number;
|
|
1650
|
+
uxImpact: number;
|
|
1651
|
+
};
|
|
1652
|
+
/** Count of candidates with breakingChangeRisk = true */
|
|
1653
|
+
highRiskCount: number;
|
|
1654
|
+
};
|
|
1353
1655
|
}
|
|
1354
1656
|
|
|
1355
1657
|
// ---------------------------------------------------------------------------
|
|
@@ -1421,6 +1723,8 @@ export function invokeStubDreamer(
|
|
|
1421
1723
|
betterDecision: 'Review docs/gateblocks.md and verify authorization requirements first; based on the evidence, this irreversible action must be reviewed before proceeding',
|
|
1422
1724
|
rationale: 'Respecting gate blocks prevents unintended system modifications',
|
|
1423
1725
|
confidence: 0.95,
|
|
1726
|
+
riskLevel: 'low' as const,
|
|
1727
|
+
strategicPerspective: 'conservative_fix' as const,
|
|
1424
1728
|
});
|
|
1425
1729
|
if (maxCandidates >= 2) {
|
|
1426
1730
|
candidates.push({
|
|
@@ -1429,6 +1733,8 @@ export function invokeStubDreamer(
|
|
|
1429
1733
|
betterDecision: 'Check the gatekeeper source first to diagnose the block reason; this is irreversible, so we must be certain before proceeding',
|
|
1430
1734
|
rationale: 'Understanding why a gate blocked prevents repeated blocks',
|
|
1431
1735
|
confidence: 0.85,
|
|
1736
|
+
riskLevel: 'low' as const,
|
|
1737
|
+
strategicPerspective: 'conservative_fix' as const,
|
|
1432
1738
|
});
|
|
1433
1739
|
}
|
|
1434
1740
|
if (maxCandidates >= 3) {
|
|
@@ -1438,6 +1744,8 @@ export function invokeStubDreamer(
|
|
|
1438
1744
|
betterDecision: 'Review docs/auth.md first to understand the authorization structure, then request proper review before any change',
|
|
1439
1745
|
rationale: 'Proper authorization ensures accountability and prevents unintended changes',
|
|
1440
1746
|
confidence: 0.75,
|
|
1747
|
+
riskLevel: 'low' as const,
|
|
1748
|
+
strategicPerspective: 'conservative_fix' as const,
|
|
1441
1749
|
});
|
|
1442
1750
|
}
|
|
1443
1751
|
} else if (hasPain) {
|
|
@@ -1447,6 +1755,8 @@ export function invokeStubDreamer(
|
|
|
1447
1755
|
betterDecision: 'Check logs/pain.json first to analyze pain signals; this error indicates we should stop and reconsider before proceeding',
|
|
1448
1756
|
rationale: 'Pain signals indicate accumulated friction or error conditions',
|
|
1449
1757
|
confidence: 0.90,
|
|
1758
|
+
riskLevel: 'medium' as const,
|
|
1759
|
+
strategicPerspective: 'structural_improvement' as const,
|
|
1450
1760
|
});
|
|
1451
1761
|
if (maxCandidates >= 2) {
|
|
1452
1762
|
candidates.push({
|
|
@@ -1455,6 +1765,8 @@ export function invokeStubDreamer(
|
|
|
1455
1765
|
betterDecision: 'Review src/pain-detector.ts first; based on the evidence, this indicates a deeper issue we must not ignore',
|
|
1456
1766
|
rationale: 'Addressing friction reduces error rates and improves outcomes',
|
|
1457
1767
|
confidence: 0.80,
|
|
1768
|
+
riskLevel: 'medium' as const,
|
|
1769
|
+
strategicPerspective: 'structural_improvement' as const,
|
|
1458
1770
|
});
|
|
1459
1771
|
}
|
|
1460
1772
|
if (maxCandidates >= 3) {
|
|
@@ -1464,6 +1776,8 @@ export function invokeStubDreamer(
|
|
|
1464
1776
|
betterDecision: 'Analyze logs/errors.json first to identify the failure pattern; this suggests we should stop and rethink before retrying',
|
|
1465
1777
|
rationale: 'Pattern analysis prevents recurring pain from the same source',
|
|
1466
1778
|
confidence: 0.70,
|
|
1779
|
+
riskLevel: 'medium' as const,
|
|
1780
|
+
strategicPerspective: 'structural_improvement' as const,
|
|
1467
1781
|
});
|
|
1468
1782
|
}
|
|
1469
1783
|
} else if (hasFailures) {
|
|
@@ -1473,6 +1787,8 @@ export function invokeStubDreamer(
|
|
|
1473
1787
|
betterDecision: 'Verify config.json preconditions first, based on the error in logs/failure.json, before retrying',
|
|
1474
1788
|
rationale: 'Diagnosing failures before retry prevents repeated failures',
|
|
1475
1789
|
confidence: 0.92,
|
|
1790
|
+
riskLevel: 'high' as const,
|
|
1791
|
+
strategicPerspective: 'paradigm_shift' as const,
|
|
1476
1792
|
});
|
|
1477
1793
|
if (maxCandidates >= 2) {
|
|
1478
1794
|
candidates.push({
|
|
@@ -1481,6 +1797,8 @@ export function invokeStubDreamer(
|
|
|
1481
1797
|
betterDecision: 'Check docs/debugging.md first to diagnose what failed; we must not ignore this when the action is irreversible',
|
|
1482
1798
|
rationale: 'Unaddressed failures compound and cause larger issues',
|
|
1483
1799
|
confidence: 0.85,
|
|
1800
|
+
riskLevel: 'high' as const,
|
|
1801
|
+
strategicPerspective: 'paradigm_shift' as const,
|
|
1484
1802
|
});
|
|
1485
1803
|
}
|
|
1486
1804
|
if (maxCandidates >= 3) {
|
|
@@ -1490,6 +1808,8 @@ export function invokeStubDreamer(
|
|
|
1490
1808
|
betterDecision: 'Verify src/validator.ts state first; this error indicates a deeper problem before assuming resolution',
|
|
1491
1809
|
rationale: 'Verification prevents cascading failures from unresolved issues',
|
|
1492
1810
|
confidence: 0.78,
|
|
1811
|
+
riskLevel: 'high' as const,
|
|
1812
|
+
strategicPerspective: 'paradigm_shift' as const,
|
|
1493
1813
|
});
|
|
1494
1814
|
}
|
|
1495
1815
|
} else {
|
|
@@ -1572,6 +1892,70 @@ export function invokeStubPhilosopher(
|
|
|
1572
1892
|
principleAligned = false;
|
|
1573
1893
|
}
|
|
1574
1894
|
|
|
1895
|
+
// Deterministic 6D scores based on strategic perspective (Phase 35 D-07 mapping)
|
|
1896
|
+
const perspective = candidate.strategicPerspective;
|
|
1897
|
+
let sixDScores: Philosopher6DScores;
|
|
1898
|
+
let riskAssessment: PhilosopherRiskAssessment;
|
|
1899
|
+
|
|
1900
|
+
if (perspective === 'conservative_fix') {
|
|
1901
|
+
sixDScores = {
|
|
1902
|
+
principleAlignment: 0.9,
|
|
1903
|
+
specificity: 0.8,
|
|
1904
|
+
actionability: 0.85,
|
|
1905
|
+
executability: 0.9,
|
|
1906
|
+
safetyImpact: 0.95,
|
|
1907
|
+
uxImpact: 0.7,
|
|
1908
|
+
};
|
|
1909
|
+
riskAssessment = {
|
|
1910
|
+
falsePositiveEstimate: 0.1,
|
|
1911
|
+
implementationComplexity: 'low',
|
|
1912
|
+
breakingChangeRisk: false,
|
|
1913
|
+
};
|
|
1914
|
+
} else if (perspective === 'structural_improvement') {
|
|
1915
|
+
sixDScores = {
|
|
1916
|
+
principleAlignment: 0.75,
|
|
1917
|
+
specificity: 0.7,
|
|
1918
|
+
actionability: 0.75,
|
|
1919
|
+
executability: 0.7,
|
|
1920
|
+
safetyImpact: 0.7,
|
|
1921
|
+
uxImpact: 0.8,
|
|
1922
|
+
};
|
|
1923
|
+
riskAssessment = {
|
|
1924
|
+
falsePositiveEstimate: 0.25,
|
|
1925
|
+
implementationComplexity: 'medium',
|
|
1926
|
+
breakingChangeRisk: false,
|
|
1927
|
+
};
|
|
1928
|
+
} else if (perspective === 'paradigm_shift') {
|
|
1929
|
+
sixDScores = {
|
|
1930
|
+
principleAlignment: 0.6,
|
|
1931
|
+
specificity: 0.5,
|
|
1932
|
+
actionability: 0.5,
|
|
1933
|
+
executability: 0.45,
|
|
1934
|
+
safetyImpact: 0.4,
|
|
1935
|
+
uxImpact: 0.6,
|
|
1936
|
+
};
|
|
1937
|
+
riskAssessment = {
|
|
1938
|
+
falsePositiveEstimate: 0.4,
|
|
1939
|
+
implementationComplexity: 'high',
|
|
1940
|
+
breakingChangeRisk: true,
|
|
1941
|
+
};
|
|
1942
|
+
} else {
|
|
1943
|
+
// Fallback for candidates without strategicPerspective
|
|
1944
|
+
sixDScores = {
|
|
1945
|
+
principleAlignment: score,
|
|
1946
|
+
specificity: score * 0.9,
|
|
1947
|
+
actionability: score * 0.85,
|
|
1948
|
+
executability: score * 0.8,
|
|
1949
|
+
safetyImpact: score * 0.7,
|
|
1950
|
+
uxImpact: score * 0.75,
|
|
1951
|
+
};
|
|
1952
|
+
riskAssessment = {
|
|
1953
|
+
falsePositiveEstimate: 0.3,
|
|
1954
|
+
implementationComplexity: 'medium',
|
|
1955
|
+
breakingChangeRisk: false,
|
|
1956
|
+
};
|
|
1957
|
+
}
|
|
1958
|
+
|
|
1575
1959
|
return {
|
|
1576
1960
|
candidateIndex: candidate.candidateIndex,
|
|
1577
1961
|
critique: `Candidate ${candidate.candidateIndex} scored ${score.toFixed(2)}. ${
|
|
@@ -1582,6 +1966,8 @@ export function invokeStubPhilosopher(
|
|
|
1582
1966
|
principleAligned,
|
|
1583
1967
|
score: Math.min(1, Math.max(0, score)),
|
|
1584
1968
|
rank: 0, // Will be set after sorting
|
|
1969
|
+
scores: sixDScores,
|
|
1970
|
+
risks: riskAssessment,
|
|
1585
1971
|
};
|
|
1586
1972
|
});
|
|
1587
1973
|
|
|
@@ -1780,6 +2166,16 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
|
|
|
1780
2166
|
telemetry.dreamerPassed = true;
|
|
1781
2167
|
telemetry.candidateCount = dreamerOutput.candidates.length;
|
|
1782
2168
|
|
|
2169
|
+
// Diversity validation (DIVER-04): soft check, never gates pipeline
|
|
2170
|
+
const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
|
|
2171
|
+
telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
|
|
2172
|
+
telemetry.candidateRiskLevels = dreamerOutput.candidates
|
|
2173
|
+
.map(c => c.riskLevel)
|
|
2174
|
+
.filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
|
|
2175
|
+
if (!diversityResult.diversityCheckPassed) {
|
|
2176
|
+
console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
|
|
2177
|
+
}
|
|
2178
|
+
|
|
1783
2179
|
// Step 2: Philosopher — rank candidates via real subagent
|
|
1784
2180
|
const philosopherOutput = await adapter.invokePhilosopher(dreamerOutput, principleId, snapshot);
|
|
1785
2181
|
|
|
@@ -1794,6 +2190,21 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
|
|
|
1794
2190
|
|
|
1795
2191
|
telemetry.philosopherPassed = true;
|
|
1796
2192
|
|
|
2193
|
+
// Aggregate 6D scores from Philosopher judgments (if available)
|
|
2194
|
+
const realJudgments6D = philosopherOutput.judgments.filter(j => j.scores);
|
|
2195
|
+
if (realJudgments6D.length > 0) {
|
|
2196
|
+
const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
|
|
2197
|
+
const avgScores: Record<string, number> = {};
|
|
2198
|
+
for (const dim of dims) {
|
|
2199
|
+
const values = realJudgments6D.map(j => j.scores?.[dim] ?? 0);
|
|
2200
|
+
avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
|
|
2201
|
+
}
|
|
2202
|
+
telemetry.philosopher6D = {
|
|
2203
|
+
avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
|
|
2204
|
+
highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
|
|
2205
|
+
};
|
|
2206
|
+
}
|
|
2207
|
+
|
|
1797
2208
|
// Step 3: Scribe — synthesize final artifact via real subagent
|
|
1798
2209
|
const draftArtifact = await adapter.invokeScribe(
|
|
1799
2210
|
dreamerOutput,
|
|
@@ -1876,6 +2287,16 @@ function runTrinityWithStubs(
|
|
|
1876
2287
|
telemetry.dreamerPassed = true;
|
|
1877
2288
|
telemetry.candidateCount = dreamerOutput.candidates.length;
|
|
1878
2289
|
|
|
2290
|
+
// Diversity validation (DIVER-04): soft check, never gates pipeline
|
|
2291
|
+
const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
|
|
2292
|
+
telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
|
|
2293
|
+
telemetry.candidateRiskLevels = dreamerOutput.candidates
|
|
2294
|
+
.map(c => c.riskLevel)
|
|
2295
|
+
.filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
|
|
2296
|
+
if (!diversityResult.diversityCheckPassed) {
|
|
2297
|
+
console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
|
|
2298
|
+
}
|
|
2299
|
+
|
|
1879
2300
|
// Step 2: Philosopher — rank candidates (stub)
|
|
1880
2301
|
const philosopherOutput = invokeStubPhilosopher(dreamerOutput, principleId, snapshot);
|
|
1881
2302
|
|
|
@@ -1895,6 +2316,21 @@ function runTrinityWithStubs(
|
|
|
1895
2316
|
|
|
1896
2317
|
telemetry.philosopherPassed = true;
|
|
1897
2318
|
|
|
2319
|
+
// Aggregate 6D scores from Philosopher judgments (if available)
|
|
2320
|
+
const judgments6D = philosopherOutput.judgments.filter(j => j.scores);
|
|
2321
|
+
if (judgments6D.length > 0) {
|
|
2322
|
+
const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
|
|
2323
|
+
const avgScores: Record<string, number> = {};
|
|
2324
|
+
for (const dim of dims) {
|
|
2325
|
+
const values = judgments6D.map(j => j.scores?.[dim] ?? 0);
|
|
2326
|
+
avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
|
|
2327
|
+
}
|
|
2328
|
+
telemetry.philosopher6D = {
|
|
2329
|
+
avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
|
|
2330
|
+
highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
|
|
2331
|
+
};
|
|
2332
|
+
}
|
|
2333
|
+
|
|
1898
2334
|
// Step 3: Scribe — produce final artifact using tournament selection (stub)
|
|
1899
2335
|
const draftArtifact = invokeStubScribe(dreamerOutput, philosopherOutput, snapshot, principleId, telemetry, config);
|
|
1900
2336
|
|