principles-disciple 1.28.0 → 1.28.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +4 -4
- package/package.json +4 -4
- package/scripts/validate-live-path.ts +18 -18
- package/src/commands/context.ts +1 -0
- package/src/commands/disable-impl.ts +2 -0
- package/src/commands/evolution-status.ts +2 -0
- package/src/commands/focus.ts +2 -0
- package/src/commands/nocturnal-train.ts +4 -6
- package/src/commands/pain.ts +9 -11
- package/src/commands/pd-reflect.ts +1 -1
- package/src/commands/principle-rollback.ts +1 -0
- package/src/commands/rollback-impl.ts +1 -0
- package/src/core/adaptive-thresholds.ts +1 -0
- package/src/core/bootstrap-rules.ts +3 -3
- package/src/core/dictionary.ts +1 -0
- package/src/core/empathy-keyword-matcher.ts +1 -0
- package/src/core/event-log.ts +2 -0
- package/src/core/evolution-engine.ts +1 -0
- package/src/core/external-training-contract.ts +1 -0
- package/src/core/focus-history.ts +3 -0
- package/src/core/init.ts +1 -0
- package/src/core/merge-gate-audit.ts +1 -1
- package/src/core/nocturnal-arbiter.ts +3 -0
- package/src/core/nocturnal-candidate-scoring.ts +131 -0
- package/src/core/nocturnal-compliance.ts +1 -0
- package/src/core/nocturnal-dataset.ts +1 -0
- package/src/core/nocturnal-executability.ts +1 -0
- package/src/core/nocturnal-reasoning-deriver.ts +338 -0
- package/src/core/nocturnal-rule-implementation-validator.ts +1 -0
- package/src/core/nocturnal-trinity.ts +457 -18
- package/src/core/pain-context-extractor.ts +2 -3
- package/src/core/pain.ts +1 -0
- package/src/core/pd-task-reconciler.ts +1 -0
- package/src/core/pd-task-service.ts +1 -0
- package/src/core/principle-internalization/deprecated-readiness.ts +1 -0
- package/src/core/principle-internalization/principle-lifecycle-service.ts +1 -0
- package/src/core/principle-tree-migration.ts +3 -4
- package/src/core/replay-engine.ts +4 -0
- package/src/core/risk-calculator.ts +1 -0
- package/src/core/rule-host.ts +2 -0
- package/src/core/session-tracker.ts +2 -0
- package/src/core/thinking-models.ts +1 -0
- package/src/core/thinking-os-parser.ts +3 -3
- package/src/core/trajectory.ts +4 -0
- package/src/hooks/bash-risk.ts +1 -1
- package/src/hooks/gfi-gate.ts +1 -1
- package/src/hooks/lifecycle-routing.ts +1 -0
- package/src/hooks/pain.ts +2 -1
- package/src/hooks/prompt.ts +37 -2
- package/src/hooks/subagent.ts +1 -1
- package/src/hooks/trajectory-collector.ts +1 -0
- package/src/http/principles-console-route.ts +2 -0
- package/src/index.ts +1 -1
- package/src/service/central-database.ts +2 -0
- package/src/service/central-sync-service.ts +1 -0
- package/src/service/control-ui-query-service.ts +2 -0
- package/src/service/event-log-auditor.ts +2 -0
- package/src/service/evolution-worker.ts +2 -1
- package/src/service/health-query-service.ts +20 -6
- package/src/service/nocturnal-runtime.ts +4 -0
- package/src/service/runtime-summary-service.ts +5 -0
- package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +1 -0
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +2 -1
- package/src/service/subagent-workflow/subagent-error-utils.ts +1 -0
- package/src/service/subagent-workflow/workflow-manager-base.ts +1 -0
- package/src/tools/critique-prompt.ts +1 -0
- package/src/utils/io.ts +1 -0
- package/tests/core/nocturnal-candidate-scoring.test.ts +132 -0
- package/tests/core/nocturnal-reasoning-deriver.test.ts +372 -0
- package/tests/core/nocturnal-trinity.test.ts +791 -0
|
@@ -35,12 +35,17 @@ import * as os from 'os';
|
|
|
35
35
|
import * as path from 'path';
|
|
36
36
|
import type { NocturnalSessionSnapshot } from './nocturnal-trajectory-extractor.js';
|
|
37
37
|
import { computeThinkingModelDelta } from './nocturnal-trajectory-extractor.js';
|
|
38
|
+
import {
|
|
39
|
+
deriveReasoningChain,
|
|
40
|
+
deriveContextualFactors,
|
|
41
|
+
} from './nocturnal-reasoning-deriver.js';
|
|
38
42
|
import type { TrinityArtificerContext } from './nocturnal-artificer.js';
|
|
39
43
|
import {
|
|
40
44
|
runTournament,
|
|
41
45
|
DEFAULT_SCORING_WEIGHTS,
|
|
42
46
|
type ScoringWeights,
|
|
43
47
|
type TournamentTraceEntry,
|
|
48
|
+
validateCandidateDiversity,
|
|
44
49
|
} from './nocturnal-candidate-scoring.js';
|
|
45
50
|
import {
|
|
46
51
|
DEFAULT_THRESHOLDS,
|
|
@@ -61,7 +66,7 @@ const FALLBACK_MODEL = process.env.OPENCLAW_DEFAULT_MODEL || 'MiniMax-M2.7';
|
|
|
61
66
|
// These prompts are embedded at build time. The agents/ directory was removed
|
|
62
67
|
// to eliminate fragile runtime file dependencies on the file system.
|
|
63
68
|
|
|
64
|
-
const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
|
|
69
|
+
export const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
|
|
65
70
|
|
|
66
71
|
> System prompt for Trinity Dreamer stage.
|
|
67
72
|
> Role: Generate multiple alternative "better decision" candidates from a session snapshot.
|
|
@@ -104,7 +109,9 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
104
109
|
"badDecision": "<what the agent did wrong>",
|
|
105
110
|
"betterDecision": "<what the agent should have done>",
|
|
106
111
|
"rationale": "<why this is better>",
|
|
107
|
-
"confidence": 0.95
|
|
112
|
+
"confidence": 0.95,
|
|
113
|
+
"riskLevel": "low",
|
|
114
|
+
"strategicPerspective": "conservative_fix"
|
|
108
115
|
}
|
|
109
116
|
],
|
|
110
117
|
"generatedAt": "<ISO timestamp>"
|
|
@@ -131,6 +138,23 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
131
138
|
- Do not generate candidates with identical betterDecisions
|
|
132
139
|
- Vary the confidence scores to reflect genuine uncertainty
|
|
133
140
|
|
|
141
|
+
## Strategic Perspective Requirements
|
|
142
|
+
|
|
143
|
+
Generate candidates from DISTINCT strategic perspectives:
|
|
144
|
+
|
|
145
|
+
- **conservative_fix**: Minimal deviation from original approach. Add a
|
|
146
|
+
verification or validation step that was missing.
|
|
147
|
+
- **structural_improvement**: Reorder operations or introduce an intermediate
|
|
148
|
+
checkpoint. Change HOW the goal is achieved.
|
|
149
|
+
- **paradigm_shift**: Challenge whether the original goal was correct.
|
|
150
|
+
Consider a fundamentally different approach.
|
|
151
|
+
|
|
152
|
+
Each candidate MUST specify \`riskLevel\` ("low"|"medium"|"high") and
|
|
153
|
+
\`strategicPerspective\` matching one of the above.
|
|
154
|
+
|
|
155
|
+
ANTI-PATTERN: Candidates that differ only in wording, not in substance,
|
|
156
|
+
will be rejected.
|
|
157
|
+
|
|
134
158
|
### Candidates must NOT:
|
|
135
159
|
- Contain raw user text or private content
|
|
136
160
|
- Reference non-existent tools or impossible actions
|
|
@@ -148,7 +172,7 @@ If you cannot generate valid candidates (e.g., no clear violation found, insuffi
|
|
|
148
172
|
"generatedAt": "<ISO timestamp>"
|
|
149
173
|
}`;
|
|
150
174
|
|
|
151
|
-
const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
|
|
175
|
+
export const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
|
|
152
176
|
|
|
153
177
|
> System prompt for Trinity Philosopher stage.
|
|
154
178
|
> Role: Evaluate Dreamer's candidates and rank them by principle alignment and quality.
|
|
@@ -187,7 +211,20 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
187
211
|
"critique": "<principle-grounded critique>",
|
|
188
212
|
"principleAligned": true,
|
|
189
213
|
"score": 0.92,
|
|
190
|
-
"rank": 1
|
|
214
|
+
"rank": 1,
|
|
215
|
+
"scores": {
|
|
216
|
+
"principleAlignment": 0.9,
|
|
217
|
+
"specificity": 0.85,
|
|
218
|
+
"actionability": 0.9,
|
|
219
|
+
"executability": 0.95,
|
|
220
|
+
"safetyImpact": 0.8,
|
|
221
|
+
"uxImpact": 0.85
|
|
222
|
+
},
|
|
223
|
+
"risks": {
|
|
224
|
+
"falsePositiveEstimate": 0.1,
|
|
225
|
+
"implementationComplexity": "low",
|
|
226
|
+
"breakingChangeRisk": false
|
|
227
|
+
}
|
|
191
228
|
}
|
|
192
229
|
],
|
|
193
230
|
"overallAssessment": "<summary of candidate set quality>",
|
|
@@ -197,10 +234,18 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
197
234
|
## Evaluation Criteria
|
|
198
235
|
|
|
199
236
|
### Score Components (0-1 scale each):
|
|
200
|
-
1. **Principle Alignment** (weight: 0.
|
|
201
|
-
2. **Specificity** (weight: 0.
|
|
202
|
-
3. **Actionability** (weight: 0.
|
|
237
|
+
1. **Principle Alignment** (weight: 0.20) — Does the betterDecision properly reflect the target principle?
|
|
238
|
+
2. **Specificity** (weight: 0.15) — Is badDecision specific? Is betterDecision actionable?
|
|
239
|
+
3. **Actionability** (weight: 0.15) — Does betterDecision describe a specific next step?
|
|
203
240
|
4. **Executability** (weight: 0.15) — Does betterDecision start with a bounded verb (read, check, verify, edit, write, etc.) and reference a concrete target?
|
|
241
|
+
5. **Safety Impact** (weight: 0.20) — Does the betterDecision reduce risk of data loss, corruption, or new failure modes? Would implementing this prevent dangerous operations?
|
|
242
|
+
6. **UX Impact** (weight: 0.15) — Does the betterDecision reduce user frustration or improve response reliability? Would the user experience be noticeably better?
|
|
243
|
+
|
|
244
|
+
### Risk Assessment (per candidate):
|
|
245
|
+
For each candidate, also assess:
|
|
246
|
+
- **falsePositiveEstimate** (0-1): How likely is this candidate a false positive (the "betterDecision" is actually not better)?
|
|
247
|
+
- **implementationComplexity** ("low"/"medium"/"high"): How complex would it be to implement this correction?
|
|
248
|
+
- **breakingChangeRisk** (boolean): Could implementing this correction break existing behavior?
|
|
204
249
|
|
|
205
250
|
### Executability Check:
|
|
206
251
|
A betterDecision is executable if it:
|
|
@@ -243,13 +288,16 @@ and synthesize it into a final decision-point artifact that passes arbiter valid
|
|
|
243
288
|
You will receive:
|
|
244
289
|
- A **target principle** (principle ID and description)
|
|
245
290
|
- A **session trajectory snapshot**
|
|
246
|
-
- **Philosopher's judgments** — ranked candidates with critiques
|
|
291
|
+
- **Philosopher's judgments** — ranked candidates with critiques and 6D scores
|
|
247
292
|
- **Dreamer's candidates** — the original candidate list
|
|
293
|
+
- **Philosopher's risk assessments** — falsePositiveEstimate, implementationComplexity, breakingChangeRisk per candidate
|
|
294
|
+
|
|
295
|
+
Use the risk assessments to determine which candidates require deeper contrastive analysis. High-risk candidates (high breakingChangeRisk or implementationComplexity) warrant thorough rejectedAnalysis.
|
|
248
296
|
|
|
249
297
|
## Task
|
|
250
298
|
|
|
251
299
|
Select the best candidate (Philosopher's rank 1) and synthesize it into
|
|
252
|
-
a final TrinityDraftArtifact.
|
|
300
|
+
a final TrinityDraftArtifact. Then produce a **Contrastive Analysis** that explains why the winner was chosen and what to learn from the runners-up.
|
|
253
301
|
|
|
254
302
|
## Output Format
|
|
255
303
|
|
|
@@ -271,9 +319,26 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
271
319
|
"candidateCount": 2,
|
|
272
320
|
"selectedCandidateIndex": 0,
|
|
273
321
|
"stageFailures": []
|
|
322
|
+
},
|
|
323
|
+
"rejectedAnalysis": {
|
|
324
|
+
"whyRejected": "<mental model that led to the rejected candidate>",
|
|
325
|
+
"warningSignals": ["<observable caution trigger 1>", "<trigger 2>"],
|
|
326
|
+
"correctiveThinking": "<correct reasoning path that should have been taken>"
|
|
327
|
+
},
|
|
328
|
+
"chosenJustification": {
|
|
329
|
+
"whyChosen": "<why this candidate was selected over others>",
|
|
330
|
+
"keyInsights": ["<transferable insight 1>", "<insight 2>", "<insight 3>"],
|
|
331
|
+
"limitations": ["<when this approach does NOT apply 1>", "<limitation 2>"]
|
|
332
|
+
},
|
|
333
|
+
"contrastiveAnalysis": {
|
|
334
|
+
"criticalDifference": "<ONE key insight distinguishing chosen from rejected>",
|
|
335
|
+
"decisionTrigger": "<When X, do Y pattern>",
|
|
336
|
+
"preventionStrategy": "<how to systematically avoid the rejected path>"
|
|
274
337
|
}
|
|
275
338
|
}
|
|
276
339
|
|
|
340
|
+
All three analysis sections (rejectedAnalysis, chosenJustification, contrastiveAnalysis) are optional but recommended. When multiple candidates were evaluated, include them to provide richer training signals.
|
|
341
|
+
|
|
277
342
|
## Validation
|
|
278
343
|
|
|
279
344
|
If you cannot synthesize an artifact:
|
|
@@ -403,6 +468,70 @@ export class TrinityRuntimeContractError extends Error {
|
|
|
403
468
|
}
|
|
404
469
|
}
|
|
405
470
|
|
|
471
|
+
// ---------------------------------------------------------------------------
|
|
472
|
+
// Reasoning Context Serialization (D-03, D-04)
|
|
473
|
+
// ---------------------------------------------------------------------------
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Format derived reasoning signals into a prompt section for Dreamer.
|
|
477
|
+
*
|
|
478
|
+
* Returns the formatted "## Reasoning Context" section as a string,
|
|
479
|
+
* or null if no meaningful reasoning content exists to include.
|
|
480
|
+
*
|
|
481
|
+
* Only reasoningChain + contextualFactors are serialized.
|
|
482
|
+
* DecisionPoints are NOT injected (reserved for Phase 37 Scribe per D-04).
|
|
483
|
+
*/
|
|
484
|
+
export function formatReasoningContext(snapshot: NocturnalSessionSnapshot): string | null {
|
|
485
|
+
const reasoningChain = deriveReasoningChain(snapshot.assistantTurns);
|
|
486
|
+
const contextualFactors = deriveContextualFactors(snapshot);
|
|
487
|
+
|
|
488
|
+
const hasReasoningContent = reasoningChain.length > 0 &&
|
|
489
|
+
reasoningChain.some(s => s.thinkingContent || s.uncertaintyMarkers.length > 0);
|
|
490
|
+
|
|
491
|
+
if (!hasReasoningContent && !contextualFactors.fileStructureKnown &&
|
|
492
|
+
!contextualFactors.errorHistoryPresent &&
|
|
493
|
+
!contextualFactors.userGuidanceAvailable &&
|
|
494
|
+
!contextualFactors.timePressure) {
|
|
495
|
+
return null;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
const sections: string[] = ['## Reasoning Context', ''];
|
|
499
|
+
|
|
500
|
+
// Serialize reasoning chain (only turns with non-empty signals)
|
|
501
|
+
const significantTurns = reasoningChain.filter(
|
|
502
|
+
s => s.thinkingContent || s.uncertaintyMarkers.length > 0
|
|
503
|
+
);
|
|
504
|
+
for (const signal of significantTurns) {
|
|
505
|
+
if (signal.thinkingContent) {
|
|
506
|
+
sections.push(`- Turn ${signal.turnIndex}: Internal reasoning: "${signal.thinkingContent.slice(0, 200)}"`);
|
|
507
|
+
}
|
|
508
|
+
if (signal.uncertaintyMarkers.length > 0) {
|
|
509
|
+
sections.push(`- Turn ${signal.turnIndex}: Uncertainty detected: ${signal.uncertaintyMarkers.join(', ')}`);
|
|
510
|
+
}
|
|
511
|
+
if (signal.confidenceSignal !== 'high') {
|
|
512
|
+
sections.push(`- Turn ${signal.turnIndex}: Confidence: ${signal.confidenceSignal}`);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// Serialize contextual factors
|
|
517
|
+
const factorLabels: string[] = [];
|
|
518
|
+
if (contextualFactors.fileStructureKnown) factorLabels.push('File structure explored before modification');
|
|
519
|
+
if (contextualFactors.errorHistoryPresent) factorLabels.push('Prior error history present');
|
|
520
|
+
if (contextualFactors.userGuidanceAvailable) factorLabels.push('User guidance/corrections available');
|
|
521
|
+
if (contextualFactors.timePressure) factorLabels.push('Time pressure detected (rapid tool calls)');
|
|
522
|
+
|
|
523
|
+
if (factorLabels.length > 0) {
|
|
524
|
+
sections.push('');
|
|
525
|
+
sections.push('Environmental context:');
|
|
526
|
+
for (const label of factorLabels) {
|
|
527
|
+
sections.push(`- ${label}`);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
sections.push('');
|
|
532
|
+
return sections.join('\n');
|
|
533
|
+
}
|
|
534
|
+
|
|
406
535
|
export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
407
536
|
|
|
408
537
|
private readonly api: {
|
|
@@ -478,8 +607,8 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
478
607
|
fs.rmSync(fullPath, { recursive: true, force: true });
|
|
479
608
|
}
|
|
480
609
|
}
|
|
481
|
-
} catch {
|
|
482
|
-
|
|
610
|
+
} catch (err) {
|
|
611
|
+
this.api.logger?.warn?.(`[Trinity] Failed to cleanup stale temp dirs: ${err instanceof Error ? err.message.replace(/([A-Za-z]:\\[^:\\s]+|\\\/[^\s:]+)/g, '[PATH]') : String(err)}`);
|
|
483
612
|
}
|
|
484
613
|
}
|
|
485
614
|
|
|
@@ -513,6 +642,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
513
642
|
* runEmbeddedPiAgent does NOT read config.agents.defaults.model —
|
|
514
643
|
* it requires explicit params.provider and params.model.
|
|
515
644
|
*/
|
|
645
|
+
// eslint-disable-next-line complexity -- complexity 12, refactor candidate
|
|
516
646
|
private resolveModel(): { provider: string; model: string } {
|
|
517
647
|
const config = this.loadFullConfig();
|
|
518
648
|
const agents = config?.agents as Record<string, unknown> | undefined;
|
|
@@ -559,6 +689,12 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
559
689
|
.join('\n');
|
|
560
690
|
}
|
|
561
691
|
|
|
692
|
+
/** Clamp a value to [0, 1] range — used for LLM-produced scores that may be out of range */
|
|
693
|
+
private clamp01(val: unknown, fallback = 0): number {
|
|
694
|
+
if (typeof val !== 'number' || !Number.isFinite(val)) return fallback;
|
|
695
|
+
return Math.min(1, Math.max(0, val));
|
|
696
|
+
}
|
|
697
|
+
|
|
562
698
|
private classifyRuntimeError(error: unknown): TrinityRuntimeFailureCode {
|
|
563
699
|
const detail = error instanceof Error ? error.message : String(error);
|
|
564
700
|
return /timeout/i.test(detail) ? 'runtime_timeout' : 'runtime_run_failed';
|
|
@@ -606,7 +742,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
606
742
|
} catch (err) {
|
|
607
743
|
return this.buildRuntimeFailureDreamerOutput(this.classifyRuntimeError(err), err);
|
|
608
744
|
} finally {
|
|
609
|
-
try { fs.unlinkSync(sessionFile); } catch {
|
|
745
|
+
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
610
746
|
}
|
|
611
747
|
}
|
|
612
748
|
|
|
@@ -650,7 +786,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
650
786
|
} catch (err) {
|
|
651
787
|
return this.buildRuntimeFailurePhilosopherOutput(this.classifyRuntimeError(err), err);
|
|
652
788
|
} finally {
|
|
653
|
-
try { fs.unlinkSync(sessionFile); } catch {
|
|
789
|
+
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
654
790
|
}
|
|
655
791
|
}
|
|
656
792
|
|
|
@@ -698,7 +834,7 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
698
834
|
this.recordFailure(this.classifyRuntimeError(err), err);
|
|
699
835
|
return null;
|
|
700
836
|
} finally {
|
|
701
|
-
try { fs.unlinkSync(sessionFile); } catch {
|
|
837
|
+
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
702
838
|
}
|
|
703
839
|
}
|
|
704
840
|
|
|
@@ -796,6 +932,12 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
796
932
|
sections.push('');
|
|
797
933
|
}
|
|
798
934
|
|
|
935
|
+
// ## Reasoning Context — derived signals from Phase 34 deriver module (D-03, D-04)
|
|
936
|
+
const reasoningSection = formatReasoningContext(snapshot);
|
|
937
|
+
if (reasoningSection) {
|
|
938
|
+
sections.push(reasoningSection);
|
|
939
|
+
}
|
|
940
|
+
|
|
799
941
|
sections.push(`## Task`,
|
|
800
942
|
`Analyze the above session and generate ${maxCandidates} candidate corrections.`,
|
|
801
943
|
`Each candidate must:`,
|
|
@@ -818,6 +960,11 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
818
960
|
): string {
|
|
819
961
|
const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
|
|
820
962
|
|
|
963
|
+
// Build per-candidate metadata from Dreamer (risk level + strategic perspective)
|
|
964
|
+
const candidateMeta = dreamerOutput.candidates
|
|
965
|
+
.filter(c => c.riskLevel || c.strategicPerspective)
|
|
966
|
+
.map(c => `- Candidate #${c.candidateIndex}: risk=${c.riskLevel || 'N/A'}, perspective=${c.strategicPerspective || 'N/A'}`);
|
|
967
|
+
|
|
821
968
|
// Build violation summary from snapshot for Philosopher to validate candidates
|
|
822
969
|
const failures = snapshot.toolCalls
|
|
823
970
|
.filter(tc => tc.outcome === 'failure')
|
|
@@ -862,6 +1009,11 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
862
1009
|
sections.push(userCues.join('\n'));
|
|
863
1010
|
}
|
|
864
1011
|
|
|
1012
|
+
if (candidateMeta.length > 0) {
|
|
1013
|
+
sections.push(`\n### Candidate Risk Profiles (${candidateMeta.length})`);
|
|
1014
|
+
sections.push(candidateMeta.join('\n'));
|
|
1015
|
+
}
|
|
1016
|
+
|
|
865
1017
|
sections.push(
|
|
866
1018
|
``,
|
|
867
1019
|
`## Dreamer's Candidates`,
|
|
@@ -926,18 +1078,29 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
926
1078
|
sections.push(`(No specific violations found in snapshot)`);
|
|
927
1079
|
}
|
|
928
1080
|
|
|
1081
|
+
// Build risk summary from Philosopher 6D judgments for Scribe contrastive analysis
|
|
1082
|
+
const riskSummary = philosopherOutput.judgments
|
|
1083
|
+
.map(j => {
|
|
1084
|
+
const risk = j.risks ? ` [risks: fp=${j.risks.falsePositiveEstimate.toFixed(2)}, complexity=${j.risks.implementationComplexity}, breaking=${j.risks.breakingChangeRisk}]` : '';
|
|
1085
|
+
return ` - candidate[${j.candidateIndex}] (rank ${j.rank}, score ${j.score?.toFixed(2) ?? 'n/a'}): ${j.principleAligned ? 'aligned' : 'not aligned'}${risk}`;
|
|
1086
|
+
})
|
|
1087
|
+
.join('\n');
|
|
1088
|
+
|
|
929
1089
|
sections.push(
|
|
930
1090
|
``,
|
|
931
1091
|
`## Dreamer's Candidates`,
|
|
932
1092
|
candidatesJson,
|
|
933
1093
|
``,
|
|
934
|
-
`## Philosopher's Judgments`,
|
|
1094
|
+
`## Philosopher's Judgments + Risk Assessments`,
|
|
935
1095
|
judgmentsJson,
|
|
936
1096
|
``,
|
|
1097
|
+
`## Philosopher 6D Risk Summary`,
|
|
1098
|
+
`Use this to determine contrastive depth — high-risk candidates need deeper analysis:`,
|
|
1099
|
+
riskSummary,
|
|
1100
|
+
``,
|
|
937
1101
|
`## Task`,
|
|
938
1102
|
`Select the best candidate (Philosopher's rank 1) and synthesize it into a final TrinityDraftArtifact.`,
|
|
939
|
-
`
|
|
940
|
-
`are grounded in the actual session events, not just Dreamer's interpretation.`,
|
|
1103
|
+
`Then produce contrastive analysis explaining why the winner was chosen and what the rejected candidates teach us.`,
|
|
941
1104
|
``,
|
|
942
1105
|
`## CRITICAL: betterDecision Format Requirements`,
|
|
943
1106
|
`Your betterDecision MUST pass executability validation. It MUST:`,
|
|
@@ -1057,7 +1220,39 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
1057
1220
|
}
|
|
1058
1221
|
return {
|
|
1059
1222
|
valid: parsed.valid,
|
|
1060
|
-
judgments: parsed.judgments,
|
|
1223
|
+
judgments: parsed.judgments.map((j: Record<string, unknown>) => ({
|
|
1224
|
+
candidateIndex: j.candidateIndex,
|
|
1225
|
+
critique: j.critique ?? '',
|
|
1226
|
+
principleAligned: j.principleAligned ?? false,
|
|
1227
|
+
score: j.score ?? 0,
|
|
1228
|
+
rank: j.rank ?? 0,
|
|
1229
|
+
// Optional 6D scores and risk assessment (Phase 36)
|
|
1230
|
+
// Only include a dimension if the LLM actually returned a number (not undefined/null).
|
|
1231
|
+
// This preserves the distinction between "LLM returned 0" vs "LLM omitted the field."
|
|
1232
|
+
...(j.scores ? {
|
|
1233
|
+
scores: Object.fromEntries(
|
|
1234
|
+
(['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const)
|
|
1235
|
+
.map(dim => [dim, (j.scores as Record<string, unknown>)[dim]])
|
|
1236
|
+
.filter(([, v]) => typeof v === 'number')
|
|
1237
|
+
.map(([dim, v]) => [dim, this.clamp01(v as number)])
|
|
1238
|
+
)
|
|
1239
|
+
} : {}),
|
|
1240
|
+
...(j.risks ? (() => {
|
|
1241
|
+
const risks = j.risks as Record<string, unknown>;
|
|
1242
|
+
const fp = risks.falsePositiveEstimate;
|
|
1243
|
+
const hasFp = typeof fp === 'number';
|
|
1244
|
+
const risksObj: {
|
|
1245
|
+
falsePositiveEstimate?: number;
|
|
1246
|
+
implementationComplexity: string;
|
|
1247
|
+
breakingChangeRisk: boolean;
|
|
1248
|
+
} = {
|
|
1249
|
+
implementationComplexity: (risks.implementationComplexity as string) ?? 'medium',
|
|
1250
|
+
breakingChangeRisk: Boolean(risks.breakingChangeRisk),
|
|
1251
|
+
};
|
|
1252
|
+
if (hasFp) risksObj.falsePositiveEstimate = this.clamp01(fp as number);
|
|
1253
|
+
return { risks: risksObj };
|
|
1254
|
+
})() : {}),
|
|
1255
|
+
})),
|
|
1061
1256
|
overallAssessment: parsed.overallAssessment ?? '',
|
|
1062
1257
|
reason: parsed.reason,
|
|
1063
1258
|
generatedAt: parsed.generatedAt ?? new Date().toISOString(),
|
|
@@ -1117,6 +1312,22 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
1117
1312
|
return null;
|
|
1118
1313
|
}
|
|
1119
1314
|
|
|
1315
|
+
// Validate contrastive analysis sub-fields (H-03): only include if structure is intact
|
|
1316
|
+
const contrastiveAnalysis = parsed.contrastiveAnalysis
|
|
1317
|
+
&& typeof parsed.contrastiveAnalysis === 'object'
|
|
1318
|
+
&& typeof parsed.contrastiveAnalysis.criticalDifference === 'string'
|
|
1319
|
+
? parsed.contrastiveAnalysis : undefined;
|
|
1320
|
+
|
|
1321
|
+
const rejectedAnalysis = parsed.rejectedAnalysis
|
|
1322
|
+
&& typeof parsed.rejectedAnalysis === 'object'
|
|
1323
|
+
&& typeof parsed.rejectedAnalysis.whyRejected === 'string'
|
|
1324
|
+
? parsed.rejectedAnalysis : undefined;
|
|
1325
|
+
|
|
1326
|
+
const chosenJustification = parsed.chosenJustification
|
|
1327
|
+
&& typeof parsed.chosenJustification === 'object'
|
|
1328
|
+
&& typeof parsed.chosenJustification.whyChosen === 'string'
|
|
1329
|
+
? parsed.chosenJustification : undefined;
|
|
1330
|
+
|
|
1120
1331
|
return {
|
|
1121
1332
|
selectedCandidateIndex: parsed.selectedCandidateIndex,
|
|
1122
1333
|
badDecision: parsed.badDecision ?? '',
|
|
@@ -1135,6 +1346,9 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
1135
1346
|
selectedCandidateIndex: parsed.selectedCandidateIndex,
|
|
1136
1347
|
stageFailures: [],
|
|
1137
1348
|
},
|
|
1349
|
+
...(contrastiveAnalysis ? { contrastiveAnalysis } : {}),
|
|
1350
|
+
...(rejectedAnalysis ? { rejectedAnalysis } : {}),
|
|
1351
|
+
...(chosenJustification ? { chosenJustification } : {}),
|
|
1138
1352
|
};
|
|
1139
1353
|
} catch {
|
|
1140
1354
|
this.recordFailure('runtime_run_failed', new Error(`Scribe output JSON parse error: ${json.slice(0, 200)}`));
|
|
@@ -1255,6 +1469,10 @@ export interface DreamerCandidate {
|
|
|
1255
1469
|
rationale: string;
|
|
1256
1470
|
/** Confidence that this candidate is valid (0-1) */
|
|
1257
1471
|
confidence: number;
|
|
1472
|
+
/** Risk level of this candidate's approach -- LLM-judged per D-02 */
|
|
1473
|
+
riskLevel?: "low" | "medium" | "high";
|
|
1474
|
+
/** Which strategic perspective this candidate embodies per D-01 */
|
|
1475
|
+
strategicPerspective?: "conservative_fix" | "structural_improvement" | "paradigm_shift";
|
|
1258
1476
|
}
|
|
1259
1477
|
|
|
1260
1478
|
export interface DreamerOutput {
|
|
@@ -1272,6 +1490,24 @@ export interface DreamerOutput {
|
|
|
1272
1490
|
* Philosopher output — principle-grounded critique and ranking.
|
|
1273
1491
|
* Philosopher evaluates Dreamer's candidates and ranks them.
|
|
1274
1492
|
*/
|
|
1493
|
+
export interface PhilosopherRiskAssessment {
|
|
1494
|
+
/** Estimated probability that this candidate is a false positive (0-1) */
|
|
1495
|
+
falsePositiveEstimate: number;
|
|
1496
|
+
/** How complex is this candidate to implement */
|
|
1497
|
+
implementationComplexity: 'low' | 'medium' | 'high';
|
|
1498
|
+
/** Whether implementing this candidate risks breaking existing functionality */
|
|
1499
|
+
breakingChangeRisk: boolean;
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1502
|
+
export interface Philosopher6DScores {
|
|
1503
|
+
principleAlignment: number;
|
|
1504
|
+
specificity: number;
|
|
1505
|
+
actionability: number;
|
|
1506
|
+
executability: number;
|
|
1507
|
+
safetyImpact: number;
|
|
1508
|
+
uxImpact: number;
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1275
1511
|
export interface PhilosopherJudgment {
|
|
1276
1512
|
/** Index of the judged candidate (references DreamerCandidate.candidateIndex) */
|
|
1277
1513
|
candidateIndex: number;
|
|
@@ -1283,6 +1519,10 @@ export interface PhilosopherJudgment {
|
|
|
1283
1519
|
score: number;
|
|
1284
1520
|
/** Rank among all candidates (1 = best) */
|
|
1285
1521
|
rank: number;
|
|
1522
|
+
/** Per-dimension scores (6D evaluation) — informational, not used for tournament ranking */
|
|
1523
|
+
scores?: Philosopher6DScores;
|
|
1524
|
+
/** Risk assessment for this candidate — informational, consumed by Scribe (Phase 37) */
|
|
1525
|
+
risks?: PhilosopherRiskAssessment;
|
|
1286
1526
|
}
|
|
1287
1527
|
|
|
1288
1528
|
export interface PhilosopherOutput {
|
|
@@ -1298,6 +1538,45 @@ export interface PhilosopherOutput {
|
|
|
1298
1538
|
generatedAt: string;
|
|
1299
1539
|
}
|
|
1300
1540
|
|
|
1541
|
+
/**
|
|
1542
|
+
* Analysis of a rejected candidate — why it lost the tournament.
|
|
1543
|
+
* Informs training signal for "what to avoid".
|
|
1544
|
+
*/
|
|
1545
|
+
export interface RejectedAnalysis {
|
|
1546
|
+
/** Mental model that led to the rejected candidate */
|
|
1547
|
+
whyRejected: string;
|
|
1548
|
+
/** Observable caution triggers that were missed or ignored */
|
|
1549
|
+
warningSignals: string[];
|
|
1550
|
+
/** Correct reasoning path that should have been taken */
|
|
1551
|
+
correctiveThinking: string;
|
|
1552
|
+
}
|
|
1553
|
+
|
|
1554
|
+
/**
|
|
1555
|
+
* Justification for the chosen candidate — why it won the tournament.
|
|
1556
|
+
* Informs training signal for "what to do".
|
|
1557
|
+
*/
|
|
1558
|
+
export interface ChosenJustification {
|
|
1559
|
+
/** Why this candidate was selected over others */
|
|
1560
|
+
whyChosen: string;
|
|
1561
|
+
/** 1-3 transferable insights from this decision */
|
|
1562
|
+
keyInsights: string[];
|
|
1563
|
+
/** When this approach does NOT apply */
|
|
1564
|
+
limitations: string[];
|
|
1565
|
+
}
|
|
1566
|
+
|
|
1567
|
+
/**
|
|
1568
|
+
* Contrastive analysis: key differences between chosen and rejected paths.
|
|
1569
|
+
* Synthesizes the core lesson from the tournament.
|
|
1570
|
+
*/
|
|
1571
|
+
export interface ContrastiveAnalysis {
|
|
1572
|
+
/** ONE key insight distinguishing chosen from rejected */
|
|
1573
|
+
criticalDifference: string;
|
|
1574
|
+
/** Pattern: "When X, do Y" */
|
|
1575
|
+
decisionTrigger: string;
|
|
1576
|
+
/** How to systematically avoid the rejected path */
|
|
1577
|
+
preventionStrategy: string;
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1301
1580
|
/**
|
|
1302
1581
|
* Scribe output — final structured artifact draft.
|
|
1303
1582
|
* Scribe synthesizes the best candidate into an approved artifact format.
|
|
@@ -1325,6 +1604,12 @@ export interface TrinityDraftArtifact {
|
|
|
1325
1604
|
planningRatioGain?: number;
|
|
1326
1605
|
/** Optional routing context for a follow-on Artificer stage */
|
|
1327
1606
|
artificerContext?: TrinityArtificerContext;
|
|
1607
|
+
/** Contrastive analysis: chosen vs rejected reasoning paths (SCRIBE-03) */
|
|
1608
|
+
contrastiveAnalysis?: ContrastiveAnalysis;
|
|
1609
|
+
/** Analysis of the rejected candidates — why they lost the tournament (SCRIBE-01) */
|
|
1610
|
+
rejectedAnalysis?: RejectedAnalysis;
|
|
1611
|
+
/** Justification for the chosen candidate — why it won (SCRIBE-02) */
|
|
1612
|
+
chosenJustification?: ChosenJustification;
|
|
1328
1613
|
}
|
|
1329
1614
|
|
|
1330
1615
|
export interface TrinityTelemetry {
|
|
@@ -1350,6 +1635,24 @@ export interface TrinityTelemetry {
|
|
|
1350
1635
|
winnerThresholdPassed?: boolean;
|
|
1351
1636
|
/** Number of eligible candidates after threshold check (optional) */
|
|
1352
1637
|
eligibleCandidateCount?: number;
|
|
1638
|
+
/** Whether Dreamer candidates passed diversity validation (DIVER-04) */
|
|
1639
|
+
diversityCheckPassed?: boolean;
|
|
1640
|
+
/** Risk levels assigned to Dreamer candidates (for telemetry) */
|
|
1641
|
+
candidateRiskLevels?: string[];
|
|
1642
|
+
/** Aggregate 6D Philosopher evaluation metrics (informational) */
|
|
1643
|
+
philosopher6D?: {
|
|
1644
|
+
/** Average scores across all candidates per dimension */
|
|
1645
|
+
avgScores: {
|
|
1646
|
+
principleAlignment: number;
|
|
1647
|
+
specificity: number;
|
|
1648
|
+
actionability: number;
|
|
1649
|
+
executability: number;
|
|
1650
|
+
safetyImpact: number;
|
|
1651
|
+
uxImpact: number;
|
|
1652
|
+
};
|
|
1653
|
+
/** Count of candidates with breakingChangeRisk = true */
|
|
1654
|
+
highRiskCount: number;
|
|
1655
|
+
};
|
|
1353
1656
|
}
|
|
1354
1657
|
|
|
1355
1658
|
// ---------------------------------------------------------------------------
|
|
@@ -1396,6 +1699,7 @@ export interface TrinityResult {
|
|
|
1396
1699
|
* In production, this would call the actual Dreamer subagent.
|
|
1397
1700
|
* The stub generates plausible candidates based on snapshot signals.
|
|
1398
1701
|
*/
|
|
1702
|
+
// eslint-disable-next-line complexity -- complexity 14, refactor candidate
|
|
1399
1703
|
export function invokeStubDreamer(
|
|
1400
1704
|
snapshot: NocturnalSessionSnapshot,
|
|
1401
1705
|
principleId: string,
|
|
@@ -1421,6 +1725,8 @@ export function invokeStubDreamer(
|
|
|
1421
1725
|
betterDecision: 'Review docs/gateblocks.md and verify authorization requirements first; based on the evidence, this irreversible action must be reviewed before proceeding',
|
|
1422
1726
|
rationale: 'Respecting gate blocks prevents unintended system modifications',
|
|
1423
1727
|
confidence: 0.95,
|
|
1728
|
+
riskLevel: 'low' as const,
|
|
1729
|
+
strategicPerspective: 'conservative_fix' as const,
|
|
1424
1730
|
});
|
|
1425
1731
|
if (maxCandidates >= 2) {
|
|
1426
1732
|
candidates.push({
|
|
@@ -1429,6 +1735,8 @@ export function invokeStubDreamer(
|
|
|
1429
1735
|
betterDecision: 'Check the gatekeeper source first to diagnose the block reason; this is irreversible, so we must be certain before proceeding',
|
|
1430
1736
|
rationale: 'Understanding why a gate blocked prevents repeated blocks',
|
|
1431
1737
|
confidence: 0.85,
|
|
1738
|
+
riskLevel: 'low' as const,
|
|
1739
|
+
strategicPerspective: 'conservative_fix' as const,
|
|
1432
1740
|
});
|
|
1433
1741
|
}
|
|
1434
1742
|
if (maxCandidates >= 3) {
|
|
@@ -1438,6 +1746,8 @@ export function invokeStubDreamer(
|
|
|
1438
1746
|
betterDecision: 'Review docs/auth.md first to understand the authorization structure, then request proper review before any change',
|
|
1439
1747
|
rationale: 'Proper authorization ensures accountability and prevents unintended changes',
|
|
1440
1748
|
confidence: 0.75,
|
|
1749
|
+
riskLevel: 'low' as const,
|
|
1750
|
+
strategicPerspective: 'conservative_fix' as const,
|
|
1441
1751
|
});
|
|
1442
1752
|
}
|
|
1443
1753
|
} else if (hasPain) {
|
|
@@ -1447,6 +1757,8 @@ export function invokeStubDreamer(
|
|
|
1447
1757
|
betterDecision: 'Check logs/pain.json first to analyze pain signals; this error indicates we should stop and reconsider before proceeding',
|
|
1448
1758
|
rationale: 'Pain signals indicate accumulated friction or error conditions',
|
|
1449
1759
|
confidence: 0.90,
|
|
1760
|
+
riskLevel: 'medium' as const,
|
|
1761
|
+
strategicPerspective: 'structural_improvement' as const,
|
|
1450
1762
|
});
|
|
1451
1763
|
if (maxCandidates >= 2) {
|
|
1452
1764
|
candidates.push({
|
|
@@ -1455,6 +1767,8 @@ export function invokeStubDreamer(
|
|
|
1455
1767
|
betterDecision: 'Review src/pain-detector.ts first; based on the evidence, this indicates a deeper issue we must not ignore',
|
|
1456
1768
|
rationale: 'Addressing friction reduces error rates and improves outcomes',
|
|
1457
1769
|
confidence: 0.80,
|
|
1770
|
+
riskLevel: 'medium' as const,
|
|
1771
|
+
strategicPerspective: 'structural_improvement' as const,
|
|
1458
1772
|
});
|
|
1459
1773
|
}
|
|
1460
1774
|
if (maxCandidates >= 3) {
|
|
@@ -1464,6 +1778,8 @@ export function invokeStubDreamer(
|
|
|
1464
1778
|
betterDecision: 'Analyze logs/errors.json first to identify the failure pattern; this suggests we should stop and rethink before retrying',
|
|
1465
1779
|
rationale: 'Pattern analysis prevents recurring pain from the same source',
|
|
1466
1780
|
confidence: 0.70,
|
|
1781
|
+
riskLevel: 'medium' as const,
|
|
1782
|
+
strategicPerspective: 'structural_improvement' as const,
|
|
1467
1783
|
});
|
|
1468
1784
|
}
|
|
1469
1785
|
} else if (hasFailures) {
|
|
@@ -1473,6 +1789,8 @@ export function invokeStubDreamer(
|
|
|
1473
1789
|
betterDecision: 'Verify config.json preconditions first, based on the error in logs/failure.json, before retrying',
|
|
1474
1790
|
rationale: 'Diagnosing failures before retry prevents repeated failures',
|
|
1475
1791
|
confidence: 0.92,
|
|
1792
|
+
riskLevel: 'high' as const,
|
|
1793
|
+
strategicPerspective: 'paradigm_shift' as const,
|
|
1476
1794
|
});
|
|
1477
1795
|
if (maxCandidates >= 2) {
|
|
1478
1796
|
candidates.push({
|
|
@@ -1481,6 +1799,8 @@ export function invokeStubDreamer(
|
|
|
1481
1799
|
betterDecision: 'Check docs/debugging.md first to diagnose what failed; we must not ignore this when the action is irreversible',
|
|
1482
1800
|
rationale: 'Unaddressed failures compound and cause larger issues',
|
|
1483
1801
|
confidence: 0.85,
|
|
1802
|
+
riskLevel: 'high' as const,
|
|
1803
|
+
strategicPerspective: 'paradigm_shift' as const,
|
|
1484
1804
|
});
|
|
1485
1805
|
}
|
|
1486
1806
|
if (maxCandidates >= 3) {
|
|
@@ -1490,6 +1810,8 @@ export function invokeStubDreamer(
|
|
|
1490
1810
|
betterDecision: 'Verify src/validator.ts state first; this error indicates a deeper problem before assuming resolution',
|
|
1491
1811
|
rationale: 'Verification prevents cascading failures from unresolved issues',
|
|
1492
1812
|
confidence: 0.78,
|
|
1813
|
+
riskLevel: 'high' as const,
|
|
1814
|
+
strategicPerspective: 'paradigm_shift' as const,
|
|
1493
1815
|
});
|
|
1494
1816
|
}
|
|
1495
1817
|
} else {
|
|
@@ -1572,6 +1894,70 @@ export function invokeStubPhilosopher(
|
|
|
1572
1894
|
principleAligned = false;
|
|
1573
1895
|
}
|
|
1574
1896
|
|
|
1897
|
+
// Deterministic 6D scores based on strategic perspective (Phase 35 D-07 mapping)
|
|
1898
|
+
const perspective = candidate.strategicPerspective;
|
|
1899
|
+
let sixDScores: Philosopher6DScores;
|
|
1900
|
+
let riskAssessment: PhilosopherRiskAssessment;
|
|
1901
|
+
|
|
1902
|
+
if (perspective === 'conservative_fix') {
|
|
1903
|
+
sixDScores = {
|
|
1904
|
+
principleAlignment: 0.9,
|
|
1905
|
+
specificity: 0.8,
|
|
1906
|
+
actionability: 0.85,
|
|
1907
|
+
executability: 0.9,
|
|
1908
|
+
safetyImpact: 0.95,
|
|
1909
|
+
uxImpact: 0.7,
|
|
1910
|
+
};
|
|
1911
|
+
riskAssessment = {
|
|
1912
|
+
falsePositiveEstimate: 0.1,
|
|
1913
|
+
implementationComplexity: 'low',
|
|
1914
|
+
breakingChangeRisk: false,
|
|
1915
|
+
};
|
|
1916
|
+
} else if (perspective === 'structural_improvement') {
|
|
1917
|
+
sixDScores = {
|
|
1918
|
+
principleAlignment: 0.75,
|
|
1919
|
+
specificity: 0.7,
|
|
1920
|
+
actionability: 0.75,
|
|
1921
|
+
executability: 0.7,
|
|
1922
|
+
safetyImpact: 0.7,
|
|
1923
|
+
uxImpact: 0.8,
|
|
1924
|
+
};
|
|
1925
|
+
riskAssessment = {
|
|
1926
|
+
falsePositiveEstimate: 0.25,
|
|
1927
|
+
implementationComplexity: 'medium',
|
|
1928
|
+
breakingChangeRisk: false,
|
|
1929
|
+
};
|
|
1930
|
+
} else if (perspective === 'paradigm_shift') {
|
|
1931
|
+
sixDScores = {
|
|
1932
|
+
principleAlignment: 0.6,
|
|
1933
|
+
specificity: 0.5,
|
|
1934
|
+
actionability: 0.5,
|
|
1935
|
+
executability: 0.45,
|
|
1936
|
+
safetyImpact: 0.4,
|
|
1937
|
+
uxImpact: 0.6,
|
|
1938
|
+
};
|
|
1939
|
+
riskAssessment = {
|
|
1940
|
+
falsePositiveEstimate: 0.4,
|
|
1941
|
+
implementationComplexity: 'high',
|
|
1942
|
+
breakingChangeRisk: true,
|
|
1943
|
+
};
|
|
1944
|
+
} else {
|
|
1945
|
+
// Fallback for candidates without strategicPerspective
|
|
1946
|
+
sixDScores = {
|
|
1947
|
+
principleAlignment: score,
|
|
1948
|
+
specificity: score * 0.9,
|
|
1949
|
+
actionability: score * 0.85,
|
|
1950
|
+
executability: score * 0.8,
|
|
1951
|
+
safetyImpact: score * 0.7,
|
|
1952
|
+
uxImpact: score * 0.75,
|
|
1953
|
+
};
|
|
1954
|
+
riskAssessment = {
|
|
1955
|
+
falsePositiveEstimate: 0.3,
|
|
1956
|
+
implementationComplexity: 'medium',
|
|
1957
|
+
breakingChangeRisk: false,
|
|
1958
|
+
};
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1575
1961
|
return {
|
|
1576
1962
|
candidateIndex: candidate.candidateIndex,
|
|
1577
1963
|
critique: `Candidate ${candidate.candidateIndex} scored ${score.toFixed(2)}. ${
|
|
@@ -1582,6 +1968,8 @@ export function invokeStubPhilosopher(
|
|
|
1582
1968
|
principleAligned,
|
|
1583
1969
|
score: Math.min(1, Math.max(0, score)),
|
|
1584
1970
|
rank: 0, // Will be set after sorting
|
|
1971
|
+
scores: sixDScores,
|
|
1972
|
+
risks: riskAssessment,
|
|
1585
1973
|
};
|
|
1586
1974
|
});
|
|
1587
1975
|
|
|
@@ -1780,6 +2168,16 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
|
|
|
1780
2168
|
telemetry.dreamerPassed = true;
|
|
1781
2169
|
telemetry.candidateCount = dreamerOutput.candidates.length;
|
|
1782
2170
|
|
|
2171
|
+
// Diversity validation (DIVER-04): soft check, never gates pipeline
|
|
2172
|
+
const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
|
|
2173
|
+
telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
|
|
2174
|
+
telemetry.candidateRiskLevels = dreamerOutput.candidates
|
|
2175
|
+
.map(c => c.riskLevel)
|
|
2176
|
+
.filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
|
|
2177
|
+
if (!diversityResult.diversityCheckPassed) {
|
|
2178
|
+
console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
|
|
2179
|
+
}
|
|
2180
|
+
|
|
1783
2181
|
// Step 2: Philosopher — rank candidates via real subagent
|
|
1784
2182
|
const philosopherOutput = await adapter.invokePhilosopher(dreamerOutput, principleId, snapshot);
|
|
1785
2183
|
|
|
@@ -1794,6 +2192,21 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
|
|
|
1794
2192
|
|
|
1795
2193
|
telemetry.philosopherPassed = true;
|
|
1796
2194
|
|
|
2195
|
+
// Aggregate 6D scores from Philosopher judgments (if available)
|
|
2196
|
+
const realJudgments6D = philosopherOutput.judgments.filter(j => j.scores);
|
|
2197
|
+
if (realJudgments6D.length > 0) {
|
|
2198
|
+
const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
|
|
2199
|
+
const avgScores: Record<string, number> = {};
|
|
2200
|
+
for (const dim of dims) {
|
|
2201
|
+
const values = realJudgments6D.map(j => j.scores?.[dim] ?? 0);
|
|
2202
|
+
avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
|
|
2203
|
+
}
|
|
2204
|
+
telemetry.philosopher6D = {
|
|
2205
|
+
avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
|
|
2206
|
+
highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
|
|
2207
|
+
};
|
|
2208
|
+
}
|
|
2209
|
+
|
|
1797
2210
|
// Step 3: Scribe — synthesize final artifact via real subagent
|
|
1798
2211
|
const draftArtifact = await adapter.invokeScribe(
|
|
1799
2212
|
dreamerOutput,
|
|
@@ -1837,6 +2250,7 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
|
|
|
1837
2250
|
|
|
1838
2251
|
/**
|
|
1839
2252
|
* Internal: Run Trinity chain with stub implementations (synchronous).
|
|
2253
|
+
// eslint-disable-next-line complexity -- complexity 14, refactor candidate
|
|
1840
2254
|
*/
|
|
1841
2255
|
function runTrinityWithStubs(
|
|
1842
2256
|
snapshot: NocturnalSessionSnapshot,
|
|
@@ -1876,6 +2290,16 @@ function runTrinityWithStubs(
|
|
|
1876
2290
|
telemetry.dreamerPassed = true;
|
|
1877
2291
|
telemetry.candidateCount = dreamerOutput.candidates.length;
|
|
1878
2292
|
|
|
2293
|
+
// Diversity validation (DIVER-04): soft check, never gates pipeline
|
|
2294
|
+
const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
|
|
2295
|
+
telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
|
|
2296
|
+
telemetry.candidateRiskLevels = dreamerOutput.candidates
|
|
2297
|
+
.map(c => c.riskLevel)
|
|
2298
|
+
.filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
|
|
2299
|
+
if (!diversityResult.diversityCheckPassed) {
|
|
2300
|
+
console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
|
|
2301
|
+
}
|
|
2302
|
+
|
|
1879
2303
|
// Step 2: Philosopher — rank candidates (stub)
|
|
1880
2304
|
const philosopherOutput = invokeStubPhilosopher(dreamerOutput, principleId, snapshot);
|
|
1881
2305
|
|
|
@@ -1895,6 +2319,21 @@ function runTrinityWithStubs(
|
|
|
1895
2319
|
|
|
1896
2320
|
telemetry.philosopherPassed = true;
|
|
1897
2321
|
|
|
2322
|
+
// Aggregate 6D scores from Philosopher judgments (if available)
|
|
2323
|
+
const judgments6D = philosopherOutput.judgments.filter(j => j.scores);
|
|
2324
|
+
if (judgments6D.length > 0) {
|
|
2325
|
+
const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
|
|
2326
|
+
const avgScores: Record<string, number> = {};
|
|
2327
|
+
for (const dim of dims) {
|
|
2328
|
+
const values = judgments6D.map(j => j.scores?.[dim] ?? 0);
|
|
2329
|
+
avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
|
|
2330
|
+
}
|
|
2331
|
+
telemetry.philosopher6D = {
|
|
2332
|
+
avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
|
|
2333
|
+
highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
|
|
2334
|
+
};
|
|
2335
|
+
}
|
|
2336
|
+
|
|
1898
2337
|
// Step 3: Scribe — produce final artifact using tournament selection (stub)
|
|
1899
2338
|
const draftArtifact = invokeStubScribe(dreamerOutput, philosopherOutput, snapshot, principleId, telemetry, config);
|
|
1900
2339
|
|