principles-disciple 1.71.0 → 1.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +10 -5
- package/package.json +17 -19
- package/scripts/acceptance-test.mjs +16 -73
- package/scripts/sync-plugin.mjs +382 -77
- package/src/commands/archive-impl.ts +2 -1
- package/src/commands/capabilities.ts +2 -2
- package/src/commands/context.ts +2 -2
- package/src/commands/disable-impl.ts +2 -1
- package/src/commands/evolution-status.ts +16 -16
- package/src/commands/export.ts +12 -67
- package/src/commands/pain.ts +91 -1
- package/src/commands/principle-rollback.ts +2 -1
- package/src/commands/promote-impl.ts +7 -43
- package/src/commands/rollback-impl.ts +2 -1
- package/src/commands/rollback.ts +2 -1
- package/src/commands/samples.ts +2 -1
- package/src/commands/thinking-os.ts +2 -1
- package/src/config/errors.ts +18 -2
- package/src/constants/diagnostician.ts +2 -2
- package/src/constants/tools.ts +2 -1
- package/src/core/__tests__/focus-history.test.ts +210 -0
- package/src/core/config.ts +1 -1
- package/src/core/confirm-first-gate.ts +255 -0
- package/src/core/correction-cue-learner.ts +2 -136
- package/src/core/correction-types.ts +16 -88
- package/src/core/dictionary.ts +19 -20
- package/src/core/empathy-keyword-matcher.ts +17 -289
- package/src/core/empathy-types.ts +18 -229
- package/src/core/event-log.ts +38 -132
- package/src/core/evolution-reducer.ts +21 -2
- package/src/core/evolution-types.ts +76 -464
- package/src/core/file-store.ts +80 -0
- package/src/core/focus-history.ts +228 -955
- package/src/core/local-worker-routing.ts +34 -314
- package/src/core/merge-gate-audit.ts +0 -195
- package/src/core/pain-diagnostic-gate.ts +154 -0
- package/src/core/pain-signal.ts +21 -138
- package/src/core/pain.ts +15 -88
- package/src/core/pd-task-reconciler.ts +26 -115
- package/src/core/pd-task-service.ts +9 -9
- package/src/core/pd-task-types.ts +23 -127
- package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
- package/src/core/principle-compiler/code-validator.ts +15 -42
- package/src/core/principle-compiler/compiler.ts +100 -15
- package/src/core/principle-compiler/index.ts +5 -2
- package/src/core/principle-compiler/template-generator.ts +4 -104
- package/src/core/principle-injection.ts +10 -202
- package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
- package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
- package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
- package/src/core/principle-tree-ledger-adapter.ts +145 -0
- package/src/core/principle-tree-ledger.ts +8 -6
- package/src/core/reflection/reflection-context.ts +14 -109
- package/src/core/replay-engine.ts +8 -500
- package/src/core/rule-host-helpers.ts +5 -35
- package/src/core/rule-host-types.ts +10 -82
- package/src/core/rule-host.ts +6 -63
- package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
- package/src/core/session-tracker.ts +87 -101
- package/src/core/shadow-observation-registry.ts +19 -48
- package/src/core/trajectory.ts +3 -1
- package/src/core/workflow-funnel-loader.ts +62 -68
- package/src/core/workspace-context.ts +46 -0
- package/src/core/workspace-dir-service.ts +1 -1
- package/src/core/workspace-dir-validation.ts +18 -9
- package/src/hooks/AGENTS.md +1 -1
- package/src/hooks/gate-block-helper.ts +46 -44
- package/src/hooks/gate.ts +207 -7
- package/src/hooks/lifecycle.ts +30 -32
- package/src/hooks/llm.ts +60 -32
- package/src/hooks/pain.ts +297 -103
- package/src/hooks/prompt.ts +469 -339
- package/src/hooks/subagent.ts +2 -29
- package/src/i18n/commands.ts +2 -10
- package/src/index.ts +95 -85
- package/src/openclaw-sdk.ts +311 -0
- package/src/service/central-database.ts +8 -4
- package/src/service/evolution-queue-migration.ts +2 -1
- package/src/service/evolution-worker.ts +163 -1786
- package/src/service/internalization-trigger-adapter.ts +302 -0
- package/src/service/keyword-optimization-service.ts +4 -4
- package/src/service/monitoring-query-service.ts +1 -215
- package/src/service/queue-io.ts +60 -331
- package/src/service/runtime-summary-service.ts +115 -18
- package/src/service/subagent-workflow/index.ts +0 -41
- package/src/service/subagent-workflow/types.ts +9 -120
- package/src/service/subagent-workflow/workflow-store.ts +2 -119
- package/src/service/workflow-watchdog.ts +0 -43
- package/src/types/event-payload.ts +16 -74
- package/src/types/event-types.ts +39 -547
- package/src/types/hygiene-types.ts +7 -30
- package/src/types/principle-tree-schema.ts +20 -222
- package/src/types/queue.ts +15 -70
- package/src/types/runtime-summary.ts +5 -49
- package/src/utils/io.ts +10 -0
- package/src/utils/retry.ts +1 -1
- package/src/utils/shadow-fingerprint.ts +2 -2
- package/src/utils/workspace-resolver.ts +50 -0
- package/templates/langs/en/core/AGENTS.md +2 -2
- package/templates/langs/en/core/BOOT.md +1 -1
- package/templates/langs/en/core/HEARTBEAT.md +2 -2
- package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/en/skills/evolve-task/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-mentor/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
- package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
- package/templates/langs/zh/core/AGENTS.md +2 -2
- package/templates/langs/zh/core/BOOT.md +1 -1
- package/templates/langs/zh/core/HEARTBEAT.md +2 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
- package/templates/langs/zh/skills/evolve-task/SKILL.md +2 -2
- package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/zh/skills/pd-mentor/SKILL.md +1 -1
- package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
- package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
- package/tests/build-artifacts.test.ts +1 -3
- package/tests/commands/evolution-status.test.ts +0 -118
- package/tests/core/bootstrap-rules.test.ts +1 -1
- package/tests/core/config.test.ts +1 -1
- package/tests/core/event-log.test.ts +35 -0
- package/tests/core/evolution-engine.test.ts +610 -0
- package/tests/core/file-store.test.ts +102 -0
- package/tests/core/focus-history.test.ts +203 -11
- package/tests/core/merge-gate-audit.test.ts +2 -169
- package/tests/core/model-deployment-registry.test.ts +7 -1
- package/tests/core/model-training-registry.test.ts +19 -0
- package/tests/core/observability.test.ts +0 -1
- package/tests/core/pain-diagnostic-gate.test.ts +498 -0
- package/tests/core/pain.test.ts +0 -1
- package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
- package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
- package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
- package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
- package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
- package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
- package/tests/core/reflection-context.test.ts +0 -14
- package/tests/core/replay-engine.test.ts +127 -215
- package/tests/core/rule-host-helpers.test.ts +2 -2
- package/tests/core/rule-implementation-runtime.test.ts +0 -27
- package/tests/core/workflow-funnel-loader.test.ts +162 -0
- package/tests/core/workspace-dir-validation.test.ts +8 -1
- package/tests/core-anti-growth.test.ts +192 -0
- package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
- package/tests/hooks/confirm-first-gate.test.ts +333 -0
- package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
- package/tests/hooks/gate-auto-correct.test.ts +665 -0
- package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
- package/tests/hooks/pain.test.ts +269 -12
- package/tests/hooks/prompt-characterization.test.ts +500 -0
- package/tests/hooks/prompt-size-guard.test.ts +329 -0
- package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
- package/tests/index.test.ts +94 -1
- package/tests/integration/auto-entry-gate.test.ts +248 -0
- package/tests/integration/internalization-trigger-guard.test.ts +69 -0
- package/tests/integration/m8-legacy-paths.test.ts +63 -0
- package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
- package/tests/plugin-config-resolution-cutover.test.ts +359 -0
- package/tests/runtime-v2-discovery-guard.test.ts +154 -0
- package/tests/service/central-database.test.ts +457 -0
- package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
- package/tests/service/evolution-worker.timeout.test.ts +11 -129
- package/tests/service/internalization-trigger-adapter.test.ts +251 -0
- package/tests/service/monitoring-query-service.test.ts +1 -47
- package/tests/service/queue-io.test.ts +1 -62
- package/tests/service/runtime-summary-service.test.ts +184 -3
- package/tests/service/workflow-watchdog.test.ts +0 -91
- package/tests/utils/file-lock.test.ts +5 -3
- package/tests/utils/session-key.test.ts +52 -0
- package/tests/utils/subagent-probe.test.ts +48 -1
- package/vitest.config.ts +4 -11
- package/.planning/codebase/ARCHITECTURE.md +0 -157
- package/.planning/codebase/CONCERNS.md +0 -145
- package/.planning/codebase/CONVENTIONS.md +0 -148
- package/.planning/codebase/INTEGRATIONS.md +0 -81
- package/.planning/codebase/STACK.md +0 -87
- package/.planning/codebase/STRUCTURE.md +0 -193
- package/.planning/codebase/TESTING.md +0 -243
- package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
- package/docs/COMMAND_REFERENCE.md +0 -76
- package/docs/COMMAND_REFERENCE_EN.md +0 -79
- package/scripts/build-web.mjs +0 -46
- package/scripts/diagnose-nocturnal.mjs +0 -537
- package/scripts/seed-nocturnal-scenarios.mjs +0 -384
- package/src/commands/nocturnal-review.ts +0 -322
- package/src/commands/nocturnal-rollout.ts +0 -790
- package/src/commands/nocturnal-train.ts +0 -986
- package/src/commands/pd-reflect.ts +0 -88
- package/src/core/adaptive-thresholds.ts +0 -478
- package/src/core/diagnostician-task-store.ts +0 -192
- package/src/core/nocturnal-arbiter.ts +0 -715
- package/src/core/nocturnal-artifact-lineage.ts +0 -116
- package/src/core/nocturnal-artificer.ts +0 -257
- package/src/core/nocturnal-candidate-scoring.ts +0 -530
- package/src/core/nocturnal-compliance.ts +0 -1146
- package/src/core/nocturnal-dataset.ts +0 -763
- package/src/core/nocturnal-executability.ts +0 -428
- package/src/core/nocturnal-export.ts +0 -499
- package/src/core/nocturnal-paths.ts +0 -240
- package/src/core/nocturnal-reasoning-deriver.ts +0 -343
- package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
- package/src/core/nocturnal-snapshot-contract.ts +0 -99
- package/src/core/nocturnal-trajectory-extractor.ts +0 -512
- package/src/core/nocturnal-trinity-types.ts +0 -218
- package/src/core/nocturnal-trinity.ts +0 -2680
- package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
- package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
- package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
- package/src/http/principles-console-route.ts +0 -709
- package/src/service/central-health-service.ts +0 -49
- package/src/service/central-overview-service.ts +0 -138
- package/src/service/control-ui-query-service.ts +0 -900
- package/src/service/cooldown-strategy.ts +0 -97
- package/src/service/evolution-pain-context.ts +0 -79
- package/src/service/evolution-query-service.ts +0 -407
- package/src/service/health-query-service.ts +0 -1038
- package/src/service/nocturnal-config.ts +0 -214
- package/src/service/nocturnal-runtime.ts +0 -734
- package/src/service/nocturnal-service.ts +0 -1605
- package/src/service/nocturnal-target-selector.ts +0 -545
- package/src/service/sleep-cycle.ts +0 -157
- package/src/service/startup-reconciler.ts +0 -112
- package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
- package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
- package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
- package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
- package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
- package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
- package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
- package/src/tools/write-pain-flag.ts +0 -215
- package/tests/commands/nocturnal-review.test.ts +0 -448
- package/tests/commands/nocturnal-train.test.ts +0 -97
- package/tests/commands/pd-reflect.test.ts +0 -49
- package/tests/core/adaptive-thresholds.test.ts +0 -261
- package/tests/core/nocturnal-arbiter.test.ts +0 -559
- package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
- package/tests/core/nocturnal-artificer.test.ts +0 -241
- package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
- package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
- package/tests/core/nocturnal-compliance.test.ts +0 -646
- package/tests/core/nocturnal-dataset.test.ts +0 -892
- package/tests/core/nocturnal-e2e.test.ts +0 -234
- package/tests/core/nocturnal-executability.test.ts +0 -357
- package/tests/core/nocturnal-export.test.ts +0 -517
- package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
- package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
- package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
- package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
- package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
- package/tests/core/nocturnal-trinity.test.ts +0 -2053
- package/tests/core/pain-auto-repair.test.ts +0 -96
- package/tests/core/pain-integration.test.ts +0 -510
- package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
- package/tests/http/principles-console-route.test.ts +0 -162
- package/tests/integration/chaos-resilience.test.ts +0 -348
- package/tests/integration/empathy-workflow-integration.test.ts +0 -626
- package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
- package/tests/service/control-ui-query-service.test.ts +0 -121
- package/tests/service/cooldown-strategy.test.ts +0 -164
- package/tests/service/data-endpoints-regression.test.ts +0 -834
- package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
- package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
- package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
- package/tests/service/nocturnal-runtime.test.ts +0 -473
- package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
- package/tests/service/nocturnal-target-selector.test.ts +0 -615
- package/tests/service/startup-reconciler.test.ts +0 -148
- package/tests/tools/write-pain-flag.test.ts +0 -358
- package/ui/src/App.tsx +0 -45
- package/ui/src/api.ts +0 -220
- package/ui/src/charts.tsx +0 -955
- package/ui/src/components/ErrorState.tsx +0 -6
- package/ui/src/components/Loading.tsx +0 -13
- package/ui/src/components/ProtectedRoute.tsx +0 -12
- package/ui/src/components/Shell.tsx +0 -91
- package/ui/src/components/WorkspaceConfig.tsx +0 -178
- package/ui/src/components/index.ts +0 -5
- package/ui/src/context/auth.tsx +0 -80
- package/ui/src/context/theme.tsx +0 -66
- package/ui/src/hooks/useAutoRefresh.ts +0 -39
- package/ui/src/i18n/ui.ts +0 -473
- package/ui/src/main.tsx +0 -16
- package/ui/src/pages/EvolutionPage.tsx +0 -333
- package/ui/src/pages/FeedbackPage.tsx +0 -138
- package/ui/src/pages/GateMonitorPage.tsx +0 -136
- package/ui/src/pages/LoginPage.tsx +0 -89
- package/ui/src/pages/OverviewPage.tsx +0 -599
- package/ui/src/pages/SamplesPage.tsx +0 -174
- package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
- package/ui/src/styles.css +0 -2020
- package/ui/src/types.ts +0 -384
- package/ui/src/utils/format.ts +0 -15
|
@@ -1,2680 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Nocturnal Trinity — Three-Stage Reflection Chain
|
|
3
|
-
* ================================================
|
|
4
|
-
*
|
|
5
|
-
* PURPOSE: Upgrade single-reflector nocturnal sample generation to a
|
|
6
|
-
* Dreamer -> Philosopher -> Scribe Trinity chain that produces higher quality
|
|
7
|
-
* decision-point samples through structured multi-stage reflection.
|
|
8
|
-
*
|
|
9
|
-
* TRINITY STAGES:
|
|
10
|
-
* 1. Dreamer — Generates multiple candidate corrections/alternatives
|
|
11
|
-
* 2. Philosopher — Provides principle-grounded critique and ranking
|
|
12
|
-
* 3. Scribe — Produces the final structured artifact draft using tournament selection
|
|
13
|
-
*
|
|
14
|
-
* DESIGN CONSTRAINTS:
|
|
15
|
-
* - All stage I/O is structured JSON contracts (not prose)
|
|
16
|
-
* - Any malformed stage output fails the entire chain closed
|
|
17
|
-
* - Single-reflector fallback is preserved via useTrinity flag
|
|
18
|
-
* - Trinity mode is configurable but defaults to enabled
|
|
19
|
-
* - Final artifact still passes arbiter + executability validation
|
|
20
|
-
* - Telemetry records chain mode, stage outcomes, candidate counts
|
|
21
|
-
* - Tournament selection is deterministic (same inputs → same winner)
|
|
22
|
-
*
|
|
23
|
-
* RUNTIME ADAPTER:
|
|
24
|
-
* - useStubs=true: uses synchronous stub implementations (no external calls)
|
|
25
|
-
* - useStubs=false: requires a TrinityRuntimeAdapter for real subagent execution
|
|
26
|
-
* - Adapter uses api.runtime.agent.runEmbeddedPiAgent() which works in background contexts
|
|
27
|
-
* (unlike api.runtime.subagent.* which requires gateway request scope)
|
|
28
|
-
* - IMPORTANT: provider and model must be passed explicitly — runEmbeddedPiAgent does NOT
|
|
29
|
-
* read config.agents.defaults.model and falls back to openai/gpt-5.4 if not specified
|
|
30
|
-
*/
|
|
31
|
-
|
|
32
|
-
import { randomUUID } from 'crypto';
|
|
33
|
-
import * as fs from 'fs';
|
|
34
|
-
import * as os from 'os';
|
|
35
|
-
import * as path from 'path';
|
|
36
|
-
import type { NocturnalSessionSnapshot } from './nocturnal-trajectory-extractor.js';
|
|
37
|
-
import { computeThinkingModelDelta } from './nocturnal-trajectory-extractor.js';
|
|
38
|
-
import {
|
|
39
|
-
deriveReasoningChain,
|
|
40
|
-
deriveContextualFactors,
|
|
41
|
-
} from './nocturnal-reasoning-deriver.js';
|
|
42
|
-
import type { TrinityArtificerContext } from './nocturnal-artificer.js';
|
|
43
|
-
import {
|
|
44
|
-
runTournament,
|
|
45
|
-
DEFAULT_SCORING_WEIGHTS,
|
|
46
|
-
type ScoringWeights,
|
|
47
|
-
type TournamentTraceEntry,
|
|
48
|
-
validateCandidateDiversity,
|
|
49
|
-
} from './nocturnal-candidate-scoring.js';
|
|
50
|
-
import {
|
|
51
|
-
DEFAULT_THRESHOLDS,
|
|
52
|
-
getEffectiveThresholds,
|
|
53
|
-
type ThresholdValues,
|
|
54
|
-
} from './adaptive-thresholds.js';
|
|
55
|
-
|
|
56
|
-
// ---------------------------------------------------------------------------
|
|
57
|
-
// Configurable Model Fallback (avoid hardcoded strings deep in adapters)
|
|
58
|
-
// ---------------------------------------------------------------------------
|
|
59
|
-
|
|
60
|
-
const FALLBACK_PROVIDER = process.env.OPENCLAW_DEFAULT_PROVIDER || 'minimax-portal';
|
|
61
|
-
const FALLBACK_MODEL = process.env.OPENCLAW_DEFAULT_MODEL || 'MiniMax-M2.7';
|
|
62
|
-
|
|
63
|
-
// ---------------------------------------------------------------------------
|
|
64
|
-
// Embedded Role Prompts
|
|
65
|
-
// ---------------------------------------------------------------------------
|
|
66
|
-
// These prompts are embedded at build time. The agents/ directory was removed
|
|
67
|
-
// to eliminate fragile runtime file dependencies on the file system.
|
|
68
|
-
|
|
69
|
-
export const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
|
|
70
|
-
|
|
71
|
-
> System prompt for Trinity Dreamer stage.
|
|
72
|
-
> Role: Generate multiple alternative "better decision" candidates from a session snapshot.
|
|
73
|
-
|
|
74
|
-
## Role
|
|
75
|
-
|
|
76
|
-
You are a principles analyst specializing in identifying decision alternatives.
|
|
77
|
-
Your task is to analyze a session trajectory and generate **multiple candidate corrections**,
|
|
78
|
-
each representing a different valid approach to the same problem.
|
|
79
|
-
|
|
80
|
-
## Input
|
|
81
|
-
|
|
82
|
-
You will receive:
|
|
83
|
-
- A **target principle** (principle ID and description)
|
|
84
|
-
- A **session trajectory snapshot** containing:
|
|
85
|
-
- Assistant turns (sanitized text, no raw content)
|
|
86
|
-
- User turns (correction cues only, no raw content)
|
|
87
|
-
- Tool calls with outcomes and error messages
|
|
88
|
-
- Pain events and gate blocks
|
|
89
|
-
- Session metadata
|
|
90
|
-
|
|
91
|
-
## Task
|
|
92
|
-
|
|
93
|
-
Analyze the session and generate **2-3 candidate corrections**, each capturing:
|
|
94
|
-
|
|
95
|
-
1. **The bad decision**: What the agent decided or did that violated the target principle
|
|
96
|
-
2. **The better decision**: What the agent should have done instead (unique per candidate)
|
|
97
|
-
3. **The rationale**: Why this alternative is better
|
|
98
|
-
4. **Confidence**: How confident you are this is a valid alternative (0.0-1.0)
|
|
99
|
-
|
|
100
|
-
## Output Format
|
|
101
|
-
|
|
102
|
-
You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no preamble.
|
|
103
|
-
|
|
104
|
-
{
|
|
105
|
-
"valid": true,
|
|
106
|
-
"candidates": [
|
|
107
|
-
{
|
|
108
|
-
"candidateIndex": 0,
|
|
109
|
-
"badDecision": "<what the agent did wrong>",
|
|
110
|
-
"betterDecision": "<what the agent should have done>",
|
|
111
|
-
"rationale": "<why this is better>",
|
|
112
|
-
"confidence": 0.95,
|
|
113
|
-
"riskLevel": "low",
|
|
114
|
-
"strategicPerspective": "conservative_fix"
|
|
115
|
-
}
|
|
116
|
-
],
|
|
117
|
-
"generatedAt": "<ISO timestamp>"
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
## Quality Standards
|
|
121
|
-
|
|
122
|
-
### Each candidate MUST:
|
|
123
|
-
- Have a candidateIndex that is unique within the candidate list
|
|
124
|
-
- Describe a specific, concrete badDecision (not generic anti-patterns)
|
|
125
|
-
- Propose a specific, actionable betterDecision (contains an action verb)
|
|
126
|
-
- Provide a principle-grounded rationale (explicitly references the principle)
|
|
127
|
-
- Include a confidence score (0.0-1.0, higher = more confident)
|
|
128
|
-
|
|
129
|
-
### betterDecision FORMAT — Must be executable:
|
|
130
|
-
- MUST start with a concrete action verb: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug
|
|
131
|
-
- MUST reference a specific, concrete target (file, command, config, etc.)
|
|
132
|
-
- MUST describe a bounded, executable action — not a vague principle
|
|
133
|
-
- Examples: "Read the file before editing to verify current content", "Check user permissions before executing privileged commands"
|
|
134
|
-
- Anti-examples: "Per T-01, pause all tasks..." (starts with "Per"), "Be more careful" (vague verb "be")
|
|
135
|
-
|
|
136
|
-
### Candidates should DIFFER from each other:
|
|
137
|
-
- Different candidates should represent genuinely different approaches
|
|
138
|
-
- Do not generate candidates with identical betterDecisions
|
|
139
|
-
- Vary the confidence scores to reflect genuine uncertainty
|
|
140
|
-
|
|
141
|
-
## Strategic Perspective Requirements
|
|
142
|
-
|
|
143
|
-
Generate candidates from DISTINCT strategic perspectives:
|
|
144
|
-
|
|
145
|
-
- **conservative_fix**: Minimal deviation from original approach. Add a
|
|
146
|
-
verification or validation step that was missing.
|
|
147
|
-
- **structural_improvement**: Reorder operations or introduce an intermediate
|
|
148
|
-
checkpoint. Change HOW the goal is achieved.
|
|
149
|
-
- **paradigm_shift**: Challenge whether the original goal was correct.
|
|
150
|
-
Consider a fundamentally different approach.
|
|
151
|
-
|
|
152
|
-
Each candidate MUST specify \`riskLevel\` ("low"|"medium"|"high") and
|
|
153
|
-
\`strategicPerspective\` matching one of the above.
|
|
154
|
-
|
|
155
|
-
ANTI-PATTERN: Candidates that differ only in wording, not in substance,
|
|
156
|
-
will be rejected.
|
|
157
|
-
|
|
158
|
-
### Candidates must NOT:
|
|
159
|
-
- Contain raw user text or private content
|
|
160
|
-
- Reference non-existent tools or impossible actions
|
|
161
|
-
- Propose vague improvements ("be more careful")
|
|
162
|
-
- Exceed the requested number of candidates
|
|
163
|
-
|
|
164
|
-
## Validation
|
|
165
|
-
|
|
166
|
-
If you cannot generate valid candidates (e.g., no clear violation found, insufficient data), respond with:
|
|
167
|
-
|
|
168
|
-
{
|
|
169
|
-
"valid": false,
|
|
170
|
-
"candidates": [],
|
|
171
|
-
"reason": "<why valid candidates cannot be generated>",
|
|
172
|
-
"generatedAt": "<ISO timestamp>"
|
|
173
|
-
}`;
|
|
174
|
-
|
|
175
|
-
export const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
|
|
176
|
-
|
|
177
|
-
> System prompt for Trinity Philosopher stage.
|
|
178
|
-
> Role: Evaluate Dreamer's candidates and rank them by principle alignment and quality.
|
|
179
|
-
|
|
180
|
-
## Role
|
|
181
|
-
|
|
182
|
-
You are a principles analyst specializing in critical evaluation.
|
|
183
|
-
Your task is to evaluate Dreamer's candidate corrections and rank them
|
|
184
|
-
based on principle alignment, specificity, and actionability.
|
|
185
|
-
|
|
186
|
-
## Input
|
|
187
|
-
|
|
188
|
-
You will receive:
|
|
189
|
-
- A **target principle** (principle ID and description)
|
|
190
|
-
- **Dreamer's candidates** — a list of alternative corrections to evaluate
|
|
191
|
-
|
|
192
|
-
## Task
|
|
193
|
-
|
|
194
|
-
For each candidate, provide:
|
|
195
|
-
1. **Critique**: A principle-grounded assessment of this candidate's strengths and weaknesses
|
|
196
|
-
2. **Principle alignment**: Whether this candidate properly aligns with the target principle
|
|
197
|
-
3. **Score**: Overall quality score (0.0-1.0, higher = better)
|
|
198
|
-
4. **Rank**: Relative ranking among all candidates (1 = best)
|
|
199
|
-
|
|
200
|
-
Finally, provide an **overall assessment** of the candidate set.
|
|
201
|
-
|
|
202
|
-
## Output Format
|
|
203
|
-
|
|
204
|
-
You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no preamble.
|
|
205
|
-
|
|
206
|
-
{
|
|
207
|
-
"valid": true,
|
|
208
|
-
"judgments": [
|
|
209
|
-
{
|
|
210
|
-
"candidateIndex": 0,
|
|
211
|
-
"critique": "<principle-grounded critique>",
|
|
212
|
-
"principleAligned": true,
|
|
213
|
-
"score": 0.92,
|
|
214
|
-
"rank": 1,
|
|
215
|
-
"scores": {
|
|
216
|
-
"principleAlignment": 0.9,
|
|
217
|
-
"specificity": 0.85,
|
|
218
|
-
"actionability": 0.9,
|
|
219
|
-
"executability": 0.95,
|
|
220
|
-
"safetyImpact": 0.8,
|
|
221
|
-
"uxImpact": 0.85
|
|
222
|
-
},
|
|
223
|
-
"risks": {
|
|
224
|
-
"falsePositiveEstimate": 0.1,
|
|
225
|
-
"implementationComplexity": "low",
|
|
226
|
-
"breakingChangeRisk": false
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
],
|
|
230
|
-
"overallAssessment": "<summary of candidate set quality>",
|
|
231
|
-
"generatedAt": "<ISO timestamp>"
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
## Evaluation Criteria
|
|
235
|
-
|
|
236
|
-
### Score Components (0-1 scale each):
|
|
237
|
-
1. **Principle Alignment** (weight: 0.20) — Does the betterDecision properly reflect the target principle?
|
|
238
|
-
2. **Specificity** (weight: 0.15) — Is badDecision specific? Is betterDecision actionable?
|
|
239
|
-
3. **Actionability** (weight: 0.15) — Does betterDecision describe a specific next step?
|
|
240
|
-
4. **Executability** (weight: 0.15) — Does betterDecision start with a bounded verb (read, check, verify, edit, write, etc.) and reference a concrete target?
|
|
241
|
-
5. **Safety Impact** (weight: 0.20) — Does the betterDecision reduce risk of data loss, corruption, or new failure modes? Would implementing this prevent dangerous operations?
|
|
242
|
-
6. **UX Impact** (weight: 0.15) — Does the betterDecision reduce user frustration or improve response reliability? Would the user experience be noticeably better?
|
|
243
|
-
|
|
244
|
-
### Risk Assessment (per candidate):
|
|
245
|
-
For each candidate, also assess:
|
|
246
|
-
- **falsePositiveEstimate** (0-1): How likely is this candidate a false positive (the "betterDecision" is actually not better)?
|
|
247
|
-
- **implementationComplexity** ("low"/"medium"/"high"): How complex would it be to implement this correction?
|
|
248
|
-
- **breakingChangeRisk** (boolean): Could implementing this correction break existing behavior?
|
|
249
|
-
|
|
250
|
-
### Executability Check:
|
|
251
|
-
A betterDecision is executable if it:
|
|
252
|
-
- STARTS with a concrete action verb: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug
|
|
253
|
-
- References a specific, concrete target (file, command, config, etc.)
|
|
254
|
-
- Describes a bounded, executable action — not a vague principle
|
|
255
|
-
- Examples that PASS: "Read the file before editing", "Check user permissions before executing"
|
|
256
|
-
- Examples that FAIL: "Per T-01, pause all tasks..." (starts with "Per"), "Be more careful" (vague)
|
|
257
|
-
|
|
258
|
-
### Ranking Rules:
|
|
259
|
-
- Candidates are ranked by score (highest = rank 1)
|
|
260
|
-
- Ties broken by: higher executability, then higher principle alignment, then lower candidateIndex
|
|
261
|
-
- If a candidate's betterDecision is NOT executable, penalize its score by 0.2
|
|
262
|
-
|
|
263
|
-
## Validation
|
|
264
|
-
|
|
265
|
-
If you cannot judge the candidates, respond with:
|
|
266
|
-
|
|
267
|
-
{
|
|
268
|
-
"valid": false,
|
|
269
|
-
"judgments": [],
|
|
270
|
-
"overallAssessment": "",
|
|
271
|
-
"reason": "<why judgment cannot be produced>",
|
|
272
|
-
"generatedAt": "<ISO timestamp>"
|
|
273
|
-
}`;
|
|
274
|
-
|
|
275
|
-
const NOCTURNAL_SCRIBE_PROMPT = `# Nocturnal Scribe — Final Artifact Synthesis
|
|
276
|
-
|
|
277
|
-
> System prompt for Trinity Scribe stage.
|
|
278
|
-
> Role: Synthesize the best candidate into a final structured artifact.
|
|
279
|
-
|
|
280
|
-
## Role
|
|
281
|
-
|
|
282
|
-
You are a principles analyst specializing in structured output.
|
|
283
|
-
Your task is to take the top-ranked candidate from Philosopher's evaluation
|
|
284
|
-
and synthesize it into a final decision-point artifact that passes arbiter validation.
|
|
285
|
-
|
|
286
|
-
## Input
|
|
287
|
-
|
|
288
|
-
You will receive:
|
|
289
|
-
- A **target principle** (principle ID and description)
|
|
290
|
-
- A **session trajectory snapshot**
|
|
291
|
-
- **Philosopher's judgments** — ranked candidates with critiques and 6D scores
|
|
292
|
-
- **Dreamer's candidates** — the original candidate list
|
|
293
|
-
- **Philosopher's risk assessments** — falsePositiveEstimate, implementationComplexity, breakingChangeRisk per candidate
|
|
294
|
-
|
|
295
|
-
Use the risk assessments to determine which candidates require deeper contrastive analysis. High-risk candidates (high breakingChangeRisk or implementationComplexity) warrant thorough rejectedAnalysis.
|
|
296
|
-
|
|
297
|
-
## Task
|
|
298
|
-
|
|
299
|
-
Select the best candidate (Philosopher's rank 1) and synthesize it into
|
|
300
|
-
a final TrinityDraftArtifact. Then produce a **Contrastive Analysis** that explains why the winner was chosen and what to learn from the runners-up.
|
|
301
|
-
|
|
302
|
-
## Output Format
|
|
303
|
-
|
|
304
|
-
You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no preamble.
|
|
305
|
-
|
|
306
|
-
{
|
|
307
|
-
"selectedCandidateIndex": 0,
|
|
308
|
-
"badDecision": "<final bad decision text>",
|
|
309
|
-
"betterDecision": "<final better decision text>",
|
|
310
|
-
"rationale": "<final rationale text>",
|
|
311
|
-
"sessionId": "<source session ID>",
|
|
312
|
-
"principleId": "<principle ID>",
|
|
313
|
-
"sourceSnapshotRef": "<snapshot reference>",
|
|
314
|
-
"telemetry": {
|
|
315
|
-
"chainMode": "trinity",
|
|
316
|
-
"dreamerPassed": true,
|
|
317
|
-
"philosopherPassed": true,
|
|
318
|
-
"scribePassed": true,
|
|
319
|
-
"candidateCount": 2,
|
|
320
|
-
"selectedCandidateIndex": 0,
|
|
321
|
-
"stageFailures": []
|
|
322
|
-
},
|
|
323
|
-
"rejectedAnalysis": {
|
|
324
|
-
"whyRejected": "<mental model that led to the rejected candidate>",
|
|
325
|
-
"warningSignals": ["<observable caution trigger 1>", "<trigger 2>"],
|
|
326
|
-
"correctiveThinking": "<correct reasoning path that should have been taken>"
|
|
327
|
-
},
|
|
328
|
-
"chosenJustification": {
|
|
329
|
-
"whyChosen": "<why this candidate was selected over others>",
|
|
330
|
-
"keyInsights": ["<transferable insight 1>", "<insight 2>", "<insight 3>"],
|
|
331
|
-
"limitations": ["<when this approach does NOT apply 1>", "<limitation 2>"]
|
|
332
|
-
},
|
|
333
|
-
"contrastiveAnalysis": {
|
|
334
|
-
"criticalDifference": "<ONE key insight distinguishing chosen from rejected>",
|
|
335
|
-
"decisionTrigger": "<When X, do Y pattern>",
|
|
336
|
-
"preventionStrategy": "<how to systematically avoid the rejected path>"
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
All three analysis sections (rejectedAnalysis, chosenJustification, contrastiveAnalysis) are optional but recommended. When multiple candidates were evaluated, include them to provide richer training signals.
|
|
341
|
-
|
|
342
|
-
## Validation
|
|
343
|
-
|
|
344
|
-
If you cannot synthesize an artifact:
|
|
345
|
-
|
|
346
|
-
{
|
|
347
|
-
"selectedCandidateIndex": -1,
|
|
348
|
-
"badDecision": "",
|
|
349
|
-
"betterDecision": "",
|
|
350
|
-
"rationale": "",
|
|
351
|
-
"sessionId": "<source session ID>",
|
|
352
|
-
"principleId": "<principle ID>",
|
|
353
|
-
"sourceSnapshotRef": "",
|
|
354
|
-
"telemetry": {
|
|
355
|
-
"chainMode": "trinity",
|
|
356
|
-
"dreamerPassed": true,
|
|
357
|
-
"philosopherPassed": false,
|
|
358
|
-
"scribePassed": false,
|
|
359
|
-
"candidateCount": 2,
|
|
360
|
-
"selectedCandidateIndex": -1,
|
|
361
|
-
"stageFailures": ["Philosopher: no valid judgments produced"]
|
|
362
|
-
}
|
|
363
|
-
}`;
|
|
364
|
-
|
|
365
|
-
// ---------------------------------------------------------------------------
|
|
366
|
-
// Trinity Runtime Adapter
|
|
367
|
-
// ---------------------------------------------------------------------------
|
|
368
|
-
|
|
369
|
-
/**
|
|
370
|
-
* Interface for Trinity stage invocation.
|
|
371
|
-
* Implementations can use real subagent runtimes or stubs.
|
|
372
|
-
*/
|
|
373
|
-
|
|
374
|
-
export interface TrinityRuntimeAdapter {
|
|
375
|
-
/**
|
|
376
|
-
* Check if the runtime surface is available for Trinity stage execution.
|
|
377
|
-
* @returns true if the adapter can invoke stages
|
|
378
|
-
*/
|
|
379
|
-
isRuntimeAvailable(): boolean;
|
|
380
|
-
|
|
381
|
-
/**
|
|
382
|
-
* Get the reason for the last runtime failure, or null if no failure.
|
|
383
|
-
*/
|
|
384
|
-
getLastFailureReason(): string | null;
|
|
385
|
-
|
|
386
|
-
/**
|
|
387
|
-
* Invoke the Dreamer stage.
|
|
388
|
-
* @param snapshot Session trajectory snapshot
|
|
389
|
-
* @param principleId Target principle ID
|
|
390
|
-
* @param maxCandidates Maximum number of candidates to generate
|
|
391
|
-
* @returns Dreamer output JSON
|
|
392
|
-
*/
|
|
393
|
-
invokeDreamer(
|
|
394
|
-
_snapshot: NocturnalSessionSnapshot,
|
|
395
|
-
_principleId: string,
|
|
396
|
-
_maxCandidates: number
|
|
397
|
-
): Promise<DreamerOutput>;
|
|
398
|
-
|
|
399
|
-
/**
|
|
400
|
-
* Invoke the Philosopher stage.
|
|
401
|
-
* @param dreamerOutput Dreamer's output
|
|
402
|
-
* @param principleId Target principle ID
|
|
403
|
-
* @param snapshot Session snapshot (for violation evidence)
|
|
404
|
-
* @returns Philosopher output JSON
|
|
405
|
-
*/
|
|
406
|
-
invokePhilosopher(
|
|
407
|
-
_dreamerOutput: DreamerOutput,
|
|
408
|
-
_principleId: string,
|
|
409
|
-
_snapshot: NocturnalSessionSnapshot
|
|
410
|
-
): Promise<PhilosopherOutput>;
|
|
411
|
-
|
|
412
|
-
/**
|
|
413
|
-
* Invoke the Scribe stage.
|
|
414
|
-
* @param dreamerOutput Dreamer's output
|
|
415
|
-
* @param philosopherOutput Philosopher's output
|
|
416
|
-
* @param snapshot Session snapshot
|
|
417
|
-
* @param principleId Target principle ID
|
|
418
|
-
* @param telemetry Running telemetry
|
|
419
|
-
* @param config Trinity config
|
|
420
|
-
* @returns Scribe draft artifact or null if failed
|
|
421
|
-
*/
|
|
422
|
-
invokeScribe(
|
|
423
|
-
_dreamerOutput: DreamerOutput,
|
|
424
|
-
_philosopherOutput: PhilosopherOutput,
|
|
425
|
-
_snapshot: NocturnalSessionSnapshot,
|
|
426
|
-
_principleId: string,
|
|
427
|
-
_telemetry: TrinityTelemetry,
|
|
428
|
-
_config: TrinityConfig
|
|
429
|
-
): Promise<TrinityDraftArtifact | null>;
|
|
430
|
-
|
|
431
|
-
/**
|
|
432
|
-
* Clean up any resources used by the adapter.
|
|
433
|
-
* Called after Trinity chain completes (success or failure).
|
|
434
|
-
*/
|
|
435
|
-
close?(): Promise<void>;
|
|
436
|
-
}
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
// ---------------------------------------------------------------------------
|
|
440
|
-
// OpenClaw Runtime Adapter
|
|
441
|
-
// ---------------------------------------------------------------------------
|
|
442
|
-
|
|
443
|
-
/**
|
|
444
|
-
* OpenClaw-backed Trinity runtime adapter.
|
|
445
|
-
* Uses api.runtime.agent.runEmbeddedPiAgent() which works in background contexts
|
|
446
|
-
* (unlike api.runtime.subagent.* which requires gateway request scope).
|
|
447
|
-
*/
|
|
448
|
-
export type TrinityRuntimeFailureCode =
|
|
449
|
-
| 'runtime_unavailable'
|
|
450
|
-
| 'invalid_runtime_request'
|
|
451
|
-
| 'runtime_run_failed'
|
|
452
|
-
| 'runtime_timeout'
|
|
453
|
-
| 'runtime_session_read_failed';
|
|
454
|
-
|
|
455
|
-
export class TrinityRuntimeContractError extends Error {
|
|
456
|
-
readonly code: TrinityRuntimeFailureCode;
|
|
457
|
-
readonly diagnostics?: Record<string, unknown>;
|
|
458
|
-
|
|
459
|
-
constructor(
|
|
460
|
-
code: TrinityRuntimeFailureCode,
|
|
461
|
-
message: string,
|
|
462
|
-
diagnostics?: Record<string, unknown>
|
|
463
|
-
) {
|
|
464
|
-
super(`${code}: ${message}`);
|
|
465
|
-
this.name = 'TrinityRuntimeContractError';
|
|
466
|
-
this.code = code;
|
|
467
|
-
this.diagnostics = diagnostics;
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
|
|
471
|
-
// ---------------------------------------------------------------------------
|
|
472
|
-
// Reasoning Context Serialization (D-03, D-04)
|
|
473
|
-
// ---------------------------------------------------------------------------
|
|
474
|
-
|
|
475
|
-
/**
|
|
476
|
-
* Format derived reasoning signals into a prompt section for Dreamer.
|
|
477
|
-
*
|
|
478
|
-
* Returns the formatted "## Reasoning Context" section as a string,
|
|
479
|
-
* or null if no meaningful reasoning content exists to include.
|
|
480
|
-
*
|
|
481
|
-
* Only reasoningChain + contextualFactors are serialized.
|
|
482
|
-
* DecisionPoints are NOT injected (reserved for Phase 37 Scribe per D-04).
|
|
483
|
-
*/
|
|
484
|
-
export function formatReasoningContext(snapshot: NocturnalSessionSnapshot): string | null {
|
|
485
|
-
const reasoningChain = deriveReasoningChain(snapshot.assistantTurns);
|
|
486
|
-
const contextualFactors = deriveContextualFactors(snapshot);
|
|
487
|
-
|
|
488
|
-
const hasReasoningContent = reasoningChain.length > 0 &&
|
|
489
|
-
reasoningChain.some(s => s.thinkingContent || s.uncertaintyMarkers.length > 0);
|
|
490
|
-
|
|
491
|
-
if (!hasReasoningContent && !contextualFactors.fileStructureKnown &&
|
|
492
|
-
!contextualFactors.errorHistoryPresent &&
|
|
493
|
-
!contextualFactors.userGuidanceAvailable &&
|
|
494
|
-
!contextualFactors.timePressure) {
|
|
495
|
-
return null;
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
const sections: string[] = ['## Reasoning Context', ''];
|
|
499
|
-
|
|
500
|
-
// Serialize reasoning chain (only turns with non-empty signals)
|
|
501
|
-
const significantTurns = reasoningChain.filter(
|
|
502
|
-
s => s.thinkingContent || s.uncertaintyMarkers.length > 0
|
|
503
|
-
);
|
|
504
|
-
for (const signal of significantTurns) {
|
|
505
|
-
if (signal.thinkingContent) {
|
|
506
|
-
sections.push(`- Turn ${signal.turnIndex}: Internal reasoning: "${signal.thinkingContent.slice(0, 200)}"`);
|
|
507
|
-
}
|
|
508
|
-
if (signal.uncertaintyMarkers.length > 0) {
|
|
509
|
-
sections.push(`- Turn ${signal.turnIndex}: Uncertainty detected: ${signal.uncertaintyMarkers.join(', ')}`);
|
|
510
|
-
}
|
|
511
|
-
if (signal.confidenceSignal !== 'high') {
|
|
512
|
-
sections.push(`- Turn ${signal.turnIndex}: Confidence: ${signal.confidenceSignal}`);
|
|
513
|
-
}
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
// Serialize contextual factors
|
|
517
|
-
const factorLabels: string[] = [];
|
|
518
|
-
if (contextualFactors.fileStructureKnown) factorLabels.push('File structure explored before modification');
|
|
519
|
-
if (contextualFactors.errorHistoryPresent) factorLabels.push('Prior error history present');
|
|
520
|
-
if (contextualFactors.userGuidanceAvailable) factorLabels.push('User guidance/corrections available');
|
|
521
|
-
if (contextualFactors.timePressure) factorLabels.push('Time pressure detected (rapid tool calls)');
|
|
522
|
-
|
|
523
|
-
if (factorLabels.length > 0) {
|
|
524
|
-
sections.push('');
|
|
525
|
-
sections.push('Environmental context:');
|
|
526
|
-
for (const label of factorLabels) {
|
|
527
|
-
sections.push(`- ${label}`);
|
|
528
|
-
}
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
sections.push('');
|
|
532
|
-
return sections.join('\n');
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
536
|
-
|
|
537
|
-
private readonly api: {
|
|
538
|
-
runtime: {
|
|
539
|
-
agent: {
|
|
540
|
-
runEmbeddedPiAgent: (_opts: {
|
|
541
|
-
sessionId: string;
|
|
542
|
-
sessionFile: string;
|
|
543
|
-
prompt: string;
|
|
544
|
-
extraSystemPrompt?: string;
|
|
545
|
-
config?: unknown;
|
|
546
|
-
provider?: string;
|
|
547
|
-
model?: string;
|
|
548
|
-
timeoutMs: number;
|
|
549
|
-
runId: string;
|
|
550
|
-
disableTools?: boolean;
|
|
551
|
-
}) => Promise<{
|
|
552
|
-
payloads?: { isError?: boolean; text?: string }[];
|
|
553
|
-
}>;
|
|
554
|
-
};
|
|
555
|
-
config?: {
|
|
556
|
-
loadConfig?: () => unknown;
|
|
557
|
-
};
|
|
558
|
-
};
|
|
559
|
-
config?: unknown;
|
|
560
|
-
logger?: { info: (msg: string) => void; warn: (msg: string) => void; error: (msg: string) => void };
|
|
561
|
-
};
|
|
562
|
-
private lastFailureReason: string | null = null;
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
private readonly stageTimeoutMs: number;
|
|
566
|
-
private readonly tempDir: string;
|
|
567
|
-
|
|
568
|
-
constructor(
|
|
569
|
-
api: OpenClawTrinityRuntimeAdapter['api'],
|
|
570
|
-
stageTimeoutMs = 300_000 // 5 min — increased from 3 min to accommodate slower LLM responses
|
|
571
|
-
) {
|
|
572
|
-
if (typeof api?.runtime?.agent?.runEmbeddedPiAgent !== 'function') {
|
|
573
|
-
throw new TrinityRuntimeContractError(
|
|
574
|
-
'runtime_unavailable',
|
|
575
|
-
'embedded runtime unavailable (missing runtime.agent.runEmbeddedPiAgent)',
|
|
576
|
-
);
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
this.api = api;
|
|
580
|
-
this.stageTimeoutMs = stageTimeoutMs;
|
|
581
|
-
// Cross-platform temp directory for session files
|
|
582
|
-
this.tempDir = path.join(os.tmpdir(), `pd-trinity-${process.pid}`);
|
|
583
|
-
// Clean up any stale temp files from previous crashed runs
|
|
584
|
-
this.cleanupStaleTempDirs();
|
|
585
|
-
}
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
isRuntimeAvailable(): boolean {
|
|
589
|
-
return true;
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
getLastFailureReason(): string | null {
|
|
593
|
-
return this.lastFailureReason;
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
/**
|
|
597
|
-
* Clean up temp directories from previous crashed runs.
|
|
598
|
-
* Matches pattern pd-trinity-* in the OS temp directory.
|
|
599
|
-
*/
|
|
600
|
-
private cleanupStaleTempDirs(): void {
|
|
601
|
-
try {
|
|
602
|
-
const osTempDir = os.tmpdir();
|
|
603
|
-
if (!fs.existsSync(osTempDir)) return;
|
|
604
|
-
const entries = fs.readdirSync(osTempDir);
|
|
605
|
-
for (const entry of entries) {
|
|
606
|
-
if (entry.startsWith('pd-trinity-') && entry !== path.basename(this.tempDir)) {
|
|
607
|
-
const fullPath = path.join(osTempDir, entry);
|
|
608
|
-
fs.rmSync(fullPath, { recursive: true, force: true });
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
} catch (err) {
|
|
612
|
-
this.api.logger?.warn?.(`[Trinity] Failed to cleanup stale temp dirs: ${err instanceof Error ? err.message.replace(/([A-Za-z]:\\[^:\\s]+|\\\/[^\s:]+)/g, '[PATH]') : String(err)}`);
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
|
|
616
|
-
/**
|
|
617
|
-
* Load the full OpenClaw config (including models.providers).
|
|
618
|
-
*
|
|
619
|
-
* Why: `this.api.config` is the plugin config, not the full OpenClaw config.
|
|
620
|
-
* It does NOT contain `models.providers`, which is needed to resolve provider
|
|
621
|
-
* model definitions. `api.runtime.config.loadConfig()` returns the full config.
|
|
622
|
-
*
|
|
623
|
-
* Fallback: If loadConfig() is unavailable, we return the plugin config.
|
|
624
|
-
* The caller (resolveModel) handles this with a minimax-portal fallback.
|
|
625
|
-
*/
|
|
626
|
-
private loadFullConfig(): Record<string, unknown> | undefined {
|
|
627
|
-
// Try runtime.config.loadConfig() first (available in native plugin context)
|
|
628
|
-
const loadConfig = this.api.runtime?.config?.loadConfig;
|
|
629
|
-
if (loadConfig && typeof loadConfig === 'function') {
|
|
630
|
-
try {
|
|
631
|
-
return loadConfig() as Record<string, unknown> | undefined;
|
|
632
|
-
} catch (err) {
|
|
633
|
-
this.api.logger?.warn?.(`[Trinity] loadConfig() failed, falling back to plugin config: ${err instanceof Error ? err.message : String(err)}`);
|
|
634
|
-
}
|
|
635
|
-
}
|
|
636
|
-
// Fallback: plugin config (limited — won't have models.providers)
|
|
637
|
-
// resolveModel() handles this with a minimax-portal/MiniMax-M2.7 fallback
|
|
638
|
-
return this.api.config as Record<string, unknown> | undefined;
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
/**
|
|
642
|
-
* Resolve the provider and model from the OpenClaw config.
|
|
643
|
-
* runEmbeddedPiAgent does NOT read config.agents.defaults.model —
|
|
644
|
-
* it requires explicit params.provider and params.model.
|
|
645
|
-
*/
|
|
646
|
-
|
|
647
|
-
private resolveModel(): { provider: string; model: string } {
|
|
648
|
-
const config = this.loadFullConfig();
|
|
649
|
-
const agents = config?.agents as Record<string, unknown> | undefined;
|
|
650
|
-
const defaults = agents?.defaults as Record<string, unknown> | undefined;
|
|
651
|
-
const modelConfig = defaults?.model;
|
|
652
|
-
|
|
653
|
-
if (typeof modelConfig === 'string' && modelConfig.includes('/')) {
|
|
654
|
-
const parts = modelConfig.split('/');
|
|
655
|
-
return { provider: parts[0], model: parts.slice(1).join('/') };
|
|
656
|
-
}
|
|
657
|
-
|
|
658
|
-
if (modelConfig && typeof modelConfig === 'object') {
|
|
659
|
-
const mc = modelConfig as Record<string, unknown>;
|
|
660
|
-
const primary = mc.primary as string | undefined;
|
|
661
|
-
if (primary && primary.includes('/')) {
|
|
662
|
-
const parts = primary.split('/');
|
|
663
|
-
return { provider: parts[0], model: parts.slice(1).join('/') };
|
|
664
|
-
}
|
|
665
|
-
}
|
|
666
|
-
|
|
667
|
-
// Last resort fallback — read from env vars to avoid hardcoded strings
|
|
668
|
-
this.api.logger?.warn?.(`[Trinity] Could not resolve model from config, using fallback: ${FALLBACK_PROVIDER}/${FALLBACK_MODEL}`);
|
|
669
|
-
return { provider: FALLBACK_PROVIDER, model: FALLBACK_MODEL };
|
|
670
|
-
}
|
|
671
|
-
|
|
672
|
-
/**
|
|
673
|
-
* Create a valid JSONL session file for runEmbeddedPiAgent.
|
|
674
|
-
*/
|
|
675
|
-
private createSessionFile(stage: string): string {
|
|
676
|
-
if (!fs.existsSync(this.tempDir)) {
|
|
677
|
-
fs.mkdirSync(this.tempDir, { recursive: true });
|
|
678
|
-
}
|
|
679
|
-
return path.join(this.tempDir, `${stage}-${randomUUID()}.jsonl`);
|
|
680
|
-
}
|
|
681
|
-
|
|
682
|
-
/**
|
|
683
|
-
* Extract text from runEmbeddedPiAgent result.
|
|
684
|
-
*/
|
|
685
|
-
|
|
686
|
-
private extractPayloadText(result: { payloads?: { isError?: boolean; text?: string }[] }): string {
|
|
687
|
-
return (result.payloads ?? [])
|
|
688
|
-
.filter(p => !p.isError)
|
|
689
|
-
.map(p => p.text?.trim() ?? '')
|
|
690
|
-
.filter(Boolean)
|
|
691
|
-
.join('\n');
|
|
692
|
-
}
|
|
693
|
-
|
|
694
|
-
/** Clamp a value to [0, 1] range — used for LLM-produced scores that may be out of range */
|
|
695
|
-
|
|
696
|
-
private clamp01(val: unknown, fallback = 0): number {
|
|
697
|
-
if (typeof val !== 'number' || !Number.isFinite(val)) return fallback;
|
|
698
|
-
return Math.min(1, Math.max(0, val));
|
|
699
|
-
}
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
private classifyRuntimeError(error: unknown): TrinityRuntimeFailureCode {
|
|
703
|
-
const detail = error instanceof Error ? error.message : String(error);
|
|
704
|
-
return /timeout/i.test(detail) ? 'runtime_timeout' : 'runtime_run_failed';
|
|
705
|
-
}
|
|
706
|
-
|
|
707
|
-
private sleep(ms: number): Promise<void> {
|
|
708
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
async invokeDreamer(
|
|
712
|
-
snapshot: NocturnalSessionSnapshot,
|
|
713
|
-
principleId: string,
|
|
714
|
-
maxCandidates: number
|
|
715
|
-
): Promise<DreamerOutput> {
|
|
716
|
-
this.lastFailureReason = null;
|
|
717
|
-
const runId = `dreamer-${randomUUID()}`;
|
|
718
|
-
const sessionFile = this.createSessionFile('dreamer');
|
|
719
|
-
const prompt = this.buildDreamerPrompt(snapshot, principleId, maxCandidates);
|
|
720
|
-
const model = this.resolveModel();
|
|
721
|
-
|
|
722
|
-
this.api.logger?.info(`[Trinity:Dreamer] Using model: ${model.provider}/${model.model}`);
|
|
723
|
-
|
|
724
|
-
try {
|
|
725
|
-
const result = await this.api.runtime.agent.runEmbeddedPiAgent({
|
|
726
|
-
sessionId: runId,
|
|
727
|
-
sessionFile,
|
|
728
|
-
prompt,
|
|
729
|
-
extraSystemPrompt: NOCTURNAL_DREAMER_PROMPT,
|
|
730
|
-
config: this.loadFullConfig(),
|
|
731
|
-
provider: model.provider,
|
|
732
|
-
model: model.model,
|
|
733
|
-
timeoutMs: this.stageTimeoutMs,
|
|
734
|
-
runId,
|
|
735
|
-
disableTools: true,
|
|
736
|
-
});
|
|
737
|
-
|
|
738
|
-
const outputText = this.extractPayloadText(result);
|
|
739
|
-
if (!outputText) {
|
|
740
|
-
return this.buildRuntimeFailureDreamerOutput(
|
|
741
|
-
'runtime_session_read_failed',
|
|
742
|
-
'Dreamer returned empty response',
|
|
743
|
-
);
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
// DEBUG: Log Dreamer's actual output
|
|
747
|
-
this.api.logger?.info(`[Trinity:Dreamer] Output preview: ${outputText.slice(0, 500)}`);
|
|
748
|
-
|
|
749
|
-
return this.parseDreamerOutput(outputText);
|
|
750
|
-
} catch (err) {
|
|
751
|
-
return this.buildRuntimeFailureDreamerOutput(this.classifyRuntimeError(err), err);
|
|
752
|
-
} finally {
|
|
753
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
754
|
-
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
755
|
-
}
|
|
756
|
-
}
|
|
757
|
-
|
|
758
|
-
async invokePhilosopher(
|
|
759
|
-
dreamerOutput: DreamerOutput,
|
|
760
|
-
principleId: string,
|
|
761
|
-
snapshot: NocturnalSessionSnapshot
|
|
762
|
-
): Promise<PhilosopherOutput> {
|
|
763
|
-
this.lastFailureReason = null;
|
|
764
|
-
const runId = `philosopher-${randomUUID()}`;
|
|
765
|
-
const sessionFile = this.createSessionFile('philosopher');
|
|
766
|
-
const prompt = this.buildPhilosopherPrompt(dreamerOutput, principleId, snapshot);
|
|
767
|
-
const model = this.resolveModel();
|
|
768
|
-
|
|
769
|
-
try {
|
|
770
|
-
const result = await this.api.runtime.agent.runEmbeddedPiAgent({
|
|
771
|
-
sessionId: runId,
|
|
772
|
-
sessionFile,
|
|
773
|
-
prompt,
|
|
774
|
-
extraSystemPrompt: NOCTURNAL_PHILOSOPHER_PROMPT,
|
|
775
|
-
config: this.loadFullConfig(),
|
|
776
|
-
provider: model.provider,
|
|
777
|
-
model: model.model,
|
|
778
|
-
timeoutMs: this.stageTimeoutMs,
|
|
779
|
-
runId,
|
|
780
|
-
disableTools: true,
|
|
781
|
-
});
|
|
782
|
-
|
|
783
|
-
const outputText = this.extractPayloadText(result);
|
|
784
|
-
if (!outputText) {
|
|
785
|
-
return this.buildRuntimeFailurePhilosopherOutput(
|
|
786
|
-
'runtime_session_read_failed',
|
|
787
|
-
'Philosopher returned empty response',
|
|
788
|
-
);
|
|
789
|
-
}
|
|
790
|
-
|
|
791
|
-
// DEBUG: Log Philosopher's actual output
|
|
792
|
-
this.api.logger?.info(`[Trinity:Philosopher] Output preview: ${outputText.slice(0, 500)}`);
|
|
793
|
-
|
|
794
|
-
return this.parsePhilosopherOutput(outputText);
|
|
795
|
-
} catch (err) {
|
|
796
|
-
return this.buildRuntimeFailurePhilosopherOutput(this.classifyRuntimeError(err), err);
|
|
797
|
-
} finally {
|
|
798
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
799
|
-
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
800
|
-
}
|
|
801
|
-
}
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
async invokeScribe(
|
|
806
|
-
dreamerOutput: DreamerOutput,
|
|
807
|
-
philosopherOutput: PhilosopherOutput,
|
|
808
|
-
snapshot: NocturnalSessionSnapshot,
|
|
809
|
-
principleId: string,
|
|
810
|
-
telemetry: TrinityTelemetry,
|
|
811
|
-
|
|
812
|
-
_config: TrinityConfig
|
|
813
|
-
): Promise<TrinityDraftArtifact | null> {
|
|
814
|
-
this.lastFailureReason = null;
|
|
815
|
-
const prompt = this.buildScribePrompt(dreamerOutput, philosopherOutput, snapshot, principleId);
|
|
816
|
-
const model = this.resolveModel();
|
|
817
|
-
|
|
818
|
-
// Retry up to 2 times on JSON parse / missing-field errors (common LLM output issues)
|
|
819
|
-
const maxAttempts = 3;
|
|
820
|
-
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
821
|
-
const runId = `scribe-${randomUUID()}`;
|
|
822
|
-
const sessionFile = this.createSessionFile('scribe');
|
|
823
|
-
|
|
824
|
-
try {
|
|
825
|
-
const result = await this.api.runtime.agent.runEmbeddedPiAgent({
|
|
826
|
-
sessionId: runId,
|
|
827
|
-
sessionFile,
|
|
828
|
-
prompt,
|
|
829
|
-
extraSystemPrompt: NOCTURNAL_SCRIBE_PROMPT,
|
|
830
|
-
config: this.loadFullConfig(),
|
|
831
|
-
provider: model.provider,
|
|
832
|
-
model: model.model,
|
|
833
|
-
timeoutMs: this.stageTimeoutMs,
|
|
834
|
-
runId,
|
|
835
|
-
disableTools: true,
|
|
836
|
-
});
|
|
837
|
-
|
|
838
|
-
const outputText = this.extractPayloadText(result);
|
|
839
|
-
if (!outputText) {
|
|
840
|
-
this.recordFailure('runtime_session_read_failed', 'Scribe returned empty response');
|
|
841
|
-
if (attempt < maxAttempts) { await this.sleep(1000); continue; }
|
|
842
|
-
return null;
|
|
843
|
-
}
|
|
844
|
-
|
|
845
|
-
// DEBUG: Log Scribe's actual output
|
|
846
|
-
this.api.logger?.info(`[Trinity:Scribe] Output preview (attempt ${attempt}): ${outputText.slice(0, 800)}`);
|
|
847
|
-
|
|
848
|
-
const artifact = this.parseScribeOutput(outputText, snapshot, principleId, telemetry);
|
|
849
|
-
if (artifact) return artifact;
|
|
850
|
-
|
|
851
|
-
// JSON parse or missing-field error — retry
|
|
852
|
-
if (attempt < maxAttempts) {
|
|
853
|
-
await this.sleep(1500);
|
|
854
|
-
continue;
|
|
855
|
-
}
|
|
856
|
-
return null;
|
|
857
|
-
} catch (err) {
|
|
858
|
-
this.recordFailure(this.classifyRuntimeError(err), err);
|
|
859
|
-
if (attempt < maxAttempts) { await this.sleep(2000); continue; }
|
|
860
|
-
return null;
|
|
861
|
-
} finally {
|
|
862
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
863
|
-
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
864
|
-
}
|
|
865
|
-
}
|
|
866
|
-
return null;
|
|
867
|
-
}
|
|
868
|
-
|
|
869
|
-
async close(): Promise<void> {
|
|
870
|
-
// Clean up temp directory
|
|
871
|
-
try {
|
|
872
|
-
if (fs.existsSync(this.tempDir)) {
|
|
873
|
-
const files = fs.readdirSync(this.tempDir);
|
|
874
|
-
for (const file of files) {
|
|
875
|
-
fs.unlinkSync(path.join(this.tempDir, file));
|
|
876
|
-
}
|
|
877
|
-
fs.rmSync(this.tempDir, { recursive: true, force: true });
|
|
878
|
-
}
|
|
879
|
-
} catch (err) {
|
|
880
|
-
this.api.logger?.warn?.(`[Trinity] Session cleanup failed: ${String(err)}`);
|
|
881
|
-
}
|
|
882
|
-
}
|
|
883
|
-
|
|
884
|
-
// ---------------------------------------------------------------------------
|
|
885
|
-
// Private Helper Methods
|
|
886
|
-
// ---------------------------------------------------------------------------
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
private buildDreamerPrompt(
|
|
891
|
-
snapshot: NocturnalSessionSnapshot,
|
|
892
|
-
principleId: string,
|
|
893
|
-
maxCandidates: number
|
|
894
|
-
): string {
|
|
895
|
-
// Build detailed tool failure list
|
|
896
|
-
const failures = snapshot.toolCalls
|
|
897
|
-
.filter(tc => tc.outcome === 'failure')
|
|
898
|
-
.map(tc => {
|
|
899
|
-
let desc = `- ${tc.toolName}`;
|
|
900
|
-
if (tc.filePath) desc += ` on ${tc.filePath}`;
|
|
901
|
-
desc += ` → FAILED: ${tc.errorMessage || 'unknown error'}`;
|
|
902
|
-
return desc;
|
|
903
|
-
});
|
|
904
|
-
|
|
905
|
-
// Build detailed pain event list
|
|
906
|
-
const pains = snapshot.painEvents
|
|
907
|
-
.filter(pe => pe.score >= 50)
|
|
908
|
-
.map(pe => `- Pain (score: ${pe.score}): ${pe.reason || 'no reason'} [source: ${pe.source}]`);
|
|
909
|
-
|
|
910
|
-
// Build gate block list
|
|
911
|
-
const blocks = snapshot.gateBlocks
|
|
912
|
-
.map(gb => `- Gate blocked ${gb.toolName}: ${gb.reason}`);
|
|
913
|
-
|
|
914
|
-
// Build assistant decision context (last 3 turns max)
|
|
915
|
-
const recentTurns = snapshot.assistantTurns
|
|
916
|
-
.slice(-3)
|
|
917
|
-
.map((t, i) => `[Turn ${i+1}] ${t.sanitizedText.slice(0, 300)}`)
|
|
918
|
-
.join('\n');
|
|
919
|
-
|
|
920
|
-
// Build user correction cues (if any)
|
|
921
|
-
const userCues = snapshot.userTurns
|
|
922
|
-
.filter(ut => ut.correctionDetected)
|
|
923
|
-
.map(ut => `- User correction: ${ut.correctionCue || 'detected'}`)
|
|
924
|
-
.join('\n');
|
|
925
|
-
|
|
926
|
-
const sections = [
|
|
927
|
-
`## Target Principle`,
|
|
928
|
-
`**Principle ID**: ${principleId}`,
|
|
929
|
-
``,
|
|
930
|
-
`## Session Context`,
|
|
931
|
-
`**Session ID**: ${snapshot.sessionId}`,
|
|
932
|
-
``,
|
|
933
|
-
];
|
|
934
|
-
|
|
935
|
-
if (failures.length > 0) {
|
|
936
|
-
sections.push(`## Tool Failures (${failures.length})`);
|
|
937
|
-
sections.push(failures.join('\n'));
|
|
938
|
-
sections.push('');
|
|
939
|
-
}
|
|
940
|
-
|
|
941
|
-
if (pains.length > 0) {
|
|
942
|
-
sections.push(`## Pain Signals (${pains.length})`);
|
|
943
|
-
sections.push(pains.join('\n'));
|
|
944
|
-
sections.push('');
|
|
945
|
-
}
|
|
946
|
-
|
|
947
|
-
if (blocks.length > 0) {
|
|
948
|
-
sections.push(`## Gate Blocks (${blocks.length})`);
|
|
949
|
-
sections.push(blocks.join('\n'));
|
|
950
|
-
sections.push('');
|
|
951
|
-
}
|
|
952
|
-
|
|
953
|
-
if (recentTurns) {
|
|
954
|
-
sections.push(`## Assistant Decision Context`);
|
|
955
|
-
sections.push(recentTurns);
|
|
956
|
-
sections.push('');
|
|
957
|
-
}
|
|
958
|
-
|
|
959
|
-
if (userCues) {
|
|
960
|
-
sections.push(`## User Corrections`);
|
|
961
|
-
sections.push(userCues);
|
|
962
|
-
sections.push('');
|
|
963
|
-
}
|
|
964
|
-
|
|
965
|
-
// ## Reasoning Context — derived signals from Phase 34 deriver module (D-03, D-04)
|
|
966
|
-
const reasoningSection = formatReasoningContext(snapshot);
|
|
967
|
-
if (reasoningSection) {
|
|
968
|
-
sections.push(reasoningSection);
|
|
969
|
-
}
|
|
970
|
-
|
|
971
|
-
sections.push(`## Task`,
|
|
972
|
-
`Analyze the above session and generate ${maxCandidates} candidate corrections.`,
|
|
973
|
-
`Each candidate must:`,
|
|
974
|
-
`1. Identify a specific bad decision from the session`,
|
|
975
|
-
`2. Propose a concrete better decision grounded in principle ${principleId}`,
|
|
976
|
-
`3. The betterDecision MUST START with a bounded verb: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug`,
|
|
977
|
-
`4. Explain the rationale referencing the principle`,
|
|
978
|
-
``,
|
|
979
|
-
`Respond with ONLY a valid JSON object matching the DreamerOutput contract.`
|
|
980
|
-
);
|
|
981
|
-
|
|
982
|
-
return sections.join('\n');
|
|
983
|
-
}
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
private buildPhilosopherPrompt(
|
|
988
|
-
dreamerOutput: DreamerOutput,
|
|
989
|
-
principleId: string,
|
|
990
|
-
snapshot: NocturnalSessionSnapshot
|
|
991
|
-
): string {
|
|
992
|
-
const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
|
|
993
|
-
|
|
994
|
-
// Build per-candidate metadata from Dreamer (risk level + strategic perspective)
|
|
995
|
-
const candidateMeta = dreamerOutput.candidates
|
|
996
|
-
.filter(c => c.riskLevel || c.strategicPerspective)
|
|
997
|
-
.map(c => `- Candidate #${c.candidateIndex}: risk=${c.riskLevel || 'N/A'}, perspective=${c.strategicPerspective || 'N/A'}`);
|
|
998
|
-
|
|
999
|
-
// Build violation summary from snapshot for Philosopher to validate candidates
|
|
1000
|
-
const failures = snapshot.toolCalls
|
|
1001
|
-
.filter(tc => tc.outcome === 'failure')
|
|
1002
|
-
.map(tc => `- ${tc.toolName}${tc.filePath ? ` on ${tc.filePath}` : ''} → FAILED: ${tc.errorMessage || 'unknown error'}`);
|
|
1003
|
-
|
|
1004
|
-
const pains = snapshot.painEvents
|
|
1005
|
-
.filter(pe => pe.score >= 50)
|
|
1006
|
-
.map(pe => `- Pain (score: ${pe.score}, severity: ${pe.severity || 'N/A'}): ${pe.reason || 'no reason'} [source: ${pe.source}]`);
|
|
1007
|
-
|
|
1008
|
-
const blocks = snapshot.gateBlocks
|
|
1009
|
-
.map(gb => `- Gate blocked ${gb.toolName}: ${gb.reason}`);
|
|
1010
|
-
|
|
1011
|
-
const userCues = snapshot.userTurns
|
|
1012
|
-
.filter(ut => ut.correctionDetected)
|
|
1013
|
-
.map(ut => `- User correction: ${ut.correctionCue || 'detected'}`);
|
|
1014
|
-
|
|
1015
|
-
const sections = [
|
|
1016
|
-
`## Target Principle`,
|
|
1017
|
-
`**Principle ID**: ${principleId}`,
|
|
1018
|
-
``,
|
|
1019
|
-
`## Session Violation Summary`,
|
|
1020
|
-
`**Session ID**: ${snapshot.sessionId}`,
|
|
1021
|
-
];
|
|
1022
|
-
|
|
1023
|
-
if (failures.length > 0) {
|
|
1024
|
-
sections.push(`\n### Tool Failures (${failures.length})`);
|
|
1025
|
-
sections.push(failures.join('\n'));
|
|
1026
|
-
}
|
|
1027
|
-
|
|
1028
|
-
if (pains.length > 0) {
|
|
1029
|
-
sections.push(`\n### Pain Signals (${pains.length})`);
|
|
1030
|
-
sections.push(pains.join('\n'));
|
|
1031
|
-
}
|
|
1032
|
-
|
|
1033
|
-
if (blocks.length > 0) {
|
|
1034
|
-
sections.push(`\n### Gate Blocks (${blocks.length})`);
|
|
1035
|
-
sections.push(blocks.join('\n'));
|
|
1036
|
-
}
|
|
1037
|
-
|
|
1038
|
-
if (userCues.length > 0) {
|
|
1039
|
-
sections.push(`\n### User Corrections (${userCues.length})`);
|
|
1040
|
-
sections.push(userCues.join('\n'));
|
|
1041
|
-
}
|
|
1042
|
-
|
|
1043
|
-
if (candidateMeta.length > 0) {
|
|
1044
|
-
sections.push(`\n### Candidate Risk Profiles (${candidateMeta.length})`);
|
|
1045
|
-
sections.push(candidateMeta.join('\n'));
|
|
1046
|
-
}
|
|
1047
|
-
|
|
1048
|
-
sections.push(
|
|
1049
|
-
``,
|
|
1050
|
-
`## Dreamer's Candidates`,
|
|
1051
|
-
candidatesJson,
|
|
1052
|
-
``,
|
|
1053
|
-
`## Task`,
|
|
1054
|
-
`Evaluate each candidate against the violation summary above.`,
|
|
1055
|
-
`For each candidate:`,
|
|
1056
|
-
`1. Is the badDecision accurate — does it match the actual violations in the session?`,
|
|
1057
|
-
`2. Is the betterDecision specific and actionable?`,
|
|
1058
|
-
`3. Does the betterDecision START with a bounded verb (read, check, verify, edit, write, etc.)?`,
|
|
1059
|
-
`4. Does the rationale correctly reference principle ${principleId}?`,
|
|
1060
|
-
`5. Is the confidence score justified?`,
|
|
1061
|
-
``,
|
|
1062
|
-
`**Penalize executability**: If betterDecision does NOT start with a bounded verb, reduce score by 0.2.`,
|
|
1063
|
-
``,
|
|
1064
|
-
`Respond with ONLY a valid JSON object matching the PhilosopherOutput contract.`
|
|
1065
|
-
);
|
|
1066
|
-
|
|
1067
|
-
return sections.join('\n');
|
|
1068
|
-
}
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
private buildScribePrompt(
|
|
1074
|
-
dreamerOutput: DreamerOutput,
|
|
1075
|
-
philosopherOutput: PhilosopherOutput,
|
|
1076
|
-
snapshot: NocturnalSessionSnapshot,
|
|
1077
|
-
principleId: string
|
|
1078
|
-
): string {
|
|
1079
|
-
const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
|
|
1080
|
-
const judgmentsJson = JSON.stringify(philosopherOutput.judgments, null, 2);
|
|
1081
|
-
|
|
1082
|
-
// Build violation evidence for Scribe to ground the final artifact
|
|
1083
|
-
const violations: string[] = [];
|
|
1084
|
-
|
|
1085
|
-
const failures = snapshot.toolCalls.filter(tc => tc.outcome === 'failure');
|
|
1086
|
-
for (const tc of failures) {
|
|
1087
|
-
violations.push(`- Tool failure: ${tc.toolName}${tc.filePath ? ` on ${tc.filePath}` : ''} → ${tc.errorMessage || 'unknown error'}`);
|
|
1088
|
-
}
|
|
1089
|
-
|
|
1090
|
-
const pains = snapshot.painEvents.filter(pe => pe.score >= 50);
|
|
1091
|
-
for (const pe of pains) {
|
|
1092
|
-
violations.push(`- Pain signal (score: ${pe.score}): ${pe.reason || 'no reason'} [source: ${pe.source}]`);
|
|
1093
|
-
}
|
|
1094
|
-
|
|
1095
|
-
const blocks = snapshot.gateBlocks;
|
|
1096
|
-
for (const gb of blocks) {
|
|
1097
|
-
violations.push(`- Gate blocked: ${gb.toolName} → ${gb.reason}`);
|
|
1098
|
-
}
|
|
1099
|
-
|
|
1100
|
-
const sections = [
|
|
1101
|
-
`## Target Principle`,
|
|
1102
|
-
`**Principle ID**: ${principleId}`,
|
|
1103
|
-
``,
|
|
1104
|
-
`## Original Violation Evidence`,
|
|
1105
|
-
`**Session ID**: ${snapshot.sessionId}`,
|
|
1106
|
-
];
|
|
1107
|
-
|
|
1108
|
-
if (violations.length > 0) {
|
|
1109
|
-
sections.push(violations.join('\n'));
|
|
1110
|
-
} else {
|
|
1111
|
-
sections.push(`(No specific violations found in snapshot)`);
|
|
1112
|
-
}
|
|
1113
|
-
|
|
1114
|
-
// Build risk summary from Philosopher 6D judgments for Scribe contrastive analysis
|
|
1115
|
-
const riskSummary = philosopherOutput.judgments
|
|
1116
|
-
.map(j => {
|
|
1117
|
-
const risk = j.risks ? ` [risks: fp=${j.risks.falsePositiveEstimate.toFixed(2)}, complexity=${j.risks.implementationComplexity}, breaking=${j.risks.breakingChangeRisk}]` : '';
|
|
1118
|
-
return ` - candidate[${j.candidateIndex}] (rank ${j.rank}, score ${j.score?.toFixed(2) ?? 'n/a'}): ${j.principleAligned ? 'aligned' : 'not aligned'}${risk}`;
|
|
1119
|
-
})
|
|
1120
|
-
.join('\n');
|
|
1121
|
-
|
|
1122
|
-
sections.push(
|
|
1123
|
-
``,
|
|
1124
|
-
`## Dreamer's Candidates`,
|
|
1125
|
-
candidatesJson,
|
|
1126
|
-
``,
|
|
1127
|
-
`## Philosopher's Judgments + Risk Assessments`,
|
|
1128
|
-
judgmentsJson,
|
|
1129
|
-
``,
|
|
1130
|
-
`## Philosopher 6D Risk Summary`,
|
|
1131
|
-
`Use this to determine contrastive depth — high-risk candidates need deeper analysis:`,
|
|
1132
|
-
riskSummary,
|
|
1133
|
-
``,
|
|
1134
|
-
`## Task`,
|
|
1135
|
-
`Select the best candidate (Philosopher's rank 1) and synthesize it into a final TrinityDraftArtifact.`,
|
|
1136
|
-
`Then produce contrastive analysis explaining why the winner was chosen and what the rejected candidates teach us.`,
|
|
1137
|
-
``,
|
|
1138
|
-
`## CRITICAL: betterDecision Format Requirements`,
|
|
1139
|
-
`Your betterDecision MUST pass executability validation. It MUST:`,
|
|
1140
|
-
`1. START with a concrete action verb from this list: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug`,
|
|
1141
|
-
`2. Reference a SPECIFIC, concrete target (file path, command name, config key, etc.)`,
|
|
1142
|
-
`3. Describe a BOUNDED, executable action — not a vague principle or process`,
|
|
1143
|
-
``,
|
|
1144
|
-
`**Examples that PASS executability check**:`,
|
|
1145
|
-
`- "Read the file before editing to verify current content"`,
|
|
1146
|
-
`- "Check user permissions before executing privileged commands"`,
|
|
1147
|
-
`- "Verify the routing infrastructure is operational before analyzing system state"`,
|
|
1148
|
-
`- "Edit the config file to set timeout=30000ms"`,
|
|
1149
|
-
``,
|
|
1150
|
-
`**Examples that FAIL executability check**:`,
|
|
1151
|
-
`- "Per T-01, pause all analysis tasks..." (starts with "Per", not a bounded verb)`,
|
|
1152
|
-
`- "The agent should have first checked..." (starts with "The", not the action verb)`,
|
|
1153
|
-
`- "Be more careful with routing tools" (vague verb "be")`,
|
|
1154
|
-
`- "Ensure proper authorization" (vague verb "ensure")`,
|
|
1155
|
-
``,
|
|
1156
|
-
`Respond with ONLY a valid JSON object.`
|
|
1157
|
-
);
|
|
1158
|
-
|
|
1159
|
-
return sections.join('\n');
|
|
1160
|
-
}
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
private parseDreamerOutput(text: string): DreamerOutput {
|
|
1164
|
-
const json = this.extractJson(text);
|
|
1165
|
-
if (!json) {
|
|
1166
|
-
return {
|
|
1167
|
-
valid: false,
|
|
1168
|
-
candidates: [],
|
|
1169
|
-
reason: 'Failed to parse Dreamer output as JSON',
|
|
1170
|
-
generatedAt: new Date().toISOString(),
|
|
1171
|
-
};
|
|
1172
|
-
}
|
|
1173
|
-
|
|
1174
|
-
try {
|
|
1175
|
-
const parsed = JSON.parse(json);
|
|
1176
|
-
// Validate required structure
|
|
1177
|
-
if (typeof parsed.valid !== 'boolean') {
|
|
1178
|
-
return {
|
|
1179
|
-
valid: false,
|
|
1180
|
-
candidates: [],
|
|
1181
|
-
reason: 'Dreamer output missing "valid" field',
|
|
1182
|
-
generatedAt: new Date().toISOString(),
|
|
1183
|
-
};
|
|
1184
|
-
}
|
|
1185
|
-
if (!Array.isArray(parsed.candidates)) {
|
|
1186
|
-
return {
|
|
1187
|
-
valid: false,
|
|
1188
|
-
candidates: [],
|
|
1189
|
-
reason: 'Dreamer output missing "candidates" array',
|
|
1190
|
-
generatedAt: new Date().toISOString(),
|
|
1191
|
-
};
|
|
1192
|
-
}
|
|
1193
|
-
return {
|
|
1194
|
-
valid: parsed.valid,
|
|
1195
|
-
candidates: parsed.candidates,
|
|
1196
|
-
reason: parsed.reason,
|
|
1197
|
-
generatedAt: parsed.generatedAt ?? new Date().toISOString(),
|
|
1198
|
-
};
|
|
1199
|
-
} catch {
|
|
1200
|
-
return {
|
|
1201
|
-
valid: false,
|
|
1202
|
-
candidates: [],
|
|
1203
|
-
reason: `JSON parse error: ${text.slice(0, 100)}`,
|
|
1204
|
-
generatedAt: new Date().toISOString(),
|
|
1205
|
-
};
|
|
1206
|
-
}
|
|
1207
|
-
}
|
|
1208
|
-
|
|
1209
|
-
private buildRuntimeFailureDreamerOutput(
|
|
1210
|
-
code: TrinityRuntimeFailureCode,
|
|
1211
|
-
error: unknown
|
|
1212
|
-
): DreamerOutput {
|
|
1213
|
-
const reason = this.recordFailure(code, error);
|
|
1214
|
-
return {
|
|
1215
|
-
valid: false,
|
|
1216
|
-
candidates: [],
|
|
1217
|
-
reason,
|
|
1218
|
-
generatedAt: new Date().toISOString(),
|
|
1219
|
-
};
|
|
1220
|
-
}
|
|
1221
|
-
|
|
1222
|
-
private parsePhilosopherOutput(text: string): PhilosopherOutput {
|
|
1223
|
-
const json = this.extractJson(text);
|
|
1224
|
-
if (!json) {
|
|
1225
|
-
return {
|
|
1226
|
-
valid: false,
|
|
1227
|
-
judgments: [],
|
|
1228
|
-
overallAssessment: '',
|
|
1229
|
-
reason: 'Failed to parse Philosopher output as JSON',
|
|
1230
|
-
generatedAt: new Date().toISOString(),
|
|
1231
|
-
};
|
|
1232
|
-
}
|
|
1233
|
-
|
|
1234
|
-
try {
|
|
1235
|
-
const parsed = JSON.parse(json);
|
|
1236
|
-
if (typeof parsed.valid !== 'boolean') {
|
|
1237
|
-
return {
|
|
1238
|
-
valid: false,
|
|
1239
|
-
judgments: [],
|
|
1240
|
-
overallAssessment: '',
|
|
1241
|
-
reason: 'Philosopher output missing "valid" field',
|
|
1242
|
-
generatedAt: new Date().toISOString(),
|
|
1243
|
-
};
|
|
1244
|
-
}
|
|
1245
|
-
if (!Array.isArray(parsed.judgments)) {
|
|
1246
|
-
return {
|
|
1247
|
-
valid: false,
|
|
1248
|
-
judgments: [],
|
|
1249
|
-
overallAssessment: '',
|
|
1250
|
-
reason: 'Philosopher output missing "judgments" array',
|
|
1251
|
-
generatedAt: new Date().toISOString(),
|
|
1252
|
-
};
|
|
1253
|
-
}
|
|
1254
|
-
return {
|
|
1255
|
-
valid: parsed.valid,
|
|
1256
|
-
judgments: parsed.judgments.map((j: Record<string, unknown>) => ({
|
|
1257
|
-
candidateIndex: j.candidateIndex,
|
|
1258
|
-
critique: j.critique ?? '',
|
|
1259
|
-
principleAligned: j.principleAligned ?? false,
|
|
1260
|
-
score: j.score ?? 0,
|
|
1261
|
-
rank: j.rank ?? 0,
|
|
1262
|
-
// Optional 6D scores and risk assessment (Phase 36)
|
|
1263
|
-
// Only include a dimension if the LLM actually returned a number (not undefined/null).
|
|
1264
|
-
// This preserves the distinction between "LLM returned 0" vs "LLM omitted the field."
|
|
1265
|
-
...(j.scores ? {
|
|
1266
|
-
scores: Object.fromEntries(
|
|
1267
|
-
(['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const)
|
|
1268
|
-
.map(dim => [dim, (j.scores as Record<string, unknown>)[dim]])
|
|
1269
|
-
.filter(([, v]) => typeof v === 'number')
|
|
1270
|
-
.map(([dim, v]) => [dim, this.clamp01(v as number)])
|
|
1271
|
-
)
|
|
1272
|
-
} : {}),
|
|
1273
|
-
...(j.risks ? (() => {
|
|
1274
|
-
const risks = j.risks as Record<string, unknown>;
|
|
1275
|
-
const fp = risks.falsePositiveEstimate;
|
|
1276
|
-
const hasFp = typeof fp === 'number';
|
|
1277
|
-
const risksObj: {
|
|
1278
|
-
falsePositiveEstimate?: number;
|
|
1279
|
-
implementationComplexity: string;
|
|
1280
|
-
breakingChangeRisk: boolean;
|
|
1281
|
-
|
|
1282
|
-
} = {
|
|
1283
|
-
implementationComplexity: (risks.implementationComplexity as string) ?? 'medium',
|
|
1284
|
-
breakingChangeRisk: Boolean(risks.breakingChangeRisk),
|
|
1285
|
-
};
|
|
1286
|
-
|
|
1287
|
-
if (hasFp) risksObj.falsePositiveEstimate = this.clamp01(fp as number);
|
|
1288
|
-
return { risks: risksObj };
|
|
1289
|
-
})() : {}),
|
|
1290
|
-
})),
|
|
1291
|
-
overallAssessment: parsed.overallAssessment ?? '',
|
|
1292
|
-
reason: parsed.reason,
|
|
1293
|
-
generatedAt: parsed.generatedAt ?? new Date().toISOString(),
|
|
1294
|
-
};
|
|
1295
|
-
} catch {
|
|
1296
|
-
return {
|
|
1297
|
-
valid: false,
|
|
1298
|
-
judgments: [],
|
|
1299
|
-
overallAssessment: '',
|
|
1300
|
-
reason: `JSON parse error: ${text.slice(0, 100)}`,
|
|
1301
|
-
generatedAt: new Date().toISOString(),
|
|
1302
|
-
};
|
|
1303
|
-
}
|
|
1304
|
-
}
|
|
1305
|
-
|
|
1306
|
-
private buildRuntimeFailurePhilosopherOutput(
|
|
1307
|
-
code: TrinityRuntimeFailureCode,
|
|
1308
|
-
error: unknown
|
|
1309
|
-
): PhilosopherOutput {
|
|
1310
|
-
const reason = this.recordFailure(code, error);
|
|
1311
|
-
return {
|
|
1312
|
-
valid: false,
|
|
1313
|
-
judgments: [],
|
|
1314
|
-
overallAssessment: '',
|
|
1315
|
-
reason,
|
|
1316
|
-
generatedAt: new Date().toISOString(),
|
|
1317
|
-
};
|
|
1318
|
-
}
|
|
1319
|
-
|
|
1320
|
-
private recordFailure(
|
|
1321
|
-
code: TrinityRuntimeFailureCode,
|
|
1322
|
-
error: unknown
|
|
1323
|
-
): string {
|
|
1324
|
-
const detail = error instanceof Error ? error.message : String(error);
|
|
1325
|
-
this.lastFailureReason = `${code}: ${detail}`;
|
|
1326
|
-
return this.lastFailureReason;
|
|
1327
|
-
}
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
private parseScribeOutput(
|
|
1332
|
-
text: string,
|
|
1333
|
-
snapshot: NocturnalSessionSnapshot,
|
|
1334
|
-
principleId: string,
|
|
1335
|
-
|
|
1336
|
-
_telemetry: TrinityTelemetry
|
|
1337
|
-
): TrinityDraftArtifact | null {
|
|
1338
|
-
const json = this.extractJson(text);
|
|
1339
|
-
if (!json) {
|
|
1340
|
-
this.recordFailure('runtime_run_failed', new Error('Scribe output contains no parseable JSON'));
|
|
1341
|
-
return null;
|
|
1342
|
-
}
|
|
1343
|
-
|
|
1344
|
-
try {
|
|
1345
|
-
const parsed = JSON.parse(json);
|
|
1346
|
-
if (typeof parsed.selectedCandidateIndex !== 'number') {
|
|
1347
|
-
this.recordFailure('runtime_run_failed', new Error(`Scribe output missing "selectedCandidateIndex" field: ${text.slice(0, 200)}`));
|
|
1348
|
-
return null;
|
|
1349
|
-
}
|
|
1350
|
-
|
|
1351
|
-
// Validate contrastive analysis sub-fields (H-03): only include if structure is intact
|
|
1352
|
-
const contrastiveAnalysis = parsed.contrastiveAnalysis
|
|
1353
|
-
&& typeof parsed.contrastiveAnalysis === 'object'
|
|
1354
|
-
&& typeof parsed.contrastiveAnalysis.criticalDifference === 'string'
|
|
1355
|
-
? parsed.contrastiveAnalysis : undefined;
|
|
1356
|
-
|
|
1357
|
-
const rejectedAnalysis = parsed.rejectedAnalysis
|
|
1358
|
-
&& typeof parsed.rejectedAnalysis === 'object'
|
|
1359
|
-
&& typeof parsed.rejectedAnalysis.whyRejected === 'string'
|
|
1360
|
-
? parsed.rejectedAnalysis : undefined;
|
|
1361
|
-
|
|
1362
|
-
const chosenJustification = parsed.chosenJustification
|
|
1363
|
-
&& typeof parsed.chosenJustification === 'object'
|
|
1364
|
-
&& typeof parsed.chosenJustification.whyChosen === 'string'
|
|
1365
|
-
? parsed.chosenJustification : undefined;
|
|
1366
|
-
|
|
1367
|
-
return {
|
|
1368
|
-
selectedCandidateIndex: parsed.selectedCandidateIndex,
|
|
1369
|
-
badDecision: parsed.badDecision ?? '',
|
|
1370
|
-
betterDecision: parsed.betterDecision ?? '',
|
|
1371
|
-
rationale: parsed.rationale ?? '',
|
|
1372
|
-
sessionId: snapshot.sessionId,
|
|
1373
|
-
principleId,
|
|
1374
|
-
sourceSnapshotRef: `snapshot-${snapshot.sessionId}-${Date.now()}`,
|
|
1375
|
-
telemetry: {
|
|
1376
|
-
chainMode: 'trinity',
|
|
1377
|
-
usedStubs: _telemetry.usedStubs,
|
|
1378
|
-
dreamerPassed: true,
|
|
1379
|
-
philosopherPassed: true,
|
|
1380
|
-
scribePassed: true,
|
|
1381
|
-
candidateCount: parsed.candidateCount ?? 0,
|
|
1382
|
-
selectedCandidateIndex: parsed.selectedCandidateIndex,
|
|
1383
|
-
stageFailures: [],
|
|
1384
|
-
},
|
|
1385
|
-
...(contrastiveAnalysis ? { contrastiveAnalysis } : {}),
|
|
1386
|
-
...(rejectedAnalysis ? { rejectedAnalysis } : {}),
|
|
1387
|
-
...(chosenJustification ? { chosenJustification } : {}),
|
|
1388
|
-
};
|
|
1389
|
-
} catch {
|
|
1390
|
-
this.recordFailure('runtime_run_failed', new Error(`Scribe output JSON parse error: ${json.slice(0, 200)}`));
|
|
1391
|
-
return null;
|
|
1392
|
-
}
|
|
1393
|
-
}
|
|
1394
|
-
|
|
1395
|
-
/**
|
|
1396
|
-
* Extract JSON object from text that may contain markdown code blocks.
|
|
1397
|
-
*/
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
private extractJson(text: string): string | null {
|
|
1401
|
-
// Try direct parse first
|
|
1402
|
-
try {
|
|
1403
|
-
JSON.parse(text);
|
|
1404
|
-
return text;
|
|
1405
|
-
} catch {
|
|
1406
|
-
// Try extracting from markdown code blocks
|
|
1407
|
-
}
|
|
1408
|
-
|
|
1409
|
-
// Match triple-backtick JSON blocks
|
|
1410
|
-
const codeBlockMatch = /```(?:json)?\s*\n?([\s\S]*?)\n?```/.exec(text);
|
|
1411
|
-
if (codeBlockMatch) {
|
|
1412
|
-
const extracted = codeBlockMatch[1].trim();
|
|
1413
|
-
try {
|
|
1414
|
-
JSON.parse(extracted);
|
|
1415
|
-
return extracted;
|
|
1416
|
-
} catch {
|
|
1417
|
-
// Not valid JSON
|
|
1418
|
-
}
|
|
1419
|
-
}
|
|
1420
|
-
|
|
1421
|
-
// Try to find first { and last } to extract JSON object
|
|
1422
|
-
const firstBrace = text.indexOf('{');
|
|
1423
|
-
const lastBrace = text.lastIndexOf('}');
|
|
1424
|
-
if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
|
|
1425
|
-
const extracted = text.slice(firstBrace, lastBrace + 1);
|
|
1426
|
-
try {
|
|
1427
|
-
JSON.parse(extracted);
|
|
1428
|
-
return extracted;
|
|
1429
|
-
} catch {
|
|
1430
|
-
// Not valid JSON
|
|
1431
|
-
}
|
|
1432
|
-
}
|
|
1433
|
-
|
|
1434
|
-
return null;
|
|
1435
|
-
}
|
|
1436
|
-
}
|
|
1437
|
-
|
|
1438
|
-
// ---------------------------------------------------------------------------
|
|
1439
|
-
// Trinity Mode Configuration
|
|
1440
|
-
// ---------------------------------------------------------------------------
|
|
1441
|
-
|
|
1442
|
-
/**
|
|
1443
|
-
* Configuration for Trinity chain execution.
|
|
1444
|
-
*/
|
|
1445
|
-
export interface TrinityConfig {
|
|
1446
|
-
/**
|
|
1447
|
-
* Whether to use Trinity chain (true) or single-reflector (false).
|
|
1448
|
-
* Default: true
|
|
1449
|
-
*/
|
|
1450
|
-
useTrinity: boolean;
|
|
1451
|
-
|
|
1452
|
-
/**
|
|
1453
|
-
* Maximum candidates Dreamer should generate.
|
|
1454
|
-
* Default: 3
|
|
1455
|
-
*/
|
|
1456
|
-
maxCandidates: number;
|
|
1457
|
-
|
|
1458
|
-
/**
|
|
1459
|
-
* Whether to use stub stage outputs (for testing without real model calls).
|
|
1460
|
-
* Default: false (real subagent calls via runtimeAdapter)
|
|
1461
|
-
*/
|
|
1462
|
-
useStubs: boolean;
|
|
1463
|
-
|
|
1464
|
-
/**
|
|
1465
|
-
* Runtime adapter for real subagent execution.
|
|
1466
|
-
* Required when useStubs is false. Ignored when useStubs is true.
|
|
1467
|
-
* Default: undefined
|
|
1468
|
-
*/
|
|
1469
|
-
runtimeAdapter?: TrinityRuntimeAdapter;
|
|
1470
|
-
|
|
1471
|
-
/**
|
|
1472
|
-
* Scoring weights for tournament selection.
|
|
1473
|
-
* Default: DEFAULT_SCORING_WEIGHTS
|
|
1474
|
-
*/
|
|
1475
|
-
scoringWeights?: ScoringWeights;
|
|
1476
|
-
|
|
1477
|
-
/**
|
|
1478
|
-
* Threshold values for tournament eligibility.
|
|
1479
|
-
* Default: DEFAULT_THRESHOLDS
|
|
1480
|
-
*/
|
|
1481
|
-
thresholds?: ThresholdValues;
|
|
1482
|
-
|
|
1483
|
-
/**
|
|
1484
|
-
* State directory for threshold persistence.
|
|
1485
|
-
* If provided, thresholds will be loaded from state.
|
|
1486
|
-
*/
|
|
1487
|
-
stateDir?: string;
|
|
1488
|
-
}
|
|
1489
|
-
|
|
1490
|
-
// ---------------------------------------------------------------------------
|
|
1491
|
-
// Trinity Intermediate Contracts
|
|
1492
|
-
// ---------------------------------------------------------------------------
|
|
1493
|
-
|
|
1494
|
-
// Forward-exports from shared types module — single source of truth
|
|
1495
|
-
export type {
|
|
1496
|
-
DreamerCandidate,
|
|
1497
|
-
DreamerOutput,
|
|
1498
|
-
PhilosopherRiskAssessment,
|
|
1499
|
-
Philosopher6DScores,
|
|
1500
|
-
PhilosopherJudgment,
|
|
1501
|
-
PhilosopherOutput,
|
|
1502
|
-
} from './nocturnal-trinity-types.js';
|
|
1503
|
-
|
|
1504
|
-
// Import all types for local use in this file
|
|
1505
|
-
import type {
|
|
1506
|
-
DreamerCandidate,
|
|
1507
|
-
DreamerOutput,
|
|
1508
|
-
PhilosopherRiskAssessment,
|
|
1509
|
-
Philosopher6DScores,
|
|
1510
|
-
PhilosopherJudgment,
|
|
1511
|
-
PhilosopherOutput,
|
|
1512
|
-
} from './nocturnal-trinity-types.js';
|
|
1513
|
-
|
|
1514
|
-
/**
|
|
1515
|
-
* Analysis of a rejected candidate — why it lost the tournament.
|
|
1516
|
-
* Informs training signal for "what to avoid".
|
|
1517
|
-
*/
|
|
1518
|
-
export interface RejectedAnalysis {
|
|
1519
|
-
/** Mental model that led to the rejected candidate */
|
|
1520
|
-
whyRejected: string;
|
|
1521
|
-
/** Observable caution triggers that were missed or ignored */
|
|
1522
|
-
warningSignals: string[];
|
|
1523
|
-
/** Correct reasoning path that should have been taken */
|
|
1524
|
-
correctiveThinking: string;
|
|
1525
|
-
}
|
|
1526
|
-
|
|
1527
|
-
/**
|
|
1528
|
-
* Justification for the chosen candidate — why it won the tournament.
|
|
1529
|
-
* Informs training signal for "what to do".
|
|
1530
|
-
*/
|
|
1531
|
-
export interface ChosenJustification {
|
|
1532
|
-
/** Why this candidate was selected over others */
|
|
1533
|
-
whyChosen: string;
|
|
1534
|
-
/** 1-3 transferable insights from this decision */
|
|
1535
|
-
keyInsights: string[];
|
|
1536
|
-
/** When this approach does NOT apply */
|
|
1537
|
-
limitations: string[];
|
|
1538
|
-
}
|
|
1539
|
-
|
|
1540
|
-
/**
|
|
1541
|
-
* Contrastive analysis: key differences between chosen and rejected paths.
|
|
1542
|
-
* Synthesizes the core lesson from the tournament.
|
|
1543
|
-
*/
|
|
1544
|
-
export interface ContrastiveAnalysis {
|
|
1545
|
-
/** ONE key insight distinguishing chosen from rejected */
|
|
1546
|
-
criticalDifference: string;
|
|
1547
|
-
/** Pattern: "When X, do Y" */
|
|
1548
|
-
decisionTrigger: string;
|
|
1549
|
-
/** How to systematically avoid the rejected path */
|
|
1550
|
-
preventionStrategy: string;
|
|
1551
|
-
}
|
|
1552
|
-
|
|
1553
|
-
/**
|
|
1554
|
-
* Scribe output — final structured artifact draft.
|
|
1555
|
-
* Scribe synthesizes the best candidate into an approved artifact format.
|
|
1556
|
-
*/
|
|
1557
|
-
export interface TrinityDraftArtifact {
|
|
1558
|
-
/** The selected winning candidate index */
|
|
1559
|
-
selectedCandidateIndex: number;
|
|
1560
|
-
/** The final badDecision */
|
|
1561
|
-
badDecision: string;
|
|
1562
|
-
/** The final betterDecision */
|
|
1563
|
-
betterDecision: string;
|
|
1564
|
-
/** The final rationale */
|
|
1565
|
-
rationale: string;
|
|
1566
|
-
/** Source session from snapshot */
|
|
1567
|
-
sessionId: string;
|
|
1568
|
-
/** Target principle ID */
|
|
1569
|
-
principleId: string;
|
|
1570
|
-
/** Reference to snapshot used */
|
|
1571
|
-
sourceSnapshotRef: string;
|
|
1572
|
-
/** Chain telemetry */
|
|
1573
|
-
telemetry: TrinityTelemetry;
|
|
1574
|
-
/** Reflection quality: delta in thinking model activation (-1 to 1) */
|
|
1575
|
-
thinkingModelDelta?: number;
|
|
1576
|
-
/** Reflection quality: gain in planning ratio (-1 to 1) */
|
|
1577
|
-
planningRatioGain?: number;
|
|
1578
|
-
/** Optional routing context for a follow-on Artificer stage */
|
|
1579
|
-
artificerContext?: TrinityArtificerContext;
|
|
1580
|
-
/** Contrastive analysis: chosen vs rejected reasoning paths (SCRIBE-03) */
|
|
1581
|
-
contrastiveAnalysis?: ContrastiveAnalysis;
|
|
1582
|
-
/** Analysis of the rejected candidates — why they lost the tournament (SCRIBE-01) */
|
|
1583
|
-
rejectedAnalysis?: RejectedAnalysis;
|
|
1584
|
-
/** Justification for the chosen candidate — why it won (SCRIBE-02) */
|
|
1585
|
-
chosenJustification?: ChosenJustification;
|
|
1586
|
-
}
|
|
1587
|
-
|
|
1588
|
-
export interface TrinityTelemetry {
|
|
1589
|
-
/** Whether Trinity or single-reflector was used */
|
|
1590
|
-
chainMode: 'trinity' | 'single-reflector';
|
|
1591
|
-
/** Whether stub implementations were used (always true in Phase 8) */
|
|
1592
|
-
usedStubs: boolean;
|
|
1593
|
-
/** Whether each stage passed */
|
|
1594
|
-
dreamerPassed: boolean;
|
|
1595
|
-
philosopherPassed: boolean;
|
|
1596
|
-
scribePassed: boolean;
|
|
1597
|
-
/** Number of candidates generated */
|
|
1598
|
-
candidateCount: number;
|
|
1599
|
-
/** Final selected candidate index */
|
|
1600
|
-
selectedCandidateIndex: number;
|
|
1601
|
-
/** Stage failure reasons (if any) */
|
|
1602
|
-
stageFailures: string[];
|
|
1603
|
-
/** Tournament trace for explainability (optional) */
|
|
1604
|
-
tournamentTrace?: TournamentTraceEntry[];
|
|
1605
|
-
/** Winner aggregate score (optional) */
|
|
1606
|
-
winnerAggregateScore?: number;
|
|
1607
|
-
/** Whether winner passed all thresholds (optional) */
|
|
1608
|
-
winnerThresholdPassed?: boolean;
|
|
1609
|
-
/** Number of eligible candidates after threshold check (optional) */
|
|
1610
|
-
eligibleCandidateCount?: number;
|
|
1611
|
-
/** Whether Dreamer candidates passed diversity validation (DIVER-04) */
|
|
1612
|
-
diversityCheckPassed?: boolean;
|
|
1613
|
-
/** Risk levels assigned to Dreamer candidates (for telemetry) */
|
|
1614
|
-
candidateRiskLevels?: string[];
|
|
1615
|
-
/** Aggregate 6D Philosopher evaluation metrics (informational) */
|
|
1616
|
-
philosopher6D?: {
|
|
1617
|
-
/** Average scores across all candidates per dimension */
|
|
1618
|
-
avgScores: {
|
|
1619
|
-
principleAlignment: number;
|
|
1620
|
-
specificity: number;
|
|
1621
|
-
actionability: number;
|
|
1622
|
-
executability: number;
|
|
1623
|
-
safetyImpact: number;
|
|
1624
|
-
uxImpact: number;
|
|
1625
|
-
};
|
|
1626
|
-
/** Count of candidates with breakingChangeRisk = true */
|
|
1627
|
-
highRiskCount: number;
|
|
1628
|
-
};
|
|
1629
|
-
}
|
|
1630
|
-
|
|
1631
|
-
// ---------------------------------------------------------------------------
|
|
1632
|
-
// Trinity Stage Validation
|
|
1633
|
-
// ---------------------------------------------------------------------------
|
|
1634
|
-
|
|
1635
|
-
/**
|
|
1636
|
-
* Validation failure for a Trinity stage.
|
|
1637
|
-
*/
|
|
1638
|
-
export interface TrinityStageFailure {
|
|
1639
|
-
stage: 'dreamer' | 'philosopher' | 'scribe';
|
|
1640
|
-
reason: string;
|
|
1641
|
-
}
|
|
1642
|
-
|
|
1643
|
-
/**
|
|
1644
|
-
* Result of Trinity chain execution.
|
|
1645
|
-
*/
|
|
1646
|
-
export interface TrinityResult {
|
|
1647
|
-
/** Whether Trinity chain completed successfully */
|
|
1648
|
-
success: boolean;
|
|
1649
|
-
/** The final draft artifact (if success) */
|
|
1650
|
-
artifact?: TrinityDraftArtifact;
|
|
1651
|
-
/** Telemetry about the chain execution */
|
|
1652
|
-
telemetry: TrinityTelemetry;
|
|
1653
|
-
/** Stage failures (if any) */
|
|
1654
|
-
failures: TrinityStageFailure[];
|
|
1655
|
-
/** Whether fallback to single-reflector occurred */
|
|
1656
|
-
fallbackOccurred: boolean;
|
|
1657
|
-
/** Optional routing context for a follow-on Artificer stage */
|
|
1658
|
-
artificerContext?: TrinityArtificerContext;
|
|
1659
|
-
}
|
|
1660
|
-
|
|
1661
|
-
// ---------------------------------------------------------------------------
|
|
1662
|
-
// Internal Types for Trinity Execution
|
|
1663
|
-
// ---------------------------------------------------------------------------
|
|
1664
|
-
|
|
1665
|
-
// ---------------------------------------------------------------------------
|
|
1666
|
-
// Stub Stage Implementations (Phase 2 — no real subagent calls)
|
|
1667
|
-
// ---------------------------------------------------------------------------
|
|
1668
|
-
|
|
1669
|
-
/**
|
|
1670
|
-
* STUB DREAMER — generates synthetic candidates for testing.
|
|
1671
|
-
*
|
|
1672
|
-
* In production, this would call the actual Dreamer subagent.
|
|
1673
|
-
* The stub generates plausible candidates based on snapshot signals.
|
|
1674
|
-
*/
|
|
1675
|
-
|
|
1676
|
-
export function invokeStubDreamer(
|
|
1677
|
-
snapshot: NocturnalSessionSnapshot,
|
|
1678
|
-
principleId: string,
|
|
1679
|
-
maxCandidates: number
|
|
1680
|
-
): DreamerOutput {
|
|
1681
|
-
const hasFailures = (snapshot.stats.failureCount ?? 0) > 0;
|
|
1682
|
-
const hasPain = snapshot.stats.totalPainEvents > 0;
|
|
1683
|
-
const hasGateBlocks = (snapshot.stats.totalGateBlocks ?? 0) > 0;
|
|
1684
|
-
|
|
1685
|
-
// #219: Detect fallback data source - stats may be incomplete
|
|
1686
|
-
const isFallback = snapshot._dataSource === 'pain_context_fallback';
|
|
1687
|
-
const fallbackWarning = isFallback ? ' [fallback data: stats may be incomplete]' : '';
|
|
1688
|
-
|
|
1689
|
-
const candidates: DreamerCandidate[] = [];
|
|
1690
|
-
|
|
1691
|
-
// Generate candidates based on available signals
|
|
1692
|
-
// NOTE: betterDecision includes thinking model patterns so computeThinkingModelDelta > 0
|
|
1693
|
-
// (these activate T-03, T-05, T-08 patterns respectively)
|
|
1694
|
-
if (hasGateBlocks) {
|
|
1695
|
-
candidates.push({
|
|
1696
|
-
candidateIndex: 0,
|
|
1697
|
-
badDecision: 'Proceeded with a tool call despite receiving a gate block, bypassing the safety check',
|
|
1698
|
-
betterDecision: 'Review docs/gateblocks.md and verify authorization requirements first; based on the evidence, this irreversible action must be reviewed before proceeding',
|
|
1699
|
-
rationale: 'Respecting gate blocks prevents unintended system modifications',
|
|
1700
|
-
confidence: 0.95,
|
|
1701
|
-
riskLevel: 'low' as const,
|
|
1702
|
-
strategicPerspective: 'conservative_fix' as const,
|
|
1703
|
-
});
|
|
1704
|
-
if (maxCandidates >= 2) {
|
|
1705
|
-
candidates.push({
|
|
1706
|
-
candidateIndex: 1,
|
|
1707
|
-
badDecision: 'Retried the same operation immediately after gate block without understanding why',
|
|
1708
|
-
betterDecision: 'Check the gatekeeper source first to diagnose the block reason; this is irreversible, so we must be certain before proceeding',
|
|
1709
|
-
rationale: 'Understanding why a gate blocked prevents repeated blocks',
|
|
1710
|
-
confidence: 0.85,
|
|
1711
|
-
riskLevel: 'low' as const,
|
|
1712
|
-
strategicPerspective: 'conservative_fix' as const,
|
|
1713
|
-
});
|
|
1714
|
-
}
|
|
1715
|
-
if (maxCandidates >= 3) {
|
|
1716
|
-
candidates.push({
|
|
1717
|
-
candidateIndex: 2,
|
|
1718
|
-
badDecision: 'Modified the target of the blocked operation to bypass the check',
|
|
1719
|
-
betterDecision: 'Review docs/auth.md first to understand the authorization structure, then request proper review before any change',
|
|
1720
|
-
rationale: 'Proper authorization ensures accountability and prevents unintended changes',
|
|
1721
|
-
confidence: 0.75,
|
|
1722
|
-
riskLevel: 'low' as const,
|
|
1723
|
-
strategicPerspective: 'conservative_fix' as const,
|
|
1724
|
-
});
|
|
1725
|
-
}
|
|
1726
|
-
} else if (hasPain) {
|
|
1727
|
-
candidates.push({
|
|
1728
|
-
candidateIndex: 0,
|
|
1729
|
-
badDecision: 'Continued executing operations without pausing to address accumulated pain signals',
|
|
1730
|
-
betterDecision: 'Check logs/pain.json first to analyze pain signals; this error indicates we should stop and reconsider before proceeding',
|
|
1731
|
-
rationale: 'Pain signals indicate accumulated friction or error conditions',
|
|
1732
|
-
confidence: 0.90,
|
|
1733
|
-
riskLevel: 'medium' as const,
|
|
1734
|
-
strategicPerspective: 'structural_improvement' as const,
|
|
1735
|
-
});
|
|
1736
|
-
if (maxCandidates >= 2) {
|
|
1737
|
-
candidates.push({
|
|
1738
|
-
candidateIndex: 1,
|
|
1739
|
-
badDecision: 'Ignored warning pain events and proceeded with high-risk operations',
|
|
1740
|
-
betterDecision: 'Review src/pain-detector.ts first; based on the evidence, this indicates a deeper issue we must not ignore',
|
|
1741
|
-
rationale: 'Addressing friction reduces error rates and improves outcomes',
|
|
1742
|
-
confidence: 0.80,
|
|
1743
|
-
riskLevel: 'medium' as const,
|
|
1744
|
-
strategicPerspective: 'structural_improvement' as const,
|
|
1745
|
-
});
|
|
1746
|
-
}
|
|
1747
|
-
if (maxCandidates >= 3) {
|
|
1748
|
-
candidates.push({
|
|
1749
|
-
candidateIndex: 2,
|
|
1750
|
-
badDecision: 'Retried failing operations without analyzing why they caused pain',
|
|
1751
|
-
betterDecision: 'Analyze logs/errors.json first to identify the failure pattern; this suggests we should stop and rethink before retrying',
|
|
1752
|
-
rationale: 'Pattern analysis prevents recurring pain from the same source',
|
|
1753
|
-
confidence: 0.70,
|
|
1754
|
-
riskLevel: 'medium' as const,
|
|
1755
|
-
strategicPerspective: 'structural_improvement' as const,
|
|
1756
|
-
});
|
|
1757
|
-
}
|
|
1758
|
-
} else if (hasFailures) {
|
|
1759
|
-
candidates.push({
|
|
1760
|
-
candidateIndex: 0,
|
|
1761
|
-
badDecision: 'Retried a failing operation without diagnosing the root cause',
|
|
1762
|
-
betterDecision: 'Verify config.json preconditions first, based on the error in logs/failure.json, before retrying',
|
|
1763
|
-
rationale: 'Diagnosing failures before retry prevents repeated failures',
|
|
1764
|
-
confidence: 0.92,
|
|
1765
|
-
riskLevel: 'high' as const,
|
|
1766
|
-
strategicPerspective: 'paradigm_shift' as const,
|
|
1767
|
-
});
|
|
1768
|
-
if (maxCandidates >= 2) {
|
|
1769
|
-
candidates.push({
|
|
1770
|
-
candidateIndex: 1,
|
|
1771
|
-
badDecision: 'Continued to the next operation after a failure without addressing it',
|
|
1772
|
-
betterDecision: 'Check docs/debugging.md first to diagnose what failed; we must not ignore this when the action is irreversible',
|
|
1773
|
-
rationale: 'Unaddressed failures compound and cause larger issues',
|
|
1774
|
-
confidence: 0.85,
|
|
1775
|
-
riskLevel: 'high' as const,
|
|
1776
|
-
strategicPerspective: 'paradigm_shift' as const,
|
|
1777
|
-
});
|
|
1778
|
-
}
|
|
1779
|
-
if (maxCandidates >= 3) {
|
|
1780
|
-
candidates.push({
|
|
1781
|
-
candidateIndex: 2,
|
|
1782
|
-
badDecision: 'Assumed the failure was transient and retried without investigation',
|
|
1783
|
-
betterDecision: 'Verify src/validator.ts state first; this error indicates a deeper problem before assuming resolution',
|
|
1784
|
-
rationale: 'Verification prevents cascading failures from unresolved issues',
|
|
1785
|
-
confidence: 0.78,
|
|
1786
|
-
riskLevel: 'high' as const,
|
|
1787
|
-
strategicPerspective: 'paradigm_shift' as const,
|
|
1788
|
-
});
|
|
1789
|
-
}
|
|
1790
|
-
} else {
|
|
1791
|
-
// No signal available - cannot generate meaningful candidates
|
|
1792
|
-
// Return empty candidates array to trigger invalid output
|
|
1793
|
-
// (Real Dreamer would also fail with no signal)
|
|
1794
|
-
return {
|
|
1795
|
-
valid: false,
|
|
1796
|
-
candidates: [],
|
|
1797
|
-
reason: 'No signal available for candidate generation (failureCount=0, painEvents=0, gateBlocks=0)',
|
|
1798
|
-
generatedAt: new Date().toISOString(),
|
|
1799
|
-
};
|
|
1800
|
-
}
|
|
1801
|
-
|
|
1802
|
-
// Ensure we don't exceed maxCandidates
|
|
1803
|
-
const limitedCandidates = candidates.slice(0, Math.min(candidates.length, maxCandidates));
|
|
1804
|
-
|
|
1805
|
-
// #219/#259: Annotate and downgrade confidence if data source is fallback
|
|
1806
|
-
// Fallback data is incomplete (trajectory DB unavailable) — reduce confidence
|
|
1807
|
-
// so reviewers don't over-trust low-quality candidates.
|
|
1808
|
-
const annotatedCandidates = limitedCandidates.map((c) => ({
|
|
1809
|
-
...c,
|
|
1810
|
-
rationale: isFallback ? c.rationale + fallbackWarning : c.rationale,
|
|
1811
|
-
confidence: isFallback ? Math.round(c.confidence * 0.5 * 100) / 100 : c.confidence,
|
|
1812
|
-
}));
|
|
1813
|
-
|
|
1814
|
-
return {
|
|
1815
|
-
valid: annotatedCandidates.length > 0,
|
|
1816
|
-
candidates: annotatedCandidates,
|
|
1817
|
-
generatedAt: new Date().toISOString(),
|
|
1818
|
-
reason: annotatedCandidates.length === 0 ? 'No signal available for candidate generation' : undefined,
|
|
1819
|
-
};
|
|
1820
|
-
}
|
|
1821
|
-
|
|
1822
|
-
/**
|
|
1823
|
-
* STUB PHILOSOPHER — ranks candidates based on simple heuristics.
|
|
1824
|
-
*
|
|
1825
|
-
* In production, this would call the actual Philosopher subagent.
|
|
1826
|
-
* The stub applies principle alignment heuristics.
|
|
1827
|
-
*/
|
|
1828
|
-
export function invokeStubPhilosopher(
|
|
1829
|
-
dreamerOutput: DreamerOutput,
|
|
1830
|
-
_principleId: string,
|
|
1831
|
-
_snapshot: NocturnalSessionSnapshot
|
|
1832
|
-
): PhilosopherOutput {
|
|
1833
|
-
if (!dreamerOutput.valid || dreamerOutput.candidates.length === 0) {
|
|
1834
|
-
return {
|
|
1835
|
-
valid: false,
|
|
1836
|
-
judgments: [],
|
|
1837
|
-
overallAssessment: '',
|
|
1838
|
-
reason: 'No candidates to judge',
|
|
1839
|
-
generatedAt: new Date().toISOString(),
|
|
1840
|
-
};
|
|
1841
|
-
}
|
|
1842
|
-
|
|
1843
|
-
// Simple heuristic scoring based on candidate structure
|
|
1844
|
-
const judgments: PhilosopherJudgment[] = dreamerOutput.candidates.map((candidate) => {
|
|
1845
|
-
let principleAligned = true;
|
|
1846
|
-
let score = candidate.confidence;
|
|
1847
|
-
|
|
1848
|
-
// Heuristic: longer rationales tend to be more principled
|
|
1849
|
-
if (candidate.rationale.length < 30) {
|
|
1850
|
-
score *= 0.8;
|
|
1851
|
-
principleAligned = false;
|
|
1852
|
-
}
|
|
1853
|
-
|
|
1854
|
-
// Heuristic: betterDecision should be actionable (contain verbs)
|
|
1855
|
-
const actionableVerbs = ['read', 'check', 'verify', 'edit', 'write', 'search', 'review', 'analyze'];
|
|
1856
|
-
const hasActionable = actionableVerbs.some((v) => candidate.betterDecision.toLowerCase().includes(v));
|
|
1857
|
-
if (!hasActionable) {
|
|
1858
|
-
score *= 0.85;
|
|
1859
|
-
principleAligned = false;
|
|
1860
|
-
}
|
|
1861
|
-
|
|
1862
|
-
// Heuristic: badDecision should be specific (not generic)
|
|
1863
|
-
const genericPatterns = ['something went wrong', 'it did not work', 'mistake was made'];
|
|
1864
|
-
const isGeneric = genericPatterns.some((p) => candidate.badDecision.toLowerCase().includes(p));
|
|
1865
|
-
if (isGeneric) {
|
|
1866
|
-
score *= 0.75;
|
|
1867
|
-
principleAligned = false;
|
|
1868
|
-
}
|
|
1869
|
-
|
|
1870
|
-
// Deterministic 6D scores based on strategic perspective (Phase 35 D-07 mapping)
|
|
1871
|
-
const perspective = candidate.strategicPerspective;
|
|
1872
|
-
|
|
1873
|
-
let sixDScores: Philosopher6DScores;
|
|
1874
|
-
|
|
1875
|
-
let riskAssessment: PhilosopherRiskAssessment;
|
|
1876
|
-
|
|
1877
|
-
if (perspective === 'conservative_fix') {
|
|
1878
|
-
sixDScores = {
|
|
1879
|
-
principleAlignment: 0.9,
|
|
1880
|
-
specificity: 0.8,
|
|
1881
|
-
actionability: 0.85,
|
|
1882
|
-
executability: 0.9,
|
|
1883
|
-
safetyImpact: 0.95,
|
|
1884
|
-
uxImpact: 0.7,
|
|
1885
|
-
};
|
|
1886
|
-
riskAssessment = {
|
|
1887
|
-
falsePositiveEstimate: 0.1,
|
|
1888
|
-
implementationComplexity: 'low',
|
|
1889
|
-
breakingChangeRisk: false,
|
|
1890
|
-
};
|
|
1891
|
-
} else if (perspective === 'structural_improvement') {
|
|
1892
|
-
sixDScores = {
|
|
1893
|
-
principleAlignment: 0.75,
|
|
1894
|
-
specificity: 0.7,
|
|
1895
|
-
actionability: 0.75,
|
|
1896
|
-
executability: 0.7,
|
|
1897
|
-
safetyImpact: 0.7,
|
|
1898
|
-
uxImpact: 0.8,
|
|
1899
|
-
};
|
|
1900
|
-
riskAssessment = {
|
|
1901
|
-
falsePositiveEstimate: 0.25,
|
|
1902
|
-
implementationComplexity: 'medium',
|
|
1903
|
-
breakingChangeRisk: false,
|
|
1904
|
-
};
|
|
1905
|
-
} else if (perspective === 'paradigm_shift') {
|
|
1906
|
-
sixDScores = {
|
|
1907
|
-
principleAlignment: 0.6,
|
|
1908
|
-
specificity: 0.5,
|
|
1909
|
-
actionability: 0.5,
|
|
1910
|
-
executability: 0.45,
|
|
1911
|
-
safetyImpact: 0.4,
|
|
1912
|
-
uxImpact: 0.6,
|
|
1913
|
-
};
|
|
1914
|
-
riskAssessment = {
|
|
1915
|
-
falsePositiveEstimate: 0.4,
|
|
1916
|
-
implementationComplexity: 'high',
|
|
1917
|
-
breakingChangeRisk: true,
|
|
1918
|
-
};
|
|
1919
|
-
} else {
|
|
1920
|
-
// Fallback for candidates without strategicPerspective
|
|
1921
|
-
sixDScores = {
|
|
1922
|
-
principleAlignment: score,
|
|
1923
|
-
specificity: score * 0.9,
|
|
1924
|
-
actionability: score * 0.85,
|
|
1925
|
-
executability: score * 0.8,
|
|
1926
|
-
safetyImpact: score * 0.7,
|
|
1927
|
-
uxImpact: score * 0.75,
|
|
1928
|
-
};
|
|
1929
|
-
riskAssessment = {
|
|
1930
|
-
falsePositiveEstimate: 0.3,
|
|
1931
|
-
implementationComplexity: 'medium',
|
|
1932
|
-
breakingChangeRisk: false,
|
|
1933
|
-
};
|
|
1934
|
-
}
|
|
1935
|
-
|
|
1936
|
-
return {
|
|
1937
|
-
candidateIndex: candidate.candidateIndex,
|
|
1938
|
-
critique: `Candidate ${candidate.candidateIndex} scored ${score.toFixed(2)}. ${
|
|
1939
|
-
principleAligned
|
|
1940
|
-
? 'Principle-aligned with specific actionable alternative.'
|
|
1941
|
-
: 'May need refinement for principle alignment.'
|
|
1942
|
-
}`,
|
|
1943
|
-
principleAligned,
|
|
1944
|
-
score: Math.min(1, Math.max(0, score)),
|
|
1945
|
-
rank: 0, // Will be set after sorting
|
|
1946
|
-
scores: sixDScores,
|
|
1947
|
-
risks: riskAssessment,
|
|
1948
|
-
};
|
|
1949
|
-
});
|
|
1950
|
-
|
|
1951
|
-
// Sort by score descending and assign ranks
|
|
1952
|
-
judgments.sort((a, b) => b.score - a.score);
|
|
1953
|
-
judgments.forEach((j, idx) => {
|
|
1954
|
-
j.rank = idx + 1;
|
|
1955
|
-
});
|
|
1956
|
-
|
|
1957
|
-
const [topJudgment] = judgments;
|
|
1958
|
-
|
|
1959
|
-
return {
|
|
1960
|
-
valid: true,
|
|
1961
|
-
judgments,
|
|
1962
|
-
overallAssessment: `Best candidate is #${topJudgment.candidateIndex} with score ${topJudgment.score.toFixed(2)}. ${topJudgment.principleAligned ? 'Well-aligned with principle.' : 'Alignment could be improved.'}`,
|
|
1963
|
-
generatedAt: new Date().toISOString(),
|
|
1964
|
-
};
|
|
1965
|
-
}
|
|
1966
|
-
|
|
1967
|
-
/**
|
|
1968
|
-
* STUB SCRIBE — synthesizes best candidate into final artifact using tournament selection.
|
|
1969
|
-
*
|
|
1970
|
-
* In production, this would call the actual Scribe subagent.
|
|
1971
|
-
* The stub uses tournament selection (scoring + thresholds) to pick the winner.
|
|
1972
|
-
*/
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
export function invokeStubScribe(
|
|
1976
|
-
dreamerOutput: DreamerOutput,
|
|
1977
|
-
philosopherOutput: PhilosopherOutput,
|
|
1978
|
-
snapshot: NocturnalSessionSnapshot,
|
|
1979
|
-
principleId: string,
|
|
1980
|
-
telemetry: TrinityTelemetry,
|
|
1981
|
-
config: TrinityConfig
|
|
1982
|
-
): TrinityDraftArtifact | null {
|
|
1983
|
-
if (!dreamerOutput.valid || !philosopherOutput.valid) {
|
|
1984
|
-
return null;
|
|
1985
|
-
}
|
|
1986
|
-
|
|
1987
|
-
// Get thresholds (from config or state, or defaults)
|
|
1988
|
-
const thresholds = config.thresholds ?? (config.stateDir ? getEffectiveThresholds(config.stateDir) : { ...DEFAULT_THRESHOLDS });
|
|
1989
|
-
const weights = config.scoringWeights ?? DEFAULT_SCORING_WEIGHTS;
|
|
1990
|
-
|
|
1991
|
-
// Run tournament selection
|
|
1992
|
-
const tournamentResult = runTournament(
|
|
1993
|
-
dreamerOutput.candidates,
|
|
1994
|
-
philosopherOutput.judgments,
|
|
1995
|
-
thresholds,
|
|
1996
|
-
weights
|
|
1997
|
-
);
|
|
1998
|
-
|
|
1999
|
-
if (!tournamentResult.success || !tournamentResult.winner) {
|
|
2000
|
-
// Tournament failed — no eligible candidate
|
|
2001
|
-
return null;
|
|
2002
|
-
}
|
|
2003
|
-
|
|
2004
|
-
const {winner} = tournamentResult;
|
|
2005
|
-
|
|
2006
|
-
// Update telemetry with tournament info
|
|
2007
|
-
const updatedTelemetry: TrinityTelemetry = {
|
|
2008
|
-
...telemetry,
|
|
2009
|
-
tournamentTrace: tournamentResult.trace,
|
|
2010
|
-
winnerAggregateScore: winner.scores.aggregate,
|
|
2011
|
-
winnerThresholdPassed: winner.thresholdPassed,
|
|
2012
|
-
eligibleCandidateCount: tournamentResult.rankedCandidates.filter((c) => c.thresholdPassed).length,
|
|
2013
|
-
};
|
|
2014
|
-
|
|
2015
|
-
return {
|
|
2016
|
-
selectedCandidateIndex: winner.candidateIndex,
|
|
2017
|
-
badDecision: winner.candidate.badDecision,
|
|
2018
|
-
betterDecision: winner.candidate.betterDecision,
|
|
2019
|
-
rationale: winner.candidate.rationale,
|
|
2020
|
-
sessionId: snapshot.sessionId,
|
|
2021
|
-
principleId,
|
|
2022
|
-
sourceSnapshotRef: `snapshot-${snapshot.sessionId}-${Date.now()}`,
|
|
2023
|
-
telemetry: updatedTelemetry,
|
|
2024
|
-
};
|
|
2025
|
-
}
|
|
2026
|
-
|
|
2027
|
-
// ---------------------------------------------------------------------------
|
|
2028
|
-
// Trinity Chain Execution
|
|
2029
|
-
// ---------------------------------------------------------------------------
|
|
2030
|
-
|
|
2031
|
-
export interface RunTrinityOptions {
|
|
2032
|
-
/** Snapshot to generate candidates from */
|
|
2033
|
-
snapshot: NocturnalSessionSnapshot;
|
|
2034
|
-
/** Target principle ID */
|
|
2035
|
-
principleId: string;
|
|
2036
|
-
/** Trinity configuration */
|
|
2037
|
-
config: TrinityConfig;
|
|
2038
|
-
}
|
|
2039
|
-
|
|
2040
|
-
/**
|
|
2041
|
-
* Execute the Trinity chain using stubs (synchronous).
|
|
2042
|
-
* Use runTrinityAsync for real subagent execution via runtime adapter.
|
|
2043
|
-
*
|
|
2044
|
-
* @param options - Trinity execution options
|
|
2045
|
-
* @returns TrinityResult with final artifact or failure info
|
|
2046
|
-
*/
|
|
2047
|
-
export function runTrinity(options: RunTrinityOptions): TrinityResult {
|
|
2048
|
-
const { snapshot, principleId, config } = options;
|
|
2049
|
-
|
|
2050
|
-
// Stub path: use synchronous stub implementations
|
|
2051
|
-
if (config.useStubs) {
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
return runTrinityWithStubs(snapshot, principleId, config);
|
|
2055
|
-
}
|
|
2056
|
-
|
|
2057
|
-
// Real execution path: requires runtimeAdapter
|
|
2058
|
-
// This is handled asynchronously in runTrinityAsync
|
|
2059
|
-
const errorMsg = '[Trinity] useStubs=false requires a runtimeAdapter. Use runTrinityAsync for real subagent execution.';
|
|
2060
|
-
const failures: TrinityStageFailure[] = [{ stage: 'dreamer', reason: errorMsg }];
|
|
2061
|
-
const telemetry: TrinityTelemetry = {
|
|
2062
|
-
chainMode: 'trinity',
|
|
2063
|
-
usedStubs: false,
|
|
2064
|
-
dreamerPassed: false,
|
|
2065
|
-
philosopherPassed: false,
|
|
2066
|
-
scribePassed: false,
|
|
2067
|
-
candidateCount: 0,
|
|
2068
|
-
selectedCandidateIndex: -1,
|
|
2069
|
-
stageFailures: [`Configuration: ${errorMsg}`],
|
|
2070
|
-
};
|
|
2071
|
-
console.error(`[Trinity] ERROR: ${errorMsg}`);
|
|
2072
|
-
return {
|
|
2073
|
-
success: false,
|
|
2074
|
-
telemetry,
|
|
2075
|
-
failures,
|
|
2076
|
-
fallbackOccurred: false,
|
|
2077
|
-
};
|
|
2078
|
-
}
|
|
2079
|
-
|
|
2080
|
-
/**
|
|
2081
|
-
* Execute the Trinity chain with real subagent runtime (asynchronous).
|
|
2082
|
-
* Requires config.runtimeAdapter to be set.
|
|
2083
|
-
*
|
|
2084
|
-
* @param options - Trinity execution options
|
|
2085
|
-
* @returns Promise<TrinityResult> with final artifact or failure info
|
|
2086
|
-
*/
|
|
2087
|
-
export async function runTrinityAsync(options: RunTrinityOptions): Promise<TrinityResult> {
|
|
2088
|
-
const { snapshot, principleId, config } = options;
|
|
2089
|
-
|
|
2090
|
-
if (config.useStubs) {
|
|
2091
|
-
// Stub path: use synchronous stubs
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
return runTrinityWithStubs(snapshot, principleId, config);
|
|
2095
|
-
}
|
|
2096
|
-
|
|
2097
|
-
if (!config.runtimeAdapter) {
|
|
2098
|
-
const errorMsg = '[Trinity] useStubs=false requires config.runtimeAdapter to be set.';
|
|
2099
|
-
const failures: TrinityStageFailure[] = [{ stage: 'dreamer', reason: errorMsg }];
|
|
2100
|
-
const telemetry: TrinityTelemetry = {
|
|
2101
|
-
chainMode: 'trinity',
|
|
2102
|
-
usedStubs: false,
|
|
2103
|
-
dreamerPassed: false,
|
|
2104
|
-
philosopherPassed: false,
|
|
2105
|
-
scribePassed: false,
|
|
2106
|
-
candidateCount: 0,
|
|
2107
|
-
selectedCandidateIndex: -1,
|
|
2108
|
-
stageFailures: [`Configuration: ${errorMsg}`],
|
|
2109
|
-
};
|
|
2110
|
-
console.error(`[Trinity] ERROR: ${errorMsg}`);
|
|
2111
|
-
return {
|
|
2112
|
-
success: false,
|
|
2113
|
-
telemetry,
|
|
2114
|
-
failures,
|
|
2115
|
-
fallbackOccurred: false,
|
|
2116
|
-
};
|
|
2117
|
-
}
|
|
2118
|
-
|
|
2119
|
-
const adapter = config.runtimeAdapter;
|
|
2120
|
-
const telemetry: TrinityTelemetry = {
|
|
2121
|
-
chainMode: 'trinity',
|
|
2122
|
-
usedStubs: false,
|
|
2123
|
-
dreamerPassed: false,
|
|
2124
|
-
philosopherPassed: false,
|
|
2125
|
-
scribePassed: false,
|
|
2126
|
-
candidateCount: 0,
|
|
2127
|
-
selectedCandidateIndex: -1,
|
|
2128
|
-
stageFailures: [],
|
|
2129
|
-
};
|
|
2130
|
-
|
|
2131
|
-
const failures: TrinityStageFailure[] = [];
|
|
2132
|
-
|
|
2133
|
-
try {
|
|
2134
|
-
// Step 1: Dreamer — generate candidates via real subagent
|
|
2135
|
-
const dreamerOutput = await adapter.invokeDreamer(snapshot, principleId, config.maxCandidates);
|
|
2136
|
-
|
|
2137
|
-
if (!dreamerOutput.valid || dreamerOutput.candidates.length === 0) {
|
|
2138
|
-
failures.push({
|
|
2139
|
-
stage: 'dreamer',
|
|
2140
|
-
reason: dreamerOutput.reason ?? 'No valid candidates generated',
|
|
2141
|
-
});
|
|
2142
|
-
telemetry.stageFailures.push(`Dreamer: ${dreamerOutput.reason ?? 'failed'}`);
|
|
2143
|
-
return { success: false, telemetry, failures, fallbackOccurred: false };
|
|
2144
|
-
}
|
|
2145
|
-
|
|
2146
|
-
telemetry.dreamerPassed = true;
|
|
2147
|
-
telemetry.candidateCount = dreamerOutput.candidates.length;
|
|
2148
|
-
|
|
2149
|
-
// Diversity validation (DIVER-04): soft check, never gates pipeline
|
|
2150
|
-
const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
|
|
2151
|
-
telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
|
|
2152
|
-
telemetry.candidateRiskLevels = dreamerOutput.candidates
|
|
2153
|
-
.map(c => c.riskLevel)
|
|
2154
|
-
.filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
|
|
2155
|
-
if (!diversityResult.diversityCheckPassed) {
|
|
2156
|
-
console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
|
|
2157
|
-
}
|
|
2158
|
-
|
|
2159
|
-
// Step 2: Philosopher — rank candidates via real subagent
|
|
2160
|
-
const philosopherOutput = await adapter.invokePhilosopher(dreamerOutput, principleId, snapshot);
|
|
2161
|
-
|
|
2162
|
-
if (!philosopherOutput.valid || philosopherOutput.judgments.length === 0) {
|
|
2163
|
-
failures.push({
|
|
2164
|
-
stage: 'philosopher',
|
|
2165
|
-
reason: philosopherOutput.reason ?? 'No judgments produced',
|
|
2166
|
-
});
|
|
2167
|
-
telemetry.stageFailures.push(`Philosopher: ${philosopherOutput.reason ?? 'failed'}`);
|
|
2168
|
-
return { success: false, telemetry, failures, fallbackOccurred: false };
|
|
2169
|
-
}
|
|
2170
|
-
|
|
2171
|
-
telemetry.philosopherPassed = true;
|
|
2172
|
-
|
|
2173
|
-
// Aggregate 6D scores from Philosopher judgments (if available)
|
|
2174
|
-
const realJudgments6D = philosopherOutput.judgments.filter(j => j.scores);
|
|
2175
|
-
if (realJudgments6D.length > 0) {
|
|
2176
|
-
const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
|
|
2177
|
-
const avgScores: Record<string, number> = {};
|
|
2178
|
-
for (const dim of dims) {
|
|
2179
|
-
const values = realJudgments6D.map(j => j.scores?.[dim] ?? 0);
|
|
2180
|
-
avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
|
|
2181
|
-
}
|
|
2182
|
-
telemetry.philosopher6D = {
|
|
2183
|
-
avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
|
|
2184
|
-
highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
|
|
2185
|
-
};
|
|
2186
|
-
}
|
|
2187
|
-
|
|
2188
|
-
// Step 3: Scribe — synthesize final artifact via real subagent
|
|
2189
|
-
const draftArtifact = await adapter.invokeScribe(
|
|
2190
|
-
dreamerOutput,
|
|
2191
|
-
philosopherOutput,
|
|
2192
|
-
snapshot,
|
|
2193
|
-
principleId,
|
|
2194
|
-
telemetry,
|
|
2195
|
-
config
|
|
2196
|
-
);
|
|
2197
|
-
|
|
2198
|
-
if (!draftArtifact) {
|
|
2199
|
-
failures.push({ stage: 'scribe', reason: 'Failed to synthesize artifact from candidates' });
|
|
2200
|
-
telemetry.stageFailures.push('Scribe: synthesis failed');
|
|
2201
|
-
return { success: false, telemetry, failures, fallbackOccurred: false };
|
|
2202
|
-
}
|
|
2203
|
-
|
|
2204
|
-
telemetry.scribePassed = true;
|
|
2205
|
-
telemetry.selectedCandidateIndex = draftArtifact.selectedCandidateIndex;
|
|
2206
|
-
|
|
2207
|
-
if (draftArtifact.telemetry) {
|
|
2208
|
-
telemetry.tournamentTrace = draftArtifact.telemetry.tournamentTrace;
|
|
2209
|
-
telemetry.winnerAggregateScore = draftArtifact.telemetry.winnerAggregateScore;
|
|
2210
|
-
telemetry.winnerThresholdPassed = draftArtifact.telemetry.winnerThresholdPassed;
|
|
2211
|
-
telemetry.eligibleCandidateCount = draftArtifact.telemetry.eligibleCandidateCount;
|
|
2212
|
-
}
|
|
2213
|
-
|
|
2214
|
-
// Hallucination detection (SDK-QUAL-02): validate extraction against snapshot
|
|
2215
|
-
const hallucinationResult = validateExtraction(draftArtifact, snapshot);
|
|
2216
|
-
if (!hallucinationResult.isGrounded) {
|
|
2217
|
-
const reason = hallucinationResult.reason ?? 'Extraction not grounded in session evidence';
|
|
2218
|
-
console.warn(`[Trinity] HALLUCINATION_DETECTED: ${reason}`);
|
|
2219
|
-
telemetry.stageFailures.push(`Hallucination: ${reason}`);
|
|
2220
|
-
return {
|
|
2221
|
-
success: false,
|
|
2222
|
-
telemetry,
|
|
2223
|
-
failures: [{ stage: 'scribe', reason }],
|
|
2224
|
-
fallbackOccurred: false,
|
|
2225
|
-
};
|
|
2226
|
-
}
|
|
2227
|
-
|
|
2228
|
-
return {
|
|
2229
|
-
success: true,
|
|
2230
|
-
artifact: draftArtifact,
|
|
2231
|
-
telemetry,
|
|
2232
|
-
failures: [],
|
|
2233
|
-
fallbackOccurred: false,
|
|
2234
|
-
artificerContext: draftArtifact.artificerContext,
|
|
2235
|
-
};
|
|
2236
|
-
} finally {
|
|
2237
|
-
if (adapter.close) {
|
|
2238
|
-
await adapter.close().catch(() => { /* intentionally empty - adapter cleanup error ignored */ });
|
|
2239
|
-
}
|
|
2240
|
-
}
|
|
2241
|
-
}
|
|
2242
|
-
|
|
2243
|
-
/**
|
|
2244
|
-
* Internal: Run Trinity chain with stub implementations (synchronous).
|
|
2245
|
-
// eslint-disable-next-line complexity, @typescript-eslint/class-methods-use-this -- complexity 14, refactor candidate
|
|
2246
|
-
*/
|
|
2247
|
-
function runTrinityWithStubs(
|
|
2248
|
-
snapshot: NocturnalSessionSnapshot,
|
|
2249
|
-
principleId: string,
|
|
2250
|
-
config: TrinityConfig
|
|
2251
|
-
): TrinityResult {
|
|
2252
|
-
const telemetry: TrinityTelemetry = {
|
|
2253
|
-
chainMode: 'trinity',
|
|
2254
|
-
usedStubs: true,
|
|
2255
|
-
dreamerPassed: false,
|
|
2256
|
-
philosopherPassed: false,
|
|
2257
|
-
scribePassed: false,
|
|
2258
|
-
candidateCount: 0,
|
|
2259
|
-
selectedCandidateIndex: -1,
|
|
2260
|
-
stageFailures: [],
|
|
2261
|
-
};
|
|
2262
|
-
|
|
2263
|
-
const failures: TrinityStageFailure[] = [];
|
|
2264
|
-
|
|
2265
|
-
// Step 1: Dreamer — generate candidates (stub)
|
|
2266
|
-
const dreamerOutput = invokeStubDreamer(snapshot, principleId, config.maxCandidates);
|
|
2267
|
-
|
|
2268
|
-
if (!dreamerOutput.valid || dreamerOutput.candidates.length === 0) {
|
|
2269
|
-
failures.push({
|
|
2270
|
-
stage: 'dreamer',
|
|
2271
|
-
reason: dreamerOutput.reason ?? 'No valid candidates generated',
|
|
2272
|
-
});
|
|
2273
|
-
telemetry.stageFailures.push(`Dreamer: ${dreamerOutput.reason ?? 'failed'}`);
|
|
2274
|
-
return {
|
|
2275
|
-
success: false,
|
|
2276
|
-
telemetry,
|
|
2277
|
-
failures,
|
|
2278
|
-
fallbackOccurred: false,
|
|
2279
|
-
};
|
|
2280
|
-
}
|
|
2281
|
-
|
|
2282
|
-
telemetry.dreamerPassed = true;
|
|
2283
|
-
telemetry.candidateCount = dreamerOutput.candidates.length;
|
|
2284
|
-
|
|
2285
|
-
// Diversity validation (DIVER-04): soft check, never gates pipeline
|
|
2286
|
-
const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
|
|
2287
|
-
telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
|
|
2288
|
-
telemetry.candidateRiskLevels = dreamerOutput.candidates
|
|
2289
|
-
.map(c => c.riskLevel)
|
|
2290
|
-
.filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
|
|
2291
|
-
if (!diversityResult.diversityCheckPassed) {
|
|
2292
|
-
console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
|
|
2293
|
-
}
|
|
2294
|
-
|
|
2295
|
-
// Step 2: Philosopher — rank candidates (stub)
|
|
2296
|
-
const philosopherOutput = invokeStubPhilosopher(dreamerOutput, principleId, snapshot);
|
|
2297
|
-
|
|
2298
|
-
if (!philosopherOutput.valid || philosopherOutput.judgments.length === 0) {
|
|
2299
|
-
failures.push({
|
|
2300
|
-
stage: 'philosopher',
|
|
2301
|
-
reason: philosopherOutput.reason ?? 'No judgments produced',
|
|
2302
|
-
});
|
|
2303
|
-
telemetry.stageFailures.push(`Philosopher: ${philosopherOutput.reason ?? 'failed'}`);
|
|
2304
|
-
return {
|
|
2305
|
-
success: false,
|
|
2306
|
-
telemetry,
|
|
2307
|
-
failures,
|
|
2308
|
-
fallbackOccurred: false,
|
|
2309
|
-
};
|
|
2310
|
-
}
|
|
2311
|
-
|
|
2312
|
-
telemetry.philosopherPassed = true;
|
|
2313
|
-
|
|
2314
|
-
// Aggregate 6D scores from Philosopher judgments (if available)
|
|
2315
|
-
const judgments6D = philosopherOutput.judgments.filter(j => j.scores);
|
|
2316
|
-
if (judgments6D.length > 0) {
|
|
2317
|
-
const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
|
|
2318
|
-
const avgScores: Record<string, number> = {};
|
|
2319
|
-
for (const dim of dims) {
|
|
2320
|
-
const values = judgments6D.map(j => j.scores?.[dim] ?? 0);
|
|
2321
|
-
avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
|
|
2322
|
-
}
|
|
2323
|
-
telemetry.philosopher6D = {
|
|
2324
|
-
avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
|
|
2325
|
-
highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
|
|
2326
|
-
};
|
|
2327
|
-
}
|
|
2328
|
-
|
|
2329
|
-
// Step 3: Scribe — produce final artifact using tournament selection (stub)
|
|
2330
|
-
const draftArtifact = invokeStubScribe(dreamerOutput, philosopherOutput, snapshot, principleId, telemetry, config);
|
|
2331
|
-
|
|
2332
|
-
if (!draftArtifact) {
|
|
2333
|
-
failures.push({
|
|
2334
|
-
stage: 'scribe',
|
|
2335
|
-
reason: 'Failed to synthesize artifact from candidates',
|
|
2336
|
-
});
|
|
2337
|
-
telemetry.stageFailures.push('Scribe: synthesis failed');
|
|
2338
|
-
return {
|
|
2339
|
-
success: false,
|
|
2340
|
-
telemetry,
|
|
2341
|
-
failures,
|
|
2342
|
-
fallbackOccurred: false,
|
|
2343
|
-
};
|
|
2344
|
-
}
|
|
2345
|
-
|
|
2346
|
-
telemetry.scribePassed = true;
|
|
2347
|
-
telemetry.selectedCandidateIndex = draftArtifact.selectedCandidateIndex;
|
|
2348
|
-
|
|
2349
|
-
if (draftArtifact.telemetry) {
|
|
2350
|
-
telemetry.tournamentTrace = draftArtifact.telemetry.tournamentTrace;
|
|
2351
|
-
telemetry.winnerAggregateScore = draftArtifact.telemetry.winnerAggregateScore;
|
|
2352
|
-
telemetry.winnerThresholdPassed = draftArtifact.telemetry.winnerThresholdPassed;
|
|
2353
|
-
telemetry.eligibleCandidateCount = draftArtifact.telemetry.eligibleCandidateCount;
|
|
2354
|
-
}
|
|
2355
|
-
|
|
2356
|
-
// Hallucination detection (SDK-QUAL-02): validate extraction against snapshot
|
|
2357
|
-
const hallucinationResult = validateExtraction(draftArtifact, snapshot);
|
|
2358
|
-
if (!hallucinationResult.isGrounded) {
|
|
2359
|
-
const reason = hallucinationResult.reason ?? 'Extraction not grounded in session evidence';
|
|
2360
|
-
console.warn(`[Trinity] HALLUCINATION_DETECTED: ${reason}`);
|
|
2361
|
-
telemetry.stageFailures.push(`Hallucination: ${reason}`);
|
|
2362
|
-
return {
|
|
2363
|
-
success: false,
|
|
2364
|
-
telemetry,
|
|
2365
|
-
failures: [{ stage: 'scribe', reason }],
|
|
2366
|
-
fallbackOccurred: false,
|
|
2367
|
-
};
|
|
2368
|
-
}
|
|
2369
|
-
|
|
2370
|
-
return {
|
|
2371
|
-
success: true,
|
|
2372
|
-
artifact: draftArtifact,
|
|
2373
|
-
telemetry,
|
|
2374
|
-
failures: [],
|
|
2375
|
-
fallbackOccurred: false,
|
|
2376
|
-
artificerContext: draftArtifact.artificerContext,
|
|
2377
|
-
};
|
|
2378
|
-
}
|
|
2379
|
-
|
|
2380
|
-
// ---------------------------------------------------------------------------
|
|
2381
|
-
// Trinity Validation (for Arbiter integration)
|
|
2382
|
-
// ---------------------------------------------------------------------------
|
|
2383
|
-
|
|
2384
|
-
/**
|
|
2385
|
-
* Validate that a Trinity draft artifact can pass final arbiter validation.
|
|
2386
|
-
* This checks the draft against the same rules as single-reflector artifacts.
|
|
2387
|
-
*/
|
|
2388
|
-
export interface DraftValidationResult {
|
|
2389
|
-
valid: boolean;
|
|
2390
|
-
failures: string[];
|
|
2391
|
-
}
|
|
2392
|
-
|
|
2393
|
-
/**
|
|
2394
|
-
* Validate a TrinityDraftArtifact before passing to arbiter.
|
|
2395
|
-
*/
|
|
2396
|
-
export function validateDraftArtifact(draft: TrinityDraftArtifact): DraftValidationResult {
|
|
2397
|
-
const failures: string[] = [];
|
|
2398
|
-
|
|
2399
|
-
if (!draft.badDecision || draft.badDecision.trim().length === 0) {
|
|
2400
|
-
failures.push('badDecision is required and non-empty');
|
|
2401
|
-
}
|
|
2402
|
-
|
|
2403
|
-
if (!draft.betterDecision || draft.betterDecision.trim().length === 0) {
|
|
2404
|
-
failures.push('betterDecision is required and non-empty');
|
|
2405
|
-
}
|
|
2406
|
-
|
|
2407
|
-
if (!draft.rationale || draft.rationale.trim().length < 20) {
|
|
2408
|
-
failures.push('rationale must be at least 20 characters');
|
|
2409
|
-
}
|
|
2410
|
-
|
|
2411
|
-
if (!draft.principleId || draft.principleId.trim().length === 0) {
|
|
2412
|
-
failures.push('principleId is required');
|
|
2413
|
-
}
|
|
2414
|
-
|
|
2415
|
-
if (!draft.sessionId || draft.sessionId.trim().length === 0) {
|
|
2416
|
-
failures.push('sessionId is required');
|
|
2417
|
-
}
|
|
2418
|
-
|
|
2419
|
-
// badDecision should not be identical to betterDecision
|
|
2420
|
-
if (
|
|
2421
|
-
typeof draft.badDecision === 'string' &&
|
|
2422
|
-
typeof draft.betterDecision === 'string' &&
|
|
2423
|
-
draft.badDecision.trim().length > 0 &&
|
|
2424
|
-
draft.betterDecision.trim().length > 0 &&
|
|
2425
|
-
draft.badDecision.trim() === draft.betterDecision.trim()
|
|
2426
|
-
) {
|
|
2427
|
-
failures.push('badDecision and betterDecision cannot be identical');
|
|
2428
|
-
}
|
|
2429
|
-
|
|
2430
|
-
return {
|
|
2431
|
-
valid: failures.length === 0,
|
|
2432
|
-
failures,
|
|
2433
|
-
};
|
|
2434
|
-
}
|
|
2435
|
-
|
|
2436
|
-
// ---------------------------------------------------------------------------
|
|
2437
|
-
// Hallucination Detection (SDK-QUAL-02)
|
|
2438
|
-
// ---------------------------------------------------------------------------
|
|
2439
|
-
|
|
2440
|
-
/**
|
|
2441
|
-
* Result of hallucination validation against session snapshot evidence.
|
|
2442
|
-
*/
|
|
2443
|
-
export interface HallucinationDetectionResult {
|
|
2444
|
-
/** Whether the extraction is grounded in real session evidence */
|
|
2445
|
-
isGrounded: boolean;
|
|
2446
|
-
/** List of evidence types found in the snapshot supporting the extraction */
|
|
2447
|
-
evidenceTypes: string[];
|
|
2448
|
-
/** Detailed reason if hallucination is detected */
|
|
2449
|
-
reason?: string;
|
|
2450
|
-
/** Matching evidence items for telemetry (truncated for safety) */
|
|
2451
|
-
evidencePreview: string[];
|
|
2452
|
-
}
|
|
2453
|
-
|
|
2454
|
-
/**
|
|
2455
|
-
* Validate that an extracted badDecision corresponds to actual events in the
|
|
2456
|
-
* NocturnalSessionSnapshot. This catches hallucinated extractions where the
|
|
2457
|
-
* Trinity chain produces a badDecision that has no grounding in real failures,
|
|
2458
|
-
* pain events, or gate blocks.
|
|
2459
|
-
*
|
|
2460
|
-
* Evidence sources checked:
|
|
2461
|
-
* 1. Failed tool calls (snapshot.toolCalls with outcome='failure')
|
|
2462
|
-
* 2. Pain events (snapshot.painEvents with score >= 50)
|
|
2463
|
-
* 3. Gate blocks (snapshot.gateBlocks)
|
|
2464
|
-
* 4. User corrections (snapshot.userTurns with correctionDetected=true)
|
|
2465
|
-
*
|
|
2466
|
-
* The function uses keyword overlap heuristics: it extracts tool names, file
|
|
2467
|
-
* paths, error messages, and pain reasons from the snapshot and checks if the
|
|
2468
|
-
* badDecision text overlaps meaningfully with any of them.
|
|
2469
|
-
*
|
|
2470
|
-
* @param artifact The draft artifact produced by the Scribe stage
|
|
2471
|
-
* @param snapshot The session snapshot used to generate the extraction
|
|
2472
|
-
* @returns HallucinationDetectionResult indicating whether the extraction is grounded
|
|
2473
|
-
*/
|
|
2474
|
-
export function validateExtraction(
|
|
2475
|
-
artifact: TrinityDraftArtifact,
|
|
2476
|
-
snapshot: NocturnalSessionSnapshot
|
|
2477
|
-
): HallucinationDetectionResult {
|
|
2478
|
-
const evidenceTypes: string[] = [];
|
|
2479
|
-
const evidencePreview: string[] = [];
|
|
2480
|
-
|
|
2481
|
-
// Shared token normalizer: lowercase + strip punctuation, same as badDecisionTokens
|
|
2482
|
-
const normalizeEvidenceToken = (value: string): string =>
|
|
2483
|
-
value.toLowerCase().replace(/[^a-z0-9]/g, '');
|
|
2484
|
-
|
|
2485
|
-
// Build a set of evidence tokens from the snapshot
|
|
2486
|
-
const evidenceTokens = new Set<string>();
|
|
2487
|
-
const badDecisionLower = artifact.badDecision.toLowerCase();
|
|
2488
|
-
|
|
2489
|
-
// 1. Failed tool calls
|
|
2490
|
-
const failedToolCalls = (snapshot.toolCalls ?? []).filter(tc => tc.outcome === 'failure');
|
|
2491
|
-
if (failedToolCalls.length > 0) {
|
|
2492
|
-
evidenceTypes.push('tool_failures');
|
|
2493
|
-
for (const tc of failedToolCalls) {
|
|
2494
|
-
// Extract tool name tokens
|
|
2495
|
-
evidenceTokens.add(tc.toolName.toLowerCase());
|
|
2496
|
-
if (tc.filePath) {
|
|
2497
|
-
// Extract all path segments and normalize each for matching
|
|
2498
|
-
const rawPathParts = [tc.filePath, ...tc.filePath.split(/[\\/]/)];
|
|
2499
|
-
for (const part of rawPathParts) {
|
|
2500
|
-
const normalized = normalizeEvidenceToken(part);
|
|
2501
|
-
if (normalized.length > 0) evidenceTokens.add(normalized);
|
|
2502
|
-
}
|
|
2503
|
-
}
|
|
2504
|
-
if (tc.errorMessage) {
|
|
2505
|
-
// Extract key words from error messages (filter stop words)
|
|
2506
|
-
const errorWords = tc.errorMessage.toLowerCase().split(/\s+/)
|
|
2507
|
-
.filter(w => w.length > 3 && !['with', 'from', 'that', 'this', 'which', 'been', 'have', 'were', 'they', 'their'].includes(w));
|
|
2508
|
-
for (const w of errorWords) {
|
|
2509
|
-
const normalized = normalizeEvidenceToken(w);
|
|
2510
|
-
if (normalized.length > 0) evidenceTokens.add(normalized);
|
|
2511
|
-
}
|
|
2512
|
-
}
|
|
2513
|
-
if (tc.errorType) evidenceTokens.add(tc.errorType.toLowerCase());
|
|
2514
|
-
evidencePreview.push(`tool:${tc.toolName}${tc.filePath ? `@${tc.filePath}` : ''} -> ${tc.errorMessage ?? 'unknown'}`.slice(0, 100));
|
|
2515
|
-
}
|
|
2516
|
-
}
|
|
2517
|
-
|
|
2518
|
-
// 2. Pain events (score >= 50 indicates meaningful pain)
|
|
2519
|
-
const significantPainEvents = (snapshot.painEvents ?? []).filter(pe => pe.score >= 50);
|
|
2520
|
-
if (significantPainEvents.length > 0) {
|
|
2521
|
-
evidenceTypes.push('pain_events');
|
|
2522
|
-
for (const pe of significantPainEvents) {
|
|
2523
|
-
evidenceTokens.add(pe.source.toLowerCase());
|
|
2524
|
-
if (pe.reason) {
|
|
2525
|
-
const painWords = pe.reason.toLowerCase().split(/\s+/)
|
|
2526
|
-
.filter(w => w.length > 3 && !['with', 'from', 'that', 'this', 'which', 'been', 'have', 'were', 'they', 'their'].includes(w));
|
|
2527
|
-
for (const w of painWords) {
|
|
2528
|
-
const normalized = normalizeEvidenceToken(w);
|
|
2529
|
-
if (normalized.length > 0) evidenceTokens.add(normalized);
|
|
2530
|
-
}
|
|
2531
|
-
}
|
|
2532
|
-
evidencePreview.push(`pain:${pe.score} [${pe.source}] ${pe.reason ?? ''}`.slice(0, 100));
|
|
2533
|
-
}
|
|
2534
|
-
}
|
|
2535
|
-
|
|
2536
|
-
// 3. Gate blocks
|
|
2537
|
-
if ((snapshot.gateBlocks ?? []).length > 0) {
|
|
2538
|
-
evidenceTypes.push('gate_blocks');
|
|
2539
|
-
for (const gb of snapshot.gateBlocks) {
|
|
2540
|
-
evidenceTokens.add(gb.toolName.toLowerCase());
|
|
2541
|
-
evidenceTokens.add('gate');
|
|
2542
|
-
evidenceTokens.add('blocked');
|
|
2543
|
-
if (gb.reason) {
|
|
2544
|
-
const blockWords = gb.reason.toLowerCase().split(/\s+/)
|
|
2545
|
-
.filter(w => w.length > 3);
|
|
2546
|
-
for (const w of blockWords) {
|
|
2547
|
-
const normalized = normalizeEvidenceToken(w);
|
|
2548
|
-
if (normalized.length > 0) evidenceTokens.add(normalized);
|
|
2549
|
-
}
|
|
2550
|
-
}
|
|
2551
|
-
evidencePreview.push(`gate:${gb.toolName} -> ${gb.reason}`.slice(0, 100));
|
|
2552
|
-
}
|
|
2553
|
-
}
|
|
2554
|
-
|
|
2555
|
-
// 4. User corrections
|
|
2556
|
-
const userCorrections = (snapshot.userTurns ?? []).filter(ut => ut.correctionDetected);
|
|
2557
|
-
if (userCorrections.length > 0) {
|
|
2558
|
-
evidenceTypes.push('user_corrections');
|
|
2559
|
-
evidenceTokens.add('correction');
|
|
2560
|
-
evidenceTokens.add('wrong');
|
|
2561
|
-
evidenceTokens.add('incorrect');
|
|
2562
|
-
evidencePreview.push(`corrections:${userCorrections.length}`);
|
|
2563
|
-
}
|
|
2564
|
-
|
|
2565
|
-
// If no evidence exists at all in the snapshot, we cannot validate.
|
|
2566
|
-
// Allow the extraction through — the pipeline already has guardrails for
|
|
2567
|
-
// empty snapshots (Dreamer returns valid:false).
|
|
2568
|
-
if (evidenceTypes.length === 0) {
|
|
2569
|
-
return {
|
|
2570
|
-
isGrounded: true,
|
|
2571
|
-
evidenceTypes: [],
|
|
2572
|
-
reason: undefined,
|
|
2573
|
-
evidencePreview: [],
|
|
2574
|
-
};
|
|
2575
|
-
}
|
|
2576
|
-
|
|
2577
|
-
// Check for overlap between badDecision text and evidence tokens
|
|
2578
|
-
// We look for meaningful keyword matches (tokens of length > 4)
|
|
2579
|
-
const badDecisionTokens = badDecisionLower.split(/\s+/)
|
|
2580
|
-
.map(t => t.replace(/[^a-z0-9]/g, ''))
|
|
2581
|
-
.filter(t => t.length > 4);
|
|
2582
|
-
|
|
2583
|
-
let matchCount = 0;
|
|
2584
|
-
const matchedTokens: string[] = [];
|
|
2585
|
-
for (const token of badDecisionTokens) {
|
|
2586
|
-
// Direct match
|
|
2587
|
-
if (evidenceTokens.has(token)) {
|
|
2588
|
-
matchCount++;
|
|
2589
|
-
matchedTokens.push(token);
|
|
2590
|
-
continue;
|
|
2591
|
-
}
|
|
2592
|
-
// Partial match: check if any evidence token contains this token or vice versa
|
|
2593
|
-
for (const evToken of evidenceTokens) {
|
|
2594
|
-
if (evToken.length > 4 && (evToken.includes(token) || token.includes(evToken))) {
|
|
2595
|
-
matchCount++;
|
|
2596
|
-
matchedTokens.push(token);
|
|
2597
|
-
break;
|
|
2598
|
-
}
|
|
2599
|
-
}
|
|
2600
|
-
}
|
|
2601
|
-
|
|
2602
|
-
// Heuristic: if at least 2 meaningful tokens overlap, consider grounded
|
|
2603
|
-
// Single overlap is acceptable if the token is highly specific (length > 8)
|
|
2604
|
-
const minOverlap = badDecisionTokens.length > 0
|
|
2605
|
-
? Math.max(1, Math.ceil(badDecisionTokens.length * 0.15))
|
|
2606
|
-
: 0;
|
|
2607
|
-
|
|
2608
|
-
if (matchCount >= Math.max(2, minOverlap)) {
|
|
2609
|
-
return {
|
|
2610
|
-
isGrounded: true,
|
|
2611
|
-
evidenceTypes,
|
|
2612
|
-
evidencePreview: evidencePreview.slice(0, 5),
|
|
2613
|
-
};
|
|
2614
|
-
}
|
|
2615
|
-
|
|
2616
|
-
// Also check for at least one highly-specific match (length > 8)
|
|
2617
|
-
const hasHighlySpecificMatch = matchedTokens.some(t => t.length > 8);
|
|
2618
|
-
if (hasHighlySpecificMatch) {
|
|
2619
|
-
return {
|
|
2620
|
-
isGrounded: true,
|
|
2621
|
-
evidenceTypes,
|
|
2622
|
-
evidencePreview: evidencePreview.slice(0, 5),
|
|
2623
|
-
};
|
|
2624
|
-
}
|
|
2625
|
-
|
|
2626
|
-
// Hallucination detected — badDecision has no grounding in snapshot evidence
|
|
2627
|
-
const reason = `Hallucinated extraction: badDecision "${artifact.badDecision.slice(0, 80)}" has insufficient overlap with session evidence. ` +
|
|
2628
|
-
`Evidence types available: [${evidenceTypes.join(', ')}]. Matched tokens: [${matchedTokens.join(', ')}] (needed >= ${Math.max(2, minOverlap)}).`;
|
|
2629
|
-
|
|
2630
|
-
return {
|
|
2631
|
-
isGrounded: false,
|
|
2632
|
-
evidenceTypes,
|
|
2633
|
-
reason,
|
|
2634
|
-
evidencePreview: evidencePreview.slice(0, 5),
|
|
2635
|
-
};
|
|
2636
|
-
}
|
|
2637
|
-
|
|
2638
|
-
/**
|
|
2639
|
-
* Convert a TrinityDraftArtifact to a NocturnalArtifact-compatible structure.
|
|
2640
|
-
*/
|
|
2641
|
-
export function draftToArtifact(draft: TrinityDraftArtifact): {
|
|
2642
|
-
artifactId: string;
|
|
2643
|
-
sessionId: string;
|
|
2644
|
-
principleId: string;
|
|
2645
|
-
sourceSnapshotRef: string;
|
|
2646
|
-
badDecision: string;
|
|
2647
|
-
betterDecision: string;
|
|
2648
|
-
rationale: string;
|
|
2649
|
-
createdAt: string;
|
|
2650
|
-
thinkingModelDelta?: number;
|
|
2651
|
-
planningRatioGain?: number;
|
|
2652
|
-
} {
|
|
2653
|
-
// Compute reflection quality metrics
|
|
2654
|
-
const thinkingModelDelta = draft.thinkingModelDelta ?? computeThinkingModelDelta(draft.badDecision, draft.betterDecision);
|
|
2655
|
-
// planningRatioGain requires an improved snapshot — Trinity draft doesn't have one, so default to 0
|
|
2656
|
-
const planningRatioGain = draft.planningRatioGain ?? 0;
|
|
2657
|
-
|
|
2658
|
-
return {
|
|
2659
|
-
artifactId: randomUUID(),
|
|
2660
|
-
sessionId: draft.sessionId,
|
|
2661
|
-
principleId: draft.principleId,
|
|
2662
|
-
sourceSnapshotRef: draft.sourceSnapshotRef,
|
|
2663
|
-
badDecision: draft.badDecision,
|
|
2664
|
-
betterDecision: draft.betterDecision,
|
|
2665
|
-
rationale: draft.rationale,
|
|
2666
|
-
createdAt: new Date().toISOString(),
|
|
2667
|
-
thinkingModelDelta,
|
|
2668
|
-
planningRatioGain,
|
|
2669
|
-
};
|
|
2670
|
-
}
|
|
2671
|
-
|
|
2672
|
-
// ---------------------------------------------------------------------------
|
|
2673
|
-
// Default Configuration
|
|
2674
|
-
// ---------------------------------------------------------------------------
|
|
2675
|
-
|
|
2676
|
-
export const DEFAULT_TRINITY_CONFIG: TrinityConfig = {
|
|
2677
|
-
useTrinity: true,
|
|
2678
|
-
maxCandidates: 3,
|
|
2679
|
-
useStubs: false, // Real subagent execution is the default; set useStubs=true for stub-only mode
|
|
2680
|
-
};
|