principles-disciple 1.71.0 → 1.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. package/openclaw.plugin.json +10 -5
  2. package/package.json +17 -19
  3. package/scripts/acceptance-test.mjs +16 -73
  4. package/scripts/sync-plugin.mjs +382 -77
  5. package/src/commands/archive-impl.ts +2 -1
  6. package/src/commands/capabilities.ts +2 -2
  7. package/src/commands/context.ts +2 -2
  8. package/src/commands/disable-impl.ts +2 -1
  9. package/src/commands/evolution-status.ts +16 -16
  10. package/src/commands/export.ts +12 -67
  11. package/src/commands/pain.ts +91 -1
  12. package/src/commands/principle-rollback.ts +2 -1
  13. package/src/commands/promote-impl.ts +7 -43
  14. package/src/commands/rollback-impl.ts +2 -1
  15. package/src/commands/rollback.ts +2 -1
  16. package/src/commands/samples.ts +2 -1
  17. package/src/commands/thinking-os.ts +2 -1
  18. package/src/config/errors.ts +18 -2
  19. package/src/constants/diagnostician.ts +2 -2
  20. package/src/constants/tools.ts +2 -1
  21. package/src/core/__tests__/focus-history.test.ts +210 -0
  22. package/src/core/config.ts +1 -1
  23. package/src/core/confirm-first-gate.ts +255 -0
  24. package/src/core/correction-cue-learner.ts +2 -136
  25. package/src/core/correction-types.ts +16 -88
  26. package/src/core/dictionary.ts +19 -20
  27. package/src/core/empathy-keyword-matcher.ts +17 -289
  28. package/src/core/empathy-types.ts +18 -229
  29. package/src/core/event-log.ts +38 -132
  30. package/src/core/evolution-reducer.ts +21 -2
  31. package/src/core/evolution-types.ts +76 -464
  32. package/src/core/file-store.ts +80 -0
  33. package/src/core/focus-history.ts +228 -955
  34. package/src/core/local-worker-routing.ts +34 -314
  35. package/src/core/merge-gate-audit.ts +0 -195
  36. package/src/core/pain-diagnostic-gate.ts +154 -0
  37. package/src/core/pain-signal.ts +21 -138
  38. package/src/core/pain.ts +15 -88
  39. package/src/core/pd-task-reconciler.ts +26 -115
  40. package/src/core/pd-task-service.ts +9 -9
  41. package/src/core/pd-task-types.ts +23 -127
  42. package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
  43. package/src/core/principle-compiler/code-validator.ts +15 -42
  44. package/src/core/principle-compiler/compiler.ts +100 -15
  45. package/src/core/principle-compiler/index.ts +5 -2
  46. package/src/core/principle-compiler/template-generator.ts +4 -104
  47. package/src/core/principle-injection.ts +10 -202
  48. package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
  49. package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
  50. package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
  51. package/src/core/principle-tree-ledger-adapter.ts +145 -0
  52. package/src/core/principle-tree-ledger.ts +8 -6
  53. package/src/core/reflection/reflection-context.ts +14 -109
  54. package/src/core/replay-engine.ts +8 -500
  55. package/src/core/rule-host-helpers.ts +5 -35
  56. package/src/core/rule-host-types.ts +10 -82
  57. package/src/core/rule-host.ts +6 -63
  58. package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
  59. package/src/core/session-tracker.ts +87 -101
  60. package/src/core/shadow-observation-registry.ts +19 -48
  61. package/src/core/trajectory.ts +3 -1
  62. package/src/core/workflow-funnel-loader.ts +62 -68
  63. package/src/core/workspace-context.ts +46 -0
  64. package/src/core/workspace-dir-service.ts +1 -1
  65. package/src/core/workspace-dir-validation.ts +18 -9
  66. package/src/hooks/AGENTS.md +1 -1
  67. package/src/hooks/gate-block-helper.ts +46 -44
  68. package/src/hooks/gate.ts +207 -7
  69. package/src/hooks/lifecycle.ts +30 -32
  70. package/src/hooks/llm.ts +60 -32
  71. package/src/hooks/pain.ts +297 -103
  72. package/src/hooks/prompt.ts +469 -339
  73. package/src/hooks/subagent.ts +2 -29
  74. package/src/i18n/commands.ts +2 -10
  75. package/src/index.ts +95 -85
  76. package/src/openclaw-sdk.ts +311 -0
  77. package/src/service/central-database.ts +8 -4
  78. package/src/service/evolution-queue-migration.ts +2 -1
  79. package/src/service/evolution-worker.ts +163 -1786
  80. package/src/service/internalization-trigger-adapter.ts +302 -0
  81. package/src/service/keyword-optimization-service.ts +4 -4
  82. package/src/service/monitoring-query-service.ts +1 -215
  83. package/src/service/queue-io.ts +60 -331
  84. package/src/service/runtime-summary-service.ts +115 -18
  85. package/src/service/subagent-workflow/index.ts +0 -41
  86. package/src/service/subagent-workflow/types.ts +9 -120
  87. package/src/service/subagent-workflow/workflow-store.ts +2 -119
  88. package/src/service/workflow-watchdog.ts +0 -43
  89. package/src/types/event-payload.ts +16 -74
  90. package/src/types/event-types.ts +39 -547
  91. package/src/types/hygiene-types.ts +7 -30
  92. package/src/types/principle-tree-schema.ts +20 -222
  93. package/src/types/queue.ts +15 -70
  94. package/src/types/runtime-summary.ts +5 -49
  95. package/src/utils/io.ts +10 -0
  96. package/src/utils/retry.ts +1 -1
  97. package/src/utils/shadow-fingerprint.ts +2 -2
  98. package/src/utils/workspace-resolver.ts +50 -0
  99. package/templates/langs/en/core/AGENTS.md +2 -2
  100. package/templates/langs/en/core/BOOT.md +1 -1
  101. package/templates/langs/en/core/HEARTBEAT.md +2 -2
  102. package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  103. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  104. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  105. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  106. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  107. package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
  108. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  109. package/templates/langs/en/skills/evolve-task/SKILL.md +1 -1
  110. package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
  111. package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
  112. package/templates/langs/en/skills/pd-mentor/SKILL.md +1 -1
  113. package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
  114. package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
  115. package/templates/langs/zh/core/AGENTS.md +2 -2
  116. package/templates/langs/zh/core/BOOT.md +1 -1
  117. package/templates/langs/zh/core/HEARTBEAT.md +2 -2
  118. package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  119. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  120. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  121. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
  122. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  123. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  124. package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
  125. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  126. package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
  127. package/templates/langs/zh/skills/evolve-task/SKILL.md +2 -2
  128. package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
  129. package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
  130. package/templates/langs/zh/skills/pd-mentor/SKILL.md +1 -1
  131. package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
  132. package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
  133. package/tests/build-artifacts.test.ts +1 -3
  134. package/tests/commands/evolution-status.test.ts +0 -118
  135. package/tests/core/bootstrap-rules.test.ts +1 -1
  136. package/tests/core/config.test.ts +1 -1
  137. package/tests/core/event-log.test.ts +35 -0
  138. package/tests/core/evolution-engine.test.ts +610 -0
  139. package/tests/core/file-store.test.ts +102 -0
  140. package/tests/core/focus-history.test.ts +203 -11
  141. package/tests/core/merge-gate-audit.test.ts +2 -169
  142. package/tests/core/model-deployment-registry.test.ts +7 -1
  143. package/tests/core/model-training-registry.test.ts +19 -0
  144. package/tests/core/observability.test.ts +0 -1
  145. package/tests/core/pain-diagnostic-gate.test.ts +498 -0
  146. package/tests/core/pain.test.ts +0 -1
  147. package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
  148. package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
  149. package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
  150. package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
  151. package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
  152. package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
  153. package/tests/core/reflection-context.test.ts +0 -14
  154. package/tests/core/replay-engine.test.ts +127 -215
  155. package/tests/core/rule-host-helpers.test.ts +2 -2
  156. package/tests/core/rule-implementation-runtime.test.ts +0 -27
  157. package/tests/core/workflow-funnel-loader.test.ts +162 -0
  158. package/tests/core/workspace-dir-validation.test.ts +8 -1
  159. package/tests/core-anti-growth.test.ts +192 -0
  160. package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
  161. package/tests/hooks/confirm-first-gate.test.ts +333 -0
  162. package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
  163. package/tests/hooks/gate-auto-correct.test.ts +665 -0
  164. package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
  165. package/tests/hooks/pain.test.ts +269 -12
  166. package/tests/hooks/prompt-characterization.test.ts +500 -0
  167. package/tests/hooks/prompt-size-guard.test.ts +329 -0
  168. package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
  169. package/tests/index.test.ts +94 -1
  170. package/tests/integration/auto-entry-gate.test.ts +248 -0
  171. package/tests/integration/internalization-trigger-guard.test.ts +69 -0
  172. package/tests/integration/m8-legacy-paths.test.ts +63 -0
  173. package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
  174. package/tests/plugin-config-resolution-cutover.test.ts +359 -0
  175. package/tests/runtime-v2-discovery-guard.test.ts +154 -0
  176. package/tests/service/central-database.test.ts +457 -0
  177. package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
  178. package/tests/service/evolution-worker.timeout.test.ts +11 -129
  179. package/tests/service/internalization-trigger-adapter.test.ts +251 -0
  180. package/tests/service/monitoring-query-service.test.ts +1 -47
  181. package/tests/service/queue-io.test.ts +1 -62
  182. package/tests/service/runtime-summary-service.test.ts +184 -3
  183. package/tests/service/workflow-watchdog.test.ts +0 -91
  184. package/tests/utils/file-lock.test.ts +5 -3
  185. package/tests/utils/session-key.test.ts +52 -0
  186. package/tests/utils/subagent-probe.test.ts +48 -1
  187. package/vitest.config.ts +4 -11
  188. package/.planning/codebase/ARCHITECTURE.md +0 -157
  189. package/.planning/codebase/CONCERNS.md +0 -145
  190. package/.planning/codebase/CONVENTIONS.md +0 -148
  191. package/.planning/codebase/INTEGRATIONS.md +0 -81
  192. package/.planning/codebase/STACK.md +0 -87
  193. package/.planning/codebase/STRUCTURE.md +0 -193
  194. package/.planning/codebase/TESTING.md +0 -243
  195. package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
  196. package/docs/COMMAND_REFERENCE.md +0 -76
  197. package/docs/COMMAND_REFERENCE_EN.md +0 -79
  198. package/scripts/build-web.mjs +0 -46
  199. package/scripts/diagnose-nocturnal.mjs +0 -537
  200. package/scripts/seed-nocturnal-scenarios.mjs +0 -384
  201. package/src/commands/nocturnal-review.ts +0 -322
  202. package/src/commands/nocturnal-rollout.ts +0 -790
  203. package/src/commands/nocturnal-train.ts +0 -986
  204. package/src/commands/pd-reflect.ts +0 -88
  205. package/src/core/adaptive-thresholds.ts +0 -478
  206. package/src/core/diagnostician-task-store.ts +0 -192
  207. package/src/core/nocturnal-arbiter.ts +0 -715
  208. package/src/core/nocturnal-artifact-lineage.ts +0 -116
  209. package/src/core/nocturnal-artificer.ts +0 -257
  210. package/src/core/nocturnal-candidate-scoring.ts +0 -530
  211. package/src/core/nocturnal-compliance.ts +0 -1146
  212. package/src/core/nocturnal-dataset.ts +0 -763
  213. package/src/core/nocturnal-executability.ts +0 -428
  214. package/src/core/nocturnal-export.ts +0 -499
  215. package/src/core/nocturnal-paths.ts +0 -240
  216. package/src/core/nocturnal-reasoning-deriver.ts +0 -343
  217. package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
  218. package/src/core/nocturnal-snapshot-contract.ts +0 -99
  219. package/src/core/nocturnal-trajectory-extractor.ts +0 -512
  220. package/src/core/nocturnal-trinity-types.ts +0 -218
  221. package/src/core/nocturnal-trinity.ts +0 -2680
  222. package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
  223. package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
  224. package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
  225. package/src/http/principles-console-route.ts +0 -709
  226. package/src/service/central-health-service.ts +0 -49
  227. package/src/service/central-overview-service.ts +0 -138
  228. package/src/service/control-ui-query-service.ts +0 -900
  229. package/src/service/cooldown-strategy.ts +0 -97
  230. package/src/service/evolution-pain-context.ts +0 -79
  231. package/src/service/evolution-query-service.ts +0 -407
  232. package/src/service/health-query-service.ts +0 -1038
  233. package/src/service/nocturnal-config.ts +0 -214
  234. package/src/service/nocturnal-runtime.ts +0 -734
  235. package/src/service/nocturnal-service.ts +0 -1605
  236. package/src/service/nocturnal-target-selector.ts +0 -545
  237. package/src/service/sleep-cycle.ts +0 -157
  238. package/src/service/startup-reconciler.ts +0 -112
  239. package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
  240. package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
  241. package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
  242. package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
  243. package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
  244. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
  245. package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
  246. package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
  247. package/src/tools/write-pain-flag.ts +0 -215
  248. package/tests/commands/nocturnal-review.test.ts +0 -448
  249. package/tests/commands/nocturnal-train.test.ts +0 -97
  250. package/tests/commands/pd-reflect.test.ts +0 -49
  251. package/tests/core/adaptive-thresholds.test.ts +0 -261
  252. package/tests/core/nocturnal-arbiter.test.ts +0 -559
  253. package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
  254. package/tests/core/nocturnal-artificer.test.ts +0 -241
  255. package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
  256. package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
  257. package/tests/core/nocturnal-compliance.test.ts +0 -646
  258. package/tests/core/nocturnal-dataset.test.ts +0 -892
  259. package/tests/core/nocturnal-e2e.test.ts +0 -234
  260. package/tests/core/nocturnal-executability.test.ts +0 -357
  261. package/tests/core/nocturnal-export.test.ts +0 -517
  262. package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
  263. package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
  264. package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
  265. package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
  266. package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
  267. package/tests/core/nocturnal-trinity.test.ts +0 -2053
  268. package/tests/core/pain-auto-repair.test.ts +0 -96
  269. package/tests/core/pain-integration.test.ts +0 -510
  270. package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
  271. package/tests/http/principles-console-route.test.ts +0 -162
  272. package/tests/integration/chaos-resilience.test.ts +0 -348
  273. package/tests/integration/empathy-workflow-integration.test.ts +0 -626
  274. package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
  275. package/tests/service/control-ui-query-service.test.ts +0 -121
  276. package/tests/service/cooldown-strategy.test.ts +0 -164
  277. package/tests/service/data-endpoints-regression.test.ts +0 -834
  278. package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
  279. package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
  280. package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
  281. package/tests/service/nocturnal-runtime.test.ts +0 -473
  282. package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
  283. package/tests/service/nocturnal-target-selector.test.ts +0 -615
  284. package/tests/service/startup-reconciler.test.ts +0 -148
  285. package/tests/tools/write-pain-flag.test.ts +0 -358
  286. package/ui/src/App.tsx +0 -45
  287. package/ui/src/api.ts +0 -220
  288. package/ui/src/charts.tsx +0 -955
  289. package/ui/src/components/ErrorState.tsx +0 -6
  290. package/ui/src/components/Loading.tsx +0 -13
  291. package/ui/src/components/ProtectedRoute.tsx +0 -12
  292. package/ui/src/components/Shell.tsx +0 -91
  293. package/ui/src/components/WorkspaceConfig.tsx +0 -178
  294. package/ui/src/components/index.ts +0 -5
  295. package/ui/src/context/auth.tsx +0 -80
  296. package/ui/src/context/theme.tsx +0 -66
  297. package/ui/src/hooks/useAutoRefresh.ts +0 -39
  298. package/ui/src/i18n/ui.ts +0 -473
  299. package/ui/src/main.tsx +0 -16
  300. package/ui/src/pages/EvolutionPage.tsx +0 -333
  301. package/ui/src/pages/FeedbackPage.tsx +0 -138
  302. package/ui/src/pages/GateMonitorPage.tsx +0 -136
  303. package/ui/src/pages/LoginPage.tsx +0 -89
  304. package/ui/src/pages/OverviewPage.tsx +0 -599
  305. package/ui/src/pages/SamplesPage.tsx +0 -174
  306. package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
  307. package/ui/src/styles.css +0 -2020
  308. package/ui/src/types.ts +0 -384
  309. package/ui/src/utils/format.ts +0 -15
@@ -1,2680 +0,0 @@
1
- /**
2
- * Nocturnal Trinity — Three-Stage Reflection Chain
3
- * ================================================
4
- *
5
- * PURPOSE: Upgrade single-reflector nocturnal sample generation to a
6
- * Dreamer -> Philosopher -> Scribe Trinity chain that produces higher quality
7
- * decision-point samples through structured multi-stage reflection.
8
- *
9
- * TRINITY STAGES:
10
- * 1. Dreamer — Generates multiple candidate corrections/alternatives
11
- * 2. Philosopher — Provides principle-grounded critique and ranking
12
- * 3. Scribe — Produces the final structured artifact draft using tournament selection
13
- *
14
- * DESIGN CONSTRAINTS:
15
- * - All stage I/O is structured JSON contracts (not prose)
16
- * - Any malformed stage output fails the entire chain closed
17
- * - Single-reflector fallback is preserved via useTrinity flag
18
- * - Trinity mode is configurable but defaults to enabled
19
- * - Final artifact still passes arbiter + executability validation
20
- * - Telemetry records chain mode, stage outcomes, candidate counts
21
- * - Tournament selection is deterministic (same inputs → same winner)
22
- *
23
- * RUNTIME ADAPTER:
24
- * - useStubs=true: uses synchronous stub implementations (no external calls)
25
- * - useStubs=false: requires a TrinityRuntimeAdapter for real subagent execution
26
- * - Adapter uses api.runtime.agent.runEmbeddedPiAgent() which works in background contexts
27
- * (unlike api.runtime.subagent.* which requires gateway request scope)
28
- * - IMPORTANT: provider and model must be passed explicitly — runEmbeddedPiAgent does NOT
29
- * read config.agents.defaults.model and falls back to openai/gpt-5.4 if not specified
30
- */
31
-
32
- import { randomUUID } from 'crypto';
33
- import * as fs from 'fs';
34
- import * as os from 'os';
35
- import * as path from 'path';
36
- import type { NocturnalSessionSnapshot } from './nocturnal-trajectory-extractor.js';
37
- import { computeThinkingModelDelta } from './nocturnal-trajectory-extractor.js';
38
- import {
39
- deriveReasoningChain,
40
- deriveContextualFactors,
41
- } from './nocturnal-reasoning-deriver.js';
42
- import type { TrinityArtificerContext } from './nocturnal-artificer.js';
43
- import {
44
- runTournament,
45
- DEFAULT_SCORING_WEIGHTS,
46
- type ScoringWeights,
47
- type TournamentTraceEntry,
48
- validateCandidateDiversity,
49
- } from './nocturnal-candidate-scoring.js';
50
- import {
51
- DEFAULT_THRESHOLDS,
52
- getEffectiveThresholds,
53
- type ThresholdValues,
54
- } from './adaptive-thresholds.js';
55
-
56
- // ---------------------------------------------------------------------------
57
- // Configurable Model Fallback (avoid hardcoded strings deep in adapters)
58
- // ---------------------------------------------------------------------------
59
-
60
- const FALLBACK_PROVIDER = process.env.OPENCLAW_DEFAULT_PROVIDER || 'minimax-portal';
61
- const FALLBACK_MODEL = process.env.OPENCLAW_DEFAULT_MODEL || 'MiniMax-M2.7';
62
-
63
- // ---------------------------------------------------------------------------
64
- // Embedded Role Prompts
65
- // ---------------------------------------------------------------------------
66
- // These prompts are embedded at build time. The agents/ directory was removed
67
- // to eliminate fragile runtime file dependencies on the file system.
68
-
69
- export const NOCTURNAL_DREAMER_PROMPT = `# Nocturnal Dreamer — Candidate Generation
70
-
71
- > System prompt for Trinity Dreamer stage.
72
- > Role: Generate multiple alternative "better decision" candidates from a session snapshot.
73
-
74
- ## Role
75
-
76
- You are a principles analyst specializing in identifying decision alternatives.
77
- Your task is to analyze a session trajectory and generate **multiple candidate corrections**,
78
- each representing a different valid approach to the same problem.
79
-
80
- ## Input
81
-
82
- You will receive:
83
- - A **target principle** (principle ID and description)
84
- - A **session trajectory snapshot** containing:
85
- - Assistant turns (sanitized text, no raw content)
86
- - User turns (correction cues only, no raw content)
87
- - Tool calls with outcomes and error messages
88
- - Pain events and gate blocks
89
- - Session metadata
90
-
91
- ## Task
92
-
93
- Analyze the session and generate **2-3 candidate corrections**, each capturing:
94
-
95
- 1. **The bad decision**: What the agent decided or did that violated the target principle
96
- 2. **The better decision**: What the agent should have done instead (unique per candidate)
97
- 3. **The rationale**: Why this alternative is better
98
- 4. **Confidence**: How confident you are this is a valid alternative (0.0-1.0)
99
-
100
- ## Output Format
101
-
102
- You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no preamble.
103
-
104
- {
105
- "valid": true,
106
- "candidates": [
107
- {
108
- "candidateIndex": 0,
109
- "badDecision": "<what the agent did wrong>",
110
- "betterDecision": "<what the agent should have done>",
111
- "rationale": "<why this is better>",
112
- "confidence": 0.95,
113
- "riskLevel": "low",
114
- "strategicPerspective": "conservative_fix"
115
- }
116
- ],
117
- "generatedAt": "<ISO timestamp>"
118
- }
119
-
120
- ## Quality Standards
121
-
122
- ### Each candidate MUST:
123
- - Have a candidateIndex that is unique within the candidate list
124
- - Describe a specific, concrete badDecision (not generic anti-patterns)
125
- - Propose a specific, actionable betterDecision (contains an action verb)
126
- - Provide a principle-grounded rationale (explicitly references the principle)
127
- - Include a confidence score (0.0-1.0, higher = more confident)
128
-
129
- ### betterDecision FORMAT — Must be executable:
130
- - MUST start with a concrete action verb: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug
131
- - MUST reference a specific, concrete target (file, command, config, etc.)
132
- - MUST describe a bounded, executable action — not a vague principle
133
- - Examples: "Read the file before editing to verify current content", "Check user permissions before executing privileged commands"
134
- - Anti-examples: "Per T-01, pause all tasks..." (starts with "Per"), "Be more careful" (vague verb "be")
135
-
136
- ### Candidates should DIFFER from each other:
137
- - Different candidates should represent genuinely different approaches
138
- - Do not generate candidates with identical betterDecisions
139
- - Vary the confidence scores to reflect genuine uncertainty
140
-
141
- ## Strategic Perspective Requirements
142
-
143
- Generate candidates from DISTINCT strategic perspectives:
144
-
145
- - **conservative_fix**: Minimal deviation from original approach. Add a
146
- verification or validation step that was missing.
147
- - **structural_improvement**: Reorder operations or introduce an intermediate
148
- checkpoint. Change HOW the goal is achieved.
149
- - **paradigm_shift**: Challenge whether the original goal was correct.
150
- Consider a fundamentally different approach.
151
-
152
- Each candidate MUST specify \`riskLevel\` ("low"|"medium"|"high") and
153
- \`strategicPerspective\` matching one of the above.
154
-
155
- ANTI-PATTERN: Candidates that differ only in wording, not in substance,
156
- will be rejected.
157
-
158
- ### Candidates must NOT:
159
- - Contain raw user text or private content
160
- - Reference non-existent tools or impossible actions
161
- - Propose vague improvements ("be more careful")
162
- - Exceed the requested number of candidates
163
-
164
- ## Validation
165
-
166
- If you cannot generate valid candidates (e.g., no clear violation found, insufficient data), respond with:
167
-
168
- {
169
- "valid": false,
170
- "candidates": [],
171
- "reason": "<why valid candidates cannot be generated>",
172
- "generatedAt": "<ISO timestamp>"
173
- }`;
174
-
175
- export const NOCTURNAL_PHILOSOPHER_PROMPT = `# Nocturnal Philosopher — Candidate Evaluation and Ranking
176
-
177
- > System prompt for Trinity Philosopher stage.
178
- > Role: Evaluate Dreamer's candidates and rank them by principle alignment and quality.
179
-
180
- ## Role
181
-
182
- You are a principles analyst specializing in critical evaluation.
183
- Your task is to evaluate Dreamer's candidate corrections and rank them
184
- based on principle alignment, specificity, and actionability.
185
-
186
- ## Input
187
-
188
- You will receive:
189
- - A **target principle** (principle ID and description)
190
- - **Dreamer's candidates** — a list of alternative corrections to evaluate
191
-
192
- ## Task
193
-
194
- For each candidate, provide:
195
- 1. **Critique**: A principle-grounded assessment of this candidate's strengths and weaknesses
196
- 2. **Principle alignment**: Whether this candidate properly aligns with the target principle
197
- 3. **Score**: Overall quality score (0.0-1.0, higher = better)
198
- 4. **Rank**: Relative ranking among all candidates (1 = best)
199
-
200
- Finally, provide an **overall assessment** of the candidate set.
201
-
202
- ## Output Format
203
-
204
- You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no preamble.
205
-
206
- {
207
- "valid": true,
208
- "judgments": [
209
- {
210
- "candidateIndex": 0,
211
- "critique": "<principle-grounded critique>",
212
- "principleAligned": true,
213
- "score": 0.92,
214
- "rank": 1,
215
- "scores": {
216
- "principleAlignment": 0.9,
217
- "specificity": 0.85,
218
- "actionability": 0.9,
219
- "executability": 0.95,
220
- "safetyImpact": 0.8,
221
- "uxImpact": 0.85
222
- },
223
- "risks": {
224
- "falsePositiveEstimate": 0.1,
225
- "implementationComplexity": "low",
226
- "breakingChangeRisk": false
227
- }
228
- }
229
- ],
230
- "overallAssessment": "<summary of candidate set quality>",
231
- "generatedAt": "<ISO timestamp>"
232
- }
233
-
234
- ## Evaluation Criteria
235
-
236
- ### Score Components (0-1 scale each):
237
- 1. **Principle Alignment** (weight: 0.20) — Does the betterDecision properly reflect the target principle?
238
- 2. **Specificity** (weight: 0.15) — Is badDecision specific? Is betterDecision actionable?
239
- 3. **Actionability** (weight: 0.15) — Does betterDecision describe a specific next step?
240
- 4. **Executability** (weight: 0.15) — Does betterDecision start with a bounded verb (read, check, verify, edit, write, etc.) and reference a concrete target?
241
- 5. **Safety Impact** (weight: 0.20) — Does the betterDecision reduce risk of data loss, corruption, or new failure modes? Would implementing this prevent dangerous operations?
242
- 6. **UX Impact** (weight: 0.15) — Does the betterDecision reduce user frustration or improve response reliability? Would the user experience be noticeably better?
243
-
244
- ### Risk Assessment (per candidate):
245
- For each candidate, also assess:
246
- - **falsePositiveEstimate** (0-1): How likely is this candidate a false positive (the "betterDecision" is actually not better)?
247
- - **implementationComplexity** ("low"/"medium"/"high"): How complex would it be to implement this correction?
248
- - **breakingChangeRisk** (boolean): Could implementing this correction break existing behavior?
249
-
250
- ### Executability Check:
251
- A betterDecision is executable if it:
252
- - STARTS with a concrete action verb: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug
253
- - References a specific, concrete target (file, command, config, etc.)
254
- - Describes a bounded, executable action — not a vague principle
255
- - Examples that PASS: "Read the file before editing", "Check user permissions before executing"
256
- - Examples that FAIL: "Per T-01, pause all tasks..." (starts with "Per"), "Be more careful" (vague)
257
-
258
- ### Ranking Rules:
259
- - Candidates are ranked by score (highest = rank 1)
260
- - Ties broken by: higher executability, then higher principle alignment, then lower candidateIndex
261
- - If a candidate's betterDecision is NOT executable, penalize its score by 0.2
262
-
263
- ## Validation
264
-
265
- If you cannot judge the candidates, respond with:
266
-
267
- {
268
- "valid": false,
269
- "judgments": [],
270
- "overallAssessment": "",
271
- "reason": "<why judgment cannot be produced>",
272
- "generatedAt": "<ISO timestamp>"
273
- }`;
274
-
275
- const NOCTURNAL_SCRIBE_PROMPT = `# Nocturnal Scribe — Final Artifact Synthesis
276
-
277
- > System prompt for Trinity Scribe stage.
278
- > Role: Synthesize the best candidate into a final structured artifact.
279
-
280
- ## Role
281
-
282
- You are a principles analyst specializing in structured output.
283
- Your task is to take the top-ranked candidate from Philosopher's evaluation
284
- and synthesize it into a final decision-point artifact that passes arbiter validation.
285
-
286
- ## Input
287
-
288
- You will receive:
289
- - A **target principle** (principle ID and description)
290
- - A **session trajectory snapshot**
291
- - **Philosopher's judgments** — ranked candidates with critiques and 6D scores
292
- - **Dreamer's candidates** — the original candidate list
293
- - **Philosopher's risk assessments** — falsePositiveEstimate, implementationComplexity, breakingChangeRisk per candidate
294
-
295
- Use the risk assessments to determine which candidates require deeper contrastive analysis. High-risk candidates (high breakingChangeRisk or implementationComplexity) warrant thorough rejectedAnalysis.
296
-
297
- ## Task
298
-
299
- Select the best candidate (Philosopher's rank 1) and synthesize it into
300
- a final TrinityDraftArtifact. Then produce a **Contrastive Analysis** that explains why the winner was chosen and what to learn from the runners-up.
301
-
302
- ## Output Format
303
-
304
- You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no preamble.
305
-
306
- {
307
- "selectedCandidateIndex": 0,
308
- "badDecision": "<final bad decision text>",
309
- "betterDecision": "<final better decision text>",
310
- "rationale": "<final rationale text>",
311
- "sessionId": "<source session ID>",
312
- "principleId": "<principle ID>",
313
- "sourceSnapshotRef": "<snapshot reference>",
314
- "telemetry": {
315
- "chainMode": "trinity",
316
- "dreamerPassed": true,
317
- "philosopherPassed": true,
318
- "scribePassed": true,
319
- "candidateCount": 2,
320
- "selectedCandidateIndex": 0,
321
- "stageFailures": []
322
- },
323
- "rejectedAnalysis": {
324
- "whyRejected": "<mental model that led to the rejected candidate>",
325
- "warningSignals": ["<observable caution trigger 1>", "<trigger 2>"],
326
- "correctiveThinking": "<correct reasoning path that should have been taken>"
327
- },
328
- "chosenJustification": {
329
- "whyChosen": "<why this candidate was selected over others>",
330
- "keyInsights": ["<transferable insight 1>", "<insight 2>", "<insight 3>"],
331
- "limitations": ["<when this approach does NOT apply 1>", "<limitation 2>"]
332
- },
333
- "contrastiveAnalysis": {
334
- "criticalDifference": "<ONE key insight distinguishing chosen from rejected>",
335
- "decisionTrigger": "<When X, do Y pattern>",
336
- "preventionStrategy": "<how to systematically avoid the rejected path>"
337
- }
338
- }
339
-
340
- All three analysis sections (rejectedAnalysis, chosenJustification, contrastiveAnalysis) are optional but recommended. When multiple candidates were evaluated, include them to provide richer training signals.
341
-
342
- ## Validation
343
-
344
- If you cannot synthesize an artifact:
345
-
346
- {
347
- "selectedCandidateIndex": -1,
348
- "badDecision": "",
349
- "betterDecision": "",
350
- "rationale": "",
351
- "sessionId": "<source session ID>",
352
- "principleId": "<principle ID>",
353
- "sourceSnapshotRef": "",
354
- "telemetry": {
355
- "chainMode": "trinity",
356
- "dreamerPassed": true,
357
- "philosopherPassed": false,
358
- "scribePassed": false,
359
- "candidateCount": 2,
360
- "selectedCandidateIndex": -1,
361
- "stageFailures": ["Philosopher: no valid judgments produced"]
362
- }
363
- }`;
364
-
365
- // ---------------------------------------------------------------------------
366
- // Trinity Runtime Adapter
367
- // ---------------------------------------------------------------------------
368
-
369
- /**
370
- * Interface for Trinity stage invocation.
371
- * Implementations can use real subagent runtimes or stubs.
372
- */
373
-
374
- export interface TrinityRuntimeAdapter {
375
- /**
376
- * Check if the runtime surface is available for Trinity stage execution.
377
- * @returns true if the adapter can invoke stages
378
- */
379
- isRuntimeAvailable(): boolean;
380
-
381
- /**
382
- * Get the reason for the last runtime failure, or null if no failure.
383
- */
384
- getLastFailureReason(): string | null;
385
-
386
- /**
387
- * Invoke the Dreamer stage.
388
- * @param snapshot Session trajectory snapshot
389
- * @param principleId Target principle ID
390
- * @param maxCandidates Maximum number of candidates to generate
391
- * @returns Dreamer output JSON
392
- */
393
- invokeDreamer(
394
- _snapshot: NocturnalSessionSnapshot,
395
- _principleId: string,
396
- _maxCandidates: number
397
- ): Promise<DreamerOutput>;
398
-
399
- /**
400
- * Invoke the Philosopher stage.
401
- * @param dreamerOutput Dreamer's output
402
- * @param principleId Target principle ID
403
- * @param snapshot Session snapshot (for violation evidence)
404
- * @returns Philosopher output JSON
405
- */
406
- invokePhilosopher(
407
- _dreamerOutput: DreamerOutput,
408
- _principleId: string,
409
- _snapshot: NocturnalSessionSnapshot
410
- ): Promise<PhilosopherOutput>;
411
-
412
- /**
413
- * Invoke the Scribe stage.
414
- * @param dreamerOutput Dreamer's output
415
- * @param philosopherOutput Philosopher's output
416
- * @param snapshot Session snapshot
417
- * @param principleId Target principle ID
418
- * @param telemetry Running telemetry
419
- * @param config Trinity config
420
- * @returns Scribe draft artifact or null if failed
421
- */
422
- invokeScribe(
423
- _dreamerOutput: DreamerOutput,
424
- _philosopherOutput: PhilosopherOutput,
425
- _snapshot: NocturnalSessionSnapshot,
426
- _principleId: string,
427
- _telemetry: TrinityTelemetry,
428
- _config: TrinityConfig
429
- ): Promise<TrinityDraftArtifact | null>;
430
-
431
- /**
432
- * Clean up any resources used by the adapter.
433
- * Called after Trinity chain completes (success or failure).
434
- */
435
- close?(): Promise<void>;
436
- }
437
-
438
-
439
- // ---------------------------------------------------------------------------
440
- // OpenClaw Runtime Adapter
441
- // ---------------------------------------------------------------------------
442
-
443
- /**
444
- * OpenClaw-backed Trinity runtime adapter.
445
- * Uses api.runtime.agent.runEmbeddedPiAgent() which works in background contexts
446
- * (unlike api.runtime.subagent.* which requires gateway request scope).
447
- */
448
- export type TrinityRuntimeFailureCode =
449
- | 'runtime_unavailable'
450
- | 'invalid_runtime_request'
451
- | 'runtime_run_failed'
452
- | 'runtime_timeout'
453
- | 'runtime_session_read_failed';
454
-
455
- export class TrinityRuntimeContractError extends Error {
456
- readonly code: TrinityRuntimeFailureCode;
457
- readonly diagnostics?: Record<string, unknown>;
458
-
459
- constructor(
460
- code: TrinityRuntimeFailureCode,
461
- message: string,
462
- diagnostics?: Record<string, unknown>
463
- ) {
464
- super(`${code}: ${message}`);
465
- this.name = 'TrinityRuntimeContractError';
466
- this.code = code;
467
- this.diagnostics = diagnostics;
468
- }
469
- }
470
-
471
- // ---------------------------------------------------------------------------
472
- // Reasoning Context Serialization (D-03, D-04)
473
- // ---------------------------------------------------------------------------
474
-
475
- /**
476
- * Format derived reasoning signals into a prompt section for Dreamer.
477
- *
478
- * Returns the formatted "## Reasoning Context" section as a string,
479
- * or null if no meaningful reasoning content exists to include.
480
- *
481
- * Only reasoningChain + contextualFactors are serialized.
482
- * DecisionPoints are NOT injected (reserved for Phase 37 Scribe per D-04).
483
- */
484
- export function formatReasoningContext(snapshot: NocturnalSessionSnapshot): string | null {
485
- const reasoningChain = deriveReasoningChain(snapshot.assistantTurns);
486
- const contextualFactors = deriveContextualFactors(snapshot);
487
-
488
- const hasReasoningContent = reasoningChain.length > 0 &&
489
- reasoningChain.some(s => s.thinkingContent || s.uncertaintyMarkers.length > 0);
490
-
491
- if (!hasReasoningContent && !contextualFactors.fileStructureKnown &&
492
- !contextualFactors.errorHistoryPresent &&
493
- !contextualFactors.userGuidanceAvailable &&
494
- !contextualFactors.timePressure) {
495
- return null;
496
- }
497
-
498
- const sections: string[] = ['## Reasoning Context', ''];
499
-
500
- // Serialize reasoning chain (only turns with non-empty signals)
501
- const significantTurns = reasoningChain.filter(
502
- s => s.thinkingContent || s.uncertaintyMarkers.length > 0
503
- );
504
- for (const signal of significantTurns) {
505
- if (signal.thinkingContent) {
506
- sections.push(`- Turn ${signal.turnIndex}: Internal reasoning: "${signal.thinkingContent.slice(0, 200)}"`);
507
- }
508
- if (signal.uncertaintyMarkers.length > 0) {
509
- sections.push(`- Turn ${signal.turnIndex}: Uncertainty detected: ${signal.uncertaintyMarkers.join(', ')}`);
510
- }
511
- if (signal.confidenceSignal !== 'high') {
512
- sections.push(`- Turn ${signal.turnIndex}: Confidence: ${signal.confidenceSignal}`);
513
- }
514
- }
515
-
516
- // Serialize contextual factors
517
- const factorLabels: string[] = [];
518
- if (contextualFactors.fileStructureKnown) factorLabels.push('File structure explored before modification');
519
- if (contextualFactors.errorHistoryPresent) factorLabels.push('Prior error history present');
520
- if (contextualFactors.userGuidanceAvailable) factorLabels.push('User guidance/corrections available');
521
- if (contextualFactors.timePressure) factorLabels.push('Time pressure detected (rapid tool calls)');
522
-
523
- if (factorLabels.length > 0) {
524
- sections.push('');
525
- sections.push('Environmental context:');
526
- for (const label of factorLabels) {
527
- sections.push(`- ${label}`);
528
- }
529
- }
530
-
531
- sections.push('');
532
- return sections.join('\n');
533
- }
534
-
535
- export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
536
-
537
- private readonly api: {
538
- runtime: {
539
- agent: {
540
- runEmbeddedPiAgent: (_opts: {
541
- sessionId: string;
542
- sessionFile: string;
543
- prompt: string;
544
- extraSystemPrompt?: string;
545
- config?: unknown;
546
- provider?: string;
547
- model?: string;
548
- timeoutMs: number;
549
- runId: string;
550
- disableTools?: boolean;
551
- }) => Promise<{
552
- payloads?: { isError?: boolean; text?: string }[];
553
- }>;
554
- };
555
- config?: {
556
- loadConfig?: () => unknown;
557
- };
558
- };
559
- config?: unknown;
560
- logger?: { info: (msg: string) => void; warn: (msg: string) => void; error: (msg: string) => void };
561
- };
562
- private lastFailureReason: string | null = null;
563
-
564
-
565
- private readonly stageTimeoutMs: number;
566
- private readonly tempDir: string;
567
-
568
- constructor(
569
- api: OpenClawTrinityRuntimeAdapter['api'],
570
- stageTimeoutMs = 300_000 // 5 min — increased from 3 min to accommodate slower LLM responses
571
- ) {
572
- if (typeof api?.runtime?.agent?.runEmbeddedPiAgent !== 'function') {
573
- throw new TrinityRuntimeContractError(
574
- 'runtime_unavailable',
575
- 'embedded runtime unavailable (missing runtime.agent.runEmbeddedPiAgent)',
576
- );
577
- }
578
-
579
- this.api = api;
580
- this.stageTimeoutMs = stageTimeoutMs;
581
- // Cross-platform temp directory for session files
582
- this.tempDir = path.join(os.tmpdir(), `pd-trinity-${process.pid}`);
583
- // Clean up any stale temp files from previous crashed runs
584
- this.cleanupStaleTempDirs();
585
- }
586
-
587
-
588
- isRuntimeAvailable(): boolean {
589
- return true;
590
- }
591
-
592
- getLastFailureReason(): string | null {
593
- return this.lastFailureReason;
594
- }
595
-
596
- /**
597
- * Clean up temp directories from previous crashed runs.
598
- * Matches pattern pd-trinity-* in the OS temp directory.
599
- */
600
- private cleanupStaleTempDirs(): void {
601
- try {
602
- const osTempDir = os.tmpdir();
603
- if (!fs.existsSync(osTempDir)) return;
604
- const entries = fs.readdirSync(osTempDir);
605
- for (const entry of entries) {
606
- if (entry.startsWith('pd-trinity-') && entry !== path.basename(this.tempDir)) {
607
- const fullPath = path.join(osTempDir, entry);
608
- fs.rmSync(fullPath, { recursive: true, force: true });
609
- }
610
- }
611
- } catch (err) {
612
- this.api.logger?.warn?.(`[Trinity] Failed to cleanup stale temp dirs: ${err instanceof Error ? err.message.replace(/([A-Za-z]:\\[^:\\s]+|\\\/[^\s:]+)/g, '[PATH]') : String(err)}`);
613
- }
614
- }
615
-
616
- /**
617
- * Load the full OpenClaw config (including models.providers).
618
- *
619
- * Why: `this.api.config` is the plugin config, not the full OpenClaw config.
620
- * It does NOT contain `models.providers`, which is needed to resolve provider
621
- * model definitions. `api.runtime.config.loadConfig()` returns the full config.
622
- *
623
- * Fallback: If loadConfig() is unavailable, we return the plugin config.
624
- * The caller (resolveModel) handles this with a minimax-portal fallback.
625
- */
626
- private loadFullConfig(): Record<string, unknown> | undefined {
627
- // Try runtime.config.loadConfig() first (available in native plugin context)
628
- const loadConfig = this.api.runtime?.config?.loadConfig;
629
- if (loadConfig && typeof loadConfig === 'function') {
630
- try {
631
- return loadConfig() as Record<string, unknown> | undefined;
632
- } catch (err) {
633
- this.api.logger?.warn?.(`[Trinity] loadConfig() failed, falling back to plugin config: ${err instanceof Error ? err.message : String(err)}`);
634
- }
635
- }
636
- // Fallback: plugin config (limited — won't have models.providers)
637
- // resolveModel() handles this with a minimax-portal/MiniMax-M2.7 fallback
638
- return this.api.config as Record<string, unknown> | undefined;
639
- }
640
-
641
- /**
642
- * Resolve the provider and model from the OpenClaw config.
643
- * runEmbeddedPiAgent does NOT read config.agents.defaults.model —
644
- * it requires explicit params.provider and params.model.
645
- */
646
-
647
- private resolveModel(): { provider: string; model: string } {
648
- const config = this.loadFullConfig();
649
- const agents = config?.agents as Record<string, unknown> | undefined;
650
- const defaults = agents?.defaults as Record<string, unknown> | undefined;
651
- const modelConfig = defaults?.model;
652
-
653
- if (typeof modelConfig === 'string' && modelConfig.includes('/')) {
654
- const parts = modelConfig.split('/');
655
- return { provider: parts[0], model: parts.slice(1).join('/') };
656
- }
657
-
658
- if (modelConfig && typeof modelConfig === 'object') {
659
- const mc = modelConfig as Record<string, unknown>;
660
- const primary = mc.primary as string | undefined;
661
- if (primary && primary.includes('/')) {
662
- const parts = primary.split('/');
663
- return { provider: parts[0], model: parts.slice(1).join('/') };
664
- }
665
- }
666
-
667
- // Last resort fallback — read from env vars to avoid hardcoded strings
668
- this.api.logger?.warn?.(`[Trinity] Could not resolve model from config, using fallback: ${FALLBACK_PROVIDER}/${FALLBACK_MODEL}`);
669
- return { provider: FALLBACK_PROVIDER, model: FALLBACK_MODEL };
670
- }
671
-
672
- /**
673
- * Create a valid JSONL session file for runEmbeddedPiAgent.
674
- */
675
- private createSessionFile(stage: string): string {
676
- if (!fs.existsSync(this.tempDir)) {
677
- fs.mkdirSync(this.tempDir, { recursive: true });
678
- }
679
- return path.join(this.tempDir, `${stage}-${randomUUID()}.jsonl`);
680
- }
681
-
682
- /**
683
- * Extract text from runEmbeddedPiAgent result.
684
- */
685
-
686
- private extractPayloadText(result: { payloads?: { isError?: boolean; text?: string }[] }): string {
687
- return (result.payloads ?? [])
688
- .filter(p => !p.isError)
689
- .map(p => p.text?.trim() ?? '')
690
- .filter(Boolean)
691
- .join('\n');
692
- }
693
-
694
- /** Clamp a value to [0, 1] range — used for LLM-produced scores that may be out of range */
695
-
696
- private clamp01(val: unknown, fallback = 0): number {
697
- if (typeof val !== 'number' || !Number.isFinite(val)) return fallback;
698
- return Math.min(1, Math.max(0, val));
699
- }
700
-
701
-
702
- private classifyRuntimeError(error: unknown): TrinityRuntimeFailureCode {
703
- const detail = error instanceof Error ? error.message : String(error);
704
- return /timeout/i.test(detail) ? 'runtime_timeout' : 'runtime_run_failed';
705
- }
706
-
707
- private sleep(ms: number): Promise<void> {
708
- return new Promise(resolve => setTimeout(resolve, ms));
709
- }
710
-
711
- async invokeDreamer(
712
- snapshot: NocturnalSessionSnapshot,
713
- principleId: string,
714
- maxCandidates: number
715
- ): Promise<DreamerOutput> {
716
- this.lastFailureReason = null;
717
- const runId = `dreamer-${randomUUID()}`;
718
- const sessionFile = this.createSessionFile('dreamer');
719
- const prompt = this.buildDreamerPrompt(snapshot, principleId, maxCandidates);
720
- const model = this.resolveModel();
721
-
722
- this.api.logger?.info(`[Trinity:Dreamer] Using model: ${model.provider}/${model.model}`);
723
-
724
- try {
725
- const result = await this.api.runtime.agent.runEmbeddedPiAgent({
726
- sessionId: runId,
727
- sessionFile,
728
- prompt,
729
- extraSystemPrompt: NOCTURNAL_DREAMER_PROMPT,
730
- config: this.loadFullConfig(),
731
- provider: model.provider,
732
- model: model.model,
733
- timeoutMs: this.stageTimeoutMs,
734
- runId,
735
- disableTools: true,
736
- });
737
-
738
- const outputText = this.extractPayloadText(result);
739
- if (!outputText) {
740
- return this.buildRuntimeFailureDreamerOutput(
741
- 'runtime_session_read_failed',
742
- 'Dreamer returned empty response',
743
- );
744
- }
745
-
746
- // DEBUG: Log Dreamer's actual output
747
- this.api.logger?.info(`[Trinity:Dreamer] Output preview: ${outputText.slice(0, 500)}`);
748
-
749
- return this.parseDreamerOutput(outputText);
750
- } catch (err) {
751
- return this.buildRuntimeFailureDreamerOutput(this.classifyRuntimeError(err), err);
752
- } finally {
753
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
754
- try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
755
- }
756
- }
757
-
758
- async invokePhilosopher(
759
- dreamerOutput: DreamerOutput,
760
- principleId: string,
761
- snapshot: NocturnalSessionSnapshot
762
- ): Promise<PhilosopherOutput> {
763
- this.lastFailureReason = null;
764
- const runId = `philosopher-${randomUUID()}`;
765
- const sessionFile = this.createSessionFile('philosopher');
766
- const prompt = this.buildPhilosopherPrompt(dreamerOutput, principleId, snapshot);
767
- const model = this.resolveModel();
768
-
769
- try {
770
- const result = await this.api.runtime.agent.runEmbeddedPiAgent({
771
- sessionId: runId,
772
- sessionFile,
773
- prompt,
774
- extraSystemPrompt: NOCTURNAL_PHILOSOPHER_PROMPT,
775
- config: this.loadFullConfig(),
776
- provider: model.provider,
777
- model: model.model,
778
- timeoutMs: this.stageTimeoutMs,
779
- runId,
780
- disableTools: true,
781
- });
782
-
783
- const outputText = this.extractPayloadText(result);
784
- if (!outputText) {
785
- return this.buildRuntimeFailurePhilosopherOutput(
786
- 'runtime_session_read_failed',
787
- 'Philosopher returned empty response',
788
- );
789
- }
790
-
791
- // DEBUG: Log Philosopher's actual output
792
- this.api.logger?.info(`[Trinity:Philosopher] Output preview: ${outputText.slice(0, 500)}`);
793
-
794
- return this.parsePhilosopherOutput(outputText);
795
- } catch (err) {
796
- return this.buildRuntimeFailurePhilosopherOutput(this.classifyRuntimeError(err), err);
797
- } finally {
798
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
799
- try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
800
- }
801
- }
802
-
803
-
804
-
805
- async invokeScribe(
806
- dreamerOutput: DreamerOutput,
807
- philosopherOutput: PhilosopherOutput,
808
- snapshot: NocturnalSessionSnapshot,
809
- principleId: string,
810
- telemetry: TrinityTelemetry,
811
-
812
- _config: TrinityConfig
813
- ): Promise<TrinityDraftArtifact | null> {
814
- this.lastFailureReason = null;
815
- const prompt = this.buildScribePrompt(dreamerOutput, philosopherOutput, snapshot, principleId);
816
- const model = this.resolveModel();
817
-
818
- // Retry up to 2 times on JSON parse / missing-field errors (common LLM output issues)
819
- const maxAttempts = 3;
820
- for (let attempt = 1; attempt <= maxAttempts; attempt++) {
821
- const runId = `scribe-${randomUUID()}`;
822
- const sessionFile = this.createSessionFile('scribe');
823
-
824
- try {
825
- const result = await this.api.runtime.agent.runEmbeddedPiAgent({
826
- sessionId: runId,
827
- sessionFile,
828
- prompt,
829
- extraSystemPrompt: NOCTURNAL_SCRIBE_PROMPT,
830
- config: this.loadFullConfig(),
831
- provider: model.provider,
832
- model: model.model,
833
- timeoutMs: this.stageTimeoutMs,
834
- runId,
835
- disableTools: true,
836
- });
837
-
838
- const outputText = this.extractPayloadText(result);
839
- if (!outputText) {
840
- this.recordFailure('runtime_session_read_failed', 'Scribe returned empty response');
841
- if (attempt < maxAttempts) { await this.sleep(1000); continue; }
842
- return null;
843
- }
844
-
845
- // DEBUG: Log Scribe's actual output
846
- this.api.logger?.info(`[Trinity:Scribe] Output preview (attempt ${attempt}): ${outputText.slice(0, 800)}`);
847
-
848
- const artifact = this.parseScribeOutput(outputText, snapshot, principleId, telemetry);
849
- if (artifact) return artifact;
850
-
851
- // JSON parse or missing-field error — retry
852
- if (attempt < maxAttempts) {
853
- await this.sleep(1500);
854
- continue;
855
- }
856
- return null;
857
- } catch (err) {
858
- this.recordFailure(this.classifyRuntimeError(err), err);
859
- if (attempt < maxAttempts) { await this.sleep(2000); continue; }
860
- return null;
861
- } finally {
862
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
863
- try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
864
- }
865
- }
866
- return null;
867
- }
868
-
869
- async close(): Promise<void> {
870
- // Clean up temp directory
871
- try {
872
- if (fs.existsSync(this.tempDir)) {
873
- const files = fs.readdirSync(this.tempDir);
874
- for (const file of files) {
875
- fs.unlinkSync(path.join(this.tempDir, file));
876
- }
877
- fs.rmSync(this.tempDir, { recursive: true, force: true });
878
- }
879
- } catch (err) {
880
- this.api.logger?.warn?.(`[Trinity] Session cleanup failed: ${String(err)}`);
881
- }
882
- }
883
-
884
- // ---------------------------------------------------------------------------
885
- // Private Helper Methods
886
- // ---------------------------------------------------------------------------
887
-
888
-
889
-
890
- private buildDreamerPrompt(
891
- snapshot: NocturnalSessionSnapshot,
892
- principleId: string,
893
- maxCandidates: number
894
- ): string {
895
- // Build detailed tool failure list
896
- const failures = snapshot.toolCalls
897
- .filter(tc => tc.outcome === 'failure')
898
- .map(tc => {
899
- let desc = `- ${tc.toolName}`;
900
- if (tc.filePath) desc += ` on ${tc.filePath}`;
901
- desc += ` → FAILED: ${tc.errorMessage || 'unknown error'}`;
902
- return desc;
903
- });
904
-
905
- // Build detailed pain event list
906
- const pains = snapshot.painEvents
907
- .filter(pe => pe.score >= 50)
908
- .map(pe => `- Pain (score: ${pe.score}): ${pe.reason || 'no reason'} [source: ${pe.source}]`);
909
-
910
- // Build gate block list
911
- const blocks = snapshot.gateBlocks
912
- .map(gb => `- Gate blocked ${gb.toolName}: ${gb.reason}`);
913
-
914
- // Build assistant decision context (last 3 turns max)
915
- const recentTurns = snapshot.assistantTurns
916
- .slice(-3)
917
- .map((t, i) => `[Turn ${i+1}] ${t.sanitizedText.slice(0, 300)}`)
918
- .join('\n');
919
-
920
- // Build user correction cues (if any)
921
- const userCues = snapshot.userTurns
922
- .filter(ut => ut.correctionDetected)
923
- .map(ut => `- User correction: ${ut.correctionCue || 'detected'}`)
924
- .join('\n');
925
-
926
- const sections = [
927
- `## Target Principle`,
928
- `**Principle ID**: ${principleId}`,
929
- ``,
930
- `## Session Context`,
931
- `**Session ID**: ${snapshot.sessionId}`,
932
- ``,
933
- ];
934
-
935
- if (failures.length > 0) {
936
- sections.push(`## Tool Failures (${failures.length})`);
937
- sections.push(failures.join('\n'));
938
- sections.push('');
939
- }
940
-
941
- if (pains.length > 0) {
942
- sections.push(`## Pain Signals (${pains.length})`);
943
- sections.push(pains.join('\n'));
944
- sections.push('');
945
- }
946
-
947
- if (blocks.length > 0) {
948
- sections.push(`## Gate Blocks (${blocks.length})`);
949
- sections.push(blocks.join('\n'));
950
- sections.push('');
951
- }
952
-
953
- if (recentTurns) {
954
- sections.push(`## Assistant Decision Context`);
955
- sections.push(recentTurns);
956
- sections.push('');
957
- }
958
-
959
- if (userCues) {
960
- sections.push(`## User Corrections`);
961
- sections.push(userCues);
962
- sections.push('');
963
- }
964
-
965
- // ## Reasoning Context — derived signals from Phase 34 deriver module (D-03, D-04)
966
- const reasoningSection = formatReasoningContext(snapshot);
967
- if (reasoningSection) {
968
- sections.push(reasoningSection);
969
- }
970
-
971
- sections.push(`## Task`,
972
- `Analyze the above session and generate ${maxCandidates} candidate corrections.`,
973
- `Each candidate must:`,
974
- `1. Identify a specific bad decision from the session`,
975
- `2. Propose a concrete better decision grounded in principle ${principleId}`,
976
- `3. The betterDecision MUST START with a bounded verb: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug`,
977
- `4. Explain the rationale referencing the principle`,
978
- ``,
979
- `Respond with ONLY a valid JSON object matching the DreamerOutput contract.`
980
- );
981
-
982
- return sections.join('\n');
983
- }
984
-
985
-
986
-
987
- private buildPhilosopherPrompt(
988
- dreamerOutput: DreamerOutput,
989
- principleId: string,
990
- snapshot: NocturnalSessionSnapshot
991
- ): string {
992
- const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
993
-
994
- // Build per-candidate metadata from Dreamer (risk level + strategic perspective)
995
- const candidateMeta = dreamerOutput.candidates
996
- .filter(c => c.riskLevel || c.strategicPerspective)
997
- .map(c => `- Candidate #${c.candidateIndex}: risk=${c.riskLevel || 'N/A'}, perspective=${c.strategicPerspective || 'N/A'}`);
998
-
999
- // Build violation summary from snapshot for Philosopher to validate candidates
1000
- const failures = snapshot.toolCalls
1001
- .filter(tc => tc.outcome === 'failure')
1002
- .map(tc => `- ${tc.toolName}${tc.filePath ? ` on ${tc.filePath}` : ''} → FAILED: ${tc.errorMessage || 'unknown error'}`);
1003
-
1004
- const pains = snapshot.painEvents
1005
- .filter(pe => pe.score >= 50)
1006
- .map(pe => `- Pain (score: ${pe.score}, severity: ${pe.severity || 'N/A'}): ${pe.reason || 'no reason'} [source: ${pe.source}]`);
1007
-
1008
- const blocks = snapshot.gateBlocks
1009
- .map(gb => `- Gate blocked ${gb.toolName}: ${gb.reason}`);
1010
-
1011
- const userCues = snapshot.userTurns
1012
- .filter(ut => ut.correctionDetected)
1013
- .map(ut => `- User correction: ${ut.correctionCue || 'detected'}`);
1014
-
1015
- const sections = [
1016
- `## Target Principle`,
1017
- `**Principle ID**: ${principleId}`,
1018
- ``,
1019
- `## Session Violation Summary`,
1020
- `**Session ID**: ${snapshot.sessionId}`,
1021
- ];
1022
-
1023
- if (failures.length > 0) {
1024
- sections.push(`\n### Tool Failures (${failures.length})`);
1025
- sections.push(failures.join('\n'));
1026
- }
1027
-
1028
- if (pains.length > 0) {
1029
- sections.push(`\n### Pain Signals (${pains.length})`);
1030
- sections.push(pains.join('\n'));
1031
- }
1032
-
1033
- if (blocks.length > 0) {
1034
- sections.push(`\n### Gate Blocks (${blocks.length})`);
1035
- sections.push(blocks.join('\n'));
1036
- }
1037
-
1038
- if (userCues.length > 0) {
1039
- sections.push(`\n### User Corrections (${userCues.length})`);
1040
- sections.push(userCues.join('\n'));
1041
- }
1042
-
1043
- if (candidateMeta.length > 0) {
1044
- sections.push(`\n### Candidate Risk Profiles (${candidateMeta.length})`);
1045
- sections.push(candidateMeta.join('\n'));
1046
- }
1047
-
1048
- sections.push(
1049
- ``,
1050
- `## Dreamer's Candidates`,
1051
- candidatesJson,
1052
- ``,
1053
- `## Task`,
1054
- `Evaluate each candidate against the violation summary above.`,
1055
- `For each candidate:`,
1056
- `1. Is the badDecision accurate — does it match the actual violations in the session?`,
1057
- `2. Is the betterDecision specific and actionable?`,
1058
- `3. Does the betterDecision START with a bounded verb (read, check, verify, edit, write, etc.)?`,
1059
- `4. Does the rationale correctly reference principle ${principleId}?`,
1060
- `5. Is the confidence score justified?`,
1061
- ``,
1062
- `**Penalize executability**: If betterDecision does NOT start with a bounded verb, reduce score by 0.2.`,
1063
- ``,
1064
- `Respond with ONLY a valid JSON object matching the PhilosopherOutput contract.`
1065
- );
1066
-
1067
- return sections.join('\n');
1068
- }
1069
-
1070
-
1071
-
1072
-
1073
- private buildScribePrompt(
1074
- dreamerOutput: DreamerOutput,
1075
- philosopherOutput: PhilosopherOutput,
1076
- snapshot: NocturnalSessionSnapshot,
1077
- principleId: string
1078
- ): string {
1079
- const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
1080
- const judgmentsJson = JSON.stringify(philosopherOutput.judgments, null, 2);
1081
-
1082
- // Build violation evidence for Scribe to ground the final artifact
1083
- const violations: string[] = [];
1084
-
1085
- const failures = snapshot.toolCalls.filter(tc => tc.outcome === 'failure');
1086
- for (const tc of failures) {
1087
- violations.push(`- Tool failure: ${tc.toolName}${tc.filePath ? ` on ${tc.filePath}` : ''} → ${tc.errorMessage || 'unknown error'}`);
1088
- }
1089
-
1090
- const pains = snapshot.painEvents.filter(pe => pe.score >= 50);
1091
- for (const pe of pains) {
1092
- violations.push(`- Pain signal (score: ${pe.score}): ${pe.reason || 'no reason'} [source: ${pe.source}]`);
1093
- }
1094
-
1095
- const blocks = snapshot.gateBlocks;
1096
- for (const gb of blocks) {
1097
- violations.push(`- Gate blocked: ${gb.toolName} → ${gb.reason}`);
1098
- }
1099
-
1100
- const sections = [
1101
- `## Target Principle`,
1102
- `**Principle ID**: ${principleId}`,
1103
- ``,
1104
- `## Original Violation Evidence`,
1105
- `**Session ID**: ${snapshot.sessionId}`,
1106
- ];
1107
-
1108
- if (violations.length > 0) {
1109
- sections.push(violations.join('\n'));
1110
- } else {
1111
- sections.push(`(No specific violations found in snapshot)`);
1112
- }
1113
-
1114
- // Build risk summary from Philosopher 6D judgments for Scribe contrastive analysis
1115
- const riskSummary = philosopherOutput.judgments
1116
- .map(j => {
1117
- const risk = j.risks ? ` [risks: fp=${j.risks.falsePositiveEstimate.toFixed(2)}, complexity=${j.risks.implementationComplexity}, breaking=${j.risks.breakingChangeRisk}]` : '';
1118
- return ` - candidate[${j.candidateIndex}] (rank ${j.rank}, score ${j.score?.toFixed(2) ?? 'n/a'}): ${j.principleAligned ? 'aligned' : 'not aligned'}${risk}`;
1119
- })
1120
- .join('\n');
1121
-
1122
- sections.push(
1123
- ``,
1124
- `## Dreamer's Candidates`,
1125
- candidatesJson,
1126
- ``,
1127
- `## Philosopher's Judgments + Risk Assessments`,
1128
- judgmentsJson,
1129
- ``,
1130
- `## Philosopher 6D Risk Summary`,
1131
- `Use this to determine contrastive depth — high-risk candidates need deeper analysis:`,
1132
- riskSummary,
1133
- ``,
1134
- `## Task`,
1135
- `Select the best candidate (Philosopher's rank 1) and synthesize it into a final TrinityDraftArtifact.`,
1136
- `Then produce contrastive analysis explaining why the winner was chosen and what the rejected candidates teach us.`,
1137
- ``,
1138
- `## CRITICAL: betterDecision Format Requirements`,
1139
- `Your betterDecision MUST pass executability validation. It MUST:`,
1140
- `1. START with a concrete action verb from this list: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug`,
1141
- `2. Reference a SPECIFIC, concrete target (file path, command name, config key, etc.)`,
1142
- `3. Describe a BOUNDED, executable action — not a vague principle or process`,
1143
- ``,
1144
- `**Examples that PASS executability check**:`,
1145
- `- "Read the file before editing to verify current content"`,
1146
- `- "Check user permissions before executing privileged commands"`,
1147
- `- "Verify the routing infrastructure is operational before analyzing system state"`,
1148
- `- "Edit the config file to set timeout=30000ms"`,
1149
- ``,
1150
- `**Examples that FAIL executability check**:`,
1151
- `- "Per T-01, pause all analysis tasks..." (starts with "Per", not a bounded verb)`,
1152
- `- "The agent should have first checked..." (starts with "The", not the action verb)`,
1153
- `- "Be more careful with routing tools" (vague verb "be")`,
1154
- `- "Ensure proper authorization" (vague verb "ensure")`,
1155
- ``,
1156
- `Respond with ONLY a valid JSON object.`
1157
- );
1158
-
1159
- return sections.join('\n');
1160
- }
1161
-
1162
-
1163
- private parseDreamerOutput(text: string): DreamerOutput {
1164
- const json = this.extractJson(text);
1165
- if (!json) {
1166
- return {
1167
- valid: false,
1168
- candidates: [],
1169
- reason: 'Failed to parse Dreamer output as JSON',
1170
- generatedAt: new Date().toISOString(),
1171
- };
1172
- }
1173
-
1174
- try {
1175
- const parsed = JSON.parse(json);
1176
- // Validate required structure
1177
- if (typeof parsed.valid !== 'boolean') {
1178
- return {
1179
- valid: false,
1180
- candidates: [],
1181
- reason: 'Dreamer output missing "valid" field',
1182
- generatedAt: new Date().toISOString(),
1183
- };
1184
- }
1185
- if (!Array.isArray(parsed.candidates)) {
1186
- return {
1187
- valid: false,
1188
- candidates: [],
1189
- reason: 'Dreamer output missing "candidates" array',
1190
- generatedAt: new Date().toISOString(),
1191
- };
1192
- }
1193
- return {
1194
- valid: parsed.valid,
1195
- candidates: parsed.candidates,
1196
- reason: parsed.reason,
1197
- generatedAt: parsed.generatedAt ?? new Date().toISOString(),
1198
- };
1199
- } catch {
1200
- return {
1201
- valid: false,
1202
- candidates: [],
1203
- reason: `JSON parse error: ${text.slice(0, 100)}`,
1204
- generatedAt: new Date().toISOString(),
1205
- };
1206
- }
1207
- }
1208
-
1209
- private buildRuntimeFailureDreamerOutput(
1210
- code: TrinityRuntimeFailureCode,
1211
- error: unknown
1212
- ): DreamerOutput {
1213
- const reason = this.recordFailure(code, error);
1214
- return {
1215
- valid: false,
1216
- candidates: [],
1217
- reason,
1218
- generatedAt: new Date().toISOString(),
1219
- };
1220
- }
1221
-
1222
- private parsePhilosopherOutput(text: string): PhilosopherOutput {
1223
- const json = this.extractJson(text);
1224
- if (!json) {
1225
- return {
1226
- valid: false,
1227
- judgments: [],
1228
- overallAssessment: '',
1229
- reason: 'Failed to parse Philosopher output as JSON',
1230
- generatedAt: new Date().toISOString(),
1231
- };
1232
- }
1233
-
1234
- try {
1235
- const parsed = JSON.parse(json);
1236
- if (typeof parsed.valid !== 'boolean') {
1237
- return {
1238
- valid: false,
1239
- judgments: [],
1240
- overallAssessment: '',
1241
- reason: 'Philosopher output missing "valid" field',
1242
- generatedAt: new Date().toISOString(),
1243
- };
1244
- }
1245
- if (!Array.isArray(parsed.judgments)) {
1246
- return {
1247
- valid: false,
1248
- judgments: [],
1249
- overallAssessment: '',
1250
- reason: 'Philosopher output missing "judgments" array',
1251
- generatedAt: new Date().toISOString(),
1252
- };
1253
- }
1254
- return {
1255
- valid: parsed.valid,
1256
- judgments: parsed.judgments.map((j: Record<string, unknown>) => ({
1257
- candidateIndex: j.candidateIndex,
1258
- critique: j.critique ?? '',
1259
- principleAligned: j.principleAligned ?? false,
1260
- score: j.score ?? 0,
1261
- rank: j.rank ?? 0,
1262
- // Optional 6D scores and risk assessment (Phase 36)
1263
- // Only include a dimension if the LLM actually returned a number (not undefined/null).
1264
- // This preserves the distinction between "LLM returned 0" vs "LLM omitted the field."
1265
- ...(j.scores ? {
1266
- scores: Object.fromEntries(
1267
- (['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const)
1268
- .map(dim => [dim, (j.scores as Record<string, unknown>)[dim]])
1269
- .filter(([, v]) => typeof v === 'number')
1270
- .map(([dim, v]) => [dim, this.clamp01(v as number)])
1271
- )
1272
- } : {}),
1273
- ...(j.risks ? (() => {
1274
- const risks = j.risks as Record<string, unknown>;
1275
- const fp = risks.falsePositiveEstimate;
1276
- const hasFp = typeof fp === 'number';
1277
- const risksObj: {
1278
- falsePositiveEstimate?: number;
1279
- implementationComplexity: string;
1280
- breakingChangeRisk: boolean;
1281
-
1282
- } = {
1283
- implementationComplexity: (risks.implementationComplexity as string) ?? 'medium',
1284
- breakingChangeRisk: Boolean(risks.breakingChangeRisk),
1285
- };
1286
-
1287
- if (hasFp) risksObj.falsePositiveEstimate = this.clamp01(fp as number);
1288
- return { risks: risksObj };
1289
- })() : {}),
1290
- })),
1291
- overallAssessment: parsed.overallAssessment ?? '',
1292
- reason: parsed.reason,
1293
- generatedAt: parsed.generatedAt ?? new Date().toISOString(),
1294
- };
1295
- } catch {
1296
- return {
1297
- valid: false,
1298
- judgments: [],
1299
- overallAssessment: '',
1300
- reason: `JSON parse error: ${text.slice(0, 100)}`,
1301
- generatedAt: new Date().toISOString(),
1302
- };
1303
- }
1304
- }
1305
-
1306
- private buildRuntimeFailurePhilosopherOutput(
1307
- code: TrinityRuntimeFailureCode,
1308
- error: unknown
1309
- ): PhilosopherOutput {
1310
- const reason = this.recordFailure(code, error);
1311
- return {
1312
- valid: false,
1313
- judgments: [],
1314
- overallAssessment: '',
1315
- reason,
1316
- generatedAt: new Date().toISOString(),
1317
- };
1318
- }
1319
-
1320
- private recordFailure(
1321
- code: TrinityRuntimeFailureCode,
1322
- error: unknown
1323
- ): string {
1324
- const detail = error instanceof Error ? error.message : String(error);
1325
- this.lastFailureReason = `${code}: ${detail}`;
1326
- return this.lastFailureReason;
1327
- }
1328
-
1329
-
1330
-
1331
- private parseScribeOutput(
1332
- text: string,
1333
- snapshot: NocturnalSessionSnapshot,
1334
- principleId: string,
1335
-
1336
- _telemetry: TrinityTelemetry
1337
- ): TrinityDraftArtifact | null {
1338
- const json = this.extractJson(text);
1339
- if (!json) {
1340
- this.recordFailure('runtime_run_failed', new Error('Scribe output contains no parseable JSON'));
1341
- return null;
1342
- }
1343
-
1344
- try {
1345
- const parsed = JSON.parse(json);
1346
- if (typeof parsed.selectedCandidateIndex !== 'number') {
1347
- this.recordFailure('runtime_run_failed', new Error(`Scribe output missing "selectedCandidateIndex" field: ${text.slice(0, 200)}`));
1348
- return null;
1349
- }
1350
-
1351
- // Validate contrastive analysis sub-fields (H-03): only include if structure is intact
1352
- const contrastiveAnalysis = parsed.contrastiveAnalysis
1353
- && typeof parsed.contrastiveAnalysis === 'object'
1354
- && typeof parsed.contrastiveAnalysis.criticalDifference === 'string'
1355
- ? parsed.contrastiveAnalysis : undefined;
1356
-
1357
- const rejectedAnalysis = parsed.rejectedAnalysis
1358
- && typeof parsed.rejectedAnalysis === 'object'
1359
- && typeof parsed.rejectedAnalysis.whyRejected === 'string'
1360
- ? parsed.rejectedAnalysis : undefined;
1361
-
1362
- const chosenJustification = parsed.chosenJustification
1363
- && typeof parsed.chosenJustification === 'object'
1364
- && typeof parsed.chosenJustification.whyChosen === 'string'
1365
- ? parsed.chosenJustification : undefined;
1366
-
1367
- return {
1368
- selectedCandidateIndex: parsed.selectedCandidateIndex,
1369
- badDecision: parsed.badDecision ?? '',
1370
- betterDecision: parsed.betterDecision ?? '',
1371
- rationale: parsed.rationale ?? '',
1372
- sessionId: snapshot.sessionId,
1373
- principleId,
1374
- sourceSnapshotRef: `snapshot-${snapshot.sessionId}-${Date.now()}`,
1375
- telemetry: {
1376
- chainMode: 'trinity',
1377
- usedStubs: _telemetry.usedStubs,
1378
- dreamerPassed: true,
1379
- philosopherPassed: true,
1380
- scribePassed: true,
1381
- candidateCount: parsed.candidateCount ?? 0,
1382
- selectedCandidateIndex: parsed.selectedCandidateIndex,
1383
- stageFailures: [],
1384
- },
1385
- ...(contrastiveAnalysis ? { contrastiveAnalysis } : {}),
1386
- ...(rejectedAnalysis ? { rejectedAnalysis } : {}),
1387
- ...(chosenJustification ? { chosenJustification } : {}),
1388
- };
1389
- } catch {
1390
- this.recordFailure('runtime_run_failed', new Error(`Scribe output JSON parse error: ${json.slice(0, 200)}`));
1391
- return null;
1392
- }
1393
- }
1394
-
1395
- /**
1396
- * Extract JSON object from text that may contain markdown code blocks.
1397
- */
1398
-
1399
-
1400
- private extractJson(text: string): string | null {
1401
- // Try direct parse first
1402
- try {
1403
- JSON.parse(text);
1404
- return text;
1405
- } catch {
1406
- // Try extracting from markdown code blocks
1407
- }
1408
-
1409
- // Match triple-backtick JSON blocks
1410
- const codeBlockMatch = /```(?:json)?\s*\n?([\s\S]*?)\n?```/.exec(text);
1411
- if (codeBlockMatch) {
1412
- const extracted = codeBlockMatch[1].trim();
1413
- try {
1414
- JSON.parse(extracted);
1415
- return extracted;
1416
- } catch {
1417
- // Not valid JSON
1418
- }
1419
- }
1420
-
1421
- // Try to find first { and last } to extract JSON object
1422
- const firstBrace = text.indexOf('{');
1423
- const lastBrace = text.lastIndexOf('}');
1424
- if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
1425
- const extracted = text.slice(firstBrace, lastBrace + 1);
1426
- try {
1427
- JSON.parse(extracted);
1428
- return extracted;
1429
- } catch {
1430
- // Not valid JSON
1431
- }
1432
- }
1433
-
1434
- return null;
1435
- }
1436
- }
1437
-
1438
- // ---------------------------------------------------------------------------
1439
- // Trinity Mode Configuration
1440
- // ---------------------------------------------------------------------------
1441
-
1442
- /**
1443
- * Configuration for Trinity chain execution.
1444
- */
1445
- export interface TrinityConfig {
1446
- /**
1447
- * Whether to use Trinity chain (true) or single-reflector (false).
1448
- * Default: true
1449
- */
1450
- useTrinity: boolean;
1451
-
1452
- /**
1453
- * Maximum candidates Dreamer should generate.
1454
- * Default: 3
1455
- */
1456
- maxCandidates: number;
1457
-
1458
- /**
1459
- * Whether to use stub stage outputs (for testing without real model calls).
1460
- * Default: false (real subagent calls via runtimeAdapter)
1461
- */
1462
- useStubs: boolean;
1463
-
1464
- /**
1465
- * Runtime adapter for real subagent execution.
1466
- * Required when useStubs is false. Ignored when useStubs is true.
1467
- * Default: undefined
1468
- */
1469
- runtimeAdapter?: TrinityRuntimeAdapter;
1470
-
1471
- /**
1472
- * Scoring weights for tournament selection.
1473
- * Default: DEFAULT_SCORING_WEIGHTS
1474
- */
1475
- scoringWeights?: ScoringWeights;
1476
-
1477
- /**
1478
- * Threshold values for tournament eligibility.
1479
- * Default: DEFAULT_THRESHOLDS
1480
- */
1481
- thresholds?: ThresholdValues;
1482
-
1483
- /**
1484
- * State directory for threshold persistence.
1485
- * If provided, thresholds will be loaded from state.
1486
- */
1487
- stateDir?: string;
1488
- }
1489
-
1490
- // ---------------------------------------------------------------------------
1491
- // Trinity Intermediate Contracts
1492
- // ---------------------------------------------------------------------------
1493
-
1494
- // Forward-exports from shared types module — single source of truth
1495
- export type {
1496
- DreamerCandidate,
1497
- DreamerOutput,
1498
- PhilosopherRiskAssessment,
1499
- Philosopher6DScores,
1500
- PhilosopherJudgment,
1501
- PhilosopherOutput,
1502
- } from './nocturnal-trinity-types.js';
1503
-
1504
- // Import all types for local use in this file
1505
- import type {
1506
- DreamerCandidate,
1507
- DreamerOutput,
1508
- PhilosopherRiskAssessment,
1509
- Philosopher6DScores,
1510
- PhilosopherJudgment,
1511
- PhilosopherOutput,
1512
- } from './nocturnal-trinity-types.js';
1513
-
1514
- /**
1515
- * Analysis of a rejected candidate — why it lost the tournament.
1516
- * Informs training signal for "what to avoid".
1517
- */
1518
- export interface RejectedAnalysis {
1519
- /** Mental model that led to the rejected candidate */
1520
- whyRejected: string;
1521
- /** Observable caution triggers that were missed or ignored */
1522
- warningSignals: string[];
1523
- /** Correct reasoning path that should have been taken */
1524
- correctiveThinking: string;
1525
- }
1526
-
1527
- /**
1528
- * Justification for the chosen candidate — why it won the tournament.
1529
- * Informs training signal for "what to do".
1530
- */
1531
- export interface ChosenJustification {
1532
- /** Why this candidate was selected over others */
1533
- whyChosen: string;
1534
- /** 1-3 transferable insights from this decision */
1535
- keyInsights: string[];
1536
- /** When this approach does NOT apply */
1537
- limitations: string[];
1538
- }
1539
-
1540
- /**
1541
- * Contrastive analysis: key differences between chosen and rejected paths.
1542
- * Synthesizes the core lesson from the tournament.
1543
- */
1544
- export interface ContrastiveAnalysis {
1545
- /** ONE key insight distinguishing chosen from rejected */
1546
- criticalDifference: string;
1547
- /** Pattern: "When X, do Y" */
1548
- decisionTrigger: string;
1549
- /** How to systematically avoid the rejected path */
1550
- preventionStrategy: string;
1551
- }
1552
-
1553
- /**
1554
- * Scribe output — final structured artifact draft.
1555
- * Scribe synthesizes the best candidate into an approved artifact format.
1556
- */
1557
- export interface TrinityDraftArtifact {
1558
- /** The selected winning candidate index */
1559
- selectedCandidateIndex: number;
1560
- /** The final badDecision */
1561
- badDecision: string;
1562
- /** The final betterDecision */
1563
- betterDecision: string;
1564
- /** The final rationale */
1565
- rationale: string;
1566
- /** Source session from snapshot */
1567
- sessionId: string;
1568
- /** Target principle ID */
1569
- principleId: string;
1570
- /** Reference to snapshot used */
1571
- sourceSnapshotRef: string;
1572
- /** Chain telemetry */
1573
- telemetry: TrinityTelemetry;
1574
- /** Reflection quality: delta in thinking model activation (-1 to 1) */
1575
- thinkingModelDelta?: number;
1576
- /** Reflection quality: gain in planning ratio (-1 to 1) */
1577
- planningRatioGain?: number;
1578
- /** Optional routing context for a follow-on Artificer stage */
1579
- artificerContext?: TrinityArtificerContext;
1580
- /** Contrastive analysis: chosen vs rejected reasoning paths (SCRIBE-03) */
1581
- contrastiveAnalysis?: ContrastiveAnalysis;
1582
- /** Analysis of the rejected candidates — why they lost the tournament (SCRIBE-01) */
1583
- rejectedAnalysis?: RejectedAnalysis;
1584
- /** Justification for the chosen candidate — why it won (SCRIBE-02) */
1585
- chosenJustification?: ChosenJustification;
1586
- }
1587
-
1588
- export interface TrinityTelemetry {
1589
- /** Whether Trinity or single-reflector was used */
1590
- chainMode: 'trinity' | 'single-reflector';
1591
- /** Whether stub implementations were used (always true in Phase 8) */
1592
- usedStubs: boolean;
1593
- /** Whether each stage passed */
1594
- dreamerPassed: boolean;
1595
- philosopherPassed: boolean;
1596
- scribePassed: boolean;
1597
- /** Number of candidates generated */
1598
- candidateCount: number;
1599
- /** Final selected candidate index */
1600
- selectedCandidateIndex: number;
1601
- /** Stage failure reasons (if any) */
1602
- stageFailures: string[];
1603
- /** Tournament trace for explainability (optional) */
1604
- tournamentTrace?: TournamentTraceEntry[];
1605
- /** Winner aggregate score (optional) */
1606
- winnerAggregateScore?: number;
1607
- /** Whether winner passed all thresholds (optional) */
1608
- winnerThresholdPassed?: boolean;
1609
- /** Number of eligible candidates after threshold check (optional) */
1610
- eligibleCandidateCount?: number;
1611
- /** Whether Dreamer candidates passed diversity validation (DIVER-04) */
1612
- diversityCheckPassed?: boolean;
1613
- /** Risk levels assigned to Dreamer candidates (for telemetry) */
1614
- candidateRiskLevels?: string[];
1615
- /** Aggregate 6D Philosopher evaluation metrics (informational) */
1616
- philosopher6D?: {
1617
- /** Average scores across all candidates per dimension */
1618
- avgScores: {
1619
- principleAlignment: number;
1620
- specificity: number;
1621
- actionability: number;
1622
- executability: number;
1623
- safetyImpact: number;
1624
- uxImpact: number;
1625
- };
1626
- /** Count of candidates with breakingChangeRisk = true */
1627
- highRiskCount: number;
1628
- };
1629
- }
1630
-
1631
- // ---------------------------------------------------------------------------
1632
- // Trinity Stage Validation
1633
- // ---------------------------------------------------------------------------
1634
-
1635
- /**
1636
- * Validation failure for a Trinity stage.
1637
- */
1638
- export interface TrinityStageFailure {
1639
- stage: 'dreamer' | 'philosopher' | 'scribe';
1640
- reason: string;
1641
- }
1642
-
1643
- /**
1644
- * Result of Trinity chain execution.
1645
- */
1646
- export interface TrinityResult {
1647
- /** Whether Trinity chain completed successfully */
1648
- success: boolean;
1649
- /** The final draft artifact (if success) */
1650
- artifact?: TrinityDraftArtifact;
1651
- /** Telemetry about the chain execution */
1652
- telemetry: TrinityTelemetry;
1653
- /** Stage failures (if any) */
1654
- failures: TrinityStageFailure[];
1655
- /** Whether fallback to single-reflector occurred */
1656
- fallbackOccurred: boolean;
1657
- /** Optional routing context for a follow-on Artificer stage */
1658
- artificerContext?: TrinityArtificerContext;
1659
- }
1660
-
1661
- // ---------------------------------------------------------------------------
1662
- // Internal Types for Trinity Execution
1663
- // ---------------------------------------------------------------------------
1664
-
1665
- // ---------------------------------------------------------------------------
1666
- // Stub Stage Implementations (Phase 2 — no real subagent calls)
1667
- // ---------------------------------------------------------------------------
1668
-
1669
- /**
1670
- * STUB DREAMER — generates synthetic candidates for testing.
1671
- *
1672
- * In production, this would call the actual Dreamer subagent.
1673
- * The stub generates plausible candidates based on snapshot signals.
1674
- */
1675
-
1676
- export function invokeStubDreamer(
1677
- snapshot: NocturnalSessionSnapshot,
1678
- principleId: string,
1679
- maxCandidates: number
1680
- ): DreamerOutput {
1681
- const hasFailures = (snapshot.stats.failureCount ?? 0) > 0;
1682
- const hasPain = snapshot.stats.totalPainEvents > 0;
1683
- const hasGateBlocks = (snapshot.stats.totalGateBlocks ?? 0) > 0;
1684
-
1685
- // #219: Detect fallback data source - stats may be incomplete
1686
- const isFallback = snapshot._dataSource === 'pain_context_fallback';
1687
- const fallbackWarning = isFallback ? ' [fallback data: stats may be incomplete]' : '';
1688
-
1689
- const candidates: DreamerCandidate[] = [];
1690
-
1691
- // Generate candidates based on available signals
1692
- // NOTE: betterDecision includes thinking model patterns so computeThinkingModelDelta > 0
1693
- // (these activate T-03, T-05, T-08 patterns respectively)
1694
- if (hasGateBlocks) {
1695
- candidates.push({
1696
- candidateIndex: 0,
1697
- badDecision: 'Proceeded with a tool call despite receiving a gate block, bypassing the safety check',
1698
- betterDecision: 'Review docs/gateblocks.md and verify authorization requirements first; based on the evidence, this irreversible action must be reviewed before proceeding',
1699
- rationale: 'Respecting gate blocks prevents unintended system modifications',
1700
- confidence: 0.95,
1701
- riskLevel: 'low' as const,
1702
- strategicPerspective: 'conservative_fix' as const,
1703
- });
1704
- if (maxCandidates >= 2) {
1705
- candidates.push({
1706
- candidateIndex: 1,
1707
- badDecision: 'Retried the same operation immediately after gate block without understanding why',
1708
- betterDecision: 'Check the gatekeeper source first to diagnose the block reason; this is irreversible, so we must be certain before proceeding',
1709
- rationale: 'Understanding why a gate blocked prevents repeated blocks',
1710
- confidence: 0.85,
1711
- riskLevel: 'low' as const,
1712
- strategicPerspective: 'conservative_fix' as const,
1713
- });
1714
- }
1715
- if (maxCandidates >= 3) {
1716
- candidates.push({
1717
- candidateIndex: 2,
1718
- badDecision: 'Modified the target of the blocked operation to bypass the check',
1719
- betterDecision: 'Review docs/auth.md first to understand the authorization structure, then request proper review before any change',
1720
- rationale: 'Proper authorization ensures accountability and prevents unintended changes',
1721
- confidence: 0.75,
1722
- riskLevel: 'low' as const,
1723
- strategicPerspective: 'conservative_fix' as const,
1724
- });
1725
- }
1726
- } else if (hasPain) {
1727
- candidates.push({
1728
- candidateIndex: 0,
1729
- badDecision: 'Continued executing operations without pausing to address accumulated pain signals',
1730
- betterDecision: 'Check logs/pain.json first to analyze pain signals; this error indicates we should stop and reconsider before proceeding',
1731
- rationale: 'Pain signals indicate accumulated friction or error conditions',
1732
- confidence: 0.90,
1733
- riskLevel: 'medium' as const,
1734
- strategicPerspective: 'structural_improvement' as const,
1735
- });
1736
- if (maxCandidates >= 2) {
1737
- candidates.push({
1738
- candidateIndex: 1,
1739
- badDecision: 'Ignored warning pain events and proceeded with high-risk operations',
1740
- betterDecision: 'Review src/pain-detector.ts first; based on the evidence, this indicates a deeper issue we must not ignore',
1741
- rationale: 'Addressing friction reduces error rates and improves outcomes',
1742
- confidence: 0.80,
1743
- riskLevel: 'medium' as const,
1744
- strategicPerspective: 'structural_improvement' as const,
1745
- });
1746
- }
1747
- if (maxCandidates >= 3) {
1748
- candidates.push({
1749
- candidateIndex: 2,
1750
- badDecision: 'Retried failing operations without analyzing why they caused pain',
1751
- betterDecision: 'Analyze logs/errors.json first to identify the failure pattern; this suggests we should stop and rethink before retrying',
1752
- rationale: 'Pattern analysis prevents recurring pain from the same source',
1753
- confidence: 0.70,
1754
- riskLevel: 'medium' as const,
1755
- strategicPerspective: 'structural_improvement' as const,
1756
- });
1757
- }
1758
- } else if (hasFailures) {
1759
- candidates.push({
1760
- candidateIndex: 0,
1761
- badDecision: 'Retried a failing operation without diagnosing the root cause',
1762
- betterDecision: 'Verify config.json preconditions first, based on the error in logs/failure.json, before retrying',
1763
- rationale: 'Diagnosing failures before retry prevents repeated failures',
1764
- confidence: 0.92,
1765
- riskLevel: 'high' as const,
1766
- strategicPerspective: 'paradigm_shift' as const,
1767
- });
1768
- if (maxCandidates >= 2) {
1769
- candidates.push({
1770
- candidateIndex: 1,
1771
- badDecision: 'Continued to the next operation after a failure without addressing it',
1772
- betterDecision: 'Check docs/debugging.md first to diagnose what failed; we must not ignore this when the action is irreversible',
1773
- rationale: 'Unaddressed failures compound and cause larger issues',
1774
- confidence: 0.85,
1775
- riskLevel: 'high' as const,
1776
- strategicPerspective: 'paradigm_shift' as const,
1777
- });
1778
- }
1779
- if (maxCandidates >= 3) {
1780
- candidates.push({
1781
- candidateIndex: 2,
1782
- badDecision: 'Assumed the failure was transient and retried without investigation',
1783
- betterDecision: 'Verify src/validator.ts state first; this error indicates a deeper problem before assuming resolution',
1784
- rationale: 'Verification prevents cascading failures from unresolved issues',
1785
- confidence: 0.78,
1786
- riskLevel: 'high' as const,
1787
- strategicPerspective: 'paradigm_shift' as const,
1788
- });
1789
- }
1790
- } else {
1791
- // No signal available - cannot generate meaningful candidates
1792
- // Return empty candidates array to trigger invalid output
1793
- // (Real Dreamer would also fail with no signal)
1794
- return {
1795
- valid: false,
1796
- candidates: [],
1797
- reason: 'No signal available for candidate generation (failureCount=0, painEvents=0, gateBlocks=0)',
1798
- generatedAt: new Date().toISOString(),
1799
- };
1800
- }
1801
-
1802
- // Ensure we don't exceed maxCandidates
1803
- const limitedCandidates = candidates.slice(0, Math.min(candidates.length, maxCandidates));
1804
-
1805
- // #219/#259: Annotate and downgrade confidence if data source is fallback
1806
- // Fallback data is incomplete (trajectory DB unavailable) — reduce confidence
1807
- // so reviewers don't over-trust low-quality candidates.
1808
- const annotatedCandidates = limitedCandidates.map((c) => ({
1809
- ...c,
1810
- rationale: isFallback ? c.rationale + fallbackWarning : c.rationale,
1811
- confidence: isFallback ? Math.round(c.confidence * 0.5 * 100) / 100 : c.confidence,
1812
- }));
1813
-
1814
- return {
1815
- valid: annotatedCandidates.length > 0,
1816
- candidates: annotatedCandidates,
1817
- generatedAt: new Date().toISOString(),
1818
- reason: annotatedCandidates.length === 0 ? 'No signal available for candidate generation' : undefined,
1819
- };
1820
- }
1821
-
1822
- /**
1823
- * STUB PHILOSOPHER — ranks candidates based on simple heuristics.
1824
- *
1825
- * In production, this would call the actual Philosopher subagent.
1826
- * The stub applies principle alignment heuristics.
1827
- */
1828
- export function invokeStubPhilosopher(
1829
- dreamerOutput: DreamerOutput,
1830
- _principleId: string,
1831
- _snapshot: NocturnalSessionSnapshot
1832
- ): PhilosopherOutput {
1833
- if (!dreamerOutput.valid || dreamerOutput.candidates.length === 0) {
1834
- return {
1835
- valid: false,
1836
- judgments: [],
1837
- overallAssessment: '',
1838
- reason: 'No candidates to judge',
1839
- generatedAt: new Date().toISOString(),
1840
- };
1841
- }
1842
-
1843
- // Simple heuristic scoring based on candidate structure
1844
- const judgments: PhilosopherJudgment[] = dreamerOutput.candidates.map((candidate) => {
1845
- let principleAligned = true;
1846
- let score = candidate.confidence;
1847
-
1848
- // Heuristic: longer rationales tend to be more principled
1849
- if (candidate.rationale.length < 30) {
1850
- score *= 0.8;
1851
- principleAligned = false;
1852
- }
1853
-
1854
- // Heuristic: betterDecision should be actionable (contain verbs)
1855
- const actionableVerbs = ['read', 'check', 'verify', 'edit', 'write', 'search', 'review', 'analyze'];
1856
- const hasActionable = actionableVerbs.some((v) => candidate.betterDecision.toLowerCase().includes(v));
1857
- if (!hasActionable) {
1858
- score *= 0.85;
1859
- principleAligned = false;
1860
- }
1861
-
1862
- // Heuristic: badDecision should be specific (not generic)
1863
- const genericPatterns = ['something went wrong', 'it did not work', 'mistake was made'];
1864
- const isGeneric = genericPatterns.some((p) => candidate.badDecision.toLowerCase().includes(p));
1865
- if (isGeneric) {
1866
- score *= 0.75;
1867
- principleAligned = false;
1868
- }
1869
-
1870
- // Deterministic 6D scores based on strategic perspective (Phase 35 D-07 mapping)
1871
- const perspective = candidate.strategicPerspective;
1872
-
1873
- let sixDScores: Philosopher6DScores;
1874
-
1875
- let riskAssessment: PhilosopherRiskAssessment;
1876
-
1877
- if (perspective === 'conservative_fix') {
1878
- sixDScores = {
1879
- principleAlignment: 0.9,
1880
- specificity: 0.8,
1881
- actionability: 0.85,
1882
- executability: 0.9,
1883
- safetyImpact: 0.95,
1884
- uxImpact: 0.7,
1885
- };
1886
- riskAssessment = {
1887
- falsePositiveEstimate: 0.1,
1888
- implementationComplexity: 'low',
1889
- breakingChangeRisk: false,
1890
- };
1891
- } else if (perspective === 'structural_improvement') {
1892
- sixDScores = {
1893
- principleAlignment: 0.75,
1894
- specificity: 0.7,
1895
- actionability: 0.75,
1896
- executability: 0.7,
1897
- safetyImpact: 0.7,
1898
- uxImpact: 0.8,
1899
- };
1900
- riskAssessment = {
1901
- falsePositiveEstimate: 0.25,
1902
- implementationComplexity: 'medium',
1903
- breakingChangeRisk: false,
1904
- };
1905
- } else if (perspective === 'paradigm_shift') {
1906
- sixDScores = {
1907
- principleAlignment: 0.6,
1908
- specificity: 0.5,
1909
- actionability: 0.5,
1910
- executability: 0.45,
1911
- safetyImpact: 0.4,
1912
- uxImpact: 0.6,
1913
- };
1914
- riskAssessment = {
1915
- falsePositiveEstimate: 0.4,
1916
- implementationComplexity: 'high',
1917
- breakingChangeRisk: true,
1918
- };
1919
- } else {
1920
- // Fallback for candidates without strategicPerspective
1921
- sixDScores = {
1922
- principleAlignment: score,
1923
- specificity: score * 0.9,
1924
- actionability: score * 0.85,
1925
- executability: score * 0.8,
1926
- safetyImpact: score * 0.7,
1927
- uxImpact: score * 0.75,
1928
- };
1929
- riskAssessment = {
1930
- falsePositiveEstimate: 0.3,
1931
- implementationComplexity: 'medium',
1932
- breakingChangeRisk: false,
1933
- };
1934
- }
1935
-
1936
- return {
1937
- candidateIndex: candidate.candidateIndex,
1938
- critique: `Candidate ${candidate.candidateIndex} scored ${score.toFixed(2)}. ${
1939
- principleAligned
1940
- ? 'Principle-aligned with specific actionable alternative.'
1941
- : 'May need refinement for principle alignment.'
1942
- }`,
1943
- principleAligned,
1944
- score: Math.min(1, Math.max(0, score)),
1945
- rank: 0, // Will be set after sorting
1946
- scores: sixDScores,
1947
- risks: riskAssessment,
1948
- };
1949
- });
1950
-
1951
- // Sort by score descending and assign ranks
1952
- judgments.sort((a, b) => b.score - a.score);
1953
- judgments.forEach((j, idx) => {
1954
- j.rank = idx + 1;
1955
- });
1956
-
1957
- const [topJudgment] = judgments;
1958
-
1959
- return {
1960
- valid: true,
1961
- judgments,
1962
- overallAssessment: `Best candidate is #${topJudgment.candidateIndex} with score ${topJudgment.score.toFixed(2)}. ${topJudgment.principleAligned ? 'Well-aligned with principle.' : 'Alignment could be improved.'}`,
1963
- generatedAt: new Date().toISOString(),
1964
- };
1965
- }
1966
-
1967
- /**
1968
- * STUB SCRIBE — synthesizes best candidate into final artifact using tournament selection.
1969
- *
1970
- * In production, this would call the actual Scribe subagent.
1971
- * The stub uses tournament selection (scoring + thresholds) to pick the winner.
1972
- */
1973
-
1974
-
1975
- export function invokeStubScribe(
1976
- dreamerOutput: DreamerOutput,
1977
- philosopherOutput: PhilosopherOutput,
1978
- snapshot: NocturnalSessionSnapshot,
1979
- principleId: string,
1980
- telemetry: TrinityTelemetry,
1981
- config: TrinityConfig
1982
- ): TrinityDraftArtifact | null {
1983
- if (!dreamerOutput.valid || !philosopherOutput.valid) {
1984
- return null;
1985
- }
1986
-
1987
- // Get thresholds (from config or state, or defaults)
1988
- const thresholds = config.thresholds ?? (config.stateDir ? getEffectiveThresholds(config.stateDir) : { ...DEFAULT_THRESHOLDS });
1989
- const weights = config.scoringWeights ?? DEFAULT_SCORING_WEIGHTS;
1990
-
1991
- // Run tournament selection
1992
- const tournamentResult = runTournament(
1993
- dreamerOutput.candidates,
1994
- philosopherOutput.judgments,
1995
- thresholds,
1996
- weights
1997
- );
1998
-
1999
- if (!tournamentResult.success || !tournamentResult.winner) {
2000
- // Tournament failed — no eligible candidate
2001
- return null;
2002
- }
2003
-
2004
- const {winner} = tournamentResult;
2005
-
2006
- // Update telemetry with tournament info
2007
- const updatedTelemetry: TrinityTelemetry = {
2008
- ...telemetry,
2009
- tournamentTrace: tournamentResult.trace,
2010
- winnerAggregateScore: winner.scores.aggregate,
2011
- winnerThresholdPassed: winner.thresholdPassed,
2012
- eligibleCandidateCount: tournamentResult.rankedCandidates.filter((c) => c.thresholdPassed).length,
2013
- };
2014
-
2015
- return {
2016
- selectedCandidateIndex: winner.candidateIndex,
2017
- badDecision: winner.candidate.badDecision,
2018
- betterDecision: winner.candidate.betterDecision,
2019
- rationale: winner.candidate.rationale,
2020
- sessionId: snapshot.sessionId,
2021
- principleId,
2022
- sourceSnapshotRef: `snapshot-${snapshot.sessionId}-${Date.now()}`,
2023
- telemetry: updatedTelemetry,
2024
- };
2025
- }
2026
-
2027
- // ---------------------------------------------------------------------------
2028
- // Trinity Chain Execution
2029
- // ---------------------------------------------------------------------------
2030
-
2031
- export interface RunTrinityOptions {
2032
- /** Snapshot to generate candidates from */
2033
- snapshot: NocturnalSessionSnapshot;
2034
- /** Target principle ID */
2035
- principleId: string;
2036
- /** Trinity configuration */
2037
- config: TrinityConfig;
2038
- }
2039
-
2040
- /**
2041
- * Execute the Trinity chain using stubs (synchronous).
2042
- * Use runTrinityAsync for real subagent execution via runtime adapter.
2043
- *
2044
- * @param options - Trinity execution options
2045
- * @returns TrinityResult with final artifact or failure info
2046
- */
2047
- export function runTrinity(options: RunTrinityOptions): TrinityResult {
2048
- const { snapshot, principleId, config } = options;
2049
-
2050
- // Stub path: use synchronous stub implementations
2051
- if (config.useStubs) {
2052
-
2053
-
2054
- return runTrinityWithStubs(snapshot, principleId, config);
2055
- }
2056
-
2057
- // Real execution path: requires runtimeAdapter
2058
- // This is handled asynchronously in runTrinityAsync
2059
- const errorMsg = '[Trinity] useStubs=false requires a runtimeAdapter. Use runTrinityAsync for real subagent execution.';
2060
- const failures: TrinityStageFailure[] = [{ stage: 'dreamer', reason: errorMsg }];
2061
- const telemetry: TrinityTelemetry = {
2062
- chainMode: 'trinity',
2063
- usedStubs: false,
2064
- dreamerPassed: false,
2065
- philosopherPassed: false,
2066
- scribePassed: false,
2067
- candidateCount: 0,
2068
- selectedCandidateIndex: -1,
2069
- stageFailures: [`Configuration: ${errorMsg}`],
2070
- };
2071
- console.error(`[Trinity] ERROR: ${errorMsg}`);
2072
- return {
2073
- success: false,
2074
- telemetry,
2075
- failures,
2076
- fallbackOccurred: false,
2077
- };
2078
- }
2079
-
2080
- /**
2081
- * Execute the Trinity chain with real subagent runtime (asynchronous).
2082
- * Requires config.runtimeAdapter to be set.
2083
- *
2084
- * @param options - Trinity execution options
2085
- * @returns Promise<TrinityResult> with final artifact or failure info
2086
- */
2087
- export async function runTrinityAsync(options: RunTrinityOptions): Promise<TrinityResult> {
2088
- const { snapshot, principleId, config } = options;
2089
-
2090
- if (config.useStubs) {
2091
- // Stub path: use synchronous stubs
2092
-
2093
-
2094
- return runTrinityWithStubs(snapshot, principleId, config);
2095
- }
2096
-
2097
- if (!config.runtimeAdapter) {
2098
- const errorMsg = '[Trinity] useStubs=false requires config.runtimeAdapter to be set.';
2099
- const failures: TrinityStageFailure[] = [{ stage: 'dreamer', reason: errorMsg }];
2100
- const telemetry: TrinityTelemetry = {
2101
- chainMode: 'trinity',
2102
- usedStubs: false,
2103
- dreamerPassed: false,
2104
- philosopherPassed: false,
2105
- scribePassed: false,
2106
- candidateCount: 0,
2107
- selectedCandidateIndex: -1,
2108
- stageFailures: [`Configuration: ${errorMsg}`],
2109
- };
2110
- console.error(`[Trinity] ERROR: ${errorMsg}`);
2111
- return {
2112
- success: false,
2113
- telemetry,
2114
- failures,
2115
- fallbackOccurred: false,
2116
- };
2117
- }
2118
-
2119
- const adapter = config.runtimeAdapter;
2120
- const telemetry: TrinityTelemetry = {
2121
- chainMode: 'trinity',
2122
- usedStubs: false,
2123
- dreamerPassed: false,
2124
- philosopherPassed: false,
2125
- scribePassed: false,
2126
- candidateCount: 0,
2127
- selectedCandidateIndex: -1,
2128
- stageFailures: [],
2129
- };
2130
-
2131
- const failures: TrinityStageFailure[] = [];
2132
-
2133
- try {
2134
- // Step 1: Dreamer — generate candidates via real subagent
2135
- const dreamerOutput = await adapter.invokeDreamer(snapshot, principleId, config.maxCandidates);
2136
-
2137
- if (!dreamerOutput.valid || dreamerOutput.candidates.length === 0) {
2138
- failures.push({
2139
- stage: 'dreamer',
2140
- reason: dreamerOutput.reason ?? 'No valid candidates generated',
2141
- });
2142
- telemetry.stageFailures.push(`Dreamer: ${dreamerOutput.reason ?? 'failed'}`);
2143
- return { success: false, telemetry, failures, fallbackOccurred: false };
2144
- }
2145
-
2146
- telemetry.dreamerPassed = true;
2147
- telemetry.candidateCount = dreamerOutput.candidates.length;
2148
-
2149
- // Diversity validation (DIVER-04): soft check, never gates pipeline
2150
- const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
2151
- telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
2152
- telemetry.candidateRiskLevels = dreamerOutput.candidates
2153
- .map(c => c.riskLevel)
2154
- .filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
2155
- if (!diversityResult.diversityCheckPassed) {
2156
- console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
2157
- }
2158
-
2159
- // Step 2: Philosopher — rank candidates via real subagent
2160
- const philosopherOutput = await adapter.invokePhilosopher(dreamerOutput, principleId, snapshot);
2161
-
2162
- if (!philosopherOutput.valid || philosopherOutput.judgments.length === 0) {
2163
- failures.push({
2164
- stage: 'philosopher',
2165
- reason: philosopherOutput.reason ?? 'No judgments produced',
2166
- });
2167
- telemetry.stageFailures.push(`Philosopher: ${philosopherOutput.reason ?? 'failed'}`);
2168
- return { success: false, telemetry, failures, fallbackOccurred: false };
2169
- }
2170
-
2171
- telemetry.philosopherPassed = true;
2172
-
2173
- // Aggregate 6D scores from Philosopher judgments (if available)
2174
- const realJudgments6D = philosopherOutput.judgments.filter(j => j.scores);
2175
- if (realJudgments6D.length > 0) {
2176
- const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
2177
- const avgScores: Record<string, number> = {};
2178
- for (const dim of dims) {
2179
- const values = realJudgments6D.map(j => j.scores?.[dim] ?? 0);
2180
- avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
2181
- }
2182
- telemetry.philosopher6D = {
2183
- avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
2184
- highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
2185
- };
2186
- }
2187
-
2188
- // Step 3: Scribe — synthesize final artifact via real subagent
2189
- const draftArtifact = await adapter.invokeScribe(
2190
- dreamerOutput,
2191
- philosopherOutput,
2192
- snapshot,
2193
- principleId,
2194
- telemetry,
2195
- config
2196
- );
2197
-
2198
- if (!draftArtifact) {
2199
- failures.push({ stage: 'scribe', reason: 'Failed to synthesize artifact from candidates' });
2200
- telemetry.stageFailures.push('Scribe: synthesis failed');
2201
- return { success: false, telemetry, failures, fallbackOccurred: false };
2202
- }
2203
-
2204
- telemetry.scribePassed = true;
2205
- telemetry.selectedCandidateIndex = draftArtifact.selectedCandidateIndex;
2206
-
2207
- if (draftArtifact.telemetry) {
2208
- telemetry.tournamentTrace = draftArtifact.telemetry.tournamentTrace;
2209
- telemetry.winnerAggregateScore = draftArtifact.telemetry.winnerAggregateScore;
2210
- telemetry.winnerThresholdPassed = draftArtifact.telemetry.winnerThresholdPassed;
2211
- telemetry.eligibleCandidateCount = draftArtifact.telemetry.eligibleCandidateCount;
2212
- }
2213
-
2214
- // Hallucination detection (SDK-QUAL-02): validate extraction against snapshot
2215
- const hallucinationResult = validateExtraction(draftArtifact, snapshot);
2216
- if (!hallucinationResult.isGrounded) {
2217
- const reason = hallucinationResult.reason ?? 'Extraction not grounded in session evidence';
2218
- console.warn(`[Trinity] HALLUCINATION_DETECTED: ${reason}`);
2219
- telemetry.stageFailures.push(`Hallucination: ${reason}`);
2220
- return {
2221
- success: false,
2222
- telemetry,
2223
- failures: [{ stage: 'scribe', reason }],
2224
- fallbackOccurred: false,
2225
- };
2226
- }
2227
-
2228
- return {
2229
- success: true,
2230
- artifact: draftArtifact,
2231
- telemetry,
2232
- failures: [],
2233
- fallbackOccurred: false,
2234
- artificerContext: draftArtifact.artificerContext,
2235
- };
2236
- } finally {
2237
- if (adapter.close) {
2238
- await adapter.close().catch(() => { /* intentionally empty - adapter cleanup error ignored */ });
2239
- }
2240
- }
2241
- }
2242
-
2243
- /**
2244
- * Internal: Run Trinity chain with stub implementations (synchronous).
2245
- // eslint-disable-next-line complexity, @typescript-eslint/class-methods-use-this -- complexity 14, refactor candidate
2246
- */
2247
- function runTrinityWithStubs(
2248
- snapshot: NocturnalSessionSnapshot,
2249
- principleId: string,
2250
- config: TrinityConfig
2251
- ): TrinityResult {
2252
- const telemetry: TrinityTelemetry = {
2253
- chainMode: 'trinity',
2254
- usedStubs: true,
2255
- dreamerPassed: false,
2256
- philosopherPassed: false,
2257
- scribePassed: false,
2258
- candidateCount: 0,
2259
- selectedCandidateIndex: -1,
2260
- stageFailures: [],
2261
- };
2262
-
2263
- const failures: TrinityStageFailure[] = [];
2264
-
2265
- // Step 1: Dreamer — generate candidates (stub)
2266
- const dreamerOutput = invokeStubDreamer(snapshot, principleId, config.maxCandidates);
2267
-
2268
- if (!dreamerOutput.valid || dreamerOutput.candidates.length === 0) {
2269
- failures.push({
2270
- stage: 'dreamer',
2271
- reason: dreamerOutput.reason ?? 'No valid candidates generated',
2272
- });
2273
- telemetry.stageFailures.push(`Dreamer: ${dreamerOutput.reason ?? 'failed'}`);
2274
- return {
2275
- success: false,
2276
- telemetry,
2277
- failures,
2278
- fallbackOccurred: false,
2279
- };
2280
- }
2281
-
2282
- telemetry.dreamerPassed = true;
2283
- telemetry.candidateCount = dreamerOutput.candidates.length;
2284
-
2285
- // Diversity validation (DIVER-04): soft check, never gates pipeline
2286
- const diversityResult = validateCandidateDiversity(dreamerOutput.candidates);
2287
- telemetry.diversityCheckPassed = diversityResult.diversityCheckPassed;
2288
- telemetry.candidateRiskLevels = dreamerOutput.candidates
2289
- .map(c => c.riskLevel)
2290
- .filter((r): r is "low" | "medium" | "high" => typeof r === 'string');
2291
- if (!diversityResult.diversityCheckPassed) {
2292
- console.warn(`[Trinity] Diversity check failed: ${diversityResult.details}`);
2293
- }
2294
-
2295
- // Step 2: Philosopher — rank candidates (stub)
2296
- const philosopherOutput = invokeStubPhilosopher(dreamerOutput, principleId, snapshot);
2297
-
2298
- if (!philosopherOutput.valid || philosopherOutput.judgments.length === 0) {
2299
- failures.push({
2300
- stage: 'philosopher',
2301
- reason: philosopherOutput.reason ?? 'No judgments produced',
2302
- });
2303
- telemetry.stageFailures.push(`Philosopher: ${philosopherOutput.reason ?? 'failed'}`);
2304
- return {
2305
- success: false,
2306
- telemetry,
2307
- failures,
2308
- fallbackOccurred: false,
2309
- };
2310
- }
2311
-
2312
- telemetry.philosopherPassed = true;
2313
-
2314
- // Aggregate 6D scores from Philosopher judgments (if available)
2315
- const judgments6D = philosopherOutput.judgments.filter(j => j.scores);
2316
- if (judgments6D.length > 0) {
2317
- const dims = ['principleAlignment', 'specificity', 'actionability', 'executability', 'safetyImpact', 'uxImpact'] as const;
2318
- const avgScores: Record<string, number> = {};
2319
- for (const dim of dims) {
2320
- const values = judgments6D.map(j => j.scores?.[dim] ?? 0);
2321
- avgScores[dim] = values.reduce((a, b) => a + b, 0) / values.length;
2322
- }
2323
- telemetry.philosopher6D = {
2324
- avgScores: avgScores as NonNullable<TrinityTelemetry['philosopher6D']>['avgScores'],
2325
- highRiskCount: philosopherOutput.judgments.filter(j => j.risks?.breakingChangeRisk).length,
2326
- };
2327
- }
2328
-
2329
- // Step 3: Scribe — produce final artifact using tournament selection (stub)
2330
- const draftArtifact = invokeStubScribe(dreamerOutput, philosopherOutput, snapshot, principleId, telemetry, config);
2331
-
2332
- if (!draftArtifact) {
2333
- failures.push({
2334
- stage: 'scribe',
2335
- reason: 'Failed to synthesize artifact from candidates',
2336
- });
2337
- telemetry.stageFailures.push('Scribe: synthesis failed');
2338
- return {
2339
- success: false,
2340
- telemetry,
2341
- failures,
2342
- fallbackOccurred: false,
2343
- };
2344
- }
2345
-
2346
- telemetry.scribePassed = true;
2347
- telemetry.selectedCandidateIndex = draftArtifact.selectedCandidateIndex;
2348
-
2349
- if (draftArtifact.telemetry) {
2350
- telemetry.tournamentTrace = draftArtifact.telemetry.tournamentTrace;
2351
- telemetry.winnerAggregateScore = draftArtifact.telemetry.winnerAggregateScore;
2352
- telemetry.winnerThresholdPassed = draftArtifact.telemetry.winnerThresholdPassed;
2353
- telemetry.eligibleCandidateCount = draftArtifact.telemetry.eligibleCandidateCount;
2354
- }
2355
-
2356
- // Hallucination detection (SDK-QUAL-02): validate extraction against snapshot
2357
- const hallucinationResult = validateExtraction(draftArtifact, snapshot);
2358
- if (!hallucinationResult.isGrounded) {
2359
- const reason = hallucinationResult.reason ?? 'Extraction not grounded in session evidence';
2360
- console.warn(`[Trinity] HALLUCINATION_DETECTED: ${reason}`);
2361
- telemetry.stageFailures.push(`Hallucination: ${reason}`);
2362
- return {
2363
- success: false,
2364
- telemetry,
2365
- failures: [{ stage: 'scribe', reason }],
2366
- fallbackOccurred: false,
2367
- };
2368
- }
2369
-
2370
- return {
2371
- success: true,
2372
- artifact: draftArtifact,
2373
- telemetry,
2374
- failures: [],
2375
- fallbackOccurred: false,
2376
- artificerContext: draftArtifact.artificerContext,
2377
- };
2378
- }
2379
-
2380
- // ---------------------------------------------------------------------------
2381
- // Trinity Validation (for Arbiter integration)
2382
- // ---------------------------------------------------------------------------
2383
-
2384
- /**
2385
- * Validate that a Trinity draft artifact can pass final arbiter validation.
2386
- * This checks the draft against the same rules as single-reflector artifacts.
2387
- */
2388
- export interface DraftValidationResult {
2389
- valid: boolean;
2390
- failures: string[];
2391
- }
2392
-
2393
- /**
2394
- * Validate a TrinityDraftArtifact before passing to arbiter.
2395
- */
2396
- export function validateDraftArtifact(draft: TrinityDraftArtifact): DraftValidationResult {
2397
- const failures: string[] = [];
2398
-
2399
- if (!draft.badDecision || draft.badDecision.trim().length === 0) {
2400
- failures.push('badDecision is required and non-empty');
2401
- }
2402
-
2403
- if (!draft.betterDecision || draft.betterDecision.trim().length === 0) {
2404
- failures.push('betterDecision is required and non-empty');
2405
- }
2406
-
2407
- if (!draft.rationale || draft.rationale.trim().length < 20) {
2408
- failures.push('rationale must be at least 20 characters');
2409
- }
2410
-
2411
- if (!draft.principleId || draft.principleId.trim().length === 0) {
2412
- failures.push('principleId is required');
2413
- }
2414
-
2415
- if (!draft.sessionId || draft.sessionId.trim().length === 0) {
2416
- failures.push('sessionId is required');
2417
- }
2418
-
2419
- // badDecision should not be identical to betterDecision
2420
- if (
2421
- typeof draft.badDecision === 'string' &&
2422
- typeof draft.betterDecision === 'string' &&
2423
- draft.badDecision.trim().length > 0 &&
2424
- draft.betterDecision.trim().length > 0 &&
2425
- draft.badDecision.trim() === draft.betterDecision.trim()
2426
- ) {
2427
- failures.push('badDecision and betterDecision cannot be identical');
2428
- }
2429
-
2430
- return {
2431
- valid: failures.length === 0,
2432
- failures,
2433
- };
2434
- }
2435
-
2436
- // ---------------------------------------------------------------------------
2437
- // Hallucination Detection (SDK-QUAL-02)
2438
- // ---------------------------------------------------------------------------
2439
-
2440
- /**
2441
- * Result of hallucination validation against session snapshot evidence.
2442
- */
2443
- export interface HallucinationDetectionResult {
2444
- /** Whether the extraction is grounded in real session evidence */
2445
- isGrounded: boolean;
2446
- /** List of evidence types found in the snapshot supporting the extraction */
2447
- evidenceTypes: string[];
2448
- /** Detailed reason if hallucination is detected */
2449
- reason?: string;
2450
- /** Matching evidence items for telemetry (truncated for safety) */
2451
- evidencePreview: string[];
2452
- }
2453
-
2454
- /**
2455
- * Validate that an extracted badDecision corresponds to actual events in the
2456
- * NocturnalSessionSnapshot. This catches hallucinated extractions where the
2457
- * Trinity chain produces a badDecision that has no grounding in real failures,
2458
- * pain events, or gate blocks.
2459
- *
2460
- * Evidence sources checked:
2461
- * 1. Failed tool calls (snapshot.toolCalls with outcome='failure')
2462
- * 2. Pain events (snapshot.painEvents with score >= 50)
2463
- * 3. Gate blocks (snapshot.gateBlocks)
2464
- * 4. User corrections (snapshot.userTurns with correctionDetected=true)
2465
- *
2466
- * The function uses keyword overlap heuristics: it extracts tool names, file
2467
- * paths, error messages, and pain reasons from the snapshot and checks if the
2468
- * badDecision text overlaps meaningfully with any of them.
2469
- *
2470
- * @param artifact The draft artifact produced by the Scribe stage
2471
- * @param snapshot The session snapshot used to generate the extraction
2472
- * @returns HallucinationDetectionResult indicating whether the extraction is grounded
2473
- */
2474
- export function validateExtraction(
2475
- artifact: TrinityDraftArtifact,
2476
- snapshot: NocturnalSessionSnapshot
2477
- ): HallucinationDetectionResult {
2478
- const evidenceTypes: string[] = [];
2479
- const evidencePreview: string[] = [];
2480
-
2481
- // Shared token normalizer: lowercase + strip punctuation, same as badDecisionTokens
2482
- const normalizeEvidenceToken = (value: string): string =>
2483
- value.toLowerCase().replace(/[^a-z0-9]/g, '');
2484
-
2485
- // Build a set of evidence tokens from the snapshot
2486
- const evidenceTokens = new Set<string>();
2487
- const badDecisionLower = artifact.badDecision.toLowerCase();
2488
-
2489
- // 1. Failed tool calls
2490
- const failedToolCalls = (snapshot.toolCalls ?? []).filter(tc => tc.outcome === 'failure');
2491
- if (failedToolCalls.length > 0) {
2492
- evidenceTypes.push('tool_failures');
2493
- for (const tc of failedToolCalls) {
2494
- // Extract tool name tokens
2495
- evidenceTokens.add(tc.toolName.toLowerCase());
2496
- if (tc.filePath) {
2497
- // Extract all path segments and normalize each for matching
2498
- const rawPathParts = [tc.filePath, ...tc.filePath.split(/[\\/]/)];
2499
- for (const part of rawPathParts) {
2500
- const normalized = normalizeEvidenceToken(part);
2501
- if (normalized.length > 0) evidenceTokens.add(normalized);
2502
- }
2503
- }
2504
- if (tc.errorMessage) {
2505
- // Extract key words from error messages (filter stop words)
2506
- const errorWords = tc.errorMessage.toLowerCase().split(/\s+/)
2507
- .filter(w => w.length > 3 && !['with', 'from', 'that', 'this', 'which', 'been', 'have', 'were', 'they', 'their'].includes(w));
2508
- for (const w of errorWords) {
2509
- const normalized = normalizeEvidenceToken(w);
2510
- if (normalized.length > 0) evidenceTokens.add(normalized);
2511
- }
2512
- }
2513
- if (tc.errorType) evidenceTokens.add(tc.errorType.toLowerCase());
2514
- evidencePreview.push(`tool:${tc.toolName}${tc.filePath ? `@${tc.filePath}` : ''} -> ${tc.errorMessage ?? 'unknown'}`.slice(0, 100));
2515
- }
2516
- }
2517
-
2518
- // 2. Pain events (score >= 50 indicates meaningful pain)
2519
- const significantPainEvents = (snapshot.painEvents ?? []).filter(pe => pe.score >= 50);
2520
- if (significantPainEvents.length > 0) {
2521
- evidenceTypes.push('pain_events');
2522
- for (const pe of significantPainEvents) {
2523
- evidenceTokens.add(pe.source.toLowerCase());
2524
- if (pe.reason) {
2525
- const painWords = pe.reason.toLowerCase().split(/\s+/)
2526
- .filter(w => w.length > 3 && !['with', 'from', 'that', 'this', 'which', 'been', 'have', 'were', 'they', 'their'].includes(w));
2527
- for (const w of painWords) {
2528
- const normalized = normalizeEvidenceToken(w);
2529
- if (normalized.length > 0) evidenceTokens.add(normalized);
2530
- }
2531
- }
2532
- evidencePreview.push(`pain:${pe.score} [${pe.source}] ${pe.reason ?? ''}`.slice(0, 100));
2533
- }
2534
- }
2535
-
2536
- // 3. Gate blocks
2537
- if ((snapshot.gateBlocks ?? []).length > 0) {
2538
- evidenceTypes.push('gate_blocks');
2539
- for (const gb of snapshot.gateBlocks) {
2540
- evidenceTokens.add(gb.toolName.toLowerCase());
2541
- evidenceTokens.add('gate');
2542
- evidenceTokens.add('blocked');
2543
- if (gb.reason) {
2544
- const blockWords = gb.reason.toLowerCase().split(/\s+/)
2545
- .filter(w => w.length > 3);
2546
- for (const w of blockWords) {
2547
- const normalized = normalizeEvidenceToken(w);
2548
- if (normalized.length > 0) evidenceTokens.add(normalized);
2549
- }
2550
- }
2551
- evidencePreview.push(`gate:${gb.toolName} -> ${gb.reason}`.slice(0, 100));
2552
- }
2553
- }
2554
-
2555
- // 4. User corrections
2556
- const userCorrections = (snapshot.userTurns ?? []).filter(ut => ut.correctionDetected);
2557
- if (userCorrections.length > 0) {
2558
- evidenceTypes.push('user_corrections');
2559
- evidenceTokens.add('correction');
2560
- evidenceTokens.add('wrong');
2561
- evidenceTokens.add('incorrect');
2562
- evidencePreview.push(`corrections:${userCorrections.length}`);
2563
- }
2564
-
2565
- // If no evidence exists at all in the snapshot, we cannot validate.
2566
- // Allow the extraction through — the pipeline already has guardrails for
2567
- // empty snapshots (Dreamer returns valid:false).
2568
- if (evidenceTypes.length === 0) {
2569
- return {
2570
- isGrounded: true,
2571
- evidenceTypes: [],
2572
- reason: undefined,
2573
- evidencePreview: [],
2574
- };
2575
- }
2576
-
2577
- // Check for overlap between badDecision text and evidence tokens
2578
- // We look for meaningful keyword matches (tokens of length > 4)
2579
- const badDecisionTokens = badDecisionLower.split(/\s+/)
2580
- .map(t => t.replace(/[^a-z0-9]/g, ''))
2581
- .filter(t => t.length > 4);
2582
-
2583
- let matchCount = 0;
2584
- const matchedTokens: string[] = [];
2585
- for (const token of badDecisionTokens) {
2586
- // Direct match
2587
- if (evidenceTokens.has(token)) {
2588
- matchCount++;
2589
- matchedTokens.push(token);
2590
- continue;
2591
- }
2592
- // Partial match: check if any evidence token contains this token or vice versa
2593
- for (const evToken of evidenceTokens) {
2594
- if (evToken.length > 4 && (evToken.includes(token) || token.includes(evToken))) {
2595
- matchCount++;
2596
- matchedTokens.push(token);
2597
- break;
2598
- }
2599
- }
2600
- }
2601
-
2602
- // Heuristic: if at least 2 meaningful tokens overlap, consider grounded
2603
- // Single overlap is acceptable if the token is highly specific (length > 8)
2604
- const minOverlap = badDecisionTokens.length > 0
2605
- ? Math.max(1, Math.ceil(badDecisionTokens.length * 0.15))
2606
- : 0;
2607
-
2608
- if (matchCount >= Math.max(2, minOverlap)) {
2609
- return {
2610
- isGrounded: true,
2611
- evidenceTypes,
2612
- evidencePreview: evidencePreview.slice(0, 5),
2613
- };
2614
- }
2615
-
2616
- // Also check for at least one highly-specific match (length > 8)
2617
- const hasHighlySpecificMatch = matchedTokens.some(t => t.length > 8);
2618
- if (hasHighlySpecificMatch) {
2619
- return {
2620
- isGrounded: true,
2621
- evidenceTypes,
2622
- evidencePreview: evidencePreview.slice(0, 5),
2623
- };
2624
- }
2625
-
2626
- // Hallucination detected — badDecision has no grounding in snapshot evidence
2627
- const reason = `Hallucinated extraction: badDecision "${artifact.badDecision.slice(0, 80)}" has insufficient overlap with session evidence. ` +
2628
- `Evidence types available: [${evidenceTypes.join(', ')}]. Matched tokens: [${matchedTokens.join(', ')}] (needed >= ${Math.max(2, minOverlap)}).`;
2629
-
2630
- return {
2631
- isGrounded: false,
2632
- evidenceTypes,
2633
- reason,
2634
- evidencePreview: evidencePreview.slice(0, 5),
2635
- };
2636
- }
2637
-
2638
- /**
2639
- * Convert a TrinityDraftArtifact to a NocturnalArtifact-compatible structure.
2640
- */
2641
- export function draftToArtifact(draft: TrinityDraftArtifact): {
2642
- artifactId: string;
2643
- sessionId: string;
2644
- principleId: string;
2645
- sourceSnapshotRef: string;
2646
- badDecision: string;
2647
- betterDecision: string;
2648
- rationale: string;
2649
- createdAt: string;
2650
- thinkingModelDelta?: number;
2651
- planningRatioGain?: number;
2652
- } {
2653
- // Compute reflection quality metrics
2654
- const thinkingModelDelta = draft.thinkingModelDelta ?? computeThinkingModelDelta(draft.badDecision, draft.betterDecision);
2655
- // planningRatioGain requires an improved snapshot — Trinity draft doesn't have one, so default to 0
2656
- const planningRatioGain = draft.planningRatioGain ?? 0;
2657
-
2658
- return {
2659
- artifactId: randomUUID(),
2660
- sessionId: draft.sessionId,
2661
- principleId: draft.principleId,
2662
- sourceSnapshotRef: draft.sourceSnapshotRef,
2663
- badDecision: draft.badDecision,
2664
- betterDecision: draft.betterDecision,
2665
- rationale: draft.rationale,
2666
- createdAt: new Date().toISOString(),
2667
- thinkingModelDelta,
2668
- planningRatioGain,
2669
- };
2670
- }
2671
-
2672
- // ---------------------------------------------------------------------------
2673
- // Default Configuration
2674
- // ---------------------------------------------------------------------------
2675
-
2676
- export const DEFAULT_TRINITY_CONFIG: TrinityConfig = {
2677
- useTrinity: true,
2678
- maxCandidates: 3,
2679
- useStubs: false, // Real subagent execution is the default; set useStubs=true for stub-only mode
2680
- };