principles-disciple 1.72.0 → 1.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/INSTALL.md +1 -3
  2. package/openclaw.plugin.json +10 -5
  3. package/package.json +17 -19
  4. package/scripts/acceptance-test.mjs +16 -73
  5. package/scripts/sync-plugin.mjs +382 -77
  6. package/src/commands/archive-impl.ts +2 -1
  7. package/src/commands/capabilities.ts +2 -2
  8. package/src/commands/context.ts +2 -2
  9. package/src/commands/disable-impl.ts +2 -1
  10. package/src/commands/evolution-status.ts +16 -16
  11. package/src/commands/export.ts +12 -67
  12. package/src/commands/pain.ts +91 -1
  13. package/src/commands/principle-rollback.ts +2 -1
  14. package/src/commands/promote-impl.ts +7 -43
  15. package/src/commands/rollback-impl.ts +2 -1
  16. package/src/commands/rollback.ts +2 -1
  17. package/src/commands/samples.ts +2 -1
  18. package/src/commands/thinking-os.ts +2 -1
  19. package/src/config/errors.ts +18 -2
  20. package/src/constants/diagnostician.ts +2 -2
  21. package/src/constants/tools.ts +2 -1
  22. package/src/core/__tests__/focus-history.test.ts +210 -0
  23. package/src/core/config.ts +1 -1
  24. package/src/core/correction-cue-learner.ts +2 -136
  25. package/src/core/correction-types.ts +16 -88
  26. package/src/core/dictionary.ts +19 -20
  27. package/src/core/empathy-keyword-matcher.ts +17 -289
  28. package/src/core/empathy-types.ts +18 -229
  29. package/src/core/event-log.ts +29 -132
  30. package/src/core/evolution-reducer.ts +21 -2
  31. package/src/core/evolution-types.ts +76 -464
  32. package/src/core/file-store.ts +80 -0
  33. package/src/core/focus-history.ts +228 -955
  34. package/src/core/local-worker-routing.ts +34 -314
  35. package/src/core/merge-gate-audit.ts +0 -195
  36. package/src/core/migration.ts +0 -1
  37. package/src/core/pain-diagnostic-gate.ts +154 -0
  38. package/src/core/pain-signal.ts +21 -138
  39. package/src/core/pain.ts +15 -88
  40. package/src/core/path-resolver.ts +0 -1
  41. package/src/core/paths.ts +0 -1
  42. package/src/core/pd-task-reconciler.ts +26 -115
  43. package/src/core/pd-task-service.ts +9 -9
  44. package/src/core/pd-task-types.ts +23 -127
  45. package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
  46. package/src/core/principle-compiler/code-validator.ts +15 -42
  47. package/src/core/principle-compiler/compiler.ts +100 -15
  48. package/src/core/principle-compiler/index.ts +5 -2
  49. package/src/core/principle-compiler/template-generator.ts +4 -104
  50. package/src/core/principle-injection.ts +10 -202
  51. package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
  52. package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
  53. package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
  54. package/src/core/principle-tree-ledger-adapter.ts +145 -0
  55. package/src/core/principle-tree-ledger.ts +8 -6
  56. package/src/core/reflection/reflection-context.ts +14 -109
  57. package/src/core/replay-engine.ts +8 -500
  58. package/src/core/rule-host-helpers.ts +5 -35
  59. package/src/core/rule-host-types.ts +10 -82
  60. package/src/core/rule-host.ts +6 -63
  61. package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
  62. package/src/core/session-tracker.ts +87 -101
  63. package/src/core/shadow-observation-registry.ts +19 -48
  64. package/src/core/trajectory.ts +3 -1
  65. package/src/core/workflow-funnel-loader.ts +62 -68
  66. package/src/core/workspace-context.ts +46 -0
  67. package/src/core/workspace-dir-service.ts +1 -1
  68. package/src/core/workspace-dir-validation.ts +18 -9
  69. package/src/hooks/AGENTS.md +1 -1
  70. package/src/hooks/gate-block-helper.ts +71 -64
  71. package/src/hooks/gate.ts +183 -31
  72. package/src/hooks/lifecycle.ts +30 -32
  73. package/src/hooks/llm.ts +60 -32
  74. package/src/hooks/pain.ts +297 -103
  75. package/src/hooks/prompt.ts +400 -440
  76. package/src/hooks/subagent.ts +2 -29
  77. package/src/i18n/commands.ts +2 -10
  78. package/src/index.ts +95 -85
  79. package/src/openclaw-sdk.ts +311 -0
  80. package/src/service/central-database.ts +8 -4
  81. package/src/service/evolution-queue-migration.ts +2 -1
  82. package/src/service/evolution-worker.ts +163 -1786
  83. package/src/service/internalization-trigger-adapter.ts +302 -0
  84. package/src/service/keyword-optimization-service.ts +4 -4
  85. package/src/service/monitoring-query-service.ts +1 -215
  86. package/src/service/queue-io.ts +60 -331
  87. package/src/service/runtime-summary-service.ts +59 -16
  88. package/src/service/subagent-workflow/index.ts +0 -41
  89. package/src/service/subagent-workflow/types.ts +9 -120
  90. package/src/service/subagent-workflow/workflow-store.ts +2 -119
  91. package/src/service/workflow-watchdog.ts +0 -43
  92. package/src/types/event-payload.ts +16 -74
  93. package/src/types/event-types.ts +38 -547
  94. package/src/types/hygiene-types.ts +7 -30
  95. package/src/types/principle-tree-schema.ts +20 -222
  96. package/src/types/queue.ts +15 -70
  97. package/src/types/runtime-summary.ts +5 -49
  98. package/src/utils/io.ts +8 -20
  99. package/src/utils/retry.ts +1 -1
  100. package/src/utils/shadow-fingerprint.ts +2 -2
  101. package/src/utils/workspace-resolver.ts +50 -0
  102. package/templates/langs/en/core/AGENTS.md +7 -7
  103. package/templates/langs/en/core/BOOT.md +1 -1
  104. package/templates/langs/en/core/HEARTBEAT.md +2 -2
  105. package/templates/langs/en/principles/THINKING_OS.md +3 -2
  106. package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  107. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  108. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  109. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  110. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  111. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  112. package/templates/langs/en/skills/evolve-task/SKILL.md +3 -3
  113. package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
  114. package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
  115. package/templates/langs/en/skills/pd-mentor/SKILL.md +2 -3
  116. package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
  117. package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
  118. package/templates/langs/zh/core/AGENTS.md +7 -7
  119. package/templates/langs/zh/core/BOOT.md +1 -1
  120. package/templates/langs/zh/core/HEARTBEAT.md +2 -2
  121. package/templates/langs/zh/principles/THINKING_OS.md +3 -2
  122. package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  123. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  124. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  125. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
  126. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  127. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  128. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  129. package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
  130. package/templates/langs/zh/skills/evolve-task/SKILL.md +4 -4
  131. package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
  132. package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
  133. package/templates/langs/zh/skills/pd-mentor/SKILL.md +2 -3
  134. package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
  135. package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
  136. package/tests/build-artifacts.test.ts +1 -3
  137. package/tests/commands/evolution-status.test.ts +0 -118
  138. package/tests/core/bootstrap-rules.test.ts +1 -1
  139. package/tests/core/config.test.ts +1 -1
  140. package/tests/core/event-log.test.ts +35 -0
  141. package/tests/core/evolution-engine.test.ts +610 -0
  142. package/tests/core/file-store.test.ts +102 -0
  143. package/tests/core/focus-history.test.ts +203 -11
  144. package/tests/core/merge-gate-audit.test.ts +2 -169
  145. package/tests/core/migration.test.ts +7 -7
  146. package/tests/core/model-deployment-registry.test.ts +7 -1
  147. package/tests/core/model-training-registry.test.ts +19 -0
  148. package/tests/core/observability.test.ts +0 -1
  149. package/tests/core/pain-diagnostic-gate.test.ts +498 -0
  150. package/tests/core/pain.test.ts +0 -1
  151. package/tests/core/path-resolver.test.ts +1 -1
  152. package/tests/core/paths-refactor.test.ts +0 -22
  153. package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
  154. package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
  155. package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
  156. package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
  157. package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
  158. package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
  159. package/tests/core/reflection-context.test.ts +0 -14
  160. package/tests/core/replay-engine.test.ts +127 -215
  161. package/tests/core/rule-host-helpers.test.ts +2 -2
  162. package/tests/core/rule-implementation-runtime.test.ts +0 -27
  163. package/tests/core/workflow-funnel-loader.test.ts +162 -0
  164. package/tests/core/workspace-context.test.ts +2 -2
  165. package/tests/core/workspace-dir-validation.test.ts +8 -1
  166. package/tests/core-anti-growth.test.ts +191 -0
  167. package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
  168. package/tests/hooks/confirm-first-removal.test.ts +188 -0
  169. package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
  170. package/tests/hooks/gate-auto-correct.test.ts +665 -0
  171. package/tests/hooks/gate-no-path-write-tool.test.ts +172 -0
  172. package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
  173. package/tests/hooks/pain.test.ts +269 -12
  174. package/tests/hooks/prompt-characterization.test.ts +500 -0
  175. package/tests/hooks/prompt-size-guard.test.ts +32 -17
  176. package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
  177. package/tests/index.test.ts +94 -1
  178. package/tests/integration/auto-entry-gate.test.ts +248 -0
  179. package/tests/integration/internalization-trigger-guard.test.ts +69 -0
  180. package/tests/integration/m8-legacy-paths.test.ts +63 -0
  181. package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
  182. package/tests/plugin-config-resolution-cutover.test.ts +359 -0
  183. package/tests/runtime-v2-discovery-guard.test.ts +154 -0
  184. package/tests/service/central-database.test.ts +457 -0
  185. package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
  186. package/tests/service/evolution-worker.timeout.test.ts +11 -129
  187. package/tests/service/internalization-trigger-adapter.test.ts +251 -0
  188. package/tests/service/monitoring-query-service.test.ts +1 -47
  189. package/tests/service/queue-io.test.ts +1 -62
  190. package/tests/service/runtime-summary-service.test.ts +3 -1
  191. package/tests/service/workflow-watchdog.test.ts +0 -91
  192. package/tests/utils/file-lock.test.ts +5 -3
  193. package/tests/utils/session-key.test.ts +52 -0
  194. package/tests/utils/subagent-probe.test.ts +48 -1
  195. package/vitest.config.ts +4 -11
  196. package/.planning/codebase/ARCHITECTURE.md +0 -157
  197. package/.planning/codebase/CONCERNS.md +0 -145
  198. package/.planning/codebase/CONVENTIONS.md +0 -148
  199. package/.planning/codebase/INTEGRATIONS.md +0 -81
  200. package/.planning/codebase/STACK.md +0 -87
  201. package/.planning/codebase/STRUCTURE.md +0 -193
  202. package/.planning/codebase/TESTING.md +0 -243
  203. package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
  204. package/docs/COMMAND_REFERENCE.md +0 -76
  205. package/docs/COMMAND_REFERENCE_EN.md +0 -79
  206. package/scripts/build-web.mjs +0 -46
  207. package/scripts/diagnose-nocturnal.mjs +0 -537
  208. package/scripts/seed-nocturnal-scenarios.mjs +0 -384
  209. package/src/commands/nocturnal-review.ts +0 -322
  210. package/src/commands/nocturnal-rollout.ts +0 -790
  211. package/src/commands/nocturnal-train.ts +0 -986
  212. package/src/commands/pd-reflect.ts +0 -88
  213. package/src/core/adaptive-thresholds.ts +0 -478
  214. package/src/core/diagnostician-task-store.ts +0 -192
  215. package/src/core/nocturnal-arbiter.ts +0 -715
  216. package/src/core/nocturnal-artifact-lineage.ts +0 -116
  217. package/src/core/nocturnal-artificer.ts +0 -257
  218. package/src/core/nocturnal-candidate-scoring.ts +0 -530
  219. package/src/core/nocturnal-compliance.ts +0 -1146
  220. package/src/core/nocturnal-dataset.ts +0 -763
  221. package/src/core/nocturnal-executability.ts +0 -428
  222. package/src/core/nocturnal-export.ts +0 -499
  223. package/src/core/nocturnal-paths.ts +0 -240
  224. package/src/core/nocturnal-reasoning-deriver.ts +0 -343
  225. package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
  226. package/src/core/nocturnal-snapshot-contract.ts +0 -99
  227. package/src/core/nocturnal-trajectory-extractor.ts +0 -512
  228. package/src/core/nocturnal-trinity-types.ts +0 -218
  229. package/src/core/nocturnal-trinity.ts +0 -2680
  230. package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
  231. package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
  232. package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
  233. package/src/http/principles-console-route.ts +0 -709
  234. package/src/service/central-health-service.ts +0 -49
  235. package/src/service/central-overview-service.ts +0 -138
  236. package/src/service/control-ui-query-service.ts +0 -900
  237. package/src/service/cooldown-strategy.ts +0 -97
  238. package/src/service/evolution-pain-context.ts +0 -79
  239. package/src/service/evolution-query-service.ts +0 -407
  240. package/src/service/health-query-service.ts +0 -1038
  241. package/src/service/nocturnal-config.ts +0 -214
  242. package/src/service/nocturnal-runtime.ts +0 -734
  243. package/src/service/nocturnal-service.ts +0 -1605
  244. package/src/service/nocturnal-target-selector.ts +0 -545
  245. package/src/service/sleep-cycle.ts +0 -157
  246. package/src/service/startup-reconciler.ts +0 -112
  247. package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
  248. package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
  249. package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
  250. package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
  251. package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
  252. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
  253. package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
  254. package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
  255. package/src/tools/write-pain-flag.ts +0 -215
  256. package/templates/langs/en/skills/plan-script/SKILL.md +0 -32
  257. package/templates/langs/zh/skills/plan-script/SKILL.md +0 -32
  258. package/tests/commands/nocturnal-review.test.ts +0 -448
  259. package/tests/commands/nocturnal-train.test.ts +0 -97
  260. package/tests/commands/pd-reflect.test.ts +0 -49
  261. package/tests/core/adaptive-thresholds.test.ts +0 -261
  262. package/tests/core/nocturnal-arbiter.test.ts +0 -559
  263. package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
  264. package/tests/core/nocturnal-artificer.test.ts +0 -241
  265. package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
  266. package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
  267. package/tests/core/nocturnal-compliance.test.ts +0 -646
  268. package/tests/core/nocturnal-dataset.test.ts +0 -892
  269. package/tests/core/nocturnal-e2e.test.ts +0 -234
  270. package/tests/core/nocturnal-executability.test.ts +0 -357
  271. package/tests/core/nocturnal-export.test.ts +0 -517
  272. package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
  273. package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
  274. package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
  275. package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
  276. package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
  277. package/tests/core/nocturnal-trinity.test.ts +0 -2053
  278. package/tests/core/pain-auto-repair.test.ts +0 -96
  279. package/tests/core/pain-integration.test.ts +0 -510
  280. package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
  281. package/tests/http/principles-console-route.test.ts +0 -162
  282. package/tests/integration/chaos-resilience.test.ts +0 -348
  283. package/tests/integration/empathy-workflow-integration.test.ts +0 -626
  284. package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
  285. package/tests/service/control-ui-query-service.test.ts +0 -121
  286. package/tests/service/cooldown-strategy.test.ts +0 -164
  287. package/tests/service/data-endpoints-regression.test.ts +0 -834
  288. package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
  289. package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
  290. package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
  291. package/tests/service/nocturnal-runtime.test.ts +0 -473
  292. package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
  293. package/tests/service/nocturnal-target-selector.test.ts +0 -615
  294. package/tests/service/startup-reconciler.test.ts +0 -148
  295. package/tests/tools/write-pain-flag.test.ts +0 -358
  296. package/ui/src/App.tsx +0 -45
  297. package/ui/src/api.ts +0 -220
  298. package/ui/src/charts.tsx +0 -955
  299. package/ui/src/components/ErrorState.tsx +0 -6
  300. package/ui/src/components/Loading.tsx +0 -13
  301. package/ui/src/components/ProtectedRoute.tsx +0 -12
  302. package/ui/src/components/Shell.tsx +0 -91
  303. package/ui/src/components/WorkspaceConfig.tsx +0 -178
  304. package/ui/src/components/index.ts +0 -5
  305. package/ui/src/context/auth.tsx +0 -80
  306. package/ui/src/context/theme.tsx +0 -66
  307. package/ui/src/hooks/useAutoRefresh.ts +0 -39
  308. package/ui/src/i18n/ui.ts +0 -473
  309. package/ui/src/main.tsx +0 -16
  310. package/ui/src/pages/EvolutionPage.tsx +0 -333
  311. package/ui/src/pages/FeedbackPage.tsx +0 -138
  312. package/ui/src/pages/GateMonitorPage.tsx +0 -136
  313. package/ui/src/pages/LoginPage.tsx +0 -89
  314. package/ui/src/pages/OverviewPage.tsx +0 -599
  315. package/ui/src/pages/SamplesPage.tsx +0 -174
  316. package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
  317. package/ui/src/styles.css +0 -2020
  318. package/ui/src/types.ts +0 -384
  319. package/ui/src/utils/format.ts +0 -15
@@ -1,2053 +0,0 @@
1
- import { describe, it, expect, vi } from 'vitest';
2
- import {
3
- runTrinity,
4
- runTrinityAsync,
5
- validateDraftArtifact,
6
- draftToArtifact,
7
- DEFAULT_TRINITY_CONFIG,
8
- OpenClawTrinityRuntimeAdapter,
9
- TrinityRuntimeContractError,
10
- NOCTURNAL_DREAMER_PROMPT,
11
- NOCTURNAL_PHILOSOPHER_PROMPT,
12
- formatReasoningContext,
13
- invokeStubDreamer,
14
- invokeStubPhilosopher,
15
- validateExtraction,
16
- type TrinityConfig,
17
- type DreamerOutput,
18
- type DreamerCandidate,
19
- type PhilosopherOutput,
20
- type PhilosopherJudgment,
21
- type TrinityDraftArtifact,
22
- type TrinityRuntimeAdapter,
23
- type TrinityTelemetry,
24
- type RejectedAnalysis,
25
- type ChosenJustification,
26
- type ContrastiveAnalysis,
27
- } from '../../src/core/nocturnal-trinity.js';
28
- import {
29
- validateDreamerOutput,
30
- validatePhilosopherOutput,
31
- validateTrinityDraft,
32
- } from '../../src/core/nocturnal-arbiter.js';
33
-
34
- // ---------------------------------------------------------------------------
35
- // Test Fixtures
36
- // ---------------------------------------------------------------------------
37
-
38
- function makeSnapshot(overrides: Partial<{
39
- failureCount: number;
40
- totalPainEvents: number;
41
- totalGateBlocks: number;
42
- }> = {}) {
43
- return {
44
- sessionId: 'session-test-123',
45
- startedAt: '2026-04-12T00:00:00.000Z',
46
- updatedAt: '2026-04-12T00:05:00.000Z',
47
- assistantTurns: [],
48
- userTurns: [],
49
- toolCalls: [],
50
- painEvents: [],
51
- gateBlocks: [],
52
- stats: {
53
- failureCount: overrides.failureCount ?? 0,
54
- totalPainEvents: overrides.totalPainEvents ?? 0,
55
- totalGateBlocks: overrides.totalGateBlocks ?? 0,
56
- totalAssistantTurns: 5,
57
- totalToolCalls: 10,
58
- },
59
- };
60
- }
61
-
62
- // ---------------------------------------------------------------------------
63
- // Tests: validateDreamerOutput
64
- // ---------------------------------------------------------------------------
65
-
66
- describe('validateDreamerOutput', () => {
67
- it('passes a valid Dreamer output with candidates', () => {
68
- const output = {
69
- valid: true,
70
- candidates: [
71
- {
72
- candidateIndex: 0,
73
- badDecision: 'Did something wrong',
74
- betterDecision: 'Do it right',
75
- rationale: 'Because the principle says so',
76
- confidence: 0.9,
77
- },
78
- ],
79
- generatedAt: '2026-03-27T12:00:00.000Z',
80
- };
81
- const result = validateDreamerOutput(output);
82
- expect(result.valid).toBe(true);
83
- expect(result.failures).toHaveLength(0);
84
- });
85
-
86
- it('passes a valid Dreamer output with multiple candidates', () => {
87
- const output = {
88
- valid: true,
89
- candidates: [
90
- {
91
- candidateIndex: 0,
92
- badDecision: 'Did something wrong',
93
- betterDecision: 'Do it right',
94
- rationale: 'Because the principle says so',
95
- confidence: 0.9,
96
- },
97
- {
98
- candidateIndex: 1,
99
- badDecision: 'Did another wrong thing',
100
- betterDecision: 'Do it differently',
101
- rationale: 'Alternative approach is better',
102
- confidence: 0.8,
103
- },
104
- ],
105
- generatedAt: '2026-03-27T12:00:00.000Z',
106
- };
107
- const result = validateDreamerOutput(output);
108
- expect(result.valid).toBe(true);
109
- expect(result.failures).toHaveLength(0);
110
- });
111
-
112
- it('rejects Dreamer output marked invalid', () => {
113
- const output = {
114
- valid: false,
115
- candidates: [],
116
- reason: 'No signal found',
117
- generatedAt: '2026-03-27T12:00:00.000Z',
118
- };
119
- const result = validateDreamerOutput(output);
120
- expect(result.valid).toBe(false);
121
- expect(result.failures.some(f => f.includes('marked invalid'))).toBe(true);
122
- });
123
-
124
- it('rejects Dreamer output marked invalid without reason', () => {
125
- const output = {
126
- valid: false,
127
- candidates: [],
128
- generatedAt: '2026-03-27T12:00:00.000Z',
129
- };
130
- const result = validateDreamerOutput(output);
131
- expect(result.valid).toBe(false);
132
- });
133
-
134
- it('rejects Dreamer output without candidates array', () => {
135
- const output = {
136
- valid: true,
137
- generatedAt: '2026-03-27T12:00:00.000Z',
138
- };
139
- const result = validateDreamerOutput(output);
140
- expect(result.valid).toBe(false);
141
- expect(result.failures.some(f => f.includes('candidates array'))).toBe(true);
142
- });
143
-
144
- it('rejects Dreamer candidate missing required fields', () => {
145
- const output = {
146
- valid: true,
147
- candidates: [
148
- {
149
- candidateIndex: 0,
150
- badDecision: 'Has badDecision but missing betterDecision',
151
- // missing: betterDecision, rationale, confidence
152
- },
153
- ],
154
- generatedAt: '2026-03-27T12:00:00.000Z',
155
- };
156
- const result = validateDreamerOutput(output);
157
- expect(result.valid).toBe(false);
158
- expect(result.failures.some(f => f.includes('betterDecision'))).toBe(true);
159
- expect(result.failures.some(f => f.includes('rationale'))).toBe(true);
160
- expect(result.failures.some(f => f.includes('confidence'))).toBe(true);
161
- });
162
-
163
- it('rejects Dreamer candidate with invalid confidence (out of range)', () => {
164
- const output = {
165
- valid: true,
166
- candidates: [
167
- {
168
- candidateIndex: 0,
169
- badDecision: 'Wrong',
170
- betterDecision: 'Right',
171
- rationale: 'Because',
172
- confidence: 1.5, // out of range
173
- },
174
- ],
175
- generatedAt: '2026-03-27T12:00:00.000Z',
176
- };
177
- const result = validateDreamerOutput(output);
178
- expect(result.valid).toBe(false);
179
- expect(result.failures.some(f => f.includes('confidence'))).toBe(true);
180
- });
181
-
182
- it('rejects Dreamer candidate with duplicate candidateIndex', () => {
183
- const output = {
184
- valid: true,
185
- candidates: [
186
- {
187
- candidateIndex: 0,
188
- badDecision: 'Wrong 1',
189
- betterDecision: 'Right 1',
190
- rationale: 'Because 1',
191
- confidence: 0.9,
192
- },
193
- {
194
- candidateIndex: 0, // duplicate
195
- badDecision: 'Wrong 2',
196
- betterDecision: 'Right 2',
197
- rationale: 'Because 2',
198
- confidence: 0.8,
199
- },
200
- ],
201
- generatedAt: '2026-03-27T12:00:00.000Z',
202
- };
203
- const result = validateDreamerOutput(output);
204
- expect(result.valid).toBe(false);
205
- expect(result.failures.some(f => f.includes('duplicate'))).toBe(true);
206
- });
207
-
208
- it('rejects Dreamer candidate with identical badDecision and betterDecision', () => {
209
- const output = {
210
- valid: true,
211
- candidates: [
212
- {
213
- candidateIndex: 0,
214
- badDecision: 'Do the same thing',
215
- betterDecision: 'Do the same thing', // identical
216
- rationale: 'Because it is correct',
217
- confidence: 0.9,
218
- },
219
- ],
220
- generatedAt: '2026-03-27T12:00:00.000Z',
221
- };
222
- const result = validateDreamerOutput(output);
223
- expect(result.valid).toBe(false);
224
- expect(result.failures.some(f => f.includes('identical'))).toBe(true);
225
- });
226
-
227
- it('rejects Dreamer output missing generatedAt', () => {
228
- const output = {
229
- valid: true,
230
- candidates: [
231
- {
232
- candidateIndex: 0,
233
- badDecision: 'Wrong',
234
- betterDecision: 'Right',
235
- rationale: 'Because',
236
- confidence: 0.9,
237
- },
238
- ],
239
- // missing generatedAt
240
- };
241
- const result = validateDreamerOutput(output);
242
- expect(result.valid).toBe(false);
243
- expect(result.failures.some(f => f.includes('generatedAt'))).toBe(true);
244
- });
245
-
246
- it('rejects non-object input', () => {
247
- const result = validateDreamerOutput(null);
248
- expect(result.valid).toBe(false);
249
- });
250
-
251
- it('rejects string input', () => {
252
- const result = validateDreamerOutput('not an object');
253
- expect(result.valid).toBe(false);
254
- });
255
- });
256
-
257
- describe('OpenClawTrinityRuntimeAdapter contract hardening', () => {
258
- function makeRuntimeApi(overrides: Partial<any> = {}) {
259
- return {
260
- runtime: {
261
- agent: {
262
- runEmbeddedPiAgent: vi.fn().mockResolvedValue({
263
- payloads: [
264
- { text: '{"valid":true,"candidates":[],"generatedAt":"2026-04-12T00:00:00.000Z"}' },
265
- ],
266
- }),
267
- },
268
- config: {
269
- loadConfig: vi.fn().mockReturnValue({
270
- agents: {
271
- defaults: {
272
- model: 'openai/gpt-5.4',
273
- },
274
- },
275
- }),
276
- },
277
- ...overrides.runtime,
278
- },
279
- logger: {
280
- info: vi.fn(),
281
- warn: vi.fn(),
282
- error: vi.fn(),
283
- },
284
- };
285
- }
286
-
287
- it('rejects missing runtime.agent.runEmbeddedPiAgent contract explicitly', () => {
288
- expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(TrinityRuntimeContractError);
289
- expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(/runtime_unavailable/);
290
- });
291
-
292
- it('passes explicit provider/model overrides into runtime.agent.runEmbeddedPiAgent', async () => {
293
- const api = makeRuntimeApi();
294
- const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
295
-
296
- await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
297
-
298
- expect(api.runtime.agent.runEmbeddedPiAgent).toHaveBeenCalledWith(
299
- expect.objectContaining({
300
- provider: 'openai',
301
- model: 'gpt-5.4',
302
- }),
303
- );
304
- });
305
-
306
- it('returns stable failure classes when runtime invocation fails', async () => {
307
- const api = makeRuntimeApi({
308
- runtime: {
309
- agent: {
310
- runEmbeddedPiAgent: vi.fn().mockRejectedValue(new Error('gateway unavailable')),
311
- },
312
- },
313
- });
314
- const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
315
-
316
- const result = await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
317
-
318
- expect(result.valid).toBe(false);
319
- expect(result.reason).toContain('runtime_run_failed');
320
- expect(adapter.getLastFailureReason()).toContain('runtime_run_failed');
321
- });
322
- });
323
-
324
- // ---------------------------------------------------------------------------
325
- // Tests: validatePhilosopherOutput
326
- // ---------------------------------------------------------------------------
327
-
328
- describe('validatePhilosopherOutput', () => {
329
- it('passes a valid Philosopher output', () => {
330
- const output = {
331
- valid: true,
332
- judgments: [
333
- {
334
- candidateIndex: 0,
335
- critique: 'Strong alignment',
336
- principleAligned: true,
337
- score: 0.92,
338
- rank: 1,
339
- },
340
- ],
341
- overallAssessment: 'Good candidate set',
342
- generatedAt: '2026-03-27T12:00:00.000Z',
343
- };
344
- const result = validatePhilosopherOutput(output);
345
- expect(result.valid).toBe(true);
346
- expect(result.failures).toHaveLength(0);
347
- });
348
-
349
- it('rejects Philosopher output marked invalid', () => {
350
- const output = {
351
- valid: false,
352
- judgments: [],
353
- reason: 'No candidates to judge',
354
- generatedAt: '2026-03-27T12:00:00.000Z',
355
- };
356
- const result = validatePhilosopherOutput(output);
357
- expect(result.valid).toBe(false);
358
- });
359
-
360
- it('rejects Philosopher output without judgments array', () => {
361
- const output = {
362
- valid: true,
363
- overallAssessment: 'Good',
364
- generatedAt: '2026-03-27T12:00:00.000Z',
365
- };
366
- const result = validatePhilosopherOutput(output);
367
- expect(result.valid).toBe(false);
368
- expect(result.failures.some(f => f.includes('judgments array'))).toBe(true);
369
- });
370
-
371
- it('rejects Philosopher judgment missing required fields', () => {
372
- const output = {
373
- valid: true,
374
- judgments: [
375
- {
376
- candidateIndex: 0,
377
- // missing: critique, principleAligned, score, rank
378
- },
379
- ],
380
- overallAssessment: 'Good',
381
- generatedAt: '2026-03-27T12:00:00.000Z',
382
- };
383
- const result = validatePhilosopherOutput(output);
384
- expect(result.valid).toBe(false);
385
- });
386
-
387
- it('rejects Philosopher judgment with invalid score (out of range)', () => {
388
- const output = {
389
- valid: true,
390
- judgments: [
391
- {
392
- candidateIndex: 0,
393
- critique: 'Good',
394
- principleAligned: true,
395
- score: 1.5, // out of range
396
- rank: 1,
397
- },
398
- ],
399
- overallAssessment: 'Good',
400
- generatedAt: '2026-03-27T12:00:00.000Z',
401
- };
402
- const result = validatePhilosopherOutput(output);
403
- expect(result.valid).toBe(false);
404
- expect(result.failures.some(f => f.includes('score'))).toBe(true);
405
- });
406
-
407
- it('rejects Philosopher judgment with invalid rank (must be >= 1)', () => {
408
- const output = {
409
- valid: true,
410
- judgments: [
411
- {
412
- candidateIndex: 0,
413
- critique: 'Good',
414
- principleAligned: true,
415
- score: 0.9,
416
- rank: 0, // invalid
417
- },
418
- ],
419
- overallAssessment: 'Good',
420
- generatedAt: '2026-03-27T12:00:00.000Z',
421
- };
422
- const result = validatePhilosopherOutput(output);
423
- expect(result.valid).toBe(false);
424
- expect(result.failures.some(f => f.includes('rank'))).toBe(true);
425
- });
426
-
427
- it('rejects Philosopher output with non-sequential ranks', () => {
428
- const output = {
429
- valid: true,
430
- judgments: [
431
- {
432
- candidateIndex: 0,
433
- critique: 'Good',
434
- principleAligned: true,
435
- score: 0.9,
436
- rank: 1,
437
- },
438
- {
439
- candidateIndex: 1,
440
- critique: 'Also good',
441
- principleAligned: true,
442
- score: 0.8,
443
- rank: 3, // should be 2
444
- },
445
- ],
446
- overallAssessment: 'Good',
447
- generatedAt: '2026-03-27T12:00:00.000Z',
448
- };
449
- const result = validatePhilosopherOutput(output);
450
- expect(result.valid).toBe(false);
451
- expect(result.failures.some(f => f.includes('sequential ranks'))).toBe(true);
452
- });
453
-
454
- it('rejects Philosopher output missing overallAssessment', () => {
455
- const output = {
456
- valid: true,
457
- judgments: [
458
- {
459
- candidateIndex: 0,
460
- critique: 'Good',
461
- principleAligned: true,
462
- score: 0.9,
463
- rank: 1,
464
- },
465
- ],
466
- // missing overallAssessment
467
- generatedAt: '2026-03-27T12:00:00.000Z',
468
- };
469
- const result = validatePhilosopherOutput(output);
470
- expect(result.valid).toBe(false);
471
- expect(result.failures.some(f => f.includes('overallAssessment'))).toBe(true);
472
- });
473
- });
474
-
475
- // ---------------------------------------------------------------------------
476
- // Tests: validateTrinityDraft
477
- // ---------------------------------------------------------------------------
478
-
479
- describe('validateTrinityDraft', () => {
480
- function makeValidDraft(overrides: Record<string, unknown> = {}): Record<string, unknown> {
481
- return {
482
- selectedCandidateIndex: 0,
483
- badDecision: 'Did something wrong',
484
- betterDecision: 'Do it right',
485
- rationale: 'Because the principle says so and this is the right approach',
486
- sessionId: 'session-test-123',
487
- principleId: 'T-01',
488
- sourceSnapshotRef: 'snapshot-test-001',
489
- telemetry: {
490
- chainMode: 'trinity',
491
- dreamerPassed: true,
492
- philosopherPassed: true,
493
- scribePassed: true,
494
- candidateCount: 3,
495
- selectedCandidateIndex: 0,
496
- stageFailures: [],
497
- },
498
- ...overrides,
499
- };
500
- }
501
-
502
- it('passes a valid Trinity draft artifact', () => {
503
- const draft = makeValidDraft();
504
- const result = validateTrinityDraft(draft);
505
- expect(result.valid).toBe(true);
506
- expect(result.failures).toHaveLength(0);
507
- });
508
-
509
- it('rejects draft with missing badDecision', () => {
510
- const draft = makeValidDraft();
511
- delete draft.badDecision;
512
- const result = validateTrinityDraft(draft);
513
- expect(result.valid).toBe(false);
514
- expect(result.failures.some(f => f.includes('badDecision'))).toBe(true);
515
- });
516
-
517
- it('rejects draft with empty badDecision', () => {
518
- const draft = makeValidDraft({ badDecision: ' ' });
519
- const result = validateTrinityDraft(draft);
520
- expect(result.valid).toBe(false);
521
- expect(result.failures.some(f => f.includes('badDecision'))).toBe(true);
522
- });
523
-
524
- it('rejects draft with short rationale (< 20 chars)', () => {
525
- const draft = makeValidDraft({ rationale: 'Too short' });
526
- const result = validateTrinityDraft(draft);
527
- expect(result.valid).toBe(false);
528
- expect(result.failures.some(f => f.includes('rationale'))).toBe(true);
529
- });
530
-
531
- it('rejects draft with identical badDecision and betterDecision', () => {
532
- const draft = makeValidDraft({
533
- badDecision: 'Same thing',
534
- betterDecision: 'Same thing',
535
- });
536
- const result = validateTrinityDraft(draft);
537
- expect(result.valid).toBe(false);
538
- expect(result.failures.some(f => f.includes('identical'))).toBe(true);
539
- });
540
-
541
- it('rejects draft with invalid telemetry', () => {
542
- const draft = makeValidDraft({ telemetry: null });
543
- const result = validateTrinityDraft(draft);
544
- expect(result.valid).toBe(false);
545
- expect(result.failures.some(f => f.includes('telemetry'))).toBe(true);
546
- });
547
-
548
- it('rejects draft with invalid chainMode in telemetry', () => {
549
- const draft = makeValidDraft({
550
- telemetry: {
551
- chainMode: 'invalid-mode', // must be 'trinity' or 'single-reflector'
552
- dreamerPassed: true,
553
- philosopherPassed: true,
554
- scribePassed: true,
555
- candidateCount: 3,
556
- selectedCandidateIndex: 0,
557
- stageFailures: [],
558
- },
559
- });
560
- const result = validateTrinityDraft(draft);
561
- expect(result.valid).toBe(false);
562
- expect(result.failures.some(f => f.includes('chainMode'))).toBe(true);
563
- });
564
- });
565
-
566
- // ---------------------------------------------------------------------------
567
- // Tests: runTrinity — successful path
568
- // ---------------------------------------------------------------------------
569
-
570
- describe('runTrinity', () => {
571
- it('produces a successful Trinity result with valid snapshot (failure signal)', () => {
572
- const snapshot = makeSnapshot({ failureCount: 2 });
573
- const config: TrinityConfig = {
574
- useTrinity: true,
575
- maxCandidates: 3,
576
- useStubs: true, // Use stub implementations
577
- };
578
-
579
- const result = runTrinity({ snapshot, principleId: 'T-08', config });
580
-
581
- expect(result.success).toBe(true);
582
- expect(result.artifact).toBeDefined();
583
- expect(result.telemetry.chainMode).toBe('trinity');
584
- expect(result.telemetry.dreamerPassed).toBe(true);
585
- expect(result.telemetry.philosopherPassed).toBe(true);
586
- expect(result.telemetry.scribePassed).toBe(true);
587
- expect(result.telemetry.candidateCount).toBeGreaterThan(0);
588
- expect(result.telemetry.selectedCandidateIndex).toBeGreaterThanOrEqual(0);
589
- expect(result.failures).toHaveLength(0);
590
- expect(result.fallbackOccurred).toBe(false);
591
- });
592
-
593
- it('produces a successful Trinity result with pain signal', () => {
594
- const snapshot = makeSnapshot({ totalPainEvents: 3 });
595
- const config: TrinityConfig = {
596
- useTrinity: true,
597
- maxCandidates: 3,
598
- useStubs: true,
599
- };
600
-
601
- const result = runTrinity({ snapshot, principleId: 'T-08', config });
602
-
603
- expect(result.success).toBe(true);
604
- expect(result.artifact).toBeDefined();
605
- });
606
-
607
- it('produces a successful Trinity result with gate block signal', () => {
608
- const snapshot = makeSnapshot({ totalGateBlocks: 1 });
609
- const config: TrinityConfig = {
610
- useTrinity: true,
611
- maxCandidates: 3,
612
- useStubs: true,
613
- };
614
-
615
- const result = runTrinity({ snapshot, principleId: 'T-03', config });
616
-
617
- expect(result.success).toBe(true);
618
- expect(result.artifact).toBeDefined();
619
- });
620
-
621
- it('respects maxCandidates config', () => {
622
- const snapshot = makeSnapshot({ failureCount: 5 });
623
- const config: TrinityConfig = {
624
- useTrinity: true,
625
- maxCandidates: 2,
626
- useStubs: true,
627
- };
628
-
629
- const result = runTrinity({ snapshot, principleId: 'T-08', config });
630
-
631
- expect(result.success).toBe(true);
632
- expect(result.telemetry.candidateCount).toBeLessThanOrEqual(2);
633
- });
634
- });
635
-
636
- // ---------------------------------------------------------------------------
637
- // Tests: runTrinity — failure paths
638
- // ---------------------------------------------------------------------------
639
-
640
- describe('runTrinity — failure paths', () => {
641
- it('fails when snapshot has no signal and generates no candidates', () => {
642
- // Snapshot with all zero stats - stub will fail to generate candidates
643
- const snapshot = makeSnapshot({
644
- failureCount: 0,
645
- totalPainEvents: 0,
646
- totalGateBlocks: 0,
647
- });
648
- const config: TrinityConfig = {
649
- useTrinity: true,
650
- maxCandidates: 3,
651
- useStubs: true,
652
- };
653
-
654
- const result = runTrinity({ snapshot, principleId: 'T-08', config });
655
-
656
- expect(result.success).toBe(false);
657
- expect(result.failures.length).toBeGreaterThan(0);
658
- expect(result.failures[0].stage).toBe('dreamer');
659
- expect(result.telemetry.dreamerPassed).toBe(false);
660
- });
661
- });
662
-
663
- // ---------------------------------------------------------------------------
664
- // Tests: validateDraftArtifact
665
- // ---------------------------------------------------------------------------
666
-
667
- describe('validateDraftArtifact', () => {
668
- function makeValidArtifact(): TrinityDraftArtifact {
669
- return {
670
- selectedCandidateIndex: 0,
671
- badDecision: 'Did something wrong',
672
- betterDecision: 'Do it right',
673
- rationale: 'Because the principle says so and this is the correct approach',
674
- sessionId: 'session-test-123',
675
- principleId: 'T-01',
676
- sourceSnapshotRef: 'snapshot-test-001',
677
- telemetry: {
678
- chainMode: 'trinity',
679
- dreamerPassed: true,
680
- philosopherPassed: true,
681
- scribePassed: true,
682
- candidateCount: 3,
683
- selectedCandidateIndex: 0,
684
- stageFailures: [],
685
- },
686
- };
687
- }
688
-
689
- it('passes a valid TrinityDraftArtifact', () => {
690
- const artifact = makeValidArtifact();
691
- const result = validateDraftArtifact(artifact);
692
- expect(result.valid).toBe(true);
693
- expect(result.failures).toHaveLength(0);
694
- });
695
-
696
- it('rejects artifact with missing badDecision', () => {
697
- const artifact = makeValidArtifact();
698
- delete (artifact as Record<string, unknown>).badDecision;
699
- const result = validateDraftArtifact(artifact);
700
- expect(result.valid).toBe(false);
701
- });
702
-
703
- it('rejects artifact with empty betterDecision', () => {
704
- const artifact = makeValidArtifact();
705
- artifact.betterDecision = ' ';
706
- const result = validateDraftArtifact(artifact);
707
- expect(result.valid).toBe(false);
708
- });
709
-
710
- it('rejects artifact with short rationale', () => {
711
- const artifact = makeValidArtifact();
712
- artifact.rationale = 'Too short';
713
- const result = validateDraftArtifact(artifact);
714
- expect(result.valid).toBe(false);
715
- });
716
-
717
- it('rejects artifact with identical badDecision and betterDecision', () => {
718
- const artifact = makeValidArtifact();
719
- artifact.badDecision = 'Same';
720
- artifact.betterDecision = 'Same';
721
- const result = validateDraftArtifact(artifact);
722
- expect(result.valid).toBe(false);
723
- expect(result.failures.some(f => f.includes('identical'))).toBe(true);
724
- });
725
- });
726
-
727
- // ---------------------------------------------------------------------------
728
- // Tests: draftToArtifact
729
- // ---------------------------------------------------------------------------
730
-
731
- describe('draftToArtifact', () => {
732
- it('converts TrinityDraftArtifact to NocturnalArtifact-compatible structure', () => {
733
- const draft: TrinityDraftArtifact = {
734
- selectedCandidateIndex: 1,
735
- badDecision: 'Did something wrong',
736
- betterDecision: 'Do it right',
737
- rationale: 'Because the principle says so',
738
- sessionId: 'session-test-123',
739
- principleId: 'T-01',
740
- sourceSnapshotRef: 'snapshot-test-001',
741
- telemetry: {
742
- chainMode: 'trinity',
743
- dreamerPassed: true,
744
- philosopherPassed: true,
745
- scribePassed: true,
746
- candidateCount: 3,
747
- selectedCandidateIndex: 1,
748
- stageFailures: [],
749
- },
750
- };
751
-
752
- const artifact = draftToArtifact(draft);
753
-
754
- expect(artifact.artifactId).toBeDefined(); // Generated UUID
755
- expect(artifact.sessionId).toBe('session-test-123');
756
- expect(artifact.principleId).toBe('T-01');
757
- expect(artifact.badDecision).toBe('Did something wrong');
758
- expect(artifact.betterDecision).toBe('Do it right');
759
- expect(artifact.rationale).toBe('Because the principle says so');
760
- expect(artifact.sourceSnapshotRef).toBe('snapshot-test-001');
761
- expect(artifact.createdAt).toBeDefined(); // Current timestamp
762
- });
763
- });
764
-
765
- // ---------------------------------------------------------------------------
766
- // Tests: DEFAULT_TRINITY_CONFIG
767
- // ---------------------------------------------------------------------------
768
-
769
- describe('DEFAULT_TRINITY_CONFIG', () => {
770
- it('has sensible defaults', () => {
771
- expect(DEFAULT_TRINITY_CONFIG.useTrinity).toBe(true);
772
- expect(DEFAULT_TRINITY_CONFIG.maxCandidates).toBe(3);
773
- expect(DEFAULT_TRINITY_CONFIG.useStubs).toBe(false); // real subagent execution is now the default
774
- });
775
- });
776
-
777
- // ---------------------------------------------------------------------------
778
- // Tests: runTrinity — useStubs=false without adapter (sync failure)
779
- // ---------------------------------------------------------------------------
780
-
781
- describe('runTrinity — useStubs=false without adapter', () => {
782
- it('fails with clear error when useStubs=false but no runtimeAdapter provided', () => {
783
- const snapshot = makeSnapshot({ failureCount: 2 });
784
- const config: TrinityConfig = {
785
- useTrinity: true,
786
- maxCandidates: 3,
787
- useStubs: false, // No adapter provided!
788
- };
789
-
790
- const result = runTrinity({ snapshot, principleId: 'T-08', config });
791
-
792
- expect(result.success).toBe(false);
793
- expect(result.failures.length).toBeGreaterThan(0);
794
- expect(result.failures[0].stage).toBe('dreamer');
795
- expect(result.failures[0].reason).toContain('runtimeAdapter');
796
- expect(result.telemetry.usedStubs).toBe(false);
797
- expect(result.telemetry.dreamerPassed).toBe(false);
798
- });
799
- });
800
-
801
- // ---------------------------------------------------------------------------
802
- // Tests: runTrinityAsync — with mock runtime adapter
803
- // ---------------------------------------------------------------------------
804
-
805
- describe('runTrinityAsync — with mock runtime adapter', () => {
806
- function makeMockAdapter(overrides: Partial<{
807
- dreamerOutput: DreamerOutput;
808
- philosopherOutput: PhilosopherOutput;
809
- scribeArtifact: TrinityDraftArtifact | null;
810
- closeCalled: boolean;
811
- }> = {}): TrinityRuntimeAdapter & { closeCalled: boolean } {
812
- const defaultDreamerOutput: DreamerOutput = {
813
- valid: true,
814
- candidates: [
815
- {
816
- candidateIndex: 0,
817
- badDecision: 'Did something wrong',
818
- betterDecision: 'Do it right',
819
- rationale: 'Because the principle says so',
820
- confidence: 0.9,
821
- },
822
- ],
823
- generatedAt: new Date().toISOString(),
824
- };
825
-
826
- const defaultPhilosopherOutput: PhilosopherOutput = {
827
- valid: true,
828
- judgments: [
829
- {
830
- candidateIndex: 0,
831
- critique: 'Good alignment',
832
- principleAligned: true,
833
- score: 0.92,
834
- rank: 1,
835
- },
836
- ],
837
- overallAssessment: 'Good candidate',
838
- generatedAt: new Date().toISOString(),
839
- };
840
-
841
- const defaultScribeArtifact: TrinityDraftArtifact = {
842
- selectedCandidateIndex: 0,
843
- badDecision: 'Did something wrong',
844
- betterDecision: 'Do it right',
845
- rationale: 'Because the principle says so and this is the right approach',
846
- sessionId: 'session-test-123',
847
- principleId: 'T-01',
848
- sourceSnapshotRef: 'snapshot-test-001',
849
- telemetry: {
850
- chainMode: 'trinity',
851
- usedStubs: false,
852
- dreamerPassed: true,
853
- philosopherPassed: true,
854
- scribePassed: true,
855
- candidateCount: 1,
856
- selectedCandidateIndex: 0,
857
- stageFailures: [],
858
- },
859
- };
860
-
861
- return {
862
- closeCalled: overrides.closeCalled ?? false,
863
- invokeDreamer: vi.fn().mockResolvedValue(overrides.dreamerOutput ?? defaultDreamerOutput),
864
- invokePhilosopher: vi.fn().mockResolvedValue(overrides.philosopherOutput ?? defaultPhilosopherOutput),
865
- invokeScribe: vi.fn().mockResolvedValue(
866
- overrides.scribeArtifact === null ? null : (overrides.scribeArtifact ?? defaultScribeArtifact)
867
- ),
868
- close: vi.fn().mockResolvedValue(undefined),
869
- } as unknown as TrinityRuntimeAdapter & { closeCalled: boolean; invokeDreamer: ReturnType<typeof vi.fn>; invokePhilosopher: ReturnType<typeof vi.fn>; invokeScribe: ReturnType<typeof vi.fn> };
870
- }
871
-
872
- it('uses runtime adapter when useStubs=false with adapter provided', async () => {
873
- const snapshot = makeSnapshot({ failureCount: 2 });
874
- const adapter = makeMockAdapter();
875
- const config: TrinityConfig = {
876
- useTrinity: true,
877
- maxCandidates: 3,
878
- useStubs: false,
879
- runtimeAdapter: adapter,
880
- };
881
-
882
- const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
883
-
884
- expect(result.success).toBe(true);
885
- expect(adapter.invokeDreamer).toHaveBeenCalledWith(snapshot, 'T-08', 3);
886
- expect(adapter.invokePhilosopher).toHaveBeenCalled();
887
- expect(adapter.invokeScribe).toHaveBeenCalled();
888
- expect(result.telemetry.usedStubs).toBe(false);
889
- expect(result.telemetry.dreamerPassed).toBe(true);
890
- expect(result.telemetry.philosopherPassed).toBe(true);
891
- expect(result.telemetry.scribePassed).toBe(true);
892
- });
893
-
894
- it('fails closed when Dreamer stage returns invalid output', async () => {
895
- const snapshot = makeSnapshot({ failureCount: 2 });
896
- const adapter = makeMockAdapter({
897
- dreamerOutput: { valid: false, candidates: [], reason: 'No signal found', generatedAt: new Date().toISOString() },
898
- });
899
- const config: TrinityConfig = {
900
- useTrinity: true,
901
- maxCandidates: 3,
902
- useStubs: false,
903
- runtimeAdapter: adapter,
904
- };
905
-
906
- const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
907
-
908
- expect(result.success).toBe(false);
909
- expect(result.failures.length).toBeGreaterThan(0);
910
- expect(result.failures[0].stage).toBe('dreamer');
911
- expect(result.telemetry.dreamerPassed).toBe(false);
912
- expect(result.telemetry.philosopherPassed).toBe(false);
913
- expect(result.telemetry.scribePassed).toBe(false);
914
- });
915
-
916
- it('fails closed when Philosopher stage returns invalid output', async () => {
917
- const snapshot = makeSnapshot({ failureCount: 2 });
918
- const adapter = makeMockAdapter({
919
- philosopherOutput: { valid: false, judgments: [], overallAssessment: '', reason: 'No candidates', generatedAt: new Date().toISOString() },
920
- });
921
- const config: TrinityConfig = {
922
- useTrinity: true,
923
- maxCandidates: 3,
924
- useStubs: false,
925
- runtimeAdapter: adapter,
926
- };
927
-
928
- const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
929
-
930
- expect(result.success).toBe(false);
931
- expect(result.failures.some(f => f.stage === 'dreamer')).toBe(false); // Dreamer passed
932
- expect(result.failures.some(f => f.stage === 'philosopher')).toBe(true);
933
- expect(result.telemetry.dreamerPassed).toBe(true);
934
- expect(result.telemetry.philosopherPassed).toBe(false);
935
- });
936
-
937
- it('fails closed when Scribe stage returns null', async () => {
938
- const snapshot = makeSnapshot({ failureCount: 2 });
939
- const adapter = makeMockAdapter({ scribeArtifact: null });
940
- const config: TrinityConfig = {
941
- useTrinity: true,
942
- maxCandidates: 3,
943
- useStubs: false,
944
- runtimeAdapter: adapter,
945
- };
946
-
947
- const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
948
-
949
- expect(result.success).toBe(false);
950
- expect(result.failures.some(f => f.stage === 'scribe')).toBe(true);
951
- expect(result.telemetry.dreamerPassed).toBe(true);
952
- expect(result.telemetry.philosopherPassed).toBe(true);
953
- expect(result.telemetry.scribePassed).toBe(false);
954
- });
955
-
956
- it('calls adapter.close() after successful execution', async () => {
957
- const snapshot = makeSnapshot({ failureCount: 2 });
958
- const adapter = makeMockAdapter();
959
- const config: TrinityConfig = {
960
- useTrinity: true,
961
- maxCandidates: 3,
962
- useStubs: false,
963
- runtimeAdapter: adapter,
964
- };
965
-
966
- await runTrinityAsync({ snapshot, principleId: 'T-08', config });
967
-
968
- expect(adapter.close).toHaveBeenCalled();
969
- });
970
-
971
- it('calls adapter.close() even when execution fails', async () => {
972
- const snapshot = makeSnapshot({ failureCount: 2 });
973
- const adapter = makeMockAdapter({
974
- dreamerOutput: { valid: false, candidates: [], reason: 'No signal', generatedAt: new Date().toISOString() },
975
- });
976
- const config: TrinityConfig = {
977
- useTrinity: true,
978
- maxCandidates: 3,
979
- useStubs: false,
980
- runtimeAdapter: adapter,
981
- };
982
-
983
- await runTrinityAsync({ snapshot, principleId: 'T-08', config });
984
-
985
- expect(adapter.close).toHaveBeenCalled();
986
- });
987
-
988
- it('produces artifact compatible with draftToArtifact', async () => {
989
- const snapshot = makeSnapshot({ failureCount: 2 });
990
- const adapter = makeMockAdapter();
991
- const config: TrinityConfig = {
992
- useTrinity: true,
993
- maxCandidates: 3,
994
- useStubs: false,
995
- runtimeAdapter: adapter,
996
- };
997
-
998
- const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
999
-
1000
- expect(result.success).toBe(true);
1001
- expect(result.artifact).toBeDefined();
1002
- const artifact = draftToArtifact(result.artifact!);
1003
- expect(artifact.artifactId).toBeDefined();
1004
- expect(artifact.sessionId).toBe('session-test-123');
1005
- expect(artifact.principleId).toBe('T-01');
1006
- expect(artifact.badDecision).toBeDefined();
1007
- expect(artifact.betterDecision).toBeDefined();
1008
- });
1009
- });
1010
-
1011
- // ---------------------------------------------------------------------------
1012
- // Tests: runTrinityAsync — useStubs=true still uses stubs
1013
- // ---------------------------------------------------------------------------
1014
-
1015
- describe('runTrinityAsync — useStubs=true uses synchronous stubs', () => {
1016
- it('still uses stub implementations when useStubs=true even with adapter', async () => {
1017
- const snapshot = makeSnapshot({ failureCount: 2 });
1018
- const adapter = {
1019
- invokeDreamer: vi.fn().mockResolvedValue({ valid: true, candidates: [], generatedAt: new Date().toISOString() }),
1020
- invokePhilosopher: vi.fn().mockResolvedValue({ valid: true, judgments: [], overallAssessment: '', generatedAt: new Date().toISOString() }),
1021
- invokeScribe: vi.fn().mockResolvedValue(null),
1022
- };
1023
- const config: TrinityConfig = {
1024
- useTrinity: true,
1025
- maxCandidates: 3,
1026
- useStubs: true, // Explicitly use stubs
1027
- runtimeAdapter: adapter as unknown as TrinityRuntimeAdapter,
1028
- };
1029
-
1030
- // With stubs, adapter is ignored - stub produces success with failureCount signal
1031
- const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
1032
-
1033
- expect(result.success).toBe(true); // Stub succeeds because snapshot has failureCount
1034
- expect(adapter.invokeDreamer).not.toHaveBeenCalled(); // Adapter NOT called
1035
- expect(adapter.invokePhilosopher).not.toHaveBeenCalled();
1036
- expect(adapter.invokeScribe).not.toHaveBeenCalled();
1037
- });
1038
- });
1039
-
1040
- // ---------------------------------------------------------------------------
1041
- // Tests: NOCTURNAL_DREAMER_PROMPT — strategic perspective requirements (Task 1)
1042
- // ---------------------------------------------------------------------------
1043
-
1044
- describe('NOCTURNAL_DREAMER_PROMPT — strategic perspective requirements', () => {
1045
- it('contains "## Strategic Perspective Requirements" section', () => {
1046
- expect(NOCTURNAL_DREAMER_PROMPT).toContain('## Strategic Perspective Requirements');
1047
- });
1048
-
1049
- it('mentions all three strategic perspectives', () => {
1050
- expect(NOCTURNAL_DREAMER_PROMPT).toContain('conservative_fix');
1051
- expect(NOCTURNAL_DREAMER_PROMPT).toContain('structural_improvement');
1052
- expect(NOCTURNAL_DREAMER_PROMPT).toContain('paradigm_shift');
1053
- });
1054
-
1055
- it('contains ANTI-PATTERN warning', () => {
1056
- expect(NOCTURNAL_DREAMER_PROMPT).toContain('ANTI-PATTERN');
1057
- });
1058
-
1059
- it('references riskLevel as required candidate field', () => {
1060
- expect(NOCTURNAL_DREAMER_PROMPT).toContain('riskLevel');
1061
- });
1062
-
1063
- it('references strategicPerspective as required candidate field', () => {
1064
- expect(NOCTURNAL_DREAMER_PROMPT).toContain('strategicPerspective');
1065
- });
1066
- });
1067
-
1068
- // ---------------------------------------------------------------------------
1069
- // Tests: DreamerCandidate interface — optional fields (Task 1)
1070
- // ---------------------------------------------------------------------------
1071
-
1072
- describe('DreamerCandidate interface — optional fields', () => {
1073
- it('accepts a candidate with riskLevel and strategicPerspective', () => {
1074
- const candidate: DreamerCandidate = {
1075
- candidateIndex: 0,
1076
- badDecision: 'Did something wrong',
1077
- betterDecision: 'Do it right',
1078
- rationale: 'Because the principle says so',
1079
- confidence: 0.9,
1080
- riskLevel: 'medium',
1081
- strategicPerspective: 'structural_improvement',
1082
- };
1083
- expect(candidate.riskLevel).toBe('medium');
1084
- expect(candidate.strategicPerspective).toBe('structural_improvement');
1085
- });
1086
-
1087
- it('accepts a candidate without riskLevel or strategicPerspective (backward compat)', () => {
1088
- const candidate: DreamerCandidate = {
1089
- candidateIndex: 0,
1090
- badDecision: 'Did something wrong',
1091
- betterDecision: 'Do it right',
1092
- rationale: 'Because the principle says so',
1093
- confidence: 0.9,
1094
- };
1095
- expect(candidate.riskLevel).toBeUndefined();
1096
- expect(candidate.strategicPerspective).toBeUndefined();
1097
- });
1098
-
1099
- it('accepts all valid riskLevel values', () => {
1100
- const levels: Array<'low' | 'medium' | 'high'> = ['low', 'medium', 'high'];
1101
- for (const level of levels) {
1102
- const candidate: DreamerCandidate = {
1103
- candidateIndex: 0,
1104
- badDecision: 'Wrong',
1105
- betterDecision: 'Right',
1106
- rationale: 'Because',
1107
- confidence: 0.8,
1108
- riskLevel: level,
1109
- };
1110
- expect(candidate.riskLevel).toBe(level);
1111
- }
1112
- });
1113
-
1114
- it('accepts all valid strategicPerspective values', () => {
1115
- const perspectives: Array<'conservative_fix' | 'structural_improvement' | 'paradigm_shift'> = [
1116
- 'conservative_fix',
1117
- 'structural_improvement',
1118
- 'paradigm_shift',
1119
- ];
1120
- for (const perspective of perspectives) {
1121
- const candidate: DreamerCandidate = {
1122
- candidateIndex: 0,
1123
- badDecision: 'Wrong',
1124
- betterDecision: 'Right',
1125
- rationale: 'Because',
1126
- confidence: 0.8,
1127
- strategicPerspective: perspective,
1128
- };
1129
- expect(candidate.strategicPerspective).toBe(perspective);
1130
- }
1131
- });
1132
- });
1133
-
1134
- // ---------------------------------------------------------------------------
1135
- // Tests: buildDreamerPrompt — reasoning context injection (Task 2)
1136
- // ---------------------------------------------------------------------------
1137
-
1138
- describe('buildDreamerPrompt — reasoning context injection', () => {
1139
- // Helper to create a minimal snapshot for reasoning context tests
1140
- function makeReasoningSnapshot(overrides: {
1141
- assistantTurns?: any[];
1142
- toolCalls?: any[];
1143
- userTurns?: any[];
1144
- } = {}) {
1145
- return {
1146
- sessionId: 'session-reasoning-test',
1147
- startedAt: '2026-04-13T00:00:00.000Z',
1148
- updatedAt: '2026-04-13T00:05:00.000Z',
1149
- assistantTurns: overrides.assistantTurns ?? [],
1150
- userTurns: overrides.userTurns ?? [],
1151
- toolCalls: overrides.toolCalls ?? [],
1152
- painEvents: [],
1153
- gateBlocks: [],
1154
- stats: {
1155
- failureCount: 0,
1156
- totalPainEvents: 0,
1157
- totalGateBlocks: 0,
1158
- totalAssistantTurns: overrides.assistantTurns?.length ?? 0,
1159
- totalToolCalls: overrides.toolCalls?.length ?? 0,
1160
- },
1161
- };
1162
- }
1163
-
1164
- it('injects ## Reasoning Context section when assistant turns have thinking content', () => {
1165
- const snapshot = makeReasoningSnapshot({
1166
- assistantTurns: [
1167
- {
1168
- turnIndex: 0,
1169
- sanitizedText: '<thinking>I need to consider the implications carefully</thinking>',
1170
- createdAt: '2026-04-13T00:01:00.000Z',
1171
- },
1172
- ],
1173
- });
1174
-
1175
- const result = formatReasoningContext(snapshot as any);
1176
- expect(result).toContain('## Reasoning Context');
1177
- });
1178
-
1179
- it('includes uncertainty markers in reasoning context', () => {
1180
- const snapshot = makeReasoningSnapshot({
1181
- assistantTurns: [
1182
- {
1183
- turnIndex: 0,
1184
- sanitizedText: 'let me verify this first before proceeding with the change',
1185
- createdAt: '2026-04-13T00:01:00.000Z',
1186
- },
1187
- ],
1188
- });
1189
-
1190
- const result = formatReasoningContext(snapshot as any);
1191
- expect(result).toContain('Uncertainty detected');
1192
- });
1193
-
1194
- it('includes confidence signal when not high', () => {
1195
- const snapshot = makeReasoningSnapshot({
1196
- assistantTurns: [
1197
- {
1198
- turnIndex: 0,
1199
- sanitizedText: 'I should probably check this more thoroughly before continuing',
1200
- createdAt: '2026-04-13T00:01:00.000Z',
1201
- },
1202
- ],
1203
- });
1204
-
1205
- const result = formatReasoningContext(snapshot as any);
1206
- // Low or medium confidence should be shown
1207
- expect(result).toMatch(/Confidence:\s*(low|medium)/);
1208
- });
1209
-
1210
- it('includes contextual factors when present', () => {
1211
- const snapshot = makeReasoningSnapshot({
1212
- assistantTurns: [],
1213
- toolCalls: [
1214
- { toolName: 'Read', outcome: 'success', createdAt: '2026-04-13T00:01:00.000Z' },
1215
- { toolName: 'Edit', outcome: 'success', createdAt: '2026-04-13T00:02:00.000Z' },
1216
- ],
1217
- });
1218
-
1219
- const result = formatReasoningContext(snapshot as any);
1220
- expect(result).toContain('File structure explored');
1221
- });
1222
-
1223
- it('omits ## Reasoning Context when no reasoning signals exist', () => {
1224
- const snapshot = makeReasoningSnapshot({
1225
- assistantTurns: [],
1226
- toolCalls: [
1227
- { toolName: 'Edit', outcome: 'success', createdAt: '2026-04-13T00:01:00.000Z' },
1228
- ],
1229
- });
1230
-
1231
- const result = formatReasoningContext(snapshot as any);
1232
- expect(result).toBeNull();
1233
- });
1234
-
1235
- it('does not inject decisionPoints', () => {
1236
- const snapshot = makeReasoningSnapshot({
1237
- assistantTurns: [
1238
- {
1239
- turnIndex: 0,
1240
- sanitizedText: '<thinking>some thought</thinking>',
1241
- createdAt: '2026-04-13T00:01:00.000Z',
1242
- },
1243
- ],
1244
- });
1245
-
1246
- const result = formatReasoningContext(snapshot as any);
1247
- expect(result).not.toContain('decisionPoint');
1248
- expect(result).not.toContain('DecisionPoint');
1249
- });
1250
- });
1251
-
1252
- // ---------------------------------------------------------------------------
1253
- // Tests: invokeStubDreamer — risk level and perspective mapping (D-07)
1254
- // ---------------------------------------------------------------------------
1255
-
1256
- describe('invokeStubDreamer — risk level and perspective mapping (D-07)', () => {
1257
- it('gateBlocks candidates get conservative_fix/low', () => {
1258
- const snapshot = makeSnapshot({ totalGateBlocks: 2 });
1259
- const output = invokeStubDreamer(snapshot as any, 'T-03', 3);
1260
- expect(output.valid).toBe(true);
1261
- expect(output.candidates.length).toBeGreaterThan(0);
1262
- for (const candidate of output.candidates) {
1263
- expect(candidate.riskLevel).toBe('low');
1264
- expect(candidate.strategicPerspective).toBe('conservative_fix');
1265
- }
1266
- });
1267
-
1268
- it('pain candidates get structural_improvement/medium', () => {
1269
- const snapshot = makeSnapshot({ totalPainEvents: 3 });
1270
- const output = invokeStubDreamer(snapshot as any, 'T-08', 3);
1271
- expect(output.valid).toBe(true);
1272
- expect(output.candidates.length).toBeGreaterThan(0);
1273
- for (const candidate of output.candidates) {
1274
- expect(candidate.riskLevel).toBe('medium');
1275
- expect(candidate.strategicPerspective).toBe('structural_improvement');
1276
- }
1277
- });
1278
-
1279
- it('failure candidates get paradigm_shift/high', () => {
1280
- const snapshot = makeSnapshot({ failureCount: 2 });
1281
- const output = invokeStubDreamer(snapshot as any, 'T-08', 3);
1282
- expect(output.valid).toBe(true);
1283
- expect(output.candidates.length).toBeGreaterThan(0);
1284
- for (const candidate of output.candidates) {
1285
- expect(candidate.riskLevel).toBe('high');
1286
- expect(candidate.strategicPerspective).toBe('paradigm_shift');
1287
- }
1288
- });
1289
- });
1290
-
1291
- // ---------------------------------------------------------------------------
1292
- // Tests: runTrinity — diversity telemetry (DIVER-04)
1293
- // ---------------------------------------------------------------------------
1294
-
1295
- describe('runTrinity — diversity telemetry (DIVER-04)', () => {
1296
- it('emits diversityCheckPassed=false when stub candidates all have same risk level', () => {
1297
- // Failure signal produces all paradigm_shift/high candidates → not diverse
1298
- const snapshot = makeSnapshot({ failureCount: 2 });
1299
- const config: TrinityConfig = {
1300
- useTrinity: true,
1301
- maxCandidates: 3,
1302
- useStubs: true,
1303
- };
1304
-
1305
- const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
1306
-
1307
- expect(result.success).toBe(true);
1308
- expect(result.telemetry.diversityCheckPassed).toBe(false);
1309
- });
1310
-
1311
- it('emits candidateRiskLevels array matching stub mapping', () => {
1312
- const snapshot = makeSnapshot({ failureCount: 2 });
1313
- const config: TrinityConfig = {
1314
- useTrinity: true,
1315
- maxCandidates: 3,
1316
- useStubs: true,
1317
- };
1318
-
1319
- const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
1320
-
1321
- expect(result.success).toBe(true);
1322
- expect(result.telemetry.candidateRiskLevels).toBeDefined();
1323
- expect(result.telemetry.candidateRiskLevels!.length).toBeGreaterThan(0);
1324
- // All failure stub candidates should be 'high'
1325
- for (const level of result.telemetry.candidateRiskLevels!) {
1326
- expect(level).toBe('high');
1327
- }
1328
- });
1329
-
1330
- it('pipeline completes even when diversity check fails (soft enforcement)', () => {
1331
- // Failure signal: all candidates have same risk → diversity fails
1332
- const snapshot = makeSnapshot({ failureCount: 2 });
1333
- const config: TrinityConfig = {
1334
- useTrinity: true,
1335
- maxCandidates: 3,
1336
- useStubs: true,
1337
- };
1338
-
1339
- const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
1340
-
1341
- expect(result.telemetry.diversityCheckPassed).toBe(false);
1342
- expect(result.success).toBe(true);
1343
- expect(result.artifact).toBeDefined();
1344
- });
1345
- });
1346
-
1347
- // ---------------------------------------------------------------------------
1348
- // Tests: TrinityTelemetry — diversity fields
1349
- // ---------------------------------------------------------------------------
1350
-
1351
- describe('TrinityTelemetry — diversity fields', () => {
1352
- it('accepts optional diversityCheckPassed field', () => {
1353
- const telemetry: TrinityTelemetry = {
1354
- chainMode: 'trinity',
1355
- usedStubs: true,
1356
- dreamerPassed: true,
1357
- philosopherPassed: true,
1358
- scribePassed: true,
1359
- candidateCount: 2,
1360
- selectedCandidateIndex: 0,
1361
- stageFailures: [],
1362
- diversityCheckPassed: true,
1363
- };
1364
- expect(telemetry.diversityCheckPassed).toBe(true);
1365
- });
1366
-
1367
- it('accepts optional candidateRiskLevels field', () => {
1368
- const telemetry: TrinityTelemetry = {
1369
- chainMode: 'trinity',
1370
- usedStubs: true,
1371
- dreamerPassed: true,
1372
- philosopherPassed: true,
1373
- scribePassed: true,
1374
- candidateCount: 2,
1375
- selectedCandidateIndex: 0,
1376
- stageFailures: [],
1377
- candidateRiskLevels: ['low', 'high'],
1378
- };
1379
- expect(telemetry.candidateRiskLevels).toEqual(['low', 'high']);
1380
- });
1381
- });
1382
-
1383
- // ---------------------------------------------------------------------------
1384
- // Tests: Philosopher 6D Evaluation (PHILO-01)
1385
- // ---------------------------------------------------------------------------
1386
-
1387
- describe('Philosopher 6D Evaluation (PHILO-01)', () => {
1388
- it('NOCTURNAL_PHILOSOPHER_PROMPT contains 6 dimensions with calibrated weights', () => {
1389
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('Safety Impact');
1390
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('UX Impact');
1391
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('(weight: 0.20)'); // Principle Alignment
1392
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('(weight: 0.15)'); // Specificity
1393
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('(weight: 0.15)'); // Actionability
1394
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('(weight: 0.15)'); // Executability
1395
- });
1396
-
1397
- it('prompt output format includes scores and risks objects', () => {
1398
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"scores"');
1399
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"principleAlignment"');
1400
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"safetyImpact"');
1401
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"uxImpact"');
1402
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"risks"');
1403
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"falsePositiveEstimate"');
1404
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"implementationComplexity"');
1405
- expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"breakingChangeRisk"');
1406
- });
1407
- });
1408
-
1409
- // ---------------------------------------------------------------------------
1410
- // Tests: Philosopher Risk Assessment (PHILO-02)
1411
- // ---------------------------------------------------------------------------
1412
-
1413
- describe('Philosopher Risk Assessment (PHILO-02)', () => {
1414
- it('invokeStubPhilosopher produces risk assessment per candidate', () => {
1415
- const dreamerOutput: DreamerOutput = {
1416
- valid: true,
1417
- candidates: [
1418
- {
1419
- candidateIndex: 0,
1420
- badDecision: 'Did something wrong',
1421
- betterDecision: 'Read the file before editing to verify content',
1422
- rationale: 'A good rationale that explains why this is better',
1423
- confidence: 0.9,
1424
- riskLevel: 'low',
1425
- strategicPerspective: 'conservative_fix',
1426
- },
1427
- {
1428
- candidateIndex: 1,
1429
- badDecision: 'Ignored error messages',
1430
- betterDecision: 'Challenge the original approach entirely',
1431
- rationale: 'A paradigm shift rationale for fundamentally different approach',
1432
- confidence: 0.6,
1433
- riskLevel: 'high',
1434
- strategicPerspective: 'paradigm_shift',
1435
- },
1436
- ],
1437
- generatedAt: new Date().toISOString(),
1438
- };
1439
- const result = invokeStubPhilosopher(dreamerOutput, 'T-01', makeSnapshot() as any);
1440
- expect(result.valid).toBe(true);
1441
- for (const j of result.judgments) {
1442
- expect(j.risks).toBeDefined();
1443
- expect(j.risks!.falsePositiveEstimate).toBeGreaterThanOrEqual(0);
1444
- expect(j.risks!.falsePositiveEstimate).toBeLessThanOrEqual(1);
1445
- expect(['low', 'medium', 'high']).toContain(j.risks!.implementationComplexity);
1446
- expect(typeof j.risks!.breakingChangeRisk).toBe('boolean');
1447
- }
1448
- });
1449
- });
1450
-
1451
- // ---------------------------------------------------------------------------
1452
- // Tests: Philosopher Backward Compatibility (PHILO-03)
1453
- // ---------------------------------------------------------------------------
1454
-
1455
- describe('Philosopher Backward Compatibility (PHILO-03)', () => {
1456
- it('PhilosopherJudgment without scores/risks is valid', () => {
1457
- const judgment: PhilosopherJudgment = {
1458
- candidateIndex: 0,
1459
- critique: 'test',
1460
- principleAligned: true,
1461
- score: 0.8,
1462
- rank: 1,
1463
- };
1464
- expect(judgment.score).toBe(0.8);
1465
- expect(judgment.scores).toBeUndefined();
1466
- expect(judgment.risks).toBeUndefined();
1467
- });
1468
-
1469
- it('runTrinity produces output with 6D scores when candidates have strategicPerspective', () => {
1470
- const snapshot = makeSnapshot({ failureCount: 2, totalPainEvents: 1 });
1471
- const config: TrinityConfig = {
1472
- useTrinity: true,
1473
- maxCandidates: 3,
1474
- useStubs: true,
1475
- };
1476
-
1477
- const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
1478
- expect(result.success).toBe(true);
1479
- expect(result.artifact).toBeDefined();
1480
-
1481
- // The stub philosopher should produce 6D scores for stub candidates
1482
- // (stub dreamer assigns strategicPerspective based on principleId)
1483
- if (result.telemetry.philosopher6D) {
1484
- const avgScores = result.telemetry.philosopher6D.avgScores;
1485
- expect(typeof avgScores.principleAlignment).toBe('number');
1486
- expect(typeof avgScores.specificity).toBe('number');
1487
- expect(typeof avgScores.actionability).toBe('number');
1488
- expect(typeof avgScores.executability).toBe('number');
1489
- expect(typeof avgScores.safetyImpact).toBe('number');
1490
- expect(typeof avgScores.uxImpact).toBe('number');
1491
- }
1492
- });
1493
- });
1494
-
1495
- // ---------------------------------------------------------------------------
1496
- // Tests: Stub Philosopher 6D Scoring (D-09)
1497
- // ---------------------------------------------------------------------------
1498
-
1499
- describe('Stub Philosopher 6D Scoring (D-09)', () => {
1500
- it('conservative_fix candidates get high principleAlignment and low risk', () => {
1501
- const dreamerOutput: DreamerOutput = {
1502
- valid: true,
1503
- candidates: [
1504
- {
1505
- candidateIndex: 0,
1506
- badDecision: 'Did something wrong',
1507
- betterDecision: 'Read the file before editing to verify current content',
1508
- rationale: 'Following T-01 requires verifying content before making changes',
1509
- confidence: 0.9,
1510
- riskLevel: 'low',
1511
- strategicPerspective: 'conservative_fix',
1512
- },
1513
- ],
1514
- generatedAt: new Date().toISOString(),
1515
- };
1516
- const result = invokeStubPhilosopher(dreamerOutput, 'T-01', makeSnapshot() as any);
1517
- expect(result.valid).toBe(true);
1518
- const j = result.judgments[0];
1519
- expect(j.scores).toBeDefined();
1520
- expect(j.scores!.principleAlignment).toBeGreaterThanOrEqual(0.9);
1521
- expect(j.scores!.safetyImpact).toBeGreaterThanOrEqual(0.9);
1522
- expect(j.risks).toBeDefined();
1523
- expect(j.risks!.breakingChangeRisk).toBe(false);
1524
- expect(j.risks!.implementationComplexity).toBe('low');
1525
- });
1526
-
1527
- it('paradigm_shift candidates get high breakingChangeRisk', () => {
1528
- const dreamerOutput: DreamerOutput = {
1529
- valid: true,
1530
- candidates: [
1531
- {
1532
- candidateIndex: 0,
1533
- badDecision: 'Ignored all errors',
1534
- betterDecision: 'Challenge the entire approach and redesign from scratch',
1535
- rationale: 'A paradigm shift rationale for a fundamentally different approach',
1536
- confidence: 0.5,
1537
- riskLevel: 'high',
1538
- strategicPerspective: 'paradigm_shift',
1539
- },
1540
- ],
1541
- generatedAt: new Date().toISOString(),
1542
- };
1543
- const result = invokeStubPhilosopher(dreamerOutput, 'T-08', makeSnapshot() as any);
1544
- expect(result.valid).toBe(true);
1545
- const j = result.judgments[0];
1546
- expect(j.scores).toBeDefined();
1547
- expect(j.scores!.safetyImpact).toBeLessThan(0.5);
1548
- expect(j.risks).toBeDefined();
1549
- expect(j.risks!.breakingChangeRisk).toBe(true);
1550
- expect(j.risks!.implementationComplexity).toBe('high');
1551
- });
1552
-
1553
- it('structural_improvement candidates get medium across all dimensions', () => {
1554
- const dreamerOutput: DreamerOutput = {
1555
- valid: true,
1556
- candidates: [
1557
- {
1558
- candidateIndex: 0,
1559
- badDecision: 'Rushed through steps',
1560
- betterDecision: 'Reorder operations and introduce an intermediate checkpoint',
1561
- rationale: 'Structural improvement rationale to reorder operations properly',
1562
- confidence: 0.7,
1563
- riskLevel: 'medium',
1564
- strategicPerspective: 'structural_improvement',
1565
- },
1566
- ],
1567
- generatedAt: new Date().toISOString(),
1568
- };
1569
- const result = invokeStubPhilosopher(dreamerOutput, 'T-03', makeSnapshot() as any);
1570
- expect(result.valid).toBe(true);
1571
- const j = result.judgments[0];
1572
- expect(j.scores).toBeDefined();
1573
- // Medium scores should be between conservative and paradigm
1574
- expect(j.scores!.principleAlignment).toBeGreaterThanOrEqual(0.7);
1575
- expect(j.scores!.principleAlignment).toBeLessThanOrEqual(0.8);
1576
- expect(j.risks).toBeDefined();
1577
- expect(j.risks!.breakingChangeRisk).toBe(false);
1578
- expect(j.risks!.implementationComplexity).toBe('medium');
1579
- });
1580
- });
1581
-
1582
- // ---------------------------------------------------------------------------
1583
- // Tests: TrinityTelemetry — philosopher6D field
1584
- // ---------------------------------------------------------------------------
1585
-
1586
- describe('TrinityTelemetry — philosopher6D field', () => {
1587
- it('accepts optional philosopher6D field', () => {
1588
- const telemetry: TrinityTelemetry = {
1589
- chainMode: 'trinity',
1590
- usedStubs: true,
1591
- dreamerPassed: true,
1592
- philosopherPassed: true,
1593
- scribePassed: true,
1594
- candidateCount: 2,
1595
- selectedCandidateIndex: 0,
1596
- stageFailures: [],
1597
- philosopher6D: {
1598
- avgScores: {
1599
- principleAlignment: 0.85,
1600
- specificity: 0.75,
1601
- actionability: 0.8,
1602
- executability: 0.78,
1603
- safetyImpact: 0.7,
1604
- uxImpact: 0.72,
1605
- },
1606
- highRiskCount: 1,
1607
- },
1608
- };
1609
- expect(telemetry.philosopher6D).toBeDefined();
1610
- expect(telemetry.philosopher6D!.avgScores.principleAlignment).toBe(0.85);
1611
- expect(telemetry.philosopher6D!.highRiskCount).toBe(1);
1612
- });
1613
- });
1614
-
1615
- // ---------------------------------------------------------------------------
1616
- // Tests: Scribe Contrastive Analysis (SCRIBE-01, SCRIBE-02, SCRIBE-03)
1617
- // ---------------------------------------------------------------------------
1618
-
1619
- describe('Scribe Contrastive Analysis (SCRIBE-01, SCRIBE-02, SCRIBE-03)', () => {
1620
- function makeValidArtifact(overrides: Record<string, unknown> = {}): TrinityDraftArtifact {
1621
- return {
1622
- selectedCandidateIndex: 0,
1623
- badDecision: 'Did something wrong',
1624
- betterDecision: 'Do it right',
1625
- rationale: 'Because the principle says so and this is the right approach',
1626
- sessionId: 'session-test-123',
1627
- principleId: 'T-01',
1628
- sourceSnapshotRef: 'snapshot-test-001',
1629
- telemetry: {
1630
- chainMode: 'trinity',
1631
- usedStubs: false,
1632
- dreamerPassed: true,
1633
- philosopherPassed: true,
1634
- scribePassed: true,
1635
- candidateCount: 2,
1636
- selectedCandidateIndex: 0,
1637
- stageFailures: [],
1638
- },
1639
- ...overrides,
1640
- };
1641
- }
1642
-
1643
- it('TrinityDraftArtifact accepts optional rejectedAnalysis fields (SCRIBE-01)', () => {
1644
- const artifact = makeValidArtifact({
1645
- rejectedAnalysis: {
1646
- whyRejected: 'Lower alignment score',
1647
- warningSignals: ['missed pain signal', 'ignored gate block'],
1648
- correctiveThinking: 'Should have verified the routing state before proceeding',
1649
- },
1650
- } as Record<string, unknown>);
1651
- expect(artifact.rejectedAnalysis).toBeDefined();
1652
- expect(artifact.rejectedAnalysis!.whyRejected).toBe('Lower alignment score');
1653
- expect(artifact.rejectedAnalysis!.warningSignals).toHaveLength(2);
1654
- expect(artifact.rejectedAnalysis!.correctiveThinking).toContain('Should have');
1655
- });
1656
-
1657
- it('TrinityDraftArtifact accepts optional chosenJustification fields (SCRIBE-02)', () => {
1658
- const artifact = makeValidArtifact({
1659
- chosenJustification: {
1660
- whyChosen: 'Highest 6D composite score and low breakingChangeRisk',
1661
- keyInsights: ['Verify routing state before file operations', 'Check pain signals early'],
1662
- limitations: ['Does not apply when session has no pain history', 'Less relevant for conservative fixes'],
1663
- },
1664
- } as Record<string, unknown>);
1665
- expect(artifact.chosenJustification).toBeDefined();
1666
- expect(artifact.chosenJustification!.whyChosen).toContain('Highest');
1667
- expect(artifact.chosenJustification!.keyInsights).toHaveLength(2);
1668
- expect(artifact.chosenJustification!.limitations).toHaveLength(2);
1669
- });
1670
-
1671
- it('TrinityDraftArtifact accepts optional contrastiveAnalysis fields (SCRIBE-03)', () => {
1672
- const artifact = makeValidArtifact({
1673
- contrastiveAnalysis: {
1674
- criticalDifference: 'Winner checked routing state; loser proceeded without verification',
1675
- decisionTrigger: 'When session has pain events and gate blocks, verify infrastructure before file operations',
1676
- preventionStrategy: 'Add a pre-flight check: read the routing status and confirm no pending failures',
1677
- },
1678
- } as Record<string, unknown>);
1679
- expect(artifact.contrastiveAnalysis).toBeDefined();
1680
- expect(artifact.contrastiveAnalysis!.criticalDifference).toContain('routing state');
1681
- expect(artifact.contrastiveAnalysis!.decisionTrigger).toContain('When');
1682
- expect(artifact.contrastiveAnalysis!.preventionStrategy).toContain('pre-flight');
1683
- });
1684
-
1685
- it('validateDraftArtifact passes when all three analysis sections are present', () => {
1686
- const artifact = makeValidArtifact({
1687
- rejectedAnalysis: {
1688
- whyRejected: 'Lower score',
1689
- warningSignals: ['missed signal'],
1690
- correctiveThinking: 'Should have checked',
1691
- },
1692
- chosenJustification: {
1693
- whyChosen: 'Best score',
1694
- keyInsights: ['insight 1'],
1695
- limitations: ['limitation 1'],
1696
- },
1697
- contrastiveAnalysis: {
1698
- criticalDifference: 'key difference',
1699
- decisionTrigger: 'When X, do Y',
1700
- preventionStrategy: 'avoid the rejected path',
1701
- },
1702
- } as Record<string, unknown>);
1703
- const result = validateDraftArtifact(artifact);
1704
- expect(result.valid).toBe(true);
1705
- expect(result.failures).toHaveLength(0);
1706
- });
1707
-
1708
- it('RejectedAnalysis interface accepts all required fields', () => {
1709
- const analysis: RejectedAnalysis = {
1710
- whyRejected: 'test reason',
1711
- warningSignals: ['signal 1', 'signal 2'],
1712
- correctiveThinking: 'correct path',
1713
- };
1714
- expect(analysis.whyRejected).toBe('test reason');
1715
- expect(analysis.warningSignals).toHaveLength(2);
1716
- expect(analysis.correctiveThinking).toBe('correct path');
1717
- });
1718
-
1719
- it('ChosenJustification interface accepts all required fields', () => {
1720
- const justification: ChosenJustification = {
1721
- whyChosen: 'test reason',
1722
- keyInsights: ['insight 1', 'insight 2', 'insight 3'],
1723
- limitations: ['limitation 1'],
1724
- };
1725
- expect(justification.whyChosen).toBe('test reason');
1726
- expect(justification.keyInsights).toHaveLength(3);
1727
- expect(justification.limitations).toHaveLength(1);
1728
- });
1729
-
1730
- it('ContrastiveAnalysis interface accepts all required fields', () => {
1731
- const analysis: ContrastiveAnalysis = {
1732
- criticalDifference: 'key insight',
1733
- decisionTrigger: 'When X, do Y',
1734
- preventionStrategy: 'avoid the rejected path',
1735
- };
1736
- expect(analysis.criticalDifference).toBe('key insight');
1737
- expect(analysis.decisionTrigger).toBe('When X, do Y');
1738
- expect(analysis.preventionStrategy).toBe('avoid the rejected path');
1739
- });
1740
- });
1741
-
1742
- // ---------------------------------------------------------------------------
1743
- // Tests: Scribe Backward Compatibility (SCRIBE-04)
1744
- // ---------------------------------------------------------------------------
1745
-
1746
- describe('Scribe Backward Compatibility (SCRIBE-04)', () => {
1747
- function makeValidArtifact(): TrinityDraftArtifact {
1748
- return {
1749
- selectedCandidateIndex: 0,
1750
- badDecision: 'Did something wrong',
1751
- betterDecision: 'Do it right',
1752
- rationale: 'Because the principle says so and this is the right approach',
1753
- sessionId: 'session-test-123',
1754
- principleId: 'T-01',
1755
- sourceSnapshotRef: 'snapshot-test-001',
1756
- telemetry: {
1757
- chainMode: 'trinity',
1758
- usedStubs: false,
1759
- dreamerPassed: true,
1760
- philosopherPassed: true,
1761
- scribePassed: true,
1762
- candidateCount: 2,
1763
- selectedCandidateIndex: 0,
1764
- stageFailures: [],
1765
- },
1766
- };
1767
- }
1768
-
1769
- it('TrinityDraftArtifact without contrastiveAnalysis fields is valid', () => {
1770
- const artifact = makeValidArtifact();
1771
- expect(artifact.contrastiveAnalysis).toBeUndefined();
1772
- expect(artifact.rejectedAnalysis).toBeUndefined();
1773
- expect(artifact.chosenJustification).toBeUndefined();
1774
- const result = validateDraftArtifact(artifact);
1775
- expect(result.valid).toBe(true);
1776
- expect(result.failures).toHaveLength(0);
1777
- });
1778
-
1779
- it('artifact without new fields produces identical output via draftToArtifact', () => {
1780
- const artifact = makeValidArtifact();
1781
- const nocturnalArtifact = draftToArtifact(artifact);
1782
- expect(nocturnalArtifact.badDecision).toBe('Did something wrong');
1783
- expect(nocturnalArtifact.betterDecision).toBe('Do it right');
1784
- expect(nocturnalArtifact.principleId).toBe('T-01');
1785
- });
1786
-
1787
- it('runTrinity produces artifact without contrastiveAnalysis when useStubs=true', () => {
1788
- const snapshot = {
1789
- sessionId: 'session-backward-compat',
1790
- startedAt: '2026-04-13T00:00:00.000Z',
1791
- updatedAt: '2026-04-13T00:05:00.000Z',
1792
- assistantTurns: [],
1793
- userTurns: [],
1794
- toolCalls: [],
1795
- painEvents: [],
1796
- gateBlocks: [],
1797
- stats: {
1798
- failureCount: 1,
1799
- totalPainEvents: 0,
1800
- totalGateBlocks: 0,
1801
- totalAssistantTurns: 5,
1802
- totalToolCalls: 10,
1803
- },
1804
- };
1805
- const config: TrinityConfig = {
1806
- useTrinity: true,
1807
- maxCandidates: 3,
1808
- useStubs: true,
1809
- };
1810
-
1811
- const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
1812
- expect(result.success).toBe(true);
1813
- expect(result.artifact).toBeDefined();
1814
- expect(result.artifact!.contrastiveAnalysis).toBeUndefined();
1815
- expect(result.artifact!.rejectedAnalysis).toBeUndefined();
1816
- expect(result.artifact!.chosenJustification).toBeUndefined();
1817
- });
1818
- });
1819
-
1820
- // ---------------------------------------------------------------------------
1821
- // Tests: validateExtraction — Hallucination Detection (SDK-QUAL-02)
1822
- // ---------------------------------------------------------------------------
1823
-
1824
- describe('validateExtraction — Hallucination Detection (SDK-QUAL-02)', () => {
1825
- function makeArtifact(badDecision: string, overrides: Record<string, unknown> = {}): TrinityDraftArtifact {
1826
- return {
1827
- selectedCandidateIndex: 0,
1828
- badDecision,
1829
- betterDecision: 'Do it right instead',
1830
- rationale: 'Because the principle says so and this is the correct approach',
1831
- sessionId: 'session-test-123',
1832
- principleId: 'T-01',
1833
- sourceSnapshotRef: 'snapshot-test-001',
1834
- telemetry: {
1835
- chainMode: 'trinity',
1836
- usedStubs: true,
1837
- dreamerPassed: true,
1838
- philosopherPassed: true,
1839
- scribePassed: true,
1840
- candidateCount: 1,
1841
- selectedCandidateIndex: 0,
1842
- stageFailures: [],
1843
- },
1844
- ...overrides,
1845
- };
1846
- }
1847
-
1848
- function makeSnapshotWithEvidence(overrides: {
1849
- failedToolCalls?: Array<{ toolName: string; filePath?: string; errorMessage?: string }>;
1850
- painEvents?: Array<{ source: string; score: number; reason?: string }>;
1851
- gateBlocks?: Array<{ toolName: string; reason: string }>;
1852
- userCorrections?: number;
1853
- } = {}) {
1854
- const toolCalls = (overrides.failedToolCalls ?? []).map(tc => ({
1855
- toolName: tc.toolName,
1856
- outcome: 'failure' as const,
1857
- filePath: tc.filePath ?? null,
1858
- durationMs: null,
1859
- exitCode: 1,
1860
- errorType: 'runtime_error',
1861
- errorMessage: tc.errorMessage ?? 'unknown error',
1862
- createdAt: '2026-04-17T00:00:00.000Z',
1863
- }));
1864
-
1865
- const painEvents = (overrides.painEvents ?? []).map(pe => ({
1866
- source: pe.source,
1867
- score: pe.score,
1868
- severity: 'medium' as const,
1869
- reason: pe.reason ?? null,
1870
- createdAt: '2026-04-17T00:00:00.000Z',
1871
- }));
1872
-
1873
- const gateBlocks = (overrides.gateBlocks ?? []).map(gb => ({
1874
- toolName: gb.toolName,
1875
- filePath: null,
1876
- reason: gb.reason,
1877
- planStatus: null,
1878
- createdAt: '2026-04-17T00:00:00.000Z',
1879
- }));
1880
-
1881
- const userTurns = Array.from({ length: overrides.userCorrections ?? 0 }, (_, i) => ({
1882
- turnIndex: i,
1883
- correctionDetected: true,
1884
- correctionCue: 'wrong approach',
1885
- createdAt: '2026-04-17T00:00:00.000Z',
1886
- }));
1887
-
1888
- return {
1889
- sessionId: 'session-test-123',
1890
- startedAt: '2026-04-17T00:00:00.000Z',
1891
- updatedAt: '2026-04-17T00:05:00.000Z',
1892
- assistantTurns: [],
1893
- userTurns,
1894
- toolCalls: toolCalls,
1895
- painEvents,
1896
- gateBlocks,
1897
- stats: {
1898
- failureCount: toolCalls.length,
1899
- totalPainEvents: painEvents.length,
1900
- totalGateBlocks: gateBlocks.length,
1901
- totalAssistantTurns: 5,
1902
- totalToolCalls: 10,
1903
- },
1904
- };
1905
- }
1906
-
1907
- it('passes when badDecision references a tool failure from the snapshot', () => {
1908
- const snapshot = makeSnapshotWithEvidence({
1909
- failedToolCalls: [{ toolName: 'Edit', filePath: 'src/config.ts', errorMessage: 'permission denied' }],
1910
- });
1911
- const artifact = makeArtifact('Proceeded with Edit on src/config.ts without checking permission');
1912
-
1913
- const result = validateExtraction(artifact, snapshot as any);
1914
-
1915
- expect(result.isGrounded).toBe(true);
1916
- expect(result.evidenceTypes).toContain('tool_failures');
1917
- });
1918
-
1919
- it('passes when badDecision references a pain event from the snapshot', () => {
1920
- const snapshot = makeSnapshotWithEvidence({
1921
- painEvents: [{ source: 'gate', score: 70, reason: 'accumulated friction from repeated file operation failures' }],
1922
- });
1923
- const artifact = makeArtifact('Ignored accumulated friction from file operations');
1924
-
1925
- const result = validateExtraction(artifact, snapshot as any);
1926
-
1927
- expect(result.isGrounded).toBe(true);
1928
- expect(result.evidenceTypes).toContain('pain_events');
1929
- });
1930
-
1931
- it('passes when badDecision references a gate block from the snapshot', () => {
1932
- const snapshot = makeSnapshotWithEvidence({
1933
- gateBlocks: [{ toolName: 'Bash', reason: 'destructive command blocked by safety gate' }],
1934
- });
1935
- const artifact = makeArtifact('Attempted to execute a destructive Bash command that was blocked by the gate');
1936
-
1937
- const result = validateExtraction(artifact, snapshot as any);
1938
-
1939
- expect(result.isGrounded).toBe(true);
1940
- expect(result.evidenceTypes).toContain('gate_blocks');
1941
- });
1942
-
1943
- it('passes when badDecision references user corrections', () => {
1944
- const snapshot = makeSnapshotWithEvidence({
1945
- userCorrections: 2,
1946
- });
1947
- const artifact = makeArtifact('Continued with the wrong approach despite user corrections');
1948
-
1949
- const result = validateExtraction(artifact, snapshot as any);
1950
-
1951
- expect(result.isGrounded).toBe(true);
1952
- expect(result.evidenceTypes).toContain('user_corrections');
1953
- });
1954
-
1955
- it('detects hallucination when badDecision has no overlap with snapshot evidence', () => {
1956
- const snapshot = makeSnapshotWithEvidence({
1957
- failedToolCalls: [{ toolName: 'Read', filePath: 'package.json', errorMessage: 'file not found' }],
1958
- });
1959
- const artifact = makeArtifact('Deployed production database without running migration scripts first');
1960
-
1961
- const result = validateExtraction(artifact, snapshot as any);
1962
-
1963
- expect(result.isGrounded).toBe(false);
1964
- expect(result.reason).toContain('Hallucinated extraction');
1965
- });
1966
-
1967
- it('passes when snapshot has no evidence at all (no signal to validate against)', () => {
1968
- const snapshot = makeSnapshotWithEvidence();
1969
- const artifact = makeArtifact('Made an incorrect decision during the session');
1970
-
1971
- const result = validateExtraction(artifact, snapshot as any);
1972
-
1973
- // No evidence means we cannot validate -- allow through
1974
- expect(result.isGrounded).toBe(true);
1975
- expect(result.evidenceTypes).toHaveLength(0);
1976
- });
1977
-
1978
- it('provides evidence preview for telemetry', () => {
1979
- const snapshot = makeSnapshotWithEvidence({
1980
- failedToolCalls: [{ toolName: 'Write', filePath: 'output.log', errorMessage: 'permission denied for write operation' }],
1981
- painEvents: [{ source: 'hook', score: 80, reason: 'repeated permission denied failures during write operation' }],
1982
- });
1983
- const artifact = makeArtifact('Proceeded with write operation on output.log despite permission denied error');
1984
-
1985
- const result = validateExtraction(artifact, snapshot as any);
1986
-
1987
- expect(result.isGrounded).toBe(true);
1988
- expect(result.evidencePreview.length).toBeGreaterThan(0);
1989
- expect(result.evidenceTypes).toContain('tool_failures');
1990
- expect(result.evidenceTypes).toContain('pain_events');
1991
- });
1992
-
1993
- it('detects hallucination with unrelated but specific badDecision text', () => {
1994
- const snapshot = makeSnapshotWithEvidence({
1995
- painEvents: [{ source: 'gate', score: 60, reason: 'rate limit exceeded for API calls' }],
1996
- });
1997
- const artifact = makeArtifact('Deleted the primary database without creating a backup first');
1998
-
1999
- const result = validateExtraction(artifact, snapshot as any);
2000
-
2001
- expect(result.isGrounded).toBe(false);
2002
- });
2003
-
2004
- it('runTrinity stub path fails when hallucination is detected', () => {
2005
- // Create a snapshot with failure signals so stub candidates are generated
2006
- // but override the tool calls to be something completely unrelated to what
2007
- // the stub Dreamer generates (which mentions "failing operation")
2008
- const snapshot = {
2009
- sessionId: 'session-hallucination-test',
2010
- startedAt: '2026-04-17T00:00:00.000Z',
2011
- updatedAt: '2026-04-17T00:05:00.000Z',
2012
- assistantTurns: [],
2013
- userTurns: [],
2014
- toolCalls: [
2015
- {
2016
- toolName: 'Grep',
2017
- outcome: 'failure' as const,
2018
- filePath: null,
2019
- durationMs: null,
2020
- exitCode: 1,
2021
- errorType: 'timeout',
2022
- errorMessage: 'search timed out after 30 seconds',
2023
- createdAt: '2026-04-17T00:00:00.000Z',
2024
- },
2025
- ],
2026
- painEvents: [],
2027
- gateBlocks: [],
2028
- stats: {
2029
- failureCount: 1,
2030
- totalPainEvents: 0,
2031
- totalGateBlocks: 0,
2032
- totalAssistantTurns: 2,
2033
- totalToolCalls: 1,
2034
- },
2035
- };
2036
-
2037
- const config: TrinityConfig = {
2038
- useTrinity: true,
2039
- maxCandidates: 3,
2040
- useStubs: true,
2041
- };
2042
-
2043
- const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
2044
-
2045
- // The stub Dreamer generates candidates mentioning "failing operation" and "config.json"
2046
- // The snapshot has a Grep failure with "search timed out"
2047
- // With the normalized token matching: badDecisionTokens = {retry,faili,oper,diagnos,root,caus}
2048
- // and evidenceTokens = {search,timed,after,seconds,timedout} — no overlap → extraction fails
2049
- // So result.success must be false with a Hallucinated failure.
2050
- expect(result.success).toBe(false);
2051
- expect(result.failures.some(f => f.reason?.includes('Hallucinated'))).toBe(true);
2052
- });
2053
- });