principles-disciple 1.71.0 → 1.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. package/openclaw.plugin.json +10 -5
  2. package/package.json +17 -19
  3. package/scripts/acceptance-test.mjs +16 -73
  4. package/scripts/sync-plugin.mjs +382 -77
  5. package/src/commands/archive-impl.ts +2 -1
  6. package/src/commands/capabilities.ts +2 -2
  7. package/src/commands/context.ts +2 -2
  8. package/src/commands/disable-impl.ts +2 -1
  9. package/src/commands/evolution-status.ts +16 -16
  10. package/src/commands/export.ts +12 -67
  11. package/src/commands/pain.ts +91 -1
  12. package/src/commands/principle-rollback.ts +2 -1
  13. package/src/commands/promote-impl.ts +7 -43
  14. package/src/commands/rollback-impl.ts +2 -1
  15. package/src/commands/rollback.ts +2 -1
  16. package/src/commands/samples.ts +2 -1
  17. package/src/commands/thinking-os.ts +2 -1
  18. package/src/config/errors.ts +18 -2
  19. package/src/constants/diagnostician.ts +2 -2
  20. package/src/constants/tools.ts +2 -1
  21. package/src/core/__tests__/focus-history.test.ts +210 -0
  22. package/src/core/config.ts +1 -1
  23. package/src/core/confirm-first-gate.ts +255 -0
  24. package/src/core/correction-cue-learner.ts +2 -136
  25. package/src/core/correction-types.ts +16 -88
  26. package/src/core/dictionary.ts +19 -20
  27. package/src/core/empathy-keyword-matcher.ts +17 -289
  28. package/src/core/empathy-types.ts +18 -229
  29. package/src/core/event-log.ts +38 -132
  30. package/src/core/evolution-reducer.ts +21 -2
  31. package/src/core/evolution-types.ts +76 -464
  32. package/src/core/file-store.ts +80 -0
  33. package/src/core/focus-history.ts +228 -955
  34. package/src/core/local-worker-routing.ts +34 -314
  35. package/src/core/merge-gate-audit.ts +0 -195
  36. package/src/core/pain-diagnostic-gate.ts +154 -0
  37. package/src/core/pain-signal.ts +21 -138
  38. package/src/core/pain.ts +15 -88
  39. package/src/core/pd-task-reconciler.ts +26 -115
  40. package/src/core/pd-task-service.ts +9 -9
  41. package/src/core/pd-task-types.ts +23 -127
  42. package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
  43. package/src/core/principle-compiler/code-validator.ts +15 -42
  44. package/src/core/principle-compiler/compiler.ts +100 -15
  45. package/src/core/principle-compiler/index.ts +5 -2
  46. package/src/core/principle-compiler/template-generator.ts +4 -104
  47. package/src/core/principle-injection.ts +10 -202
  48. package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
  49. package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
  50. package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
  51. package/src/core/principle-tree-ledger-adapter.ts +145 -0
  52. package/src/core/principle-tree-ledger.ts +8 -6
  53. package/src/core/reflection/reflection-context.ts +14 -109
  54. package/src/core/replay-engine.ts +8 -500
  55. package/src/core/rule-host-helpers.ts +5 -35
  56. package/src/core/rule-host-types.ts +10 -82
  57. package/src/core/rule-host.ts +6 -63
  58. package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
  59. package/src/core/session-tracker.ts +87 -101
  60. package/src/core/shadow-observation-registry.ts +19 -48
  61. package/src/core/trajectory.ts +3 -1
  62. package/src/core/workflow-funnel-loader.ts +62 -68
  63. package/src/core/workspace-context.ts +46 -0
  64. package/src/core/workspace-dir-service.ts +1 -1
  65. package/src/core/workspace-dir-validation.ts +18 -9
  66. package/src/hooks/AGENTS.md +1 -1
  67. package/src/hooks/gate-block-helper.ts +46 -44
  68. package/src/hooks/gate.ts +207 -7
  69. package/src/hooks/lifecycle.ts +30 -32
  70. package/src/hooks/llm.ts +60 -32
  71. package/src/hooks/pain.ts +297 -103
  72. package/src/hooks/prompt.ts +469 -339
  73. package/src/hooks/subagent.ts +2 -29
  74. package/src/i18n/commands.ts +2 -10
  75. package/src/index.ts +95 -85
  76. package/src/openclaw-sdk.ts +311 -0
  77. package/src/service/central-database.ts +8 -4
  78. package/src/service/evolution-queue-migration.ts +2 -1
  79. package/src/service/evolution-worker.ts +163 -1786
  80. package/src/service/internalization-trigger-adapter.ts +302 -0
  81. package/src/service/keyword-optimization-service.ts +4 -4
  82. package/src/service/monitoring-query-service.ts +1 -215
  83. package/src/service/queue-io.ts +60 -331
  84. package/src/service/runtime-summary-service.ts +115 -18
  85. package/src/service/subagent-workflow/index.ts +0 -41
  86. package/src/service/subagent-workflow/types.ts +9 -120
  87. package/src/service/subagent-workflow/workflow-store.ts +2 -119
  88. package/src/service/workflow-watchdog.ts +0 -43
  89. package/src/types/event-payload.ts +16 -74
  90. package/src/types/event-types.ts +39 -547
  91. package/src/types/hygiene-types.ts +7 -30
  92. package/src/types/principle-tree-schema.ts +20 -222
  93. package/src/types/queue.ts +15 -70
  94. package/src/types/runtime-summary.ts +5 -49
  95. package/src/utils/io.ts +10 -0
  96. package/src/utils/retry.ts +1 -1
  97. package/src/utils/shadow-fingerprint.ts +2 -2
  98. package/src/utils/workspace-resolver.ts +50 -0
  99. package/templates/langs/en/core/AGENTS.md +2 -2
  100. package/templates/langs/en/core/BOOT.md +1 -1
  101. package/templates/langs/en/core/HEARTBEAT.md +2 -2
  102. package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  103. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  104. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  105. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  106. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  107. package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
  108. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  109. package/templates/langs/en/skills/evolve-task/SKILL.md +1 -1
  110. package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
  111. package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
  112. package/templates/langs/en/skills/pd-mentor/SKILL.md +1 -1
  113. package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
  114. package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
  115. package/templates/langs/zh/core/AGENTS.md +2 -2
  116. package/templates/langs/zh/core/BOOT.md +1 -1
  117. package/templates/langs/zh/core/HEARTBEAT.md +2 -2
  118. package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  119. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  120. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  121. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
  122. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  123. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  124. package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
  125. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  126. package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
  127. package/templates/langs/zh/skills/evolve-task/SKILL.md +2 -2
  128. package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
  129. package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
  130. package/templates/langs/zh/skills/pd-mentor/SKILL.md +1 -1
  131. package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
  132. package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
  133. package/tests/build-artifacts.test.ts +1 -3
  134. package/tests/commands/evolution-status.test.ts +0 -118
  135. package/tests/core/bootstrap-rules.test.ts +1 -1
  136. package/tests/core/config.test.ts +1 -1
  137. package/tests/core/event-log.test.ts +35 -0
  138. package/tests/core/evolution-engine.test.ts +610 -0
  139. package/tests/core/file-store.test.ts +102 -0
  140. package/tests/core/focus-history.test.ts +203 -11
  141. package/tests/core/merge-gate-audit.test.ts +2 -169
  142. package/tests/core/model-deployment-registry.test.ts +7 -1
  143. package/tests/core/model-training-registry.test.ts +19 -0
  144. package/tests/core/observability.test.ts +0 -1
  145. package/tests/core/pain-diagnostic-gate.test.ts +498 -0
  146. package/tests/core/pain.test.ts +0 -1
  147. package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
  148. package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
  149. package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
  150. package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
  151. package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
  152. package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
  153. package/tests/core/reflection-context.test.ts +0 -14
  154. package/tests/core/replay-engine.test.ts +127 -215
  155. package/tests/core/rule-host-helpers.test.ts +2 -2
  156. package/tests/core/rule-implementation-runtime.test.ts +0 -27
  157. package/tests/core/workflow-funnel-loader.test.ts +162 -0
  158. package/tests/core/workspace-dir-validation.test.ts +8 -1
  159. package/tests/core-anti-growth.test.ts +192 -0
  160. package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
  161. package/tests/hooks/confirm-first-gate.test.ts +333 -0
  162. package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
  163. package/tests/hooks/gate-auto-correct.test.ts +665 -0
  164. package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
  165. package/tests/hooks/pain.test.ts +269 -12
  166. package/tests/hooks/prompt-characterization.test.ts +500 -0
  167. package/tests/hooks/prompt-size-guard.test.ts +329 -0
  168. package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
  169. package/tests/index.test.ts +94 -1
  170. package/tests/integration/auto-entry-gate.test.ts +248 -0
  171. package/tests/integration/internalization-trigger-guard.test.ts +69 -0
  172. package/tests/integration/m8-legacy-paths.test.ts +63 -0
  173. package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
  174. package/tests/plugin-config-resolution-cutover.test.ts +359 -0
  175. package/tests/runtime-v2-discovery-guard.test.ts +154 -0
  176. package/tests/service/central-database.test.ts +457 -0
  177. package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
  178. package/tests/service/evolution-worker.timeout.test.ts +11 -129
  179. package/tests/service/internalization-trigger-adapter.test.ts +251 -0
  180. package/tests/service/monitoring-query-service.test.ts +1 -47
  181. package/tests/service/queue-io.test.ts +1 -62
  182. package/tests/service/runtime-summary-service.test.ts +184 -3
  183. package/tests/service/workflow-watchdog.test.ts +0 -91
  184. package/tests/utils/file-lock.test.ts +5 -3
  185. package/tests/utils/session-key.test.ts +52 -0
  186. package/tests/utils/subagent-probe.test.ts +48 -1
  187. package/vitest.config.ts +4 -11
  188. package/.planning/codebase/ARCHITECTURE.md +0 -157
  189. package/.planning/codebase/CONCERNS.md +0 -145
  190. package/.planning/codebase/CONVENTIONS.md +0 -148
  191. package/.planning/codebase/INTEGRATIONS.md +0 -81
  192. package/.planning/codebase/STACK.md +0 -87
  193. package/.planning/codebase/STRUCTURE.md +0 -193
  194. package/.planning/codebase/TESTING.md +0 -243
  195. package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
  196. package/docs/COMMAND_REFERENCE.md +0 -76
  197. package/docs/COMMAND_REFERENCE_EN.md +0 -79
  198. package/scripts/build-web.mjs +0 -46
  199. package/scripts/diagnose-nocturnal.mjs +0 -537
  200. package/scripts/seed-nocturnal-scenarios.mjs +0 -384
  201. package/src/commands/nocturnal-review.ts +0 -322
  202. package/src/commands/nocturnal-rollout.ts +0 -790
  203. package/src/commands/nocturnal-train.ts +0 -986
  204. package/src/commands/pd-reflect.ts +0 -88
  205. package/src/core/adaptive-thresholds.ts +0 -478
  206. package/src/core/diagnostician-task-store.ts +0 -192
  207. package/src/core/nocturnal-arbiter.ts +0 -715
  208. package/src/core/nocturnal-artifact-lineage.ts +0 -116
  209. package/src/core/nocturnal-artificer.ts +0 -257
  210. package/src/core/nocturnal-candidate-scoring.ts +0 -530
  211. package/src/core/nocturnal-compliance.ts +0 -1146
  212. package/src/core/nocturnal-dataset.ts +0 -763
  213. package/src/core/nocturnal-executability.ts +0 -428
  214. package/src/core/nocturnal-export.ts +0 -499
  215. package/src/core/nocturnal-paths.ts +0 -240
  216. package/src/core/nocturnal-reasoning-deriver.ts +0 -343
  217. package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
  218. package/src/core/nocturnal-snapshot-contract.ts +0 -99
  219. package/src/core/nocturnal-trajectory-extractor.ts +0 -512
  220. package/src/core/nocturnal-trinity-types.ts +0 -218
  221. package/src/core/nocturnal-trinity.ts +0 -2680
  222. package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
  223. package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
  224. package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
  225. package/src/http/principles-console-route.ts +0 -709
  226. package/src/service/central-health-service.ts +0 -49
  227. package/src/service/central-overview-service.ts +0 -138
  228. package/src/service/control-ui-query-service.ts +0 -900
  229. package/src/service/cooldown-strategy.ts +0 -97
  230. package/src/service/evolution-pain-context.ts +0 -79
  231. package/src/service/evolution-query-service.ts +0 -407
  232. package/src/service/health-query-service.ts +0 -1038
  233. package/src/service/nocturnal-config.ts +0 -214
  234. package/src/service/nocturnal-runtime.ts +0 -734
  235. package/src/service/nocturnal-service.ts +0 -1605
  236. package/src/service/nocturnal-target-selector.ts +0 -545
  237. package/src/service/sleep-cycle.ts +0 -157
  238. package/src/service/startup-reconciler.ts +0 -112
  239. package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
  240. package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
  241. package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
  242. package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
  243. package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
  244. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
  245. package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
  246. package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
  247. package/src/tools/write-pain-flag.ts +0 -215
  248. package/tests/commands/nocturnal-review.test.ts +0 -448
  249. package/tests/commands/nocturnal-train.test.ts +0 -97
  250. package/tests/commands/pd-reflect.test.ts +0 -49
  251. package/tests/core/adaptive-thresholds.test.ts +0 -261
  252. package/tests/core/nocturnal-arbiter.test.ts +0 -559
  253. package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
  254. package/tests/core/nocturnal-artificer.test.ts +0 -241
  255. package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
  256. package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
  257. package/tests/core/nocturnal-compliance.test.ts +0 -646
  258. package/tests/core/nocturnal-dataset.test.ts +0 -892
  259. package/tests/core/nocturnal-e2e.test.ts +0 -234
  260. package/tests/core/nocturnal-executability.test.ts +0 -357
  261. package/tests/core/nocturnal-export.test.ts +0 -517
  262. package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
  263. package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
  264. package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
  265. package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
  266. package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
  267. package/tests/core/nocturnal-trinity.test.ts +0 -2053
  268. package/tests/core/pain-auto-repair.test.ts +0 -96
  269. package/tests/core/pain-integration.test.ts +0 -510
  270. package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
  271. package/tests/http/principles-console-route.test.ts +0 -162
  272. package/tests/integration/chaos-resilience.test.ts +0 -348
  273. package/tests/integration/empathy-workflow-integration.test.ts +0 -626
  274. package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
  275. package/tests/service/control-ui-query-service.test.ts +0 -121
  276. package/tests/service/cooldown-strategy.test.ts +0 -164
  277. package/tests/service/data-endpoints-regression.test.ts +0 -834
  278. package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
  279. package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
  280. package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
  281. package/tests/service/nocturnal-runtime.test.ts +0 -473
  282. package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
  283. package/tests/service/nocturnal-target-selector.test.ts +0 -615
  284. package/tests/service/startup-reconciler.test.ts +0 -148
  285. package/tests/tools/write-pain-flag.test.ts +0 -358
  286. package/ui/src/App.tsx +0 -45
  287. package/ui/src/api.ts +0 -220
  288. package/ui/src/charts.tsx +0 -955
  289. package/ui/src/components/ErrorState.tsx +0 -6
  290. package/ui/src/components/Loading.tsx +0 -13
  291. package/ui/src/components/ProtectedRoute.tsx +0 -12
  292. package/ui/src/components/Shell.tsx +0 -91
  293. package/ui/src/components/WorkspaceConfig.tsx +0 -178
  294. package/ui/src/components/index.ts +0 -5
  295. package/ui/src/context/auth.tsx +0 -80
  296. package/ui/src/context/theme.tsx +0 -66
  297. package/ui/src/hooks/useAutoRefresh.ts +0 -39
  298. package/ui/src/i18n/ui.ts +0 -473
  299. package/ui/src/main.tsx +0 -16
  300. package/ui/src/pages/EvolutionPage.tsx +0 -333
  301. package/ui/src/pages/FeedbackPage.tsx +0 -138
  302. package/ui/src/pages/GateMonitorPage.tsx +0 -136
  303. package/ui/src/pages/LoginPage.tsx +0 -89
  304. package/ui/src/pages/OverviewPage.tsx +0 -599
  305. package/ui/src/pages/SamplesPage.tsx +0 -174
  306. package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
  307. package/ui/src/styles.css +0 -2020
  308. package/ui/src/types.ts +0 -384
  309. package/ui/src/utils/format.ts +0 -15
@@ -1,428 +0,0 @@
1
- import { describe, it, expect, beforeAll } from 'vitest';
2
- import * as fs from 'fs';
3
- import * as path from 'path';
4
- import { runTrinity, type TrinityConfig, type NocturnalSessionSnapshot } from '../../src/core/nocturnal-trinity.js';
5
-
6
- /**
7
- * Nocturnal Reviewed Subset Comparison Harness
8
- *
9
- * Compares single-reflector vs Trinity quality on a reviewed subset of cases.
10
- * ACTUALLY invokes the Trinity code path (not just fixture validation).
11
- */
12
-
13
- interface QualityScores {
14
- specificity: number;
15
- principleAlignment: number;
16
- actionability: number;
17
- rationaleQuality: number;
18
- overall: number;
19
- }
20
-
21
- interface TestCase {
22
- caseId: string;
23
- principleId: string;
24
- sessionId: string;
25
- signalType: string;
26
- signalContext: string;
27
- singleReflectorOutput: Record<string, unknown>;
28
- trinityOutput: Record<string, unknown>;
29
- qualityScores: {
30
- singleReflector: QualityScores;
31
- trinity: QualityScores;
32
- };
33
- trinityWins: boolean;
34
- notes: string;
35
- }
36
-
37
- interface FixtureData {
38
- testCases: TestCase[];
39
- summary: {
40
- totalCases: number;
41
- trinityWins: number;
42
- singleReflectorWins: number;
43
- averageDelta: Record<string, number>;
44
- conclusion: string;
45
- };
46
- }
47
-
48
- function loadFixture(): FixtureData {
49
- const fixturePath = path.join(__dirname, '..', 'fixtures', 'nocturnal-reviewed-subset.json');
50
- const content = fs.readFileSync(fixturePath, 'utf-8');
51
- return JSON.parse(content) as FixtureData;
52
- }
53
-
54
- /**
55
- * Create a NocturnalSessionSnapshot from fixture test case data.
56
- * Uses the signalType to determine which stats to populate.
57
- */
58
- function createSnapshotFromFixture(testCase: TestCase): NocturnalSessionSnapshot {
59
- const baseSnapshot = {
60
- sessionId: testCase.sessionId,
61
- stats: {
62
- failureCount: 0,
63
- totalPainEvents: 0,
64
- totalGateBlocks: 0,
65
- totalAssistantTurns: 5,
66
- totalToolCalls: 10,
67
- },
68
- };
69
-
70
- // Set the appropriate signal based on signalType
71
- switch (testCase.signalType) {
72
- case 'failure':
73
- return {
74
- ...baseSnapshot,
75
- stats: { ...baseSnapshot.stats, failureCount: 2 },
76
- };
77
- case 'pain':
78
- return {
79
- ...baseSnapshot,
80
- stats: { ...baseSnapshot.stats, totalPainEvents: 3 },
81
- };
82
- case 'gateblock':
83
- return {
84
- ...baseSnapshot,
85
- stats: { ...baseSnapshot.stats, totalGateBlocks: 1 },
86
- };
87
- default:
88
- return {
89
- ...baseSnapshot,
90
- stats: { ...baseSnapshot.stats, failureCount: 1 },
91
- };
92
- }
93
- }
94
-
95
- describe('Nocturnal Reviewed Subset Comparison Harness', () => {
96
- let fixture: FixtureData;
97
-
98
- beforeAll(() => {
99
- fixture = loadFixture();
100
- });
101
-
102
- describe('Fixture Integrity', () => {
103
- it('loads the fixture successfully', () => {
104
- expect(fixture).toBeDefined();
105
- expect(fixture.testCases).toBeDefined();
106
- expect(fixture.testCases.length).toBeGreaterThan(0);
107
- });
108
-
109
- it('has valid test case structure', () => {
110
- for (const testCase of fixture.testCases) {
111
- expect(testCase.caseId).toBeDefined();
112
- expect(testCase.principleId).toBeDefined();
113
- expect(testCase.sessionId).toBeDefined();
114
- expect(testCase.singleReflectorOutput).toBeDefined();
115
- expect(testCase.trinityOutput).toBeDefined();
116
- expect(testCase.qualityScores).toBeDefined();
117
- expect(testCase.qualityScores.singleReflector).toBeDefined();
118
- expect(testCase.qualityScores.trinity).toBeDefined();
119
- }
120
- });
121
-
122
- it('has valid quality score ranges (0-1)', () => {
123
- for (const testCase of fixture.testCases) {
124
- const scores = [testCase.qualityScores.singleReflector, testCase.qualityScores.trinity];
125
- for (const score of scores) {
126
- expect(score.specificity).toBeGreaterThanOrEqual(0);
127
- expect(score.specificity).toBeLessThanOrEqual(1);
128
- expect(score.principleAlignment).toBeGreaterThanOrEqual(0);
129
- expect(score.principleAlignment).toBeLessThanOrEqual(1);
130
- expect(score.actionability).toBeGreaterThanOrEqual(0);
131
- expect(score.actionability).toBeLessThanOrEqual(1);
132
- expect(score.rationaleQuality).toBeGreaterThanOrEqual(0);
133
- expect(score.rationaleQuality).toBeLessThanOrEqual(1);
134
- expect(score.overall).toBeGreaterThanOrEqual(0);
135
- expect(score.overall).toBeLessThanOrEqual(1);
136
- }
137
- }
138
- });
139
- });
140
-
141
- describe('Trinity Code Execution Verification', () => {
142
- it('Trinity actually produces valid artifacts for fixture cases (CODE INVOCATION)', () => {
143
- // This test ACTUALLY INVOKES the Trinity code path, not just fixture validation
144
- let successCount = 0;
145
- let artifactCount = 0;
146
-
147
- for (const testCase of fixture.testCases) {
148
- const snapshot = createSnapshotFromFixture(testCase);
149
- const config: TrinityConfig = {
150
- useTrinity: true,
151
- maxCandidates: 3,
152
- useStubs: true,
153
- };
154
-
155
- const result = runTrinity({ snapshot, principleId: testCase.principleId, config });
156
-
157
- if (result.success && result.artifact) {
158
- successCount++;
159
- artifactCount++;
160
-
161
- // Verify artifact has required fields
162
- expect(result.artifact.badDecision).toBeTruthy();
163
- expect(result.artifact.betterDecision).toBeTruthy();
164
- expect(result.artifact.rationale).toBeTruthy();
165
- }
166
- }
167
-
168
- // Verify that Trinity succeeded for all fixture cases
169
- expect(successCount).toBe(fixture.testCases.length);
170
- expect(artifactCount).toBe(fixture.testCases.length);
171
- });
172
-
173
- it('Trinity candidate count matches fixture expectations', () => {
174
- for (const testCase of fixture.testCases) {
175
- const snapshot = createSnapshotFromFixture(testCase);
176
- const config: TrinityConfig = {
177
- useTrinity: true,
178
- maxCandidates: 3,
179
- useStubs: true,
180
- };
181
-
182
- const result = runTrinity({ snapshot, principleId: testCase.principleId, config });
183
-
184
- expect(result.success).toBe(true);
185
- expect(result.telemetry.candidateCount).toBeGreaterThan(0);
186
- expect(result.telemetry.dreamerPassed).toBe(true);
187
- expect(result.telemetry.philosopherPassed).toBe(true);
188
- expect(result.telemetry.scribePassed).toBe(true);
189
- }
190
- });
191
- });
192
-
193
- /**
194
- * Compute a quality score from an artifact using simple heuristics.
195
- * This is a simplified scoring that doesn't require Philosopher judgments.
196
- */
197
- /**
198
- * Compute a quality score from an artifact using heuristics calibrated to
199
- * produce scores comparable to fixture baseline (~0.85-0.95) for stub outputs.
200
- *
201
- * The scoring is designed to give meaningful credit for concise but
202
- * substantive content typical of stub-generated artifacts.
203
- */
204
- function computeArtifactQuality(artifact: { rationale: string; betterDecision: string; badDecision: string }): {
205
- specificity: number;
206
- actionability: number;
207
- rationaleQuality: number;
208
- overall: number;
209
- } {
210
- // Specificity: how detailed is the badDecision?
211
- // Base 0.6 + up to 0.4 for length, reaching 1.0 at ~40 chars
212
- const specificity = Math.min(1.0, 0.6 + artifact.badDecision.length / 100);
213
-
214
- // Actionability: does betterDecision contain actionable patterns?
215
- // Base 0.65 + 0.35 for actionable verbs (gives 0.65 or 1.0)
216
- const actionableVerbs = ['read', 'check', 'verify', 'edit', 'write', 'search', 'review', 'analyze', 'diagnose', 'debug', 'inspect', 'examine', 'test'];
217
- const hasActionable = actionableVerbs.some((v) =>
218
- artifact.betterDecision.toLowerCase().includes(v)
219
- );
220
- const actionability = hasActionable ? 1.0 : 0.65;
221
-
222
- // Rationale quality: more generous for shorter texts
223
- // Base 0.5 + up to 0.5 for length, reaching 1.0 at ~42 chars
224
- const rationaleQuality = Math.min(1.0, 0.5 + artifact.rationale.length / 85);
225
-
226
- // Overall: weighted average
227
- const overall = specificity * 0.3 + actionability * 0.4 + rationaleQuality * 0.3;
228
-
229
- return { specificity, actionability, rationaleQuality, overall };
230
- }
231
-
232
- describe('Computed Quality Comparison (ACTUAL CODE SCORING)', () => {
233
- it('Trinity produces higher quality artifacts than fixture single-reflector baseline (COMPUTED)', () => {
234
- // This test ACTUALLY COMPUTES quality scores from the generated artifacts
235
- // and compares them against the fixture's single-reflector baseline.
236
-
237
- for (const testCase of fixture.testCases) {
238
- const snapshot = createSnapshotFromFixture(testCase);
239
- const config: TrinityConfig = {
240
- useTrinity: true,
241
- maxCandidates: 3,
242
- useStubs: true,
243
- };
244
-
245
- const result = runTrinity({ snapshot, principleId: testCase.principleId, config });
246
-
247
- // Trinity should succeed
248
- expect(result.success).toBe(true);
249
- expect(result.artifact).toBeDefined();
250
-
251
- // Compute quality from actual Trinity artifact
252
- const trinityQuality = computeArtifactQuality(result.artifact!);
253
-
254
- // Get fixture single-reflector baseline
255
- const { singleReflector } = testCase.qualityScores;
256
-
257
- // ACTUAL comparison: Trinity computed overall should exceed fixture baseline
258
- // This is a REAL computed comparison, not fixture data assertion
259
- expect(trinityQuality.overall).toBeGreaterThan(singleReflector.overall);
260
- }
261
- });
262
-
263
- it('Trinity artifact quality exceeds single-reflector in ALL quality dimensions (COMPUTED)', () => {
264
- // ACTUAL comparison across all quality dimensions
265
- for (const testCase of fixture.testCases) {
266
- const snapshot = createSnapshotFromFixture(testCase);
267
- const config: TrinityConfig = {
268
- useTrinity: true,
269
- maxCandidates: 3,
270
- useStubs: true,
271
- };
272
-
273
- const result = runTrinity({ snapshot, principleId: testCase.principleId, config });
274
- expect(result.success).toBe(true);
275
-
276
- const trinityQuality = computeArtifactQuality(result.artifact!);
277
- const { singleReflector } = testCase.qualityScores;
278
-
279
- // ACTUAL computed comparison
280
- expect(trinityQuality.specificity).toBeGreaterThan(singleReflector.specificity);
281
- expect(trinityQuality.actionability).toBeGreaterThanOrEqual(singleReflector.actionability);
282
- expect(trinityQuality.rationaleQuality).toBeGreaterThan(singleReflector.rationaleQuality);
283
- expect(trinityQuality.overall).toBeGreaterThan(singleReflector.overall);
284
- }
285
- });
286
-
287
- it('Trinity tournament selects higher-scoring candidate (TRACE VERIFICATION)', () => {
288
- // Verify the tournament actually ran and selected a winner
289
- for (const testCase of fixture.testCases) {
290
- const snapshot = createSnapshotFromFixture(testCase);
291
- const config: TrinityConfig = {
292
- useTrinity: true,
293
- maxCandidates: 3,
294
- useStubs: true,
295
- };
296
-
297
- const result = runTrinity({ snapshot, principleId: testCase.principleId, config });
298
-
299
- expect(result.success).toBe(true);
300
- expect(result.artifact).toBeDefined();
301
-
302
- // Verify tournament trace exists
303
- expect(result.telemetry.tournamentTrace).toBeDefined();
304
- expect(result.telemetry.tournamentTrace.length).toBeGreaterThan(0);
305
-
306
- // Verify winner was selected
307
- expect(result.telemetry.winnerAggregateScore).toBeDefined();
308
- expect(result.telemetry.eligibleCandidateCount).toBeDefined();
309
- expect(result.telemetry.eligibleCandidateCount).toBeGreaterThan(0);
310
-
311
- // Verify the selected candidate index is valid
312
- expect(result.artifact!.selectedCandidateIndex).toBeGreaterThanOrEqual(0);
313
- expect(result.artifact!.selectedCandidateIndex).toBeLessThan(result.telemetry.candidateCount);
314
- }
315
- });
316
- });
317
-
318
- describe('Single-Reflector vs Trinity Quality Comparison (Fixture Baseline)', () => {
319
- it('Trinity overall score exceeds single-reflector in all cases', () => {
320
- for (const testCase of fixture.testCases) {
321
- const { singleReflector, trinity } = testCase.qualityScores;
322
- const trinityWinsOverall = trinity.overall > singleReflector.overall;
323
- expect(trinityWinsOverall).toBe(testCase.trinityWins);
324
- }
325
- });
326
-
327
- it('Trinity has higher specificity in all cases', () => {
328
- for (const testCase of fixture.testCases) {
329
- const { singleReflector, trinity } = testCase.qualityScores;
330
- expect(trinity.specificity).toBeGreaterThan(singleReflector.specificity);
331
- }
332
- });
333
-
334
- it('Trinity has higher principle alignment in all cases', () => {
335
- for (const testCase of fixture.testCases) {
336
- const { singleReflector, trinity } = testCase.qualityScores;
337
- expect(trinity.principleAlignment).toBeGreaterThanOrEqual(singleReflector.principleAlignment);
338
- }
339
- });
340
-
341
- it('Trinity has higher actionability in all cases', () => {
342
- for (const testCase of fixture.testCases) {
343
- const { singleReflector, trinity } = testCase.qualityScores;
344
- expect(trinity.actionability).toBeGreaterThan(singleReflector.actionability);
345
- }
346
- });
347
-
348
- it('Trinity has higher rationale quality in all cases', () => {
349
- for (const testCase of fixture.testCases) {
350
- const { singleReflector, trinity } = testCase.qualityScores;
351
- expect(trinity.rationaleQuality).toBeGreaterThan(singleReflector.rationaleQuality);
352
- }
353
- });
354
- });
355
-
356
- describe('Reproducibility Evidence', () => {
357
- it('produces deterministic results for the same inputs', () => {
358
- // This test verifies that comparing the same case twice gives the same result
359
- // (no randomness in the comparison logic)
360
- for (const testCase of fixture.testCases) {
361
- const result1 = testCase.qualityScores.trinity.overall > testCase.qualityScores.singleReflector.overall;
362
- const result2 = testCase.qualityScores.trinity.overall > testCase.qualityScores.singleReflector.overall;
363
- expect(result1).toBe(result2);
364
- }
365
- });
366
-
367
- it('produces consistent deltas for the same inputs', () => {
368
- for (const testCase of fixture.testCases) {
369
- const delta = testCase.qualityScores.trinity.overall - testCase.qualityScores.singleReflector.overall;
370
- // Re-calculating should give same delta
371
- const recalculatedDelta = testCase.qualityScores.trinity.overall - testCase.qualityScores.singleReflector.overall;
372
- expect(delta).toBe(recalculatedDelta);
373
- }
374
- });
375
- });
376
-
377
- describe('Summary Statistics', () => {
378
- it('summary.totalCases matches testCases length', () => {
379
- expect(fixture.summary.totalCases).toBe(fixture.testCases.length);
380
- });
381
-
382
- it('summary.trinityWins matches actual count', () => {
383
- const actualTrinityWins = fixture.testCases.filter((tc) => tc.trinityWins).length;
384
- expect(fixture.summary.trinityWins).toBe(actualTrinityWins);
385
- });
386
-
387
- it('summary.averageDelta structure is valid', () => {
388
- // Just verify the structure exists and values are in expected ranges
389
- expect(fixture.summary.averageDelta.specificity).toBeGreaterThan(0);
390
- expect(fixture.summary.averageDelta.principleAlignment).toBeGreaterThan(0);
391
- expect(fixture.summary.averageDelta.actionability).toBeGreaterThan(0);
392
- expect(fixture.summary.averageDelta.rationaleQuality).toBeGreaterThan(0);
393
- expect(fixture.summary.averageDelta.overall).toBeGreaterThan(0);
394
- });
395
-
396
- it('conclusion is consistent with results', () => {
397
- if (fixture.summary.trinityWins > fixture.summary.singleReflectorWins) {
398
- expect(fixture.summary.conclusion).toContain('Trinity');
399
- expect(fixture.summary.conclusion).toContain('outperforms');
400
- }
401
- });
402
- });
403
-
404
- describe('Telemetry Validation', () => {
405
- it('all Trinity outputs have valid telemetry', () => {
406
- for (const testCase of fixture.testCases) {
407
- const telemetry = testCase.trinityOutput.telemetry as Record<string, unknown> | undefined;
408
- expect(telemetry).toBeDefined();
409
- expect(telemetry?.chainMode).toBe('trinity');
410
- expect(telemetry?.dreamerPassed).toBe(true);
411
- expect(telemetry?.philosopherPassed).toBe(true);
412
- expect(telemetry?.scribePassed).toBe(true);
413
- expect(typeof telemetry?.candidateCount).toBe('number');
414
- expect(telemetry?.candidateCount).toBeGreaterThan(0);
415
- }
416
- });
417
-
418
- it('all Trinity outputs have selectedCandidateIndex within candidate count', () => {
419
- for (const testCase of fixture.testCases) {
420
- const telemetry = testCase.trinityOutput.telemetry as Record<string, unknown>;
421
- const selectedIndex = testCase.trinityOutput.selectedCandidateIndex as number;
422
- const candidateCount = telemetry?.candidateCount as number;
423
- expect(selectedIndex).toBeGreaterThanOrEqual(0);
424
- expect(selectedIndex).toBeLessThan(candidateCount);
425
- }
426
- });
427
- });
428
- });
@@ -1,127 +0,0 @@
1
- import { describe, expect, it } from 'vitest';
2
- import { validateRuleImplementationCandidate } from '../../src/core/nocturnal-rule-implementation-validator.js';
3
-
4
- describe('nocturnal-rule-implementation-validator', () => {
5
- it('accepts a RuleHost-compatible candidate and reports helper usage', () => {
6
- const result = validateRuleImplementationCandidate(`
7
- export const meta = {
8
- name: 'risk-write-guard',
9
- version: '1.0.0',
10
- ruleId: 'R-001',
11
- coversCondition: 'risky write'
12
- };
13
-
14
- export function evaluate(input, helpers) {
15
- if (helpers.isRiskPath() && helpers.getToolName() === 'write') {
16
- return {
17
- decision: 'requireApproval',
18
- matched: true,
19
- reason: 'Risk path write requires approval'
20
- };
21
- }
22
-
23
- return {
24
- decision: 'allow',
25
- matched: false,
26
- reason: 'not applicable'
27
- };
28
- }
29
- `);
30
-
31
- expect(result.passed).toBe(true);
32
- expect(result.failures).toEqual([]);
33
- expect(result.helperUsage).toEqual(['isRiskPath', 'getToolName']);
34
- expect(result.meta).toMatchObject({
35
- ruleId: 'R-001',
36
- name: 'risk-write-guard',
37
- });
38
- });
39
-
40
- it('rejects forbidden APIs before compile/load checks', () => {
41
- const result = validateRuleImplementationCandidate(`
42
- export const meta = {
43
- name: 'bad-guard',
44
- version: '1.0.0',
45
- ruleId: 'R-001',
46
- coversCondition: 'bad'
47
- };
48
-
49
- export function evaluate() {
50
- const fn = eval('1 + 1');
51
- return {
52
- decision: 'allow',
53
- matched: false,
54
- reason: String(fn)
55
- };
56
- }
57
- `);
58
-
59
- expect(result.passed).toBe(false);
60
- expect(result.failures).toEqual(
61
- expect.arrayContaining([
62
- expect.objectContaining({
63
- code: 'forbidden-api',
64
- detail: 'eval',
65
- }),
66
- ])
67
- );
68
- });
69
-
70
- it('rejects malformed exports when meta is missing', () => {
71
- const result = validateRuleImplementationCandidate(`
72
- export function evaluate() {
73
- return {
74
- decision: 'allow',
75
- matched: false,
76
- reason: 'missing meta'
77
- };
78
- }
79
- `);
80
-
81
- expect(result.passed).toBe(false);
82
- expect(result.failures).toEqual(
83
- expect.arrayContaining([
84
- expect.objectContaining({
85
- code: 'missing-meta',
86
- }),
87
- ])
88
- );
89
- });
90
-
91
- it('rejects candidates whose evaluate result does not satisfy RuleHostResult', () => {
92
- const result = validateRuleImplementationCandidate(`
93
- export const meta = {
94
- name: 'wrong-result-shape',
95
- version: '1.0.0',
96
- ruleId: 'R-001',
97
- coversCondition: 'bad result'
98
- };
99
-
100
- export function evaluate() {
101
- return {
102
- decision: 'noop',
103
- matched: 'sometimes',
104
- reason: 42
105
- };
106
- }
107
- `);
108
-
109
- expect(result.passed).toBe(false);
110
- expect(result.failures).toEqual(
111
- expect.arrayContaining([
112
- expect.objectContaining({
113
- code: 'invalid-result',
114
- detail: 'decision',
115
- }),
116
- expect.objectContaining({
117
- code: 'invalid-result',
118
- detail: 'matched',
119
- }),
120
- expect.objectContaining({
121
- code: 'invalid-result',
122
- detail: 'reason',
123
- }),
124
- ])
125
- );
126
- });
127
- });
@@ -1,121 +0,0 @@
1
- import { describe, expect, it } from 'vitest';
2
- import { validateNocturnalSnapshotIngress } from '../../src/core/nocturnal-snapshot-contract.js';
3
-
4
- describe('validateNocturnalSnapshotIngress', () => {
5
- it('accepts a fully shaped runtime snapshot', () => {
6
- const result = validateNocturnalSnapshotIngress({
7
- sessionId: 'session-1',
8
- startedAt: '2026-04-10T00:00:00.000Z',
9
- updatedAt: '2026-04-10T00:01:00.000Z',
10
- assistantTurns: [],
11
- userTurns: [],
12
- toolCalls: [],
13
- painEvents: [],
14
- gateBlocks: [],
15
- stats: {
16
- totalAssistantTurns: 1,
17
- totalToolCalls: 2,
18
- totalPainEvents: 0,
19
- totalGateBlocks: 0,
20
- failureCount: 0,
21
- },
22
- });
23
-
24
- expect(result.status).toBe('valid');
25
- expect(result.snapshot?.sessionId).toBe('session-1');
26
- });
27
-
28
- it('rejects reduced pseudo-snapshots that omit canonical fields', () => {
29
- const result = validateNocturnalSnapshotIngress({
30
- sessionId: 'session-1',
31
- sessionStart: '2026-04-10T00:00:00.000Z',
32
- stats: {
33
- totalAssistantTurns: 1,
34
- totalToolCalls: 2,
35
- totalPainEvents: 0,
36
- totalGateBlocks: 0,
37
- failureCount: 0,
38
- },
39
- recentPain: [],
40
- });
41
-
42
- expect(result.status).toBe('invalid');
43
- expect(result.reasons).toContain('snapshot.startedAt must be a non-empty string');
44
- expect(result.reasons).toContain('snapshot.assistantTurns must be an array');
45
- });
46
-
47
- it('rejects fallback snapshots with no pain signal', () => {
48
- const result = validateNocturnalSnapshotIngress({
49
- sessionId: 'session-1',
50
- startedAt: '2026-04-10T00:00:00.000Z',
51
- updatedAt: '2026-04-10T00:00:00.000Z',
52
- assistantTurns: [],
53
- userTurns: [],
54
- toolCalls: [],
55
- painEvents: [],
56
- gateBlocks: [],
57
- stats: {
58
- totalAssistantTurns: 0,
59
- totalToolCalls: 0,
60
- totalPainEvents: 0,
61
- totalGateBlocks: 0,
62
- failureCount: 0,
63
- },
64
- _dataSource: 'pain_context_fallback',
65
- });
66
-
67
- expect(result.status).toBe('invalid');
68
- expect(result.reasons).toContain('fallback snapshot must contain at least one pain signal');
69
- });
70
-
71
- // #246: null stats fields should now be rejected (they used to be accepted for fallback)
72
- it('rejects null values in stats fields', () => {
73
- const result = validateNocturnalSnapshotIngress({
74
- sessionId: 'session-1',
75
- startedAt: '2026-04-10T00:00:00.000Z',
76
- updatedAt: '2026-04-10T00:00:00.000Z',
77
- assistantTurns: [],
78
- userTurns: [],
79
- toolCalls: [],
80
- painEvents: [{ source: 'test', score: 5, severity: 'high', reason: 'test', createdAt: '2026-04-10T00:00:00.000Z' }],
81
- gateBlocks: [],
82
- stats: {
83
- totalAssistantTurns: null,
84
- totalToolCalls: null,
85
- totalPainEvents: 1,
86
- totalGateBlocks: null,
87
- failureCount: null,
88
- },
89
- _dataSource: 'pain_context_fallback',
90
- });
91
-
92
- expect(result.status).toBe('invalid');
93
- expect(result.reasons).toContain('snapshot.stats.totalAssistantTurns must be a finite number');
94
- expect(result.reasons).toContain('snapshot.stats.totalToolCalls must be a finite number');
95
- expect(result.reasons).toContain('snapshot.stats.totalGateBlocks must be a finite number');
96
- expect(result.reasons).toContain('snapshot.stats.failureCount must be a finite number');
97
- });
98
-
99
- it('accepts fallback snapshot with valid stats and pain signal', () => {
100
- const result = validateNocturnalSnapshotIngress({
101
- sessionId: 'session-1',
102
- startedAt: '2026-04-10T00:00:00.000Z',
103
- updatedAt: '2026-04-10T00:00:00.000Z',
104
- assistantTurns: [],
105
- userTurns: [],
106
- toolCalls: [],
107
- painEvents: [{ source: 'test', score: 5, severity: 'high', reason: 'test', createdAt: '2026-04-10T00:00:00.000Z' }],
108
- gateBlocks: [],
109
- stats: {
110
- totalAssistantTurns: 0,
111
- totalToolCalls: 0,
112
- totalPainEvents: 1,
113
- totalGateBlocks: 0,
114
- failureCount: 0,
115
- },
116
- _dataSource: 'pain_context_fallback',
117
- });
118
-
119
- expect(result.status).toBe('valid');
120
- });
121
- });