principles-disciple 1.71.0 → 1.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. package/openclaw.plugin.json +10 -5
  2. package/package.json +17 -19
  3. package/scripts/acceptance-test.mjs +16 -73
  4. package/scripts/sync-plugin.mjs +382 -77
  5. package/src/commands/archive-impl.ts +2 -1
  6. package/src/commands/capabilities.ts +2 -2
  7. package/src/commands/context.ts +2 -2
  8. package/src/commands/disable-impl.ts +2 -1
  9. package/src/commands/evolution-status.ts +16 -16
  10. package/src/commands/export.ts +12 -67
  11. package/src/commands/pain.ts +91 -1
  12. package/src/commands/principle-rollback.ts +2 -1
  13. package/src/commands/promote-impl.ts +7 -43
  14. package/src/commands/rollback-impl.ts +2 -1
  15. package/src/commands/rollback.ts +2 -1
  16. package/src/commands/samples.ts +2 -1
  17. package/src/commands/thinking-os.ts +2 -1
  18. package/src/config/errors.ts +18 -2
  19. package/src/constants/diagnostician.ts +2 -2
  20. package/src/constants/tools.ts +2 -1
  21. package/src/core/__tests__/focus-history.test.ts +210 -0
  22. package/src/core/config.ts +1 -1
  23. package/src/core/confirm-first-gate.ts +255 -0
  24. package/src/core/correction-cue-learner.ts +2 -136
  25. package/src/core/correction-types.ts +16 -88
  26. package/src/core/dictionary.ts +19 -20
  27. package/src/core/empathy-keyword-matcher.ts +17 -289
  28. package/src/core/empathy-types.ts +18 -229
  29. package/src/core/event-log.ts +38 -132
  30. package/src/core/evolution-reducer.ts +21 -2
  31. package/src/core/evolution-types.ts +76 -464
  32. package/src/core/file-store.ts +80 -0
  33. package/src/core/focus-history.ts +228 -955
  34. package/src/core/local-worker-routing.ts +34 -314
  35. package/src/core/merge-gate-audit.ts +0 -195
  36. package/src/core/pain-diagnostic-gate.ts +154 -0
  37. package/src/core/pain-signal.ts +21 -138
  38. package/src/core/pain.ts +15 -88
  39. package/src/core/pd-task-reconciler.ts +26 -115
  40. package/src/core/pd-task-service.ts +9 -9
  41. package/src/core/pd-task-types.ts +23 -127
  42. package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
  43. package/src/core/principle-compiler/code-validator.ts +15 -42
  44. package/src/core/principle-compiler/compiler.ts +100 -15
  45. package/src/core/principle-compiler/index.ts +5 -2
  46. package/src/core/principle-compiler/template-generator.ts +4 -104
  47. package/src/core/principle-injection.ts +10 -202
  48. package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
  49. package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
  50. package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
  51. package/src/core/principle-tree-ledger-adapter.ts +145 -0
  52. package/src/core/principle-tree-ledger.ts +8 -6
  53. package/src/core/reflection/reflection-context.ts +14 -109
  54. package/src/core/replay-engine.ts +8 -500
  55. package/src/core/rule-host-helpers.ts +5 -35
  56. package/src/core/rule-host-types.ts +10 -82
  57. package/src/core/rule-host.ts +6 -63
  58. package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
  59. package/src/core/session-tracker.ts +87 -101
  60. package/src/core/shadow-observation-registry.ts +19 -48
  61. package/src/core/trajectory.ts +3 -1
  62. package/src/core/workflow-funnel-loader.ts +62 -68
  63. package/src/core/workspace-context.ts +46 -0
  64. package/src/core/workspace-dir-service.ts +1 -1
  65. package/src/core/workspace-dir-validation.ts +18 -9
  66. package/src/hooks/AGENTS.md +1 -1
  67. package/src/hooks/gate-block-helper.ts +46 -44
  68. package/src/hooks/gate.ts +207 -7
  69. package/src/hooks/lifecycle.ts +30 -32
  70. package/src/hooks/llm.ts +60 -32
  71. package/src/hooks/pain.ts +297 -103
  72. package/src/hooks/prompt.ts +469 -339
  73. package/src/hooks/subagent.ts +2 -29
  74. package/src/i18n/commands.ts +2 -10
  75. package/src/index.ts +95 -85
  76. package/src/openclaw-sdk.ts +311 -0
  77. package/src/service/central-database.ts +8 -4
  78. package/src/service/evolution-queue-migration.ts +2 -1
  79. package/src/service/evolution-worker.ts +163 -1786
  80. package/src/service/internalization-trigger-adapter.ts +302 -0
  81. package/src/service/keyword-optimization-service.ts +4 -4
  82. package/src/service/monitoring-query-service.ts +1 -215
  83. package/src/service/queue-io.ts +60 -331
  84. package/src/service/runtime-summary-service.ts +115 -18
  85. package/src/service/subagent-workflow/index.ts +0 -41
  86. package/src/service/subagent-workflow/types.ts +9 -120
  87. package/src/service/subagent-workflow/workflow-store.ts +2 -119
  88. package/src/service/workflow-watchdog.ts +0 -43
  89. package/src/types/event-payload.ts +16 -74
  90. package/src/types/event-types.ts +39 -547
  91. package/src/types/hygiene-types.ts +7 -30
  92. package/src/types/principle-tree-schema.ts +20 -222
  93. package/src/types/queue.ts +15 -70
  94. package/src/types/runtime-summary.ts +5 -49
  95. package/src/utils/io.ts +10 -0
  96. package/src/utils/retry.ts +1 -1
  97. package/src/utils/shadow-fingerprint.ts +2 -2
  98. package/src/utils/workspace-resolver.ts +50 -0
  99. package/templates/langs/en/core/AGENTS.md +2 -2
  100. package/templates/langs/en/core/BOOT.md +1 -1
  101. package/templates/langs/en/core/HEARTBEAT.md +2 -2
  102. package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  103. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  104. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  105. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  106. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  107. package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
  108. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  109. package/templates/langs/en/skills/evolve-task/SKILL.md +1 -1
  110. package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
  111. package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
  112. package/templates/langs/en/skills/pd-mentor/SKILL.md +1 -1
  113. package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
  114. package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
  115. package/templates/langs/zh/core/AGENTS.md +2 -2
  116. package/templates/langs/zh/core/BOOT.md +1 -1
  117. package/templates/langs/zh/core/HEARTBEAT.md +2 -2
  118. package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  119. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  120. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  121. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
  122. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  123. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  124. package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
  125. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  126. package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
  127. package/templates/langs/zh/skills/evolve-task/SKILL.md +2 -2
  128. package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
  129. package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
  130. package/templates/langs/zh/skills/pd-mentor/SKILL.md +1 -1
  131. package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
  132. package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
  133. package/tests/build-artifacts.test.ts +1 -3
  134. package/tests/commands/evolution-status.test.ts +0 -118
  135. package/tests/core/bootstrap-rules.test.ts +1 -1
  136. package/tests/core/config.test.ts +1 -1
  137. package/tests/core/event-log.test.ts +35 -0
  138. package/tests/core/evolution-engine.test.ts +610 -0
  139. package/tests/core/file-store.test.ts +102 -0
  140. package/tests/core/focus-history.test.ts +203 -11
  141. package/tests/core/merge-gate-audit.test.ts +2 -169
  142. package/tests/core/model-deployment-registry.test.ts +7 -1
  143. package/tests/core/model-training-registry.test.ts +19 -0
  144. package/tests/core/observability.test.ts +0 -1
  145. package/tests/core/pain-diagnostic-gate.test.ts +498 -0
  146. package/tests/core/pain.test.ts +0 -1
  147. package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
  148. package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
  149. package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
  150. package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
  151. package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
  152. package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
  153. package/tests/core/reflection-context.test.ts +0 -14
  154. package/tests/core/replay-engine.test.ts +127 -215
  155. package/tests/core/rule-host-helpers.test.ts +2 -2
  156. package/tests/core/rule-implementation-runtime.test.ts +0 -27
  157. package/tests/core/workflow-funnel-loader.test.ts +162 -0
  158. package/tests/core/workspace-dir-validation.test.ts +8 -1
  159. package/tests/core-anti-growth.test.ts +192 -0
  160. package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
  161. package/tests/hooks/confirm-first-gate.test.ts +333 -0
  162. package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
  163. package/tests/hooks/gate-auto-correct.test.ts +665 -0
  164. package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
  165. package/tests/hooks/pain.test.ts +269 -12
  166. package/tests/hooks/prompt-characterization.test.ts +500 -0
  167. package/tests/hooks/prompt-size-guard.test.ts +329 -0
  168. package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
  169. package/tests/index.test.ts +94 -1
  170. package/tests/integration/auto-entry-gate.test.ts +248 -0
  171. package/tests/integration/internalization-trigger-guard.test.ts +69 -0
  172. package/tests/integration/m8-legacy-paths.test.ts +63 -0
  173. package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
  174. package/tests/plugin-config-resolution-cutover.test.ts +359 -0
  175. package/tests/runtime-v2-discovery-guard.test.ts +154 -0
  176. package/tests/service/central-database.test.ts +457 -0
  177. package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
  178. package/tests/service/evolution-worker.timeout.test.ts +11 -129
  179. package/tests/service/internalization-trigger-adapter.test.ts +251 -0
  180. package/tests/service/monitoring-query-service.test.ts +1 -47
  181. package/tests/service/queue-io.test.ts +1 -62
  182. package/tests/service/runtime-summary-service.test.ts +184 -3
  183. package/tests/service/workflow-watchdog.test.ts +0 -91
  184. package/tests/utils/file-lock.test.ts +5 -3
  185. package/tests/utils/session-key.test.ts +52 -0
  186. package/tests/utils/subagent-probe.test.ts +48 -1
  187. package/vitest.config.ts +4 -11
  188. package/.planning/codebase/ARCHITECTURE.md +0 -157
  189. package/.planning/codebase/CONCERNS.md +0 -145
  190. package/.planning/codebase/CONVENTIONS.md +0 -148
  191. package/.planning/codebase/INTEGRATIONS.md +0 -81
  192. package/.planning/codebase/STACK.md +0 -87
  193. package/.planning/codebase/STRUCTURE.md +0 -193
  194. package/.planning/codebase/TESTING.md +0 -243
  195. package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
  196. package/docs/COMMAND_REFERENCE.md +0 -76
  197. package/docs/COMMAND_REFERENCE_EN.md +0 -79
  198. package/scripts/build-web.mjs +0 -46
  199. package/scripts/diagnose-nocturnal.mjs +0 -537
  200. package/scripts/seed-nocturnal-scenarios.mjs +0 -384
  201. package/src/commands/nocturnal-review.ts +0 -322
  202. package/src/commands/nocturnal-rollout.ts +0 -790
  203. package/src/commands/nocturnal-train.ts +0 -986
  204. package/src/commands/pd-reflect.ts +0 -88
  205. package/src/core/adaptive-thresholds.ts +0 -478
  206. package/src/core/diagnostician-task-store.ts +0 -192
  207. package/src/core/nocturnal-arbiter.ts +0 -715
  208. package/src/core/nocturnal-artifact-lineage.ts +0 -116
  209. package/src/core/nocturnal-artificer.ts +0 -257
  210. package/src/core/nocturnal-candidate-scoring.ts +0 -530
  211. package/src/core/nocturnal-compliance.ts +0 -1146
  212. package/src/core/nocturnal-dataset.ts +0 -763
  213. package/src/core/nocturnal-executability.ts +0 -428
  214. package/src/core/nocturnal-export.ts +0 -499
  215. package/src/core/nocturnal-paths.ts +0 -240
  216. package/src/core/nocturnal-reasoning-deriver.ts +0 -343
  217. package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
  218. package/src/core/nocturnal-snapshot-contract.ts +0 -99
  219. package/src/core/nocturnal-trajectory-extractor.ts +0 -512
  220. package/src/core/nocturnal-trinity-types.ts +0 -218
  221. package/src/core/nocturnal-trinity.ts +0 -2680
  222. package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
  223. package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
  224. package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
  225. package/src/http/principles-console-route.ts +0 -709
  226. package/src/service/central-health-service.ts +0 -49
  227. package/src/service/central-overview-service.ts +0 -138
  228. package/src/service/control-ui-query-service.ts +0 -900
  229. package/src/service/cooldown-strategy.ts +0 -97
  230. package/src/service/evolution-pain-context.ts +0 -79
  231. package/src/service/evolution-query-service.ts +0 -407
  232. package/src/service/health-query-service.ts +0 -1038
  233. package/src/service/nocturnal-config.ts +0 -214
  234. package/src/service/nocturnal-runtime.ts +0 -734
  235. package/src/service/nocturnal-service.ts +0 -1605
  236. package/src/service/nocturnal-target-selector.ts +0 -545
  237. package/src/service/sleep-cycle.ts +0 -157
  238. package/src/service/startup-reconciler.ts +0 -112
  239. package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
  240. package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
  241. package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
  242. package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
  243. package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
  244. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
  245. package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
  246. package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
  247. package/src/tools/write-pain-flag.ts +0 -215
  248. package/tests/commands/nocturnal-review.test.ts +0 -448
  249. package/tests/commands/nocturnal-train.test.ts +0 -97
  250. package/tests/commands/pd-reflect.test.ts +0 -49
  251. package/tests/core/adaptive-thresholds.test.ts +0 -261
  252. package/tests/core/nocturnal-arbiter.test.ts +0 -559
  253. package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
  254. package/tests/core/nocturnal-artificer.test.ts +0 -241
  255. package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
  256. package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
  257. package/tests/core/nocturnal-compliance.test.ts +0 -646
  258. package/tests/core/nocturnal-dataset.test.ts +0 -892
  259. package/tests/core/nocturnal-e2e.test.ts +0 -234
  260. package/tests/core/nocturnal-executability.test.ts +0 -357
  261. package/tests/core/nocturnal-export.test.ts +0 -517
  262. package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
  263. package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
  264. package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
  265. package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
  266. package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
  267. package/tests/core/nocturnal-trinity.test.ts +0 -2053
  268. package/tests/core/pain-auto-repair.test.ts +0 -96
  269. package/tests/core/pain-integration.test.ts +0 -510
  270. package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
  271. package/tests/http/principles-console-route.test.ts +0 -162
  272. package/tests/integration/chaos-resilience.test.ts +0 -348
  273. package/tests/integration/empathy-workflow-integration.test.ts +0 -626
  274. package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
  275. package/tests/service/control-ui-query-service.test.ts +0 -121
  276. package/tests/service/cooldown-strategy.test.ts +0 -164
  277. package/tests/service/data-endpoints-regression.test.ts +0 -834
  278. package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
  279. package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
  280. package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
  281. package/tests/service/nocturnal-runtime.test.ts +0 -473
  282. package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
  283. package/tests/service/nocturnal-target-selector.test.ts +0 -615
  284. package/tests/service/startup-reconciler.test.ts +0 -148
  285. package/tests/tools/write-pain-flag.test.ts +0 -358
  286. package/ui/src/App.tsx +0 -45
  287. package/ui/src/api.ts +0 -220
  288. package/ui/src/charts.tsx +0 -955
  289. package/ui/src/components/ErrorState.tsx +0 -6
  290. package/ui/src/components/Loading.tsx +0 -13
  291. package/ui/src/components/ProtectedRoute.tsx +0 -12
  292. package/ui/src/components/Shell.tsx +0 -91
  293. package/ui/src/components/WorkspaceConfig.tsx +0 -178
  294. package/ui/src/components/index.ts +0 -5
  295. package/ui/src/context/auth.tsx +0 -80
  296. package/ui/src/context/theme.tsx +0 -66
  297. package/ui/src/hooks/useAutoRefresh.ts +0 -39
  298. package/ui/src/i18n/ui.ts +0 -473
  299. package/ui/src/main.tsx +0 -16
  300. package/ui/src/pages/EvolutionPage.tsx +0 -333
  301. package/ui/src/pages/FeedbackPage.tsx +0 -138
  302. package/ui/src/pages/GateMonitorPage.tsx +0 -136
  303. package/ui/src/pages/LoginPage.tsx +0 -89
  304. package/ui/src/pages/OverviewPage.tsx +0 -599
  305. package/ui/src/pages/SamplesPage.tsx +0 -174
  306. package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
  307. package/ui/src/styles.css +0 -2020
  308. package/ui/src/types.ts +0 -384
  309. package/ui/src/utils/format.ts +0 -15
@@ -1,532 +0,0 @@
1
- import { describe, it, expect } from 'vitest';
2
- import {
3
- scoreCandidate,
4
- checkThresholds,
5
- rankCandidates,
6
- runTournament,
7
- DEFAULT_SCORING_WEIGHTS,
8
- validateCandidateDiversity,
9
- } from '../../src/core/nocturnal-candidate-scoring.js';
10
- import type { DreamerCandidate, PhilosopherJudgment } from '../../src/core/nocturnal-trinity.js';
11
- import type { ThresholdValues } from '../../src/core/adaptive-thresholds.js';
12
-
13
- // ---------------------------------------------------------------------------
14
- // Test Fixtures
15
- // ---------------------------------------------------------------------------
16
-
17
- function makeCandidate(overrides: Partial<DreamerCandidate> = {}): DreamerCandidate {
18
- return {
19
- candidateIndex: 0,
20
- badDecision: 'Did something wrong without verifying preconditions',
21
- betterDecision: 'Read the relevant file to understand its structure before making changes',
22
- rationale: 'Verifying preconditions prevents errors and ensures actions are appropriate',
23
- confidence: 0.85,
24
- ...overrides,
25
- };
26
- }
27
-
28
- function makeJudgment(candidateIndex: number, overrides: Partial<PhilosopherJudgment> = {}): PhilosopherJudgment {
29
- return {
30
- candidateIndex,
31
- critique: 'Strong alignment with the principle',
32
- principleAligned: true,
33
- score: 0.85,
34
- rank: 1,
35
- ...overrides,
36
- };
37
- }
38
-
39
- const DEFAULT_THRESHOLDS: ThresholdValues = {
40
- schemaCompletenessMin: 0.6,
41
- principleAlignmentMin: 0.7,
42
- executabilityMin: 0.65,
43
- boundednessMin: 0.5,
44
- confidenceMin: 0.6,
45
- aggregateMin: 0.65,
46
- };
47
-
48
- // ---------------------------------------------------------------------------
49
- // Tests: scoreCandidate
50
- // ---------------------------------------------------------------------------
51
-
52
- describe('scoreCandidate', () => {
53
- it('scores a valid candidate correctly', () => {
54
- const candidate = makeCandidate();
55
- const judgment = makeJudgment(0);
56
- const scores = scoreCandidate(candidate, judgment);
57
-
58
- expect(scores.schemaCompleteness).toBeGreaterThan(0);
59
- expect(scores.principleAlignment).toBe(1.0); // principleAligned: true
60
- expect(scores.executability).toBeGreaterThan(0);
61
- expect(scores.boundedness).toBeGreaterThan(0);
62
- expect(scores.confidence).toBeGreaterThan(0);
63
- expect(scores.aggregate).toBeGreaterThan(0);
64
- });
65
-
66
- it('penalizes non-principle-aligned candidates', () => {
67
- const candidate = makeCandidate();
68
- const judgment = makeJudgment(0, { principleAligned: false, score: 0.4 });
69
- const scores = scoreCandidate(candidate, judgment);
70
-
71
- expect(scores.principleAlignment).toBeLessThan(0.5);
72
- });
73
-
74
- it('penalizes missing fields in schema completeness', () => {
75
- const candidate = makeCandidate({ betterDecision: '' });
76
- const judgment = makeJudgment(0);
77
- const scores = scoreCandidate(candidate, judgment);
78
-
79
- expect(scores.schemaCompleteness).toBeLessThan(1.0);
80
- });
81
-
82
- it('penalizes generic betterDecision without actionable verbs', () => {
83
- const candidate = makeCandidate({ betterDecision: 'Do something better' });
84
- const judgment = makeJudgment(0);
85
- const scores = scoreCandidate(candidate, judgment);
86
-
87
- expect(scores.executability).toBeLessThan(1.0);
88
- });
89
-
90
- it('rewards specific betterDecision with file paths', () => {
91
- const candidate = makeCandidate({
92
- betterDecision: 'Read src/main.ts to understand the structure',
93
- });
94
- const judgment = makeJudgment(0);
95
- const scores = scoreCandidate(candidate, judgment);
96
-
97
- expect(scores.boundedness).toBeGreaterThan(0.5);
98
- });
99
-
100
- it('does not penalize words that merely contain "it" as a substring', () => {
101
- const candidate = makeCandidate({
102
- betterDecision: 'Verify preconditions in config.json before retrying',
103
- confidence: 0.92,
104
- });
105
- const judgment = makeJudgment(0, { score: 0.92, principleAligned: true });
106
- const scores = scoreCandidate(candidate, judgment);
107
-
108
- // Boundedness should remain 0.7 (0.5 base + 0.2 specific target) because
109
- // "preconditions" must not trigger the generic word "it" penalty.
110
- expect(scores.boundedness).toBe(0.7);
111
- });
112
-
113
- it('uses custom weights when provided', () => {
114
- const candidate = makeCandidate();
115
- const judgment = makeJudgment(0);
116
- const customWeights = { ...DEFAULT_SCORING_WEIGHTS, principleAlignment: 0.5 };
117
- const scores = scoreCandidate(candidate, judgment, customWeights);
118
-
119
- // With higher weight on principleAlignment, aggregate should be higher for aligned candidates
120
- expect(scores.aggregate).toBeGreaterThan(0);
121
- });
122
-
123
- it('does not crash when badDecision is undefined — lowers score instead', () => {
124
- const candidate = makeCandidate({ badDecision: undefined as unknown as string });
125
- const judgment = makeJudgment(0);
126
- const scores = scoreCandidate(candidate, judgment);
127
-
128
- expect(scores.schemaCompleteness).toBeLessThan(1.0);
129
- expect(scores.aggregate).toBeGreaterThanOrEqual(0);
130
- });
131
-
132
- it('does not crash when betterDecision is undefined — lowers score instead', () => {
133
- const candidate = makeCandidate({ betterDecision: undefined as unknown as string });
134
- const judgment = makeJudgment(0);
135
- const scores = scoreCandidate(candidate, judgment);
136
-
137
- expect(scores.schemaCompleteness).toBeLessThan(1.0);
138
- expect(scores.aggregate).toBeGreaterThanOrEqual(0);
139
- });
140
-
141
- it('does not crash when both badDecision and betterDecision are undefined', () => {
142
- const candidate = makeCandidate({
143
- badDecision: undefined as unknown as string,
144
- betterDecision: undefined as unknown as string,
145
- });
146
- const judgment = makeJudgment(0);
147
- const scores = scoreCandidate(candidate, judgment);
148
-
149
- expect(scores.schemaCompleteness).toBeLessThan(1.0);
150
- expect(scores.aggregate).toBeGreaterThanOrEqual(0);
151
- });
152
- });
153
-
154
- // ---------------------------------------------------------------------------
155
- // Tests: checkThresholds
156
- // ---------------------------------------------------------------------------
157
-
158
- describe('checkThresholds', () => {
159
- it('passes all thresholds with good scores', () => {
160
- const scores = {
161
- schemaCompleteness: 0.9,
162
- principleAlignment: 0.9,
163
- executability: 0.9,
164
- boundedness: 0.9,
165
- confidence: 0.9,
166
- aggregate: 0.9,
167
- };
168
- const [passed, failed] = checkThresholds(scores, DEFAULT_THRESHOLDS);
169
-
170
- expect(passed).toBe(true);
171
- expect(failed).toHaveLength(0);
172
- });
173
-
174
- it('fails when schema completeness is below threshold', () => {
175
- const scores = {
176
- schemaCompleteness: 0.3,
177
- principleAlignment: 0.9,
178
- executability: 0.9,
179
- boundedness: 0.9,
180
- confidence: 0.9,
181
- aggregate: 0.9,
182
- };
183
- const [passed, failed] = checkThresholds(scores, DEFAULT_THRESHOLDS);
184
-
185
- expect(passed).toBe(false);
186
- // checkThresholds returns formatted strings like "schemaCompleteness (0.3 < 0.6)"
187
- expect(failed.some(f => f.includes('schemaCompleteness'))).toBe(true);
188
- });
189
-
190
- it('fails when multiple thresholds are broken', () => {
191
- const scores = {
192
- schemaCompleteness: 0.3,
193
- principleAlignment: 0.3,
194
- executability: 0.3,
195
- boundedness: 0.3,
196
- confidence: 0.3,
197
- aggregate: 0.3,
198
- };
199
- const [passed, failed] = checkThresholds(scores, DEFAULT_THRESHOLDS);
200
-
201
- expect(passed).toBe(false);
202
- expect(failed.length).toBeGreaterThan(1);
203
- });
204
-
205
- it('reports all failed thresholds', () => {
206
- const scores = {
207
- schemaCompleteness: 0.5, // < 0.6 → FAIL
208
- principleAlignment: 0.7, // >= 0.7 → PASS (at threshold)
209
- executability: 0.5, // < 0.65 → FAIL
210
- boundedness: 0.7, // >= 0.65 → PASS (above new threshold)
211
- confidence: 0.5, // < 0.6 → FAIL
212
- aggregate: 0.5, // < 0.65 → FAIL
213
- };
214
- const [passed, failed] = checkThresholds(scores, DEFAULT_THRESHOLDS);
215
-
216
- expect(passed).toBe(false);
217
- // Exactly 4 failures: schemaCompleteness, executability, confidence, aggregate
218
- expect(failed.length).toBe(4);
219
- expect(failed.some(f => f.includes('schemaCompleteness'))).toBe(true);
220
- expect(failed.some(f => f.includes('executability'))).toBe(true);
221
- expect(failed.some(f => f.includes('confidence'))).toBe(true);
222
- expect(failed.some(f => f.includes('aggregate'))).toBe(true);
223
- });
224
- });
225
-
226
- // ---------------------------------------------------------------------------
227
- // Tests: rankCandidates
228
- // ---------------------------------------------------------------------------
229
-
230
- describe('rankCandidates', () => {
231
- it('ranks candidates by aggregate score', () => {
232
- // Use very different confidence levels to ensure clear ranking
233
- // Candidate 0: low confidence (0.5) - lower aggregate
234
- // Candidate 1: high confidence (0.9) - higher aggregate
235
- const candidates = [
236
- makeCandidate({ candidateIndex: 0, confidence: 0.5, betterDecision: 'Read config.json to understand setup' }),
237
- makeCandidate({ candidateIndex: 1, confidence: 0.9, betterDecision: 'Read main.ts to understand setup' }),
238
- ];
239
- const judgments = [
240
- makeJudgment(0, { score: 0.5, rank: 1, principleAligned: true }),
241
- makeJudgment(1, { score: 0.9, rank: 1, principleAligned: true }),
242
- ];
243
-
244
- const ranked = rankCandidates(candidates, judgments, DEFAULT_THRESHOLDS);
245
-
246
- // Candidate 1 has higher score and should be ranked first
247
- expect(ranked[0].candidateIndex).toBe(1);
248
- expect(ranked[0].rank).toBe(1);
249
- expect(ranked[1].rank).toBe(2);
250
- });
251
-
252
- it('excludes candidates that fail thresholds', () => {
253
- // Candidate 0 has low confidence and fails principle alignment - should fail
254
- // Candidate 1 has high confidence and passes - should pass
255
- const candidates = [
256
- makeCandidate({ candidateIndex: 0, confidence: 0.3, betterDecision: 'Check errors in src/main.ts' }),
257
- makeCandidate({ candidateIndex: 1, confidence: 0.9, betterDecision: 'Read error logs in error.json' }),
258
- ];
259
- const judgments = [
260
- makeJudgment(0, { score: 0.5, principleAligned: false }),
261
- makeJudgment(1, { score: 0.9, principleAligned: true }),
262
- ];
263
-
264
- const ranked = rankCandidates(candidates, judgments, DEFAULT_THRESHOLDS);
265
-
266
- // Candidate 1 passes thresholds (high confidence, principle aligned, has file path)
267
- expect(ranked[0].thresholdPassed).toBe(true);
268
- // Candidate 0 fails thresholds (low confidence, not principle aligned)
269
- expect(ranked[1].thresholdPassed).toBe(false);
270
- });
271
-
272
- it('uses candidateIndex as stable tie-break', () => {
273
- // Two candidates with same scoring profile but different indices
274
- const candidates = [
275
- makeCandidate({ candidateIndex: 5, betterDecision: 'Read src/index.ts to understand', confidence: 0.8 }),
276
- makeCandidate({ candidateIndex: 1, betterDecision: 'Read src/index.ts to understand', confidence: 0.8 }),
277
- ];
278
- // Both have identical judgments (same score, both aligned)
279
- const judgments = [
280
- makeJudgment(1, { score: 0.8, principleAligned: true }),
281
- makeJudgment(5, { score: 0.8, principleAligned: true }),
282
- ];
283
-
284
- const ranked = rankCandidates(candidates, judgments, DEFAULT_THRESHOLDS);
285
-
286
- // Lower candidateIndex wins tie
287
- expect(ranked[0].candidateIndex).toBe(1);
288
- });
289
-
290
- it('handles empty input gracefully', () => {
291
- const ranked = rankCandidates([], [], DEFAULT_THRESHOLDS);
292
- expect(ranked).toHaveLength(0);
293
- });
294
-
295
- it('skips candidates without matching judgments', () => {
296
- const candidates = [makeCandidate({ candidateIndex: 0 })];
297
- const judgments = [makeJudgment(99)]; // No matching judgment
298
-
299
- const ranked = rankCandidates(candidates, judgments, DEFAULT_THRESHOLDS);
300
- expect(ranked).toHaveLength(0);
301
- });
302
- });
303
-
304
- // ---------------------------------------------------------------------------
305
- // Tests: runTournament
306
- // ---------------------------------------------------------------------------
307
-
308
- describe('runTournament', () => {
309
- it('selects the highest-scoring threshold-passing candidate', () => {
310
- // Use actionable verbs and proper file paths to pass boundedness threshold
311
- const candidates = [
312
- makeCandidate({ candidateIndex: 0, confidence: 0.7, betterDecision: 'Read config.json to verify settings' }),
313
- makeCandidate({ candidateIndex: 1, confidence: 0.9, betterDecision: 'Review error.json logs for errors' }),
314
- makeCandidate({ candidateIndex: 2, confidence: 0.5, betterDecision: 'Check main.ts before proceeding' }),
315
- ];
316
- const judgments = [
317
- makeJudgment(0, { score: 0.7, principleAligned: true }),
318
- makeJudgment(1, { score: 0.9, principleAligned: true }),
319
- makeJudgment(2, { score: 0.5, principleAligned: true }),
320
- ];
321
-
322
- const result = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
323
-
324
- expect(result.success).toBe(true);
325
- expect(result.winner).not.toBeNull();
326
- expect(result.winner!.candidateIndex).toBe(1);
327
- expect(result.rankedCandidates).toHaveLength(3);
328
- });
329
-
330
- it('fails when all candidates fail thresholds', () => {
331
- // Candidates with poor confidence and not principle-aligned should fail
332
- const candidates = [
333
- makeCandidate({ candidateIndex: 0, confidence: 0.2, betterDecision: 'Do something in src.ts' }),
334
- makeCandidate({ candidateIndex: 1, confidence: 0.1, betterDecision: 'Try again with config.json' }),
335
- ];
336
- const judgments = [
337
- makeJudgment(0, { score: 0.3, principleAligned: false }),
338
- makeJudgment(1, { score: 0.2, principleAligned: false }),
339
- ];
340
-
341
- const result = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
342
-
343
- expect(result.success).toBe(false);
344
- expect(result.winner).toBeNull();
345
- expect(result.failureReason).toContain('threshold');
346
- });
347
-
348
- it('provides explainable trace', () => {
349
- const candidates = [makeCandidate({ candidateIndex: 0, betterDecision: 'Read error.json to check logs' })];
350
- const judgments = [makeJudgment(0, { score: 0.9, principleAligned: true })];
351
-
352
- const result = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
353
-
354
- expect(result.trace).toBeDefined();
355
- expect(result.trace.length).toBeGreaterThan(0);
356
- expect(result.trace[0].step).toBeDefined();
357
- expect(result.trace[0].details).toBeDefined();
358
- });
359
-
360
- it('is deterministic — same inputs yield same winner', () => {
361
- const candidates = [
362
- makeCandidate({ candidateIndex: 0, confidence: 0.8, betterDecision: 'Read config.json to understand' }),
363
- makeCandidate({ candidateIndex: 1, confidence: 0.9, betterDecision: 'Review error.json for issues' }),
364
- ];
365
- const judgments = [
366
- makeJudgment(0, { score: 0.8, principleAligned: true }),
367
- makeJudgment(1, { score: 0.9, principleAligned: true }),
368
- ];
369
-
370
- const result1 = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
371
- const result2 = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
372
-
373
- expect(result1.winner!.candidateIndex).toBe(result2.winner!.candidateIndex);
374
- });
375
- });
376
-
377
- // ---------------------------------------------------------------------------
378
- // Tests: DEFAULT_SCORING_WEIGHTS
379
- // ---------------------------------------------------------------------------
380
-
381
- describe('DEFAULT_SCORING_WEIGHTS', () => {
382
- it('has weights that sum to 1.0', () => {
383
- const sum = Object.values(DEFAULT_SCORING_WEIGHTS).reduce((a, b) => a + b, 0);
384
- expect(sum).toBeCloseTo(1.0, 2);
385
- });
386
-
387
- it('has all required properties', () => {
388
- expect(DEFAULT_SCORING_WEIGHTS.schemaCompleteness).toBeDefined();
389
- expect(DEFAULT_SCORING_WEIGHTS.principleAlignment).toBeDefined();
390
- expect(DEFAULT_SCORING_WEIGHTS.executability).toBeDefined();
391
- expect(DEFAULT_SCORING_WEIGHTS.boundedness).toBeDefined();
392
- expect(DEFAULT_SCORING_WEIGHTS.confidence).toBeDefined();
393
- });
394
-
395
- it('has values in valid range (0-1)', () => {
396
- for (const weight of Object.values(DEFAULT_SCORING_WEIGHTS)) {
397
- expect(weight).toBeGreaterThanOrEqual(0);
398
- expect(weight).toBeLessThanOrEqual(1);
399
- }
400
- });
401
- });
402
-
403
- // ---------------------------------------------------------------------------
404
- // Tests: validateCandidateDiversity
405
- // ---------------------------------------------------------------------------
406
-
407
- describe('validateCandidateDiversity', () => {
408
- it('passes when candidates have 2+ distinct risk levels and low keyword overlap', () => {
409
- const candidates: DreamerCandidate[] = [
410
- makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Read config.json to verify settings' }),
411
- makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'Refactor the entire authentication module from scratch' }),
412
- ];
413
- const result = validateCandidateDiversity(candidates);
414
- expect(result.diversityCheckPassed).toBe(true);
415
- expect(result.riskLevelDiversity).toBe(true);
416
- expect(result.keywordOverlapPassed).toBe(true);
417
- });
418
-
419
- it('fails when all candidates have the same risk level', () => {
420
- const candidates: DreamerCandidate[] = [
421
- makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Read file A to check settings' }),
422
- makeCandidate({ candidateIndex: 1, riskLevel: 'low', betterDecision: 'Review file completely different approach' }),
423
- makeCandidate({ candidateIndex: 2, riskLevel: 'low', betterDecision: 'Inspect another unique diagnostic method' }),
424
- ];
425
- const result = validateCandidateDiversity(candidates);
426
- expect(result.diversityCheckPassed).toBe(false);
427
- expect(result.riskLevelDiversity).toBe(false);
428
- });
429
-
430
- it('fails when candidate pair has keyword overlap > 0.8', () => {
431
- const candidates: DreamerCandidate[] = [
432
- makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Review the authentication configuration file before making any changes to the system' }),
433
- makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'Review the authentication configuration file before making any changes to the system' }),
434
- ];
435
- const result = validateCandidateDiversity(candidates);
436
- expect(result.diversityCheckPassed).toBe(false);
437
- expect(result.keywordOverlapPassed).toBe(false);
438
- expect(result.maxOverlapScore).toBeGreaterThan(0.8);
439
- });
440
-
441
- it('passes for single candidate', () => {
442
- const candidates: DreamerCandidate[] = [
443
- makeCandidate({ candidateIndex: 0, riskLevel: 'low' }),
444
- ];
445
- const result = validateCandidateDiversity(candidates);
446
- expect(result.diversityCheckPassed).toBe(true);
447
- expect(result.details).toContain('Single candidate');
448
- });
449
-
450
- it('passes for empty array', () => {
451
- const result = validateCandidateDiversity([]);
452
- expect(result.diversityCheckPassed).toBe(true);
453
- expect(result.details).toContain('No candidates');
454
- });
455
-
456
- it('passes when candidates lack riskLevel (graceful degradation)', () => {
457
- const candidates: DreamerCandidate[] = [
458
- makeCandidate({ candidateIndex: 0, betterDecision: 'Read config.json to verify settings' }),
459
- makeCandidate({ candidateIndex: 1, betterDecision: 'Refactor the entire authentication module from scratch' }),
460
- ];
461
- // No riskLevel on any candidate - should pass (no risk levels to check)
462
- const result = validateCandidateDiversity(candidates);
463
- expect(result.diversityCheckPassed).toBe(true);
464
- expect(result.riskLevelDiversity).toBe(true);
465
- });
466
-
467
- it('fails when some candidates have riskLevel but fewer than 2 distinct values', () => {
468
- const candidates: DreamerCandidate[] = [
469
- makeCandidate({ candidateIndex: 0, riskLevel: 'medium', betterDecision: 'Read config.json to verify settings' }),
470
- makeCandidate({ candidateIndex: 1, betterDecision: 'Refactor the entire authentication module from scratch' }),
471
- ];
472
- // Only 1 candidate has riskLevel, so only 1 distinct value → fail
473
- const result = validateCandidateDiversity(candidates);
474
- expect(result.diversityCheckPassed).toBe(false);
475
- expect(result.riskLevelDiversity).toBe(false);
476
- });
477
-
478
- it('uses max(|A|, |B|) as denominator for keyword overlap', () => {
479
- // Short text A, long text B - overlap should use max as denominator
480
- const candidates: DreamerCandidate[] = [
481
- makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'review authentication configuration' }),
482
- makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'review authentication configuration before proceeding with changes to the deployment pipeline infrastructure' }),
483
- ];
484
- const result = validateCandidateDiversity(candidates);
485
- // "review", "authentication", "configuration" overlap in both
486
- // Set A = {review, authentication, configuration} = 3
487
- // Set B = {review, authentication, configuration, before, proceeding, with, changes, deployment, pipeline, infrastructure} = 10
488
- // intersection = 3, max(3, 10) = 10, overlap = 3/10 = 0.3
489
- expect(result.maxOverlapScore).toBeLessThanOrEqual(0.4);
490
- });
491
-
492
- it('ignores words <= 3 characters in keyword overlap', () => {
493
- const candidates: DreamerCandidate[] = [
494
- makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'the and but for' }),
495
- makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'the and but for' }),
496
- ];
497
- // All words are <= 3 chars, so no keywords extracted → overlap = 0
498
- const result = validateCandidateDiversity(candidates);
499
- expect(result.keywordOverlapPassed).toBe(true);
500
- expect(result.maxOverlapScore).toBe(0);
501
- });
502
-
503
- it('never throws on malformed input', () => {
504
- // Undefined candidates
505
- expect(() => validateCandidateDiversity(undefined as unknown as DreamerCandidate[])).not.toThrow();
506
- // Null candidates
507
- expect(() => validateCandidateDiversity(null as unknown as DreamerCandidate[])).not.toThrow();
508
- // Candidates with undefined fields
509
- expect(() => validateCandidateDiversity([
510
- { candidateIndex: 0 } as DreamerCandidate,
511
- ])).not.toThrow();
512
- // Mixed valid and malformed
513
- expect(() => validateCandidateDiversity([
514
- makeCandidate({ candidateIndex: 0, riskLevel: 'low' }),
515
- { candidateIndex: 1 } as DreamerCandidate,
516
- ])).not.toThrow();
517
- });
518
-
519
- it('returns correct maxOverlapScore rounded to 2 decimal places', () => {
520
- const candidates: DreamerCandidate[] = [
521
- makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Review configuration settings before deployment' }),
522
- makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'Review configuration settings before deployment testing' }),
523
- ];
524
- const result = validateCandidateDiversity(candidates);
525
- // Verify the maxOverlapScore is a number with at most 2 decimal places
526
- const decimalPart = result.maxOverlapScore.toString().split('.')[1];
527
- if (decimalPart) {
528
- expect(decimalPart.length).toBeLessThanOrEqual(2);
529
- }
530
- expect(typeof result.maxOverlapScore).toBe('number');
531
- });
532
- });
@@ -1,133 +0,0 @@
1
- /**
2
- * Tests for #216: P_* principle violation and opportunity detection
3
- *
4
- * Before this fix, detectOpportunity and detectViolation only handled T-01~T-09,
5
- * causing all P_* principles to return false for both applicable and violated.
6
- */
7
-
8
- import { describe, expect, it } from 'vitest';
9
- import { detectOpportunity, detectViolation } from '../../src/core/nocturnal-compliance.js';
10
- import type { SessionEvents } from '../../src/core/nocturnal-compliance.js';
11
-
12
- function makeSession(overrides: Partial<SessionEvents> = {}): SessionEvents {
13
- return {
14
- sessionId: 'test-session',
15
- toolCalls: overrides.toolCalls ?? [],
16
- painSignals: overrides.painSignals ?? [],
17
- gateBlocks: overrides.gateBlocks ?? [],
18
- userCorrections: overrides.userCorrections ?? [],
19
- planApprovals: overrides.planApprovals ?? [],
20
- };
21
- }
22
-
23
- describe('#216: P_* principle detection', () => {
24
- describe('detectOpportunity for P_* principles', () => {
25
- it('returns applicable=true when session has pain signals', () => {
26
- const session = makeSession({
27
- painSignals: [{ source: 'tool_failure', score: 80, reason: 'write failed' }],
28
- });
29
- const result = detectOpportunity('P_001', session);
30
- expect(result.applicable).toBe(true);
31
- expect(result.reason).toContain('pain signal');
32
- });
33
-
34
- it('returns applicable=true when session has tool failures', () => {
35
- const session = makeSession({
36
- toolCalls: [{ toolName: 'write', filePath: 'test.txt', outcome: 'failure', errorMessage: 'disk full' }],
37
- });
38
- const result = detectOpportunity('P_042', session);
39
- expect(result.applicable).toBe(true);
40
- expect(result.reason).toContain('tool failure');
41
- });
42
-
43
- it('returns applicable=true when session has gate blocks', () => {
44
- const session = makeSession({
45
- gateBlocks: [{ toolName: 'bash', reason: 'high risk operation' }],
46
- });
47
- const result = detectOpportunity('P_065', session);
48
- expect(result.applicable).toBe(true);
49
- expect(result.reason).toContain('gate block');
50
- });
51
-
52
- it('returns applicable=false when session has no negative signals', () => {
53
- const session = makeSession({
54
- toolCalls: [{ toolName: 'read', filePath: 'test.txt', outcome: 'success' }],
55
- });
56
- const result = detectOpportunity('P_001', session);
57
- expect(result.applicable).toBe(false);
58
- expect(result.reason).toContain('no pain/tool-failure/gate-block');
59
- });
60
- });
61
-
62
- describe('detectViolation for P_* principles', () => {
63
- it('returns violated=true when session has high pain signals (score >= 50)', () => {
64
- const session = makeSession({
65
- painSignals: [{ source: 'tool_failure', score: 80, reason: 'write failed' }],
66
- });
67
- const result = detectViolation('P_001', session);
68
- expect(result.violated).toBe(true);
69
- expect(result.reason).toContain('pain signal');
70
- });
71
-
72
- it('returns violated=false when pain signals are low (score < 50)', () => {
73
- const session = makeSession({
74
- painSignals: [{ source: 'minor_issue', score: 30, reason: 'cosmetic' }],
75
- toolCalls: [{ toolName: 'read', filePath: 'test.txt', outcome: 'success' }],
76
- });
77
- const result = detectViolation('P_001', session);
78
- expect(result.violated).toBe(false);
79
- expect(result.reason).toContain('no violation signals');
80
- });
81
-
82
- it('returns violated=true when session has tool failures', () => {
83
- const session = makeSession({
84
- toolCalls: [
85
- { toolName: 'write', filePath: 'test.txt', outcome: 'failure', errorMessage: 'disk full' },
86
- ],
87
- });
88
- const result = detectViolation('P_042', session);
89
- expect(result.violated).toBe(true);
90
- expect(result.reason).toContain('tool failure');
91
- });
92
-
93
- it('returns violated=true when session has gate blocks', () => {
94
- const session = makeSession({
95
- gateBlocks: [{ toolName: 'bash', reason: 'high risk operation' }],
96
- });
97
- const result = detectViolation('P_065', session);
98
- expect(result.violated).toBe(true);
99
- expect(result.reason).toContain('gate block');
100
- });
101
-
102
- it('returns violated=false for clean session with no negative signals', () => {
103
- const session = makeSession({
104
- toolCalls: [{ toolName: 'read', filePath: 'test.txt', outcome: 'success' }],
105
- });
106
- const result = detectViolation('P_001', session);
107
- expect(result.violated).toBe(false);
108
- expect(result.reason).toContain('no violation signals');
109
- });
110
- });
111
-
112
- describe('T-* principles still work (regression check)', () => {
113
- it('T-01 opportunity detected for edit operations', () => {
114
- const session = makeSession({
115
- toolCalls: [{ toolName: 'edit_file', filePath: 'test.ts', outcome: 'success' }],
116
- });
117
- const result = detectOpportunity('T-01', session);
118
- expect(result.applicable).toBe(true);
119
- });
120
-
121
- it('T-01 violation detected when editing without reading first', () => {
122
- const session = makeSession({
123
- toolCalls: [
124
- { toolName: 'edit_file', filePath: 'test.ts', outcome: 'failure', errorMessage: 'merge conflict' },
125
- ],
126
- painSignals: [{ source: 'test.ts edit failed', score: 70, reason: 'Did not survey structure before editing' }],
127
- });
128
- const result = detectViolation('T-01', session);
129
- // T-01 violation: edit without prior read, with pain signal matching file or pattern
130
- expect(result.violated).toBe(true);
131
- });
132
- });
133
- });