principles-disciple 1.72.0 → 1.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/INSTALL.md +1 -3
  2. package/openclaw.plugin.json +10 -5
  3. package/package.json +17 -19
  4. package/scripts/acceptance-test.mjs +16 -73
  5. package/scripts/sync-plugin.mjs +382 -77
  6. package/src/commands/archive-impl.ts +2 -1
  7. package/src/commands/capabilities.ts +2 -2
  8. package/src/commands/context.ts +2 -2
  9. package/src/commands/disable-impl.ts +2 -1
  10. package/src/commands/evolution-status.ts +16 -16
  11. package/src/commands/export.ts +12 -67
  12. package/src/commands/pain.ts +91 -1
  13. package/src/commands/principle-rollback.ts +2 -1
  14. package/src/commands/promote-impl.ts +7 -43
  15. package/src/commands/rollback-impl.ts +2 -1
  16. package/src/commands/rollback.ts +2 -1
  17. package/src/commands/samples.ts +2 -1
  18. package/src/commands/thinking-os.ts +2 -1
  19. package/src/config/errors.ts +18 -2
  20. package/src/constants/diagnostician.ts +2 -2
  21. package/src/constants/tools.ts +2 -1
  22. package/src/core/__tests__/focus-history.test.ts +210 -0
  23. package/src/core/config.ts +1 -1
  24. package/src/core/correction-cue-learner.ts +2 -136
  25. package/src/core/correction-types.ts +16 -88
  26. package/src/core/dictionary.ts +19 -20
  27. package/src/core/empathy-keyword-matcher.ts +17 -289
  28. package/src/core/empathy-types.ts +18 -229
  29. package/src/core/event-log.ts +29 -132
  30. package/src/core/evolution-reducer.ts +21 -2
  31. package/src/core/evolution-types.ts +76 -464
  32. package/src/core/file-store.ts +80 -0
  33. package/src/core/focus-history.ts +228 -955
  34. package/src/core/local-worker-routing.ts +34 -314
  35. package/src/core/merge-gate-audit.ts +0 -195
  36. package/src/core/migration.ts +0 -1
  37. package/src/core/pain-diagnostic-gate.ts +154 -0
  38. package/src/core/pain-signal.ts +21 -138
  39. package/src/core/pain.ts +15 -88
  40. package/src/core/path-resolver.ts +0 -1
  41. package/src/core/paths.ts +0 -1
  42. package/src/core/pd-task-reconciler.ts +26 -115
  43. package/src/core/pd-task-service.ts +9 -9
  44. package/src/core/pd-task-types.ts +23 -127
  45. package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
  46. package/src/core/principle-compiler/code-validator.ts +15 -42
  47. package/src/core/principle-compiler/compiler.ts +100 -15
  48. package/src/core/principle-compiler/index.ts +5 -2
  49. package/src/core/principle-compiler/template-generator.ts +4 -104
  50. package/src/core/principle-injection.ts +10 -202
  51. package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
  52. package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
  53. package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
  54. package/src/core/principle-tree-ledger-adapter.ts +145 -0
  55. package/src/core/principle-tree-ledger.ts +8 -6
  56. package/src/core/reflection/reflection-context.ts +14 -109
  57. package/src/core/replay-engine.ts +8 -500
  58. package/src/core/rule-host-helpers.ts +5 -35
  59. package/src/core/rule-host-types.ts +10 -82
  60. package/src/core/rule-host.ts +6 -63
  61. package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
  62. package/src/core/session-tracker.ts +87 -101
  63. package/src/core/shadow-observation-registry.ts +19 -48
  64. package/src/core/trajectory.ts +3 -1
  65. package/src/core/workflow-funnel-loader.ts +62 -68
  66. package/src/core/workspace-context.ts +46 -0
  67. package/src/core/workspace-dir-service.ts +1 -1
  68. package/src/core/workspace-dir-validation.ts +18 -9
  69. package/src/hooks/AGENTS.md +1 -1
  70. package/src/hooks/gate-block-helper.ts +71 -64
  71. package/src/hooks/gate.ts +183 -31
  72. package/src/hooks/lifecycle.ts +30 -32
  73. package/src/hooks/llm.ts +60 -32
  74. package/src/hooks/pain.ts +297 -103
  75. package/src/hooks/prompt.ts +400 -440
  76. package/src/hooks/subagent.ts +2 -29
  77. package/src/i18n/commands.ts +2 -10
  78. package/src/index.ts +95 -85
  79. package/src/openclaw-sdk.ts +311 -0
  80. package/src/service/central-database.ts +8 -4
  81. package/src/service/evolution-queue-migration.ts +2 -1
  82. package/src/service/evolution-worker.ts +163 -1786
  83. package/src/service/internalization-trigger-adapter.ts +302 -0
  84. package/src/service/keyword-optimization-service.ts +4 -4
  85. package/src/service/monitoring-query-service.ts +1 -215
  86. package/src/service/queue-io.ts +60 -331
  87. package/src/service/runtime-summary-service.ts +59 -16
  88. package/src/service/subagent-workflow/index.ts +0 -41
  89. package/src/service/subagent-workflow/types.ts +9 -120
  90. package/src/service/subagent-workflow/workflow-store.ts +2 -119
  91. package/src/service/workflow-watchdog.ts +0 -43
  92. package/src/types/event-payload.ts +16 -74
  93. package/src/types/event-types.ts +38 -547
  94. package/src/types/hygiene-types.ts +7 -30
  95. package/src/types/principle-tree-schema.ts +20 -222
  96. package/src/types/queue.ts +15 -70
  97. package/src/types/runtime-summary.ts +5 -49
  98. package/src/utils/io.ts +8 -20
  99. package/src/utils/retry.ts +1 -1
  100. package/src/utils/shadow-fingerprint.ts +2 -2
  101. package/src/utils/workspace-resolver.ts +50 -0
  102. package/templates/langs/en/core/AGENTS.md +7 -7
  103. package/templates/langs/en/core/BOOT.md +1 -1
  104. package/templates/langs/en/core/HEARTBEAT.md +2 -2
  105. package/templates/langs/en/principles/THINKING_OS.md +3 -2
  106. package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  107. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  108. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  109. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  110. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  111. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  112. package/templates/langs/en/skills/evolve-task/SKILL.md +3 -3
  113. package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
  114. package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
  115. package/templates/langs/en/skills/pd-mentor/SKILL.md +2 -3
  116. package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
  117. package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
  118. package/templates/langs/zh/core/AGENTS.md +7 -7
  119. package/templates/langs/zh/core/BOOT.md +1 -1
  120. package/templates/langs/zh/core/HEARTBEAT.md +2 -2
  121. package/templates/langs/zh/principles/THINKING_OS.md +3 -2
  122. package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
  123. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
  124. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
  125. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
  126. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
  127. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
  128. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
  129. package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
  130. package/templates/langs/zh/skills/evolve-task/SKILL.md +4 -4
  131. package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
  132. package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
  133. package/templates/langs/zh/skills/pd-mentor/SKILL.md +2 -3
  134. package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
  135. package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
  136. package/tests/build-artifacts.test.ts +1 -3
  137. package/tests/commands/evolution-status.test.ts +0 -118
  138. package/tests/core/bootstrap-rules.test.ts +1 -1
  139. package/tests/core/config.test.ts +1 -1
  140. package/tests/core/event-log.test.ts +35 -0
  141. package/tests/core/evolution-engine.test.ts +610 -0
  142. package/tests/core/file-store.test.ts +102 -0
  143. package/tests/core/focus-history.test.ts +203 -11
  144. package/tests/core/merge-gate-audit.test.ts +2 -169
  145. package/tests/core/migration.test.ts +7 -7
  146. package/tests/core/model-deployment-registry.test.ts +7 -1
  147. package/tests/core/model-training-registry.test.ts +19 -0
  148. package/tests/core/observability.test.ts +0 -1
  149. package/tests/core/pain-diagnostic-gate.test.ts +498 -0
  150. package/tests/core/pain.test.ts +0 -1
  151. package/tests/core/path-resolver.test.ts +1 -1
  152. package/tests/core/paths-refactor.test.ts +0 -22
  153. package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
  154. package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
  155. package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
  156. package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
  157. package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
  158. package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
  159. package/tests/core/reflection-context.test.ts +0 -14
  160. package/tests/core/replay-engine.test.ts +127 -215
  161. package/tests/core/rule-host-helpers.test.ts +2 -2
  162. package/tests/core/rule-implementation-runtime.test.ts +0 -27
  163. package/tests/core/workflow-funnel-loader.test.ts +162 -0
  164. package/tests/core/workspace-context.test.ts +2 -2
  165. package/tests/core/workspace-dir-validation.test.ts +8 -1
  166. package/tests/core-anti-growth.test.ts +191 -0
  167. package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
  168. package/tests/hooks/confirm-first-removal.test.ts +188 -0
  169. package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
  170. package/tests/hooks/gate-auto-correct.test.ts +665 -0
  171. package/tests/hooks/gate-no-path-write-tool.test.ts +172 -0
  172. package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
  173. package/tests/hooks/pain.test.ts +269 -12
  174. package/tests/hooks/prompt-characterization.test.ts +500 -0
  175. package/tests/hooks/prompt-size-guard.test.ts +32 -17
  176. package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
  177. package/tests/index.test.ts +94 -1
  178. package/tests/integration/auto-entry-gate.test.ts +248 -0
  179. package/tests/integration/internalization-trigger-guard.test.ts +69 -0
  180. package/tests/integration/m8-legacy-paths.test.ts +63 -0
  181. package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
  182. package/tests/plugin-config-resolution-cutover.test.ts +359 -0
  183. package/tests/runtime-v2-discovery-guard.test.ts +154 -0
  184. package/tests/service/central-database.test.ts +457 -0
  185. package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
  186. package/tests/service/evolution-worker.timeout.test.ts +11 -129
  187. package/tests/service/internalization-trigger-adapter.test.ts +251 -0
  188. package/tests/service/monitoring-query-service.test.ts +1 -47
  189. package/tests/service/queue-io.test.ts +1 -62
  190. package/tests/service/runtime-summary-service.test.ts +3 -1
  191. package/tests/service/workflow-watchdog.test.ts +0 -91
  192. package/tests/utils/file-lock.test.ts +5 -3
  193. package/tests/utils/session-key.test.ts +52 -0
  194. package/tests/utils/subagent-probe.test.ts +48 -1
  195. package/vitest.config.ts +4 -11
  196. package/.planning/codebase/ARCHITECTURE.md +0 -157
  197. package/.planning/codebase/CONCERNS.md +0 -145
  198. package/.planning/codebase/CONVENTIONS.md +0 -148
  199. package/.planning/codebase/INTEGRATIONS.md +0 -81
  200. package/.planning/codebase/STACK.md +0 -87
  201. package/.planning/codebase/STRUCTURE.md +0 -193
  202. package/.planning/codebase/TESTING.md +0 -243
  203. package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
  204. package/docs/COMMAND_REFERENCE.md +0 -76
  205. package/docs/COMMAND_REFERENCE_EN.md +0 -79
  206. package/scripts/build-web.mjs +0 -46
  207. package/scripts/diagnose-nocturnal.mjs +0 -537
  208. package/scripts/seed-nocturnal-scenarios.mjs +0 -384
  209. package/src/commands/nocturnal-review.ts +0 -322
  210. package/src/commands/nocturnal-rollout.ts +0 -790
  211. package/src/commands/nocturnal-train.ts +0 -986
  212. package/src/commands/pd-reflect.ts +0 -88
  213. package/src/core/adaptive-thresholds.ts +0 -478
  214. package/src/core/diagnostician-task-store.ts +0 -192
  215. package/src/core/nocturnal-arbiter.ts +0 -715
  216. package/src/core/nocturnal-artifact-lineage.ts +0 -116
  217. package/src/core/nocturnal-artificer.ts +0 -257
  218. package/src/core/nocturnal-candidate-scoring.ts +0 -530
  219. package/src/core/nocturnal-compliance.ts +0 -1146
  220. package/src/core/nocturnal-dataset.ts +0 -763
  221. package/src/core/nocturnal-executability.ts +0 -428
  222. package/src/core/nocturnal-export.ts +0 -499
  223. package/src/core/nocturnal-paths.ts +0 -240
  224. package/src/core/nocturnal-reasoning-deriver.ts +0 -343
  225. package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
  226. package/src/core/nocturnal-snapshot-contract.ts +0 -99
  227. package/src/core/nocturnal-trajectory-extractor.ts +0 -512
  228. package/src/core/nocturnal-trinity-types.ts +0 -218
  229. package/src/core/nocturnal-trinity.ts +0 -2680
  230. package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
  231. package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
  232. package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
  233. package/src/http/principles-console-route.ts +0 -709
  234. package/src/service/central-health-service.ts +0 -49
  235. package/src/service/central-overview-service.ts +0 -138
  236. package/src/service/control-ui-query-service.ts +0 -900
  237. package/src/service/cooldown-strategy.ts +0 -97
  238. package/src/service/evolution-pain-context.ts +0 -79
  239. package/src/service/evolution-query-service.ts +0 -407
  240. package/src/service/health-query-service.ts +0 -1038
  241. package/src/service/nocturnal-config.ts +0 -214
  242. package/src/service/nocturnal-runtime.ts +0 -734
  243. package/src/service/nocturnal-service.ts +0 -1605
  244. package/src/service/nocturnal-target-selector.ts +0 -545
  245. package/src/service/sleep-cycle.ts +0 -157
  246. package/src/service/startup-reconciler.ts +0 -112
  247. package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
  248. package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
  249. package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
  250. package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
  251. package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
  252. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
  253. package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
  254. package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
  255. package/src/tools/write-pain-flag.ts +0 -215
  256. package/templates/langs/en/skills/plan-script/SKILL.md +0 -32
  257. package/templates/langs/zh/skills/plan-script/SKILL.md +0 -32
  258. package/tests/commands/nocturnal-review.test.ts +0 -448
  259. package/tests/commands/nocturnal-train.test.ts +0 -97
  260. package/tests/commands/pd-reflect.test.ts +0 -49
  261. package/tests/core/adaptive-thresholds.test.ts +0 -261
  262. package/tests/core/nocturnal-arbiter.test.ts +0 -559
  263. package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
  264. package/tests/core/nocturnal-artificer.test.ts +0 -241
  265. package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
  266. package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
  267. package/tests/core/nocturnal-compliance.test.ts +0 -646
  268. package/tests/core/nocturnal-dataset.test.ts +0 -892
  269. package/tests/core/nocturnal-e2e.test.ts +0 -234
  270. package/tests/core/nocturnal-executability.test.ts +0 -357
  271. package/tests/core/nocturnal-export.test.ts +0 -517
  272. package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
  273. package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
  274. package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
  275. package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
  276. package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
  277. package/tests/core/nocturnal-trinity.test.ts +0 -2053
  278. package/tests/core/pain-auto-repair.test.ts +0 -96
  279. package/tests/core/pain-integration.test.ts +0 -510
  280. package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
  281. package/tests/http/principles-console-route.test.ts +0 -162
  282. package/tests/integration/chaos-resilience.test.ts +0 -348
  283. package/tests/integration/empathy-workflow-integration.test.ts +0 -626
  284. package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
  285. package/tests/service/control-ui-query-service.test.ts +0 -121
  286. package/tests/service/cooldown-strategy.test.ts +0 -164
  287. package/tests/service/data-endpoints-regression.test.ts +0 -834
  288. package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
  289. package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
  290. package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
  291. package/tests/service/nocturnal-runtime.test.ts +0 -473
  292. package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
  293. package/tests/service/nocturnal-target-selector.test.ts +0 -615
  294. package/tests/service/startup-reconciler.test.ts +0 -148
  295. package/tests/tools/write-pain-flag.test.ts +0 -358
  296. package/ui/src/App.tsx +0 -45
  297. package/ui/src/api.ts +0 -220
  298. package/ui/src/charts.tsx +0 -955
  299. package/ui/src/components/ErrorState.tsx +0 -6
  300. package/ui/src/components/Loading.tsx +0 -13
  301. package/ui/src/components/ProtectedRoute.tsx +0 -12
  302. package/ui/src/components/Shell.tsx +0 -91
  303. package/ui/src/components/WorkspaceConfig.tsx +0 -178
  304. package/ui/src/components/index.ts +0 -5
  305. package/ui/src/context/auth.tsx +0 -80
  306. package/ui/src/context/theme.tsx +0 -66
  307. package/ui/src/hooks/useAutoRefresh.ts +0 -39
  308. package/ui/src/i18n/ui.ts +0 -473
  309. package/ui/src/main.tsx +0 -16
  310. package/ui/src/pages/EvolutionPage.tsx +0 -333
  311. package/ui/src/pages/FeedbackPage.tsx +0 -138
  312. package/ui/src/pages/GateMonitorPage.tsx +0 -136
  313. package/ui/src/pages/LoginPage.tsx +0 -89
  314. package/ui/src/pages/OverviewPage.tsx +0 -599
  315. package/ui/src/pages/SamplesPage.tsx +0 -174
  316. package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
  317. package/ui/src/styles.css +0 -2020
  318. package/ui/src/types.ts +0 -384
  319. package/ui/src/utils/format.ts +0 -15
@@ -1,1146 +0,0 @@
1
- /**
2
- * Nocturnal Compliance Engine — Opportunity-Based Principle Evaluation
3
- * =====================================================================
4
- *
5
- * Replaces session-average compliance with opportunity-based compliance.
6
- *
7
- * CORE CONCEPTS:
8
- *
9
- * Opportunity — a session context where a principle COULD have been applied.
10
- * An opportunity exists when the agent's action (or planned action)
11
- * falls within the principle's applicability scope.
12
- *
13
- * Compliance — the principle was followed in an opportunity.
14
- * Determined by absence of violation signals, not presence of
15
- * positive confirmation (avoids LLM scoring).
16
- *
17
- * Violation — strong evidence the principle was NOT followed.
18
- * Detected through deterministic event signals (pain, tool failures,
19
- * gate blocks) — no LLM involved.
20
- *
21
- * Dilution prevention — compliance is computed ONLY over sessions where the
22
- * principle had an opportunity. Unrelated sessions
23
- * (where T-05's risky operations never occurred) do NOT
24
- * dilute the compliance rate.
25
- *
26
- * DESIGN CONSTRAINTS (Phase 1):
27
- * - T-xx principles only (deterministic / weak-heuristic evaluability)
28
- * - No P_xxx automation (requires detector metadata — Task 1.3 scope)
29
- * - No LLM-based scoring
30
- * - No training logic
31
- *
32
- * FILE: No file persistence — stateless computation over event stream.
33
- * Caller is responsible for writing results to principle-training-state.ts.
34
- */
35
-
36
- // ---------------------------------------------------------------------------
37
- // Types
38
- // ---------------------------------------------------------------------------
39
-
40
- /**
41
- * Session events extracted from the event log.
42
- * Compatible with EventLogEntry from event-types.ts.
43
- */
44
- export interface SessionEvents {
45
- sessionId: string;
46
- toolCalls: ToolCallRecord[];
47
- painSignals: PainSignalRecord[];
48
- gateBlocks: GateBlockRecord[];
49
- userCorrections: UserCorrectionRecord[];
50
- planApprovals: PlanApprovalRecord[];
51
- }
52
-
53
- export interface ToolCallRecord {
54
- toolName: string;
55
- filePath?: string;
56
- outcome: 'success' | 'failure' | 'blocked';
57
- errorType?: string;
58
- errorMessage?: string;
59
- }
60
-
61
- export interface PainSignalRecord {
62
- source: string;
63
- score: number;
64
- severity?: 'mild' | 'moderate' | 'severe';
65
- reason?: string;
66
- }
67
-
68
- export interface GateBlockRecord {
69
- toolName: string;
70
- filePath?: string;
71
- reason: string;
72
- }
73
-
74
- export interface UserCorrectionRecord {
75
- correctionCue?: string;
76
- }
77
-
78
- export interface PlanApprovalRecord {
79
- toolName: string;
80
- filePath?: string;
81
- }
82
-
83
- /**
84
- * The result of compliance computation for one principle.
85
- */
86
- export interface ComplianceResult {
87
- principleId: string;
88
- /** Number of sessions/events where this principle had an applicable opportunity */
89
- applicableOpportunityCount: number;
90
- /** Number of opportunities where violation signals were detected */
91
- observedViolationCount: number;
92
- /** complianceRate = (opportunities - violations) / opportunities; 0 if none */
93
- complianceRate: number;
94
- /**
95
- * Violation trend:
96
- * +1 = violations increasing (worsening)
97
- * 0 = stable
98
- * -1 = violations decreasing (improving)
99
- */
100
- violationTrend: number;
101
- /**
102
- * Explanation of why the result is what it is.
103
- * For debugging, observability, and reviewer verification.
104
- */
105
- explanation: string;
106
- }
107
-
108
- /**
109
- * Opportunity detection result for a single session.
110
- */
111
- interface OpportunityMatch {
112
- applicable: boolean;
113
- reason: string;
114
- }
115
-
116
- /**
117
- * Violation detection result for a session with applicable opportunity.
118
- */
119
- interface ViolationMatch {
120
- violated: boolean;
121
- reason: string;
122
- }
123
-
124
- // ---------------------------------------------------------------------------
125
- // Risky Operation Registry
126
- // ---------------------------------------------------------------------------
127
-
128
- /**
129
- * Tools and operations that constitute risky actions.
130
- * Gate blocks on these map to T-05 (Safety Rails) violations.
131
- */
132
- const RISKY_TOOLS: Set<string> = new Set([
133
- 'delete_file',
134
- 'move_file',
135
- 'rename_file',
136
- 'delete_directory',
137
- 'bash',
138
- 'MultiExec',
139
- ]);
140
-
141
- /**
142
- * Bash command patterns that constitute dangerous operations.
143
- * Matched against bash command text in tool_call events.
144
- */
145
- const DANGEROUS_BASH_PATTERNS: RegExp[] = [
146
- /rm\s+(-[a-z]*r[a-z]*f?|-rf)/i, // rm -rf / rm -r
147
- /del\s+\/[s/q]/i, // Windows del /s
148
- /rmdir\s+\/s/i, // rmdir /s
149
- /git\s+push\s+.*--force/i, // git push --force
150
- /git\s+reset\s+--hard/i, // git reset --hard
151
- /git\s+clean\s+-f[dx]/i, // git clean -fd
152
- /npm\s+publish/i, // npm publish
153
- /pip\s+upload/i, // pip upload
154
- /docker\s+push/i, // docker push
155
- /curl.+\|\s*(ba)?sh/i, // curl | bash
156
- /wget.+\|\s*(ba)?sh/i, // wget | bash
157
- /^make\s+[^-|]+$/i, // bare make (destructive)
158
- ];
159
-
160
- /**
161
- * Keywords in gate block reason that indicate a dangerous/risky operation.
162
- * Used as a fallback when the tool itself is risky but the reason is free text.
163
- */
164
- const RISKY_KEYWORDS_IN_REASON: RegExp[] = [
165
- /delete|remove|destroy|drop/i,
166
- /force|unsafe|dangerous/i,
167
- /format|truncate|overwrite/i,
168
- /exec|eval|shell|command/i,
169
- /credential|secret|password|token/i,
170
- ];
171
-
172
- /**
173
- * Edit/write tool names.
174
- */
175
- const EDIT_TOOLS: Set<string> = new Set([
176
- 'edit_file',
177
- 'edit_file_batch',
178
- 'write_to_file',
179
- 'create_file',
180
- 'apply_patch',
181
- ]);
182
-
183
- /**
184
- * Read tool names.
185
- */
186
- const READ_TOOLS: Set<string> = new Set([
187
- 'read_file',
188
- 'read_multiple_files',
189
- 'grep',
190
- 'search_files',
191
- 'list_directory',
192
- 'glob',
193
- ]);
194
-
195
- // ---------------------------------------------------------------------------
196
- // Path Normalization (cross-platform)
197
- // ---------------------------------------------------------------------------
198
-
199
- /**
200
- * Normalizes a file path to POSIX forward-slash format for consistent matching.
201
- * Handles Windows backslash paths on any platform.
202
- */
203
- function normalizePathPosix(filePath: string): string {
204
- return filePath.replace(/\\/g, '/');
205
- }
206
-
207
- // ---------------------------------------------------------------------------
208
- // Opportunity Detection
209
- // ---------------------------------------------------------------------------
210
-
211
- /**
212
- * Detects whether a given session presents an APPLICABLE OPPORTUNITY
213
- * for a specific principle.
214
- *
215
- * An opportunity exists when the session context falls within the
216
- * principle's applicability scope — regardless of whether the agent
217
- * followed the principle.
218
- *
219
- * IMPORTANT: This does NOT assess compliance. It only answers:
220
- * "Could the principle have applied here?"
221
- *
222
- * #216: For P_* principles (not T-xx), uses generic detection based on
223
- * pain events and tool calls — any session with a pain signal is considered
224
- * an opportunity for a pain-derived principle.
225
- */
226
- export function detectOpportunity(principleId: string, session: SessionEvents): OpportunityMatch {
227
- // #216: P_* principles (pain-derived) — generic opportunity detection
228
- if (principleId.startsWith('P_')) {
229
- // Any session with pain signals, tool failures, or gate blocks is an opportunity
230
- // for a pain-derived principle. This is conservative: better to over-count
231
- // opportunities than to miss real violations.
232
- const hasPainSignal = session.painSignals.length > 0;
233
- const hasToolFailure = session.toolCalls.some((tc) => tc.outcome === 'failure');
234
- const hasGateBlock = session.gateBlocks.length > 0;
235
- if (hasPainSignal || hasToolFailure || hasGateBlock) {
236
- return { applicable: true, reason: `P_* principle — session has ${hasPainSignal ? 'pain signal' : hasToolFailure ? 'tool failure' : 'gate block'}` };
237
- }
238
- return { applicable: false, reason: `P_* principle — no pain/tool-failure/gate-block in session` };
239
- }
240
-
241
- // T-xx principles — specific deterministic detection
242
-
243
- switch (principleId) {
244
- case 'T-01':
245
-
246
- return detectT01Opportunity(session);
247
- case 'T-02':
248
-
249
- return detectT02Opportunity(session);
250
- case 'T-03':
251
-
252
- return detectT03Opportunity(session);
253
- case 'T-04':
254
-
255
- return detectT04Opportunity(session);
256
- case 'T-05':
257
-
258
- return detectT05Opportunity(session);
259
- case 'T-06':
260
-
261
- return detectT06Opportunity(session);
262
- case 'T-07':
263
-
264
- return detectT07Opportunity(session);
265
- case 'T-08':
266
-
267
- return detectT08Opportunity(session);
268
- case 'T-09':
269
-
270
- return detectT09Opportunity(session);
271
- default:
272
- return { applicable: false, reason: `Unknown principle: ${principleId}` };
273
- }
274
- }
275
-
276
- /**
277
- * T-01 "Survey Before Acting" — Understand the structure first before making changes.
278
- *
279
- * APPLICABLE when: Agent performs edit/write operations.
280
- * Rationale: Any edit to code is an opportunity to survey first.
281
- * Excluded: Read-only sessions (no applicable opportunity).
282
- */
283
- function detectT01Opportunity(session: SessionEvents): OpportunityMatch {
284
- const hasEdit = session.toolCalls.some((call) => EDIT_TOOLS.has(call.toolName));
285
- if (hasEdit) {
286
- return { applicable: true, reason: 'Edit operations present — opportunity to survey before acting' };
287
- }
288
- return { applicable: false, reason: 'No edit operations in session — T-01 not applicable' };
289
- }
290
-
291
- /**
292
- * T-02 "Respect Constraints" — Explicitly reason about contracts, tests, schemas.
293
- *
294
- * APPLICABLE when: Agent interacts with type/test/schema/contract files.
295
- */
296
- function detectT02Opportunity(session: SessionEvents): OpportunityMatch {
297
- const hasConstraintInteraction = session.toolCalls.some((call) => {
298
- if (!call.filePath) return false;
299
- const normalized = normalizePathPosix(call.filePath);
300
- return (
301
- /\.(ts|tsx|js|jsx)$/.test(normalized) || // type-aware files
302
- /\b(test|spec|contract|schema|interface|type)\b/i.test(normalized)
303
- );
304
- });
305
- if (hasConstraintInteraction) {
306
- return { applicable: true, reason: 'Type/test/contract interaction — opportunity to respect constraints' };
307
- }
308
- return { applicable: false, reason: 'No type/test/contract interaction — T-02 not applicable' };
309
- }
310
-
311
- /**
312
- * T-03 "Evidence Over Assumption" — Use logs, code, and outputs before inferring.
313
- *
314
- * APPLICABLE when: Pain signals or tool failures follow an edit/write operation.
315
- * Rationale: When a change causes something to go wrong, there's an opportunity
316
- * to gather evidence instead of assuming. Read-only failures are less relevant.
317
- * Narrowed: requires an edit/write in the session before the failure/pain signal.
318
- */
319
- function detectT03Opportunity(session: SessionEvents): OpportunityMatch {
320
- const hasWriteBeforeFailure = session.toolCalls.some(
321
- (call, i) => {
322
- if (call.outcome !== 'failure') return false;
323
- // Check that at least one prior call was an edit/write
324
- const priorCalls = session.toolCalls.slice(0, i);
325
- return priorCalls.some((c) => EDIT_TOOLS.has(c.toolName));
326
- }
327
- );
328
-
329
- if (hasWriteBeforeFailure) {
330
- return { applicable: true, reason: 'Write operation followed by failure — opportunity to gather evidence before retry' };
331
- }
332
-
333
- // Also applicable: pain signal with severity moderate+ (indicating something went wrong after a change)
334
- const hasSignificantPain = session.painSignals.some(
335
- (p) => p.severity === 'moderate' || p.severity === 'severe'
336
- );
337
- if (hasSignificantPain) {
338
- return { applicable: true, reason: 'Significant pain signal — opportunity to use evidence over assumption' };
339
- }
340
-
341
- return { applicable: false, reason: 'No pain or failure on write operations — T-03 not applicable' };
342
- }
343
-
344
- /**
345
- * T-04 "Reversible First" — Prefer changes that are safe to roll back.
346
- *
347
- * APPLICABLE when: Risky or destructive operations are attempted.
348
- */
349
- function detectT04Opportunity(session: SessionEvents): OpportunityMatch {
350
- const hasRisky = session.toolCalls.some(
351
- (call) => RISKY_TOOLS.has(call.toolName) || call.toolName === 'bash'
352
- );
353
- if (hasRisky) {
354
- return { applicable: true, reason: 'Risky/destructive operations — opportunity to prefer reversible changes' };
355
- }
356
- return { applicable: false, reason: 'No risky operations — T-04 not applicable' };
357
- }
358
-
359
- /**
360
- * T-05 "Safety Rails" — Call out guardrails, prohibitions, failure-prevention constraints.
361
- *
362
- * APPLICABLE when: A gate block fires on a risky operation.
363
- * Rationale: The gate block IS the safety rail being tested. An opportunity
364
- * exists when the system judged an operation risky enough to block.
365
- * This makes T-05 applicable ONLY when gate blocks fire — preventing dilution
366
- * by unrelated sessions.
367
- *
368
- * IMPORTANT: T-05's compliance is tied to gate blocks specifically.
369
- * A risky operation without a gate block may still be a T-05 opportunity
370
- * if the reason mentions safety-relevant terms.
371
- */
372
- function detectT05Opportunity(session: SessionEvents): OpportunityMatch {
373
- const hasGateBlock = session.gateBlocks.length > 0;
374
- if (hasGateBlock) {
375
- return {
376
- applicable: true,
377
- reason: 'Gate block present — opportunity to call out safety rails',
378
- };
379
- }
380
-
381
- // Also applicable when a risky operation is attempted
382
- // (even if not yet blocked — the agent should self-censor)
383
- const hasRisky = session.toolCalls.some((call) => {
384
- if (RISKY_TOOLS.has(call.toolName)) return true;
385
- // Check bash for dangerous patterns
386
- if (call.toolName === 'bash' && call.errorMessage) {
387
-
388
- return DANGEROUS_BASH_PATTERNS.some((p) => p.test(call.errorMessage!));
389
- }
390
- return false;
391
- });
392
-
393
- if (hasRisky) {
394
- return {
395
- applicable: true,
396
- reason: 'Risky operation attempted — opportunity to apply safety rails',
397
- };
398
- }
399
-
400
- return {
401
- applicable: false,
402
- reason: 'No gate blocks or risky operations — T-05 not applicable in this session',
403
- };
404
- }
405
-
406
- /**
407
- * T-06 "Simplicity First" — Prefer the smallest understandable solution.
408
- *
409
- * APPLICABLE when: The task involves non-trivial code creation or refactoring.
410
- */
411
- function detectT06Opportunity(session: SessionEvents): OpportunityMatch {
412
- const hasNonTrivialWrite = session.toolCalls.some(
413
- (call) =>
414
- call.toolName === 'create_file' ||
415
- call.toolName === 'write_to_file' ||
416
- (call.toolName === 'bash' && /\b(refactor|rewrite|overhaul)\b/i.test(call.errorMessage ?? ''))
417
- );
418
- if (hasNonTrivialWrite) {
419
- return {
420
- applicable: true,
421
- reason: 'Non-trivial code creation — opportunity to prefer simplicity',
422
- };
423
- }
424
- return { applicable: false, reason: 'No non-trivial writes — T-06 not applicable' };
425
- }
426
-
427
- /**
428
- * T-07 "Minimal Change Surface" — Limit the blast radius.
429
- *
430
- * APPLICABLE when: Multiple files are touched in a single session.
431
- */
432
- function detectT07Opportunity(session: SessionEvents): OpportunityMatch {
433
- const filePaths = session.toolCalls
434
- .filter((call) => call.filePath !== undefined)
435
-
436
- .map((call) => normalizePathPosix(call.filePath!));
437
- const uniqueFiles = new Set(filePaths);
438
- if (uniqueFiles.size >= 3) {
439
- return {
440
- applicable: true,
441
- reason: `Multiple files touched (${uniqueFiles.size}) — opportunity to minimize change surface`,
442
- };
443
- }
444
- return { applicable: false, reason: 'Few files touched — T-07 not applicable' };
445
- }
446
-
447
- /**
448
- * T-08 "Pain As Signal" — Treat failures and friction as clues.
449
- *
450
- * APPLICABLE when: Pain signals are present after a failure.
451
- */
452
- function detectT08Opportunity(session: SessionEvents): OpportunityMatch {
453
- const hasPain = session.painSignals.length > 0;
454
- const hasFailure = session.toolCalls.some((call) => call.outcome === 'failure');
455
- if (hasPain && hasFailure) {
456
- return {
457
- applicable: true,
458
- reason: 'Pain signals following failures — opportunity to treat pain as signal',
459
- };
460
- }
461
- return { applicable: false, reason: 'No pain-after-failure — T-08 not applicable' };
462
- }
463
-
464
- /**
465
- * T-09 "Divide And Conquer" — Split the task into smaller phases before execution.
466
- *
467
- * APPLICABLE when: Complex operations are attempted (multi-file edits, refactors,
468
- * architecture changes) OR when pain events occur on complex tasks.
469
- *
470
- * COMPLEXITY INDICATORS:
471
- * - 5+ tool calls in a session (indicates multi-step task)
472
- * - Multiple file paths touched
473
- * - Pain events on multi-step tasks
474
- * - Explicit "complex" or "refactor" or "architecture" in operations
475
- */
476
- function detectT09Opportunity(session: SessionEvents): OpportunityMatch {
477
- const toolCallCount = session.toolCalls.length;
478
- const uniqueFiles = new Set(
479
- session.toolCalls
480
- .filter((call) => call.filePath !== undefined)
481
-
482
- .map((call) => normalizePathPosix(call.filePath!))
483
- );
484
- const hasComplexity = toolCallCount >= 5 || uniqueFiles.size >= 3;
485
-
486
- const hasPain = session.painSignals.length > 0;
487
- const hasFailure = session.toolCalls.some((call) => call.outcome === 'failure');
488
-
489
- if (hasComplexity) {
490
- return {
491
- applicable: true,
492
- reason: `Complex task detected (${toolCallCount} calls, ${uniqueFiles.size} files) — opportunity to decompose`,
493
- };
494
- }
495
-
496
- if (hasPain || hasFailure) {
497
- // Pain/failure may indicate the task was too complex without decomposition
498
- return {
499
- applicable: true,
500
- reason: 'Pain or failure present — opportunity to decompose before retry',
501
- };
502
- }
503
-
504
- return {
505
- applicable: false,
506
- reason: 'No complexity indicators — T-09 not applicable in this session',
507
- };
508
- }
509
-
510
- // ---------------------------------------------------------------------------
511
- // Violation Detection
512
- // ---------------------------------------------------------------------------
513
-
514
- /**
515
- * Detects whether a principle was VIOLATED in a session where an
516
- * opportunity was applicable.
517
- *
518
- * Returns a ViolationMatch with violated=true if violation signals are present.
519
- *
520
- * #216: For P_* principles (pain-derived), violation is detected when the session
521
- * has pain signals, tool failures, or gate blocks that match the principle's
522
- * trigger pattern. Since P_* principles don't have T-xx specific detectors,
523
- * we use the presence of negative signals as violation evidence.
524
- */
525
-
526
- export function detectViolation(principleId: string, session: SessionEvents): ViolationMatch {
527
- // #216: P_* principles (pain-derived) — generic violation detection
528
- if (principleId.startsWith('P_')) {
529
- // For pain-derived principles, a violation is indicated when the session
530
- // contains pain signals, tool failures, or gate blocks — these are the
531
- // same signals that triggered principle creation in the first place.
532
- // A principle was violated if the bad outcome recurred after it was created.
533
- const painSignals = session.painSignals.filter((p) => p.score >= 50);
534
- const toolFailures = session.toolCalls.filter((tc) => tc.outcome === 'failure');
535
- const {gateBlocks} = session;
536
-
537
- if (painSignals.length > 0) {
538
- return { violated: true, reason: `P_* principle — ${painSignals.length} pain signal(s) detected (max score: ${Math.max(...painSignals.map(p => p.score))})` };
539
- }
540
- if (toolFailures.length > 0) {
541
- return { violated: true, reason: `P_* principle — ${toolFailures.length} tool failure(s) detected` };
542
- }
543
- if (gateBlocks.length > 0) {
544
- return { violated: true, reason: `P_* principle — ${gateBlocks.length} gate block(s) detected` };
545
- }
546
- return { violated: false, reason: `P_* principle — no violation signals in session` };
547
- }
548
-
549
- // T-xx principles — specific deterministic detection
550
-
551
- switch (principleId) {
552
- case 'T-01':
553
-
554
- return detectT01Violation(session);
555
- case 'T-02':
556
-
557
- return detectT02Violation(session);
558
- case 'T-03':
559
-
560
- return detectT03Violation(session);
561
- case 'T-04':
562
-
563
- return detectT04Violation(session);
564
- case 'T-05':
565
-
566
- return detectT05Violation(session);
567
- case 'T-06':
568
-
569
- return detectT06Violation(session);
570
- case 'T-07':
571
-
572
- return detectT07Violation(session);
573
- case 'T-08':
574
-
575
- return detectT08Violation(session);
576
- case 'T-09':
577
-
578
- return detectT09Violation(session);
579
- default:
580
- console.warn(`[PD:Compliance] Unknown principle ID: ${principleId} — treating as no violation. Check for typos (P-001 vs P_001).`);
581
- return { violated: false, reason: `Unknown principle: ${principleId}` };
582
- }
583
- }
584
-
585
- /**
586
- * T-01 violation:
587
- * - Pain signal or tool failure on an edit where the file was NOT read first
588
- * - Pain signal with source indicating structural misunderstanding
589
- */
590
- function detectT01Violation(session: SessionEvents): ViolationMatch {
591
- // Build set of files that were read (normalized for cross-platform consistency)
592
- const readFiles = new Set(
593
- session.toolCalls
594
- .filter((call) => READ_TOOLS.has(call.toolName) && call.filePath !== undefined)
595
-
596
- .map((call) => normalizePathPosix(call.filePath!))
597
- );
598
-
599
- // Find edits to files that were NOT read first
600
- const unreadEdits = session.toolCalls.filter(
601
- (call) =>
602
- EDIT_TOOLS.has(call.toolName) &&
603
- call.filePath !== undefined &&
604
- !readFiles.has(normalizePathPosix(call.filePath))
605
- );
606
-
607
- // If there were edits to unread files AND pain/failure followed → T-01 likely violated
608
- if (unreadEdits.length > 0) {
609
- const painOnUnreadEdit = session.painSignals.some(
610
- (p) =>
611
- unreadEdits.some((e) => e.filePath !== undefined && p.source.includes(e.filePath)) ||
612
- /structure|architecture|dependency|context|before.*edit|survey/i.test(p.reason ?? '')
613
- );
614
-
615
- if (painOnUnreadEdit) {
616
- return {
617
- violated: true,
618
- reason: `Edits to unread files (${unreadEdits.length}) followed by pain — T-01 violated: agent acted without surveying first`,
619
- };
620
- }
621
-
622
- // If edits to unread files AND tool failures → likely violated
623
- const failuresOnUnread = unreadEdits.some((e) => e.outcome === 'failure');
624
- if (failuresOnUnread) {
625
- return {
626
- violated: true,
627
- reason: `Edits to unread files (${unreadEdits.length}) followed by failures — T-01 violated: agent acted without understanding`,
628
- };
629
- }
630
- }
631
-
632
- // Also check for pain signals specifically mentioning T-01-relevant themes
633
- // without any prior read
634
- const hasPainTheme =
635
- /structure|architecture|context|before.*acting|didn't.*survey|didn't.*read.*first/i.test(
636
- session.painSignals.map((p) => p.reason ?? '').join(' ')
637
- );
638
- if (hasPainTheme && unreadEdits.length > 0) {
639
- return {
640
- violated: true,
641
- reason: 'Pain signals mentioning structure/context themes after edits to unread files — T-01 violated',
642
- };
643
- }
644
-
645
- return {
646
- violated: false,
647
- reason: 'No violation signals detected for T-01',
648
- };
649
- }
650
-
651
- /**
652
- * T-02 violation:
653
- * - Tool failures on type/test/contract interactions without prior verification
654
- */
655
- function detectT02Violation(session: SessionEvents): ViolationMatch {
656
- const constraintFailures = session.toolCalls.filter(
657
- (call) =>
658
- call.outcome === 'failure' &&
659
- call.filePath !== undefined &&
660
- (/\b(test|spec|contract|schema|interface|type)\b/i.test(call.filePath) ||
661
- /\b(type|test|contract)\b/i.test(call.errorMessage ?? ''))
662
- );
663
-
664
- if (constraintFailures.length > 0) {
665
- return {
666
- violated: true,
667
- reason: `Tool failures on type/test/contract interactions (${constraintFailures.length}) — T-02 violated: constraints not verified`,
668
- };
669
- }
670
-
671
- return { violated: false, reason: 'No violation signals for T-02' };
672
- }
673
-
674
- /**
675
- * T-03 violation:
676
- * - Tool failures without prior evidence gathering (no read calls before failure)
677
- */
678
- function detectT03Violation(session: SessionEvents): ViolationMatch {
679
- const failureIndices = session.toolCalls
680
- .map((call, i) => (call.outcome === 'failure' ? i : -1))
681
- .filter((i) => i >= 0);
682
-
683
- for (const failIdx of failureIndices) {
684
- const priorCalls = session.toolCalls.slice(0, failIdx);
685
- const hasPriorRead = priorCalls.some(
686
- (call) => READ_TOOLS.has(call.toolName) && call.filePath !== undefined
687
- );
688
- if (!hasPriorRead) {
689
- return {
690
- violated: true,
691
- reason: `Tool failure at index ${failIdx} without prior read operations — T-03 violated: assumption made without evidence`,
692
- };
693
- }
694
- }
695
-
696
- return { violated: false, reason: 'No violation signals for T-03' };
697
- }
698
-
699
- /**
700
- * T-04 violation:
701
- * - Pain signals following risky operations (the operation succeeded but caused issues)
702
- */
703
- function detectT04Violation(session: SessionEvents): ViolationMatch {
704
- const riskyIndices = session.toolCalls
705
- .map((call, i) => (RISKY_TOOLS.has(call.toolName) || call.toolName === 'bash' ? i : -1))
706
- .filter((i) => i >= 0);
707
-
708
- if (riskyIndices.length === 0) return { violated: false, reason: 'No risky operations — T-04 not violated' };
709
-
710
- // If risky operations AND pain signals are present in the same session,
711
- // that indicates the risky operation caused negative consequences.
712
- const hasPain = session.painSignals.length > 0;
713
- if (hasPain) {
714
- return {
715
- violated: true,
716
- reason: 'Pain signals present alongside risky operations — T-04 violated: irreversible consequences',
717
- };
718
- }
719
-
720
- return { violated: false, reason: 'No violation signals for T-04' };
721
- }
722
-
723
- /**
724
- * T-05 violation:
725
- * - Gate block fires → the agent tried a risky operation without first applying
726
- * safety reasoning. The gate block IS the violation signal.
727
- * - Gate block on a dangerous bash command is an explicit violation.
728
- */
729
- function detectT05Violation(session: SessionEvents): ViolationMatch {
730
- if (session.gateBlocks.length > 0) {
731
- // Check if any gate block was on a dangerous operation.
732
- // A block is dangerous if:
733
- // 1. The tool is in RISKY_TOOLS (delete_file, bash, MultiExec, etc.)
734
- // 2. The tool is 'bash' AND the reason mentions a dangerous pattern
735
- // 3. The reason contains risky keywords (delete, force, credential, exec, etc.)
736
- const dangerousBlocks = session.gateBlocks.filter((block) => {
737
- if (RISKY_TOOLS.has(block.toolName)) return true;
738
- if (block.toolName === 'bash' && DANGEROUS_BASH_PATTERNS.some((p) => p.test(block.reason))) return true;
739
- // Fallback: scan reason for risky keywords
740
- if (RISKY_KEYWORDS_IN_REASON.some((p) => p.test(block.reason))) return true;
741
- return false;
742
- });
743
-
744
- if (dangerousBlocks.length > 0) {
745
- return {
746
- violated: true,
747
- reason: `Gate blocks on dangerous operations (${dangerousBlocks.length}) — T-05 violated: safety rail not called out`,
748
- };
749
- }
750
-
751
- return {
752
- violated: true,
753
- reason: `Gate blocks present (${session.gateBlocks.length}) — T-05 violated: safety rail not respected`,
754
- };
755
- }
756
-
757
- return { violated: false, reason: 'No gate blocks — T-05 not violated' };
758
- }
759
-
760
- /**
761
- * T-06 violation:
762
- * - Over-engineering signals: pain from overly complex solutions
763
- */
764
- function detectT06Violation(session: SessionEvents): ViolationMatch {
765
- const hasOverEngineerPain = session.painSignals.some(
766
- (p) =>
767
- /over.*engineer|over.*complicat|too.*complex|unnecessarily.*complex/i.test(p.reason ?? '') &&
768
- p.severity === 'severe'
769
- );
770
-
771
- if (hasOverEngineerPain) {
772
- return {
773
- violated: true,
774
- reason: 'Severe pain from over-engineering — T-06 violated: simplicity not preferred',
775
- };
776
- }
777
-
778
- return { violated: false, reason: 'No over-engineering signals — T-06 not violated' };
779
- }
780
-
781
- /**
782
- * T-07 violation:
783
- * - Pain from wide blast radius: many files modified, cascading failures
784
- */
785
- function detectT07Violation(session: SessionEvents): ViolationMatch {
786
- const modifiedFiles = new Set(
787
- session.toolCalls
788
- .filter((call) => EDIT_TOOLS.has(call.toolName) && call.filePath !== undefined)
789
-
790
- .map((call) => normalizePathPosix(call.filePath!))
791
- );
792
-
793
- const failures = session.toolCalls.filter((call) => call.outcome === 'failure');
794
-
795
- if (modifiedFiles.size >= 5 && failures.length >= 2) {
796
- return {
797
- violated: true,
798
- reason: `Wide blast radius (${modifiedFiles.size} files, ${failures.length} failures) — T-07 violated: change surface not minimized`,
799
- };
800
- }
801
-
802
- return { violated: false, reason: 'No blast radius violations — T-07 not violated' };
803
- }
804
-
805
- /**
806
- * T-08 violation:
807
- * - Pain signal present but no reflection/self-correction behavior
808
- * (This is harder to detect without explicit reflection events.
809
- * We use pain-without-correction as a proxy.)
810
- */
811
- function detectT08Violation(session: SessionEvents): ViolationMatch {
812
- const hasPain = session.painSignals.length > 0;
813
- const hasFailure = session.toolCalls.some((call) => call.outcome === 'failure');
814
-
815
- // If pain and failure, but the agent immediately retries without pause/reflect
816
- if (hasPain && hasFailure) {
817
- // Find the first failure index and check if the agent continued without reflecting
818
- const failureIdx = session.toolCalls.findIndex((c) => c.outcome === 'failure');
819
- if (failureIdx >= 0) {
820
- const postFailure = session.toolCalls.slice(failureIdx + 1, failureIdx + 4);
821
- // If the agent immediately continues without a read/reflect call, T-08 may be violated
822
- const continuesImmediately =
823
- postFailure.length > 0 && !postFailure.some((c) => READ_TOOLS.has(c.toolName));
824
- if (continuesImmediately) {
825
- return {
826
- violated: true,
827
- reason: 'Failure followed immediately by continued operations without pause/reflect — T-08 violated: pain not treated as signal',
828
- };
829
- }
830
- }
831
- }
832
-
833
- return { violated: false, reason: 'No T-08 violation signals detected' };
834
- }
835
-
836
- /**
837
- * T-09 violation:
838
- * - Pain or failures on complex tasks that should have been decomposed.
839
- * Signal: pain/failure on multi-step task without prior planning calls.
840
- */
841
- function detectT09Violation(session: SessionEvents): ViolationMatch {
842
- const toolCallCount = session.toolCalls.length;
843
- const uniqueFiles = new Set(
844
- session.toolCalls
845
- .filter((call) => call.filePath !== undefined)
846
-
847
- .map((call) => normalizePathPosix(call.filePath!))
848
- );
849
-
850
- // Only applies if the session was complex
851
- if (toolCallCount < 5 && uniqueFiles.size < 3) {
852
- return { violated: false, reason: 'Session not complex enough for T-09 applicability' };
853
- }
854
-
855
- // Check: failures on complex task without prior planning
856
- const hasFailures = session.toolCalls.some((call) => call.outcome === 'failure');
857
- const hasPain = session.painSignals.length > 0;
858
-
859
- if (hasFailures || hasPain) {
860
- // Check if the agent showed decomposition/planning behavior
861
- const hasPlanApproval = session.planApprovals.length > 0;
862
- const hasReadFirst = session.toolCalls.some((call) => READ_TOOLS.has(call.toolName));
863
-
864
- if (!hasPlanApproval && !hasReadFirst) {
865
- return {
866
- violated: true,
867
- reason: `Complex task with failures/pain but no planning or decomposition signals — T-09 violated: task not divided`,
868
- };
869
- }
870
- }
871
-
872
- return { violated: false, reason: 'No T-09 violation signals' };
873
- }
874
-
875
- // ---------------------------------------------------------------------------
876
- // Compliance Computation
877
- // ---------------------------------------------------------------------------
878
-
879
- /**
880
- * Computes compliance metrics for a single T-xx principle across a batch of sessions.
881
- *
882
- * DILUTION PREVENTION:
883
- * - Sessions where the principle had NO opportunity are EXCLUDED from
884
- * applicableOpportunityCount and do not affect complianceRate.
885
- * - Example: T-05 sessions with no risky operations do not dilute
886
- * the compliance rate computed from T-05 sessions with gate blocks.
887
- *
888
- * TREND COMPUTATION:
889
- * - Sessions are ordered chronologically (most recent first).
890
- * - Current window: last 3 applicable sessions.
891
- * - Previous window: sessions 4-6 (if available).
892
- * - If either window has < 1 applicable session, trend = 0 (insufficient data).
893
- * - Otherwise: trend = prevViolationRate - currentViolationRate
894
- * (+1 = improving, 0 = stable, -1 = worsening).
895
- */
896
- export function computeCompliance(
897
- principleId: string,
898
- sessions: SessionEvents[],
899
- options: { trendWindowSize?: number } = {}
900
- ): ComplianceResult {
901
- const windowSize = options.trendWindowSize ?? 3;
902
-
903
- let applicableOpportunityCount = 0;
904
- let observedViolationCount = 0;
905
-
906
- const applicableSessions: { session: SessionEvents; violated: boolean; reason: string }[] = [];
907
-
908
- for (const session of sessions) {
909
- const opp = detectOpportunity(principleId, session);
910
- if (!opp.applicable) {
911
- // Principle had no opportunity in this session — skip entirely.
912
- // This is the key dilution-prevention mechanism.
913
- continue;
914
- }
915
-
916
- applicableOpportunityCount++;
917
- const violation = detectViolation(principleId, session);
918
- if (violation.violated) {
919
- observedViolationCount++;
920
- }
921
-
922
- applicableSessions.push({
923
- session,
924
- violated: violation.violated,
925
- reason: violation.reason,
926
- });
927
- }
928
-
929
- // Compute complianceRate
930
- const complianceRate =
931
- applicableOpportunityCount > 0
932
- ? (applicableOpportunityCount - observedViolationCount) / applicableOpportunityCount
933
- : 0;
934
-
935
- // Compute violationTrend using windows
936
-
937
- const violationTrend = computeViolationTrend(applicableSessions, windowSize);
938
-
939
- // Build explanation
940
-
941
- const explanation = buildExplanation(
942
- principleId,
943
- applicableOpportunityCount,
944
- observedViolationCount,
945
- complianceRate,
946
- violationTrend,
947
- applicableSessions
948
- );
949
-
950
- return {
951
- principleId,
952
- applicableOpportunityCount,
953
- observedViolationCount,
954
- complianceRate,
955
- violationTrend,
956
- explanation,
957
- };
958
- }
959
-
960
- /**
961
- * Computes violation trend across the applicable session list.
962
- *
963
- * Trend is positive (+1) when violations are DECREASING (improving).
964
- * Trend is negative (-1) when violations are INCREASING (worsening).
965
- *
966
- * Sessions are ordered most-recent-first.
967
- * currentWindow = first windowSize sessions (most recent)
968
- * previousWindow = next windowSize sessions
969
- */
970
- function computeViolationTrend(
971
- applicableSessions: { violated: boolean }[],
972
- windowSize: number
973
- ): number {
974
- if (applicableSessions.length < 2) {
975
- // Not enough data for trend
976
- return 0;
977
- }
978
-
979
- // Sessions are ordered most-recent-first in the input array.
980
- // currentWindow = most recent N sessions
981
- // previousWindow = N sessions before that (older)
982
- const currentWindow = applicableSessions.slice(0, windowSize);
983
- const previousWindow = applicableSessions.slice(windowSize, windowSize * 2);
984
-
985
- if (currentWindow.length === 0) return 0;
986
-
987
- const currentViolationRate =
988
- currentWindow.filter((s) => s.violated).length / currentWindow.length;
989
-
990
- if (previousWindow.length === 0) {
991
- // No previous window — compare to overall rate
992
- const overallRate = applicableSessions.filter((s) => s.violated).length / applicableSessions.length;
993
- if (currentViolationRate < overallRate - 0.1) return 1; // improving
994
- if (currentViolationRate > overallRate + 0.1) return -1; // worsening
995
- return 0;
996
- }
997
-
998
- const previousViolationRate =
999
- previousWindow.filter((s) => s.violated).length / previousWindow.length;
1000
-
1001
- const delta = previousViolationRate - currentViolationRate;
1002
-
1003
- if (delta > 0.1) return 1; // violations decreasing → improving
1004
- if (delta < -0.1) return -1; // violations increasing → worsening
1005
- return 0; // stable
1006
- }
1007
-
1008
- /**
1009
- * Builds a human-readable explanation for the compliance result.
1010
- */
1011
-
1012
-
1013
- function buildExplanation(
1014
- principleId: string,
1015
- applicableOpportunityCount: number,
1016
- observedViolationCount: number,
1017
- complianceRate: number,
1018
- violationTrend: number,
1019
- applicableSessions: { violated: boolean; reason: string }[]
1020
- ): string {
1021
- const trendStr =
1022
- violationTrend === 1
1023
- ? '↑ improving'
1024
- : violationTrend === -1
1025
- ? '↓ worsening'
1026
- : '→ stable';
1027
-
1028
- if (applicableOpportunityCount === 0) {
1029
- return `${principleId}: No applicable opportunities in provided sessions — compliance cannot be assessed.`;
1030
- }
1031
-
1032
- const violationExamples = applicableSessions
1033
- .filter((s) => s.violated)
1034
- .slice(0, 2)
1035
- .map((s) => ` • ${s.reason}`)
1036
- .join('\n');
1037
-
1038
- return [
1039
- `${principleId}: ${applicableOpportunityCount} applicable opportunities, ${observedViolationCount} violations.`,
1040
- `Compliance rate: ${(complianceRate * 100).toFixed(1)}%. Trend: ${trendStr}.`,
1041
- violationExamples ? `Sample violation signals:\n${violationExamples}` : 'No violations detected in recent sessions.',
1042
- ].join('\n');
1043
- }
1044
-
1045
- // ---------------------------------------------------------------------------
1046
- // Batch Update Helpers
1047
- // ---------------------------------------------------------------------------
1048
-
1049
- /**
1050
- * Computes compliance results for all T-01 through T-09 principles
1051
- * across the provided sessions.
1052
- *
1053
- * Sessions are assumed to be ordered most-recent-first.
1054
- */
1055
- export function computeAllCompliance(
1056
- sessions: SessionEvents[],
1057
- options: { trendWindowSize?: number } = {}
1058
- ): ComplianceResult[] {
1059
- const results: ComplianceResult[] = [];
1060
- for (const id of ['T-01', 'T-02', 'T-03', 'T-04', 'T-05', 'T-06', 'T-07', 'T-08', 'T-09']) {
1061
- results.push(computeCompliance(id, sessions, options));
1062
- }
1063
- return results;
1064
- }
1065
-
1066
- /**
1067
- * Converts raw EventLogEntry[] from event-types.ts into SessionEvents.
1068
- *
1069
- * Groups events by sessionId and maps to the SessionEvents interface.
1070
- * Events with no sessionId are grouped under sessionId = 'unknown'.
1071
- */
1072
- export function groupEventsIntoSessions(events: RawEventEntry[]): Map<string, SessionEvents> {
1073
- const sessionMap = new Map<string, SessionEvents>();
1074
-
1075
- for (const event of events) {
1076
- const sessionId = event.sessionId ?? 'unknown';
1077
-
1078
- if (!sessionMap.has(sessionId)) {
1079
- sessionMap.set(sessionId, {
1080
- sessionId,
1081
- toolCalls: [],
1082
- painSignals: [],
1083
- gateBlocks: [],
1084
- userCorrections: [],
1085
- planApprovals: [],
1086
- });
1087
- }
1088
-
1089
-
1090
- const session = sessionMap.get(sessionId)!;
1091
-
1092
- switch (event.type) {
1093
- case 'tool_call':
1094
- if (event.data.toolName) {
1095
- session.toolCalls.push({
1096
- toolName: event.data.toolName as string,
1097
- filePath: event.data.filePath as string | undefined,
1098
- outcome: (event.data.error ? 'failure' : 'success') as 'success' | 'failure' | 'blocked',
1099
- errorType: event.data.errorType as string | undefined,
1100
- errorMessage: event.data.error as string | undefined,
1101
- });
1102
- }
1103
- break;
1104
- case 'pain_signal':
1105
- session.painSignals.push({
1106
- source: (event.data.source as string) ?? 'unknown',
1107
- score: (event.data.score as number) ?? 0,
1108
- severity: event.data.severity as 'mild' | 'moderate' | 'severe' | undefined,
1109
- reason: event.data.reason as string | undefined,
1110
- });
1111
- break;
1112
- case 'gate_block':
1113
- session.gateBlocks.push({
1114
- toolName: (event.data.toolName as string) ?? 'unknown',
1115
- filePath: event.data.filePath as string | undefined,
1116
- reason: (event.data.reason as string) ?? '',
1117
- });
1118
- break;
1119
- case 'empathy_rollback':
1120
- // User corrections are flagged via empathy rollback
1121
- session.userCorrections.push({
1122
- correctionCue: event.data.reason as string | undefined,
1123
- });
1124
- break;
1125
- case 'plan_approval':
1126
- session.planApprovals.push({
1127
- toolName: (event.data.toolName as string) ?? 'unknown',
1128
- filePath: event.data.filePath as string | undefined,
1129
- });
1130
- break;
1131
- }
1132
- }
1133
-
1134
- return sessionMap;
1135
- }
1136
-
1137
- /**
1138
- * Raw event entry from the events.jsonl log.
1139
- * Compatible with EventLogEntry from event-types.ts.
1140
- */
1141
- export interface RawEventEntry {
1142
- ts: string;
1143
- type: string;
1144
- sessionId?: string;
1145
- data: Record<string, unknown>;
1146
- }