principles-disciple 1.7.6 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/commands/context.js +5 -15
  2. package/dist/commands/evolution-status.js +2 -9
  3. package/dist/commands/export.js +61 -8
  4. package/dist/commands/nocturnal-review.d.ts +24 -0
  5. package/dist/commands/nocturnal-review.js +265 -0
  6. package/dist/commands/nocturnal-rollout.d.ts +27 -0
  7. package/dist/commands/nocturnal-rollout.js +671 -0
  8. package/dist/commands/nocturnal-train.d.ts +25 -0
  9. package/dist/commands/nocturnal-train.js +919 -0
  10. package/dist/commands/pain.js +8 -21
  11. package/dist/constants/tools.d.ts +2 -2
  12. package/dist/constants/tools.js +1 -1
  13. package/dist/core/adaptive-thresholds.d.ts +186 -0
  14. package/dist/core/adaptive-thresholds.js +300 -0
  15. package/dist/core/config.d.ts +2 -38
  16. package/dist/core/config.js +6 -61
  17. package/dist/core/event-log.d.ts +1 -2
  18. package/dist/core/event-log.js +0 -3
  19. package/dist/core/evolution-engine.js +1 -21
  20. package/dist/core/evolution-reducer.d.ts +7 -1
  21. package/dist/core/evolution-reducer.js +56 -4
  22. package/dist/core/evolution-types.d.ts +61 -9
  23. package/dist/core/evolution-types.js +31 -9
  24. package/dist/core/external-training-contract.d.ts +276 -0
  25. package/dist/core/external-training-contract.js +269 -0
  26. package/dist/core/local-worker-routing.d.ts +175 -0
  27. package/dist/core/local-worker-routing.js +525 -0
  28. package/dist/core/model-deployment-registry.d.ts +218 -0
  29. package/dist/core/model-deployment-registry.js +503 -0
  30. package/dist/core/model-training-registry.d.ts +295 -0
  31. package/dist/core/model-training-registry.js +475 -0
  32. package/dist/core/nocturnal-arbiter.d.ts +159 -0
  33. package/dist/core/nocturnal-arbiter.js +534 -0
  34. package/dist/core/nocturnal-candidate-scoring.d.ts +137 -0
  35. package/dist/core/nocturnal-candidate-scoring.js +266 -0
  36. package/dist/core/nocturnal-compliance.d.ts +175 -0
  37. package/dist/core/nocturnal-compliance.js +824 -0
  38. package/dist/core/nocturnal-dataset.d.ts +224 -0
  39. package/dist/core/nocturnal-dataset.js +443 -0
  40. package/dist/core/nocturnal-executability.d.ts +85 -0
  41. package/dist/core/nocturnal-executability.js +331 -0
  42. package/dist/core/nocturnal-export.d.ts +124 -0
  43. package/dist/core/nocturnal-export.js +275 -0
  44. package/dist/core/nocturnal-paths.d.ts +124 -0
  45. package/dist/core/nocturnal-paths.js +214 -0
  46. package/dist/core/nocturnal-trajectory-extractor.d.ts +242 -0
  47. package/dist/core/nocturnal-trajectory-extractor.js +307 -0
  48. package/dist/core/nocturnal-trinity.d.ts +311 -0
  49. package/dist/core/nocturnal-trinity.js +880 -0
  50. package/dist/core/paths.d.ts +6 -0
  51. package/dist/core/paths.js +6 -0
  52. package/dist/core/principle-training-state.d.ts +121 -0
  53. package/dist/core/principle-training-state.js +321 -0
  54. package/dist/core/promotion-gate.d.ts +238 -0
  55. package/dist/core/promotion-gate.js +529 -0
  56. package/dist/core/session-tracker.d.ts +10 -0
  57. package/dist/core/session-tracker.js +14 -0
  58. package/dist/core/shadow-observation-registry.d.ts +217 -0
  59. package/dist/core/shadow-observation-registry.js +308 -0
  60. package/dist/core/training-program.d.ts +233 -0
  61. package/dist/core/training-program.js +433 -0
  62. package/dist/core/trajectory.d.ts +95 -1
  63. package/dist/core/trajectory.js +220 -6
  64. package/dist/core/workspace-context.d.ts +0 -6
  65. package/dist/core/workspace-context.js +0 -12
  66. package/dist/hooks/bash-risk.d.ts +6 -6
  67. package/dist/hooks/bash-risk.js +8 -8
  68. package/dist/hooks/gate-block-helper.js +1 -1
  69. package/dist/hooks/gate.d.ts +1 -1
  70. package/dist/hooks/gate.js +2 -2
  71. package/dist/hooks/gfi-gate.d.ts +3 -3
  72. package/dist/hooks/gfi-gate.js +15 -14
  73. package/dist/hooks/pain.js +6 -9
  74. package/dist/hooks/progressive-trust-gate.d.ts +21 -49
  75. package/dist/hooks/progressive-trust-gate.js +51 -204
  76. package/dist/hooks/prompt.d.ts +11 -11
  77. package/dist/hooks/prompt.js +158 -72
  78. package/dist/hooks/subagent.js +43 -6
  79. package/dist/i18n/commands.js +8 -8
  80. package/dist/index.js +129 -28
  81. package/dist/service/evolution-worker.d.ts +42 -4
  82. package/dist/service/evolution-worker.js +321 -13
  83. package/dist/service/nocturnal-runtime.d.ts +183 -0
  84. package/dist/service/nocturnal-runtime.js +352 -0
  85. package/dist/service/nocturnal-service.d.ts +163 -0
  86. package/dist/service/nocturnal-service.js +787 -0
  87. package/dist/service/nocturnal-target-selector.d.ts +145 -0
  88. package/dist/service/nocturnal-target-selector.js +315 -0
  89. package/dist/service/phase3-input-filter.d.ts +2 -23
  90. package/dist/service/phase3-input-filter.js +3 -27
  91. package/dist/service/runtime-summary-service.d.ts +0 -10
  92. package/dist/service/runtime-summary-service.js +1 -54
  93. package/dist/tools/deep-reflect.js +2 -1
  94. package/dist/types/event-types.d.ts +2 -10
  95. package/dist/types/runtime-summary.d.ts +1 -8
  96. package/dist/types.d.ts +0 -3
  97. package/dist/types.js +0 -2
  98. package/openclaw.plugin.json +1 -1
  99. package/package.json +1 -1
  100. package/templates/langs/en/skills/pd-mentor/SKILL.md +5 -5
  101. package/templates/langs/zh/skills/pd-mentor/SKILL.md +5 -5
  102. package/templates/pain_settings.json +0 -6
  103. package/dist/commands/trust.d.ts +0 -4
  104. package/dist/commands/trust.js +0 -78
  105. package/dist/core/trust-engine.d.ts +0 -96
  106. package/dist/core/trust-engine.js +0 -286
@@ -0,0 +1,824 @@
1
+ /**
2
+ * Nocturnal Compliance Engine — Opportunity-Based Principle Evaluation
3
+ * =====================================================================
4
+ *
5
+ * Replaces session-average compliance with opportunity-based compliance.
6
+ *
7
+ * CORE CONCEPTS:
8
+ *
9
+ * Opportunity — a session context where a principle COULD have been applied.
10
+ * An opportunity exists when the agent's action (or planned action)
11
+ * falls within the principle's applicability scope.
12
+ *
13
+ * Compliance — the principle was followed in an opportunity.
14
+ * Determined by absence of violation signals, not presence of
15
+ * positive confirmation (avoids LLM scoring).
16
+ *
17
+ * Violation — strong evidence the principle was NOT followed.
18
+ * Detected through deterministic event signals (pain, tool failures,
19
+ * gate blocks) — no LLM involved.
20
+ *
21
+ * Dilution prevention — compliance is computed ONLY over sessions where the
22
+ * principle had an opportunity. Unrelated sessions
23
+ * (where T-05's risky operations never occurred) do NOT
24
+ * dilute the compliance rate.
25
+ *
26
+ * DESIGN CONSTRAINTS (Phase 1):
27
+ * - T-xx principles only (deterministic / weak-heuristic evaluability)
28
+ * - No P_xxx automation (requires detector metadata — Task 1.3 scope)
29
+ * - No LLM-based scoring
30
+ * - No training logic
31
+ *
32
+ * FILE: No file persistence — stateless computation over event stream.
33
+ * Caller is responsible for writing results to principle-training-state.ts.
34
+ */
35
+ // ---------------------------------------------------------------------------
36
+ // Risky Operation Registry
37
+ // ---------------------------------------------------------------------------
38
+ /**
39
+ * Tools and operations that constitute risky actions.
40
+ * Gate blocks on these map to T-05 (Safety Rails) violations.
41
+ */
42
+ const RISKY_TOOLS = new Set([
43
+ 'delete_file',
44
+ 'move_file',
45
+ 'rename_file',
46
+ 'delete_directory',
47
+ 'bash',
48
+ 'MultiExec',
49
+ ]);
50
+ /**
51
+ * Bash command patterns that constitute dangerous operations.
52
+ * Matched against bash command text in tool_call events.
53
+ */
54
+ const DANGEROUS_BASH_PATTERNS = [
55
+ /rm\s+(-[a-z]*r[a-z]*f?|-rf)/i, // rm -rf / rm -r
56
+ /del\s+\/[s/q]/i, // Windows del /s
57
+ /rmdir\s+\/s/i, // rmdir /s
58
+ /git\s+push\s+.*--force/i, // git push --force
59
+ /git\s+reset\s+--hard/i, // git reset --hard
60
+ /git\s+clean\s+-f[dx]/i, // git clean -fd
61
+ /npm\s+publish/i, // npm publish
62
+ /pip\s+upload/i, // pip upload
63
+ /docker\s+push/i, // docker push
64
+ /curl.+\|\s*(ba)?sh/i, // curl | bash
65
+ /wget.+\|\s*(ba)?sh/i, // wget | bash
66
+ /^make\s+[^-|]+$/i, // bare make (destructive)
67
+ ];
68
+ /**
69
+ * Keywords in gate block reason that indicate a dangerous/risky operation.
70
+ * Used as a fallback when the tool itself is risky but the reason is free text.
71
+ */
72
+ const RISKY_KEYWORDS_IN_REASON = [
73
+ /delete|remove|destroy|drop/i,
74
+ /force|unsafe|dangerous/i,
75
+ /format|truncate|overwrite/i,
76
+ /exec|eval|shell|command/i,
77
+ /credential|secret|password|token/i,
78
+ ];
79
+ /**
80
+ * Edit/write tool names.
81
+ */
82
+ const EDIT_TOOLS = new Set([
83
+ 'edit_file',
84
+ 'edit_file_batch',
85
+ 'write_to_file',
86
+ 'create_file',
87
+ 'apply_patch',
88
+ ]);
89
+ /**
90
+ * Read tool names.
91
+ */
92
+ const READ_TOOLS = new Set([
93
+ 'read_file',
94
+ 'read_multiple_files',
95
+ 'grep',
96
+ 'search_files',
97
+ 'list_directory',
98
+ 'glob',
99
+ ]);
100
+ // ---------------------------------------------------------------------------
101
+ // Path Normalization (cross-platform)
102
+ // ---------------------------------------------------------------------------
103
+ /**
104
+ * Normalizes a file path to POSIX forward-slash format for consistent matching.
105
+ * Handles Windows backslash paths on any platform.
106
+ */
107
+ function normalizePath(filePath) {
108
+ return filePath.replace(/\\/g, '/');
109
+ }
110
+ /**
111
+ * Returns true if the file path matches any of the given patterns when normalized.
112
+ */
113
+ function pathMatches(filePath, patterns) {
114
+ if (!filePath)
115
+ return false;
116
+ const normalized = normalizePath(filePath);
117
+ return patterns.some((p) => p.test(normalized));
118
+ }
119
+ // ---------------------------------------------------------------------------
120
+ // Opportunity Detection
121
+ // ---------------------------------------------------------------------------
122
+ /**
123
+ * Detects whether a given session presents an APPLICABLE OPPORTUNITY
124
+ * for a specific T-xx principle.
125
+ *
126
+ * An opportunity exists when the session context falls within the
127
+ * principle's applicability scope — regardless of whether the agent
128
+ * followed the principle.
129
+ *
130
+ * IMPORTANT: This does NOT assess compliance. It only answers:
131
+ * "Could the principle have applied here?"
132
+ */
133
+ export function detectOpportunity(principleId, session) {
134
+ switch (principleId) {
135
+ case 'T-01':
136
+ return detectT01Opportunity(session);
137
+ case 'T-02':
138
+ return detectT02Opportunity(session);
139
+ case 'T-03':
140
+ return detectT03Opportunity(session);
141
+ case 'T-04':
142
+ return detectT04Opportunity(session);
143
+ case 'T-05':
144
+ return detectT05Opportunity(session);
145
+ case 'T-06':
146
+ return detectT06Opportunity(session);
147
+ case 'T-07':
148
+ return detectT07Opportunity(session);
149
+ case 'T-08':
150
+ return detectT08Opportunity(session);
151
+ case 'T-09':
152
+ return detectT09Opportunity(session);
153
+ default:
154
+ return { applicable: false, reason: `Unknown principle: ${principleId}` };
155
+ }
156
+ }
157
+ /**
158
+ * T-01 "Survey Before Acting" — Understand the structure first before making changes.
159
+ *
160
+ * APPLICABLE when: Agent performs edit/write operations.
161
+ * Rationale: Any edit to code is an opportunity to survey first.
162
+ * Excluded: Read-only sessions (no applicable opportunity).
163
+ */
164
+ function detectT01Opportunity(session) {
165
+ const hasEdit = session.toolCalls.some((call) => EDIT_TOOLS.has(call.toolName));
166
+ if (hasEdit) {
167
+ return { applicable: true, reason: 'Edit operations present — opportunity to survey before acting' };
168
+ }
169
+ return { applicable: false, reason: 'No edit operations in session — T-01 not applicable' };
170
+ }
171
+ /**
172
+ * T-02 "Respect Constraints" — Explicitly reason about contracts, tests, schemas.
173
+ *
174
+ * APPLICABLE when: Agent interacts with type/test/schema/contract files.
175
+ */
176
+ function detectT02Opportunity(session) {
177
+ const hasConstraintInteraction = session.toolCalls.some((call) => {
178
+ if (!call.filePath)
179
+ return false;
180
+ const normalized = normalizePath(call.filePath);
181
+ return (/\.(ts|tsx|js|jsx)$/.test(normalized) || // type-aware files
182
+ /\b(test|spec|contract|schema|interface|type)\b/i.test(normalized));
183
+ });
184
+ if (hasConstraintInteraction) {
185
+ return { applicable: true, reason: 'Type/test/contract interaction — opportunity to respect constraints' };
186
+ }
187
+ return { applicable: false, reason: 'No type/test/contract interaction — T-02 not applicable' };
188
+ }
189
+ /**
190
+ * T-03 "Evidence Over Assumption" — Use logs, code, and outputs before inferring.
191
+ *
192
+ * APPLICABLE when: Pain signals or tool failures follow an edit/write operation.
193
+ * Rationale: When a change causes something to go wrong, there's an opportunity
194
+ * to gather evidence instead of assuming. Read-only failures are less relevant.
195
+ * Narrowed: requires an edit/write in the session before the failure/pain signal.
196
+ */
197
+ function detectT03Opportunity(session) {
198
+ const hasWriteBeforeFailure = session.toolCalls.some((call, i) => {
199
+ if (call.outcome !== 'failure')
200
+ return false;
201
+ // Check that at least one prior call was an edit/write
202
+ const priorCalls = session.toolCalls.slice(0, i);
203
+ return priorCalls.some((c) => EDIT_TOOLS.has(c.toolName));
204
+ });
205
+ if (hasWriteBeforeFailure) {
206
+ return { applicable: true, reason: 'Write operation followed by failure — opportunity to gather evidence before retry' };
207
+ }
208
+ // Also applicable: pain signal with severity moderate+ (indicating something went wrong after a change)
209
+ const hasSignificantPain = session.painSignals.some((p) => p.severity === 'moderate' || p.severity === 'severe');
210
+ if (hasSignificantPain) {
211
+ return { applicable: true, reason: 'Significant pain signal — opportunity to use evidence over assumption' };
212
+ }
213
+ return { applicable: false, reason: 'No pain or failure on write operations — T-03 not applicable' };
214
+ }
215
+ /**
216
+ * T-04 "Reversible First" — Prefer changes that are safe to roll back.
217
+ *
218
+ * APPLICABLE when: Risky or destructive operations are attempted.
219
+ */
220
+ function detectT04Opportunity(session) {
221
+ const hasRisky = session.toolCalls.some((call) => RISKY_TOOLS.has(call.toolName) || call.toolName === 'bash');
222
+ if (hasRisky) {
223
+ return { applicable: true, reason: 'Risky/destructive operations — opportunity to prefer reversible changes' };
224
+ }
225
+ return { applicable: false, reason: 'No risky operations — T-04 not applicable' };
226
+ }
227
+ /**
228
+ * T-05 "Safety Rails" — Call out guardrails, prohibitions, failure-prevention constraints.
229
+ *
230
+ * APPLICABLE when: A gate block fires on a risky operation.
231
+ * Rationale: The gate block IS the safety rail being tested. An opportunity
232
+ * exists when the system judged an operation risky enough to block.
233
+ * This makes T-05 applicable ONLY when gate blocks fire — preventing dilution
234
+ * by unrelated sessions.
235
+ *
236
+ * IMPORTANT: T-05's compliance is tied to gate blocks specifically.
237
+ * A risky operation without a gate block may still be a T-05 opportunity
238
+ * if the reason mentions safety-relevant terms.
239
+ */
240
+ function detectT05Opportunity(session) {
241
+ const hasGateBlock = session.gateBlocks.length > 0;
242
+ if (hasGateBlock) {
243
+ return {
244
+ applicable: true,
245
+ reason: 'Gate block present — opportunity to call out safety rails',
246
+ };
247
+ }
248
+ // Also applicable when a risky operation is attempted
249
+ // (even if not yet blocked — the agent should self-censor)
250
+ const hasRisky = session.toolCalls.some((call) => {
251
+ if (RISKY_TOOLS.has(call.toolName))
252
+ return true;
253
+ // Check bash for dangerous patterns
254
+ if (call.toolName === 'bash' && call.errorMessage) {
255
+ return DANGEROUS_BASH_PATTERNS.some((p) => p.test(call.errorMessage));
256
+ }
257
+ return false;
258
+ });
259
+ if (hasRisky) {
260
+ return {
261
+ applicable: true,
262
+ reason: 'Risky operation attempted — opportunity to apply safety rails',
263
+ };
264
+ }
265
+ return {
266
+ applicable: false,
267
+ reason: 'No gate blocks or risky operations — T-05 not applicable in this session',
268
+ };
269
+ }
270
+ /**
271
+ * T-06 "Simplicity First" — Prefer the smallest understandable solution.
272
+ *
273
+ * APPLICABLE when: The task involves non-trivial code creation or refactoring.
274
+ */
275
+ function detectT06Opportunity(session) {
276
+ const hasNonTrivialWrite = session.toolCalls.some((call) => call.toolName === 'create_file' ||
277
+ call.toolName === 'write_to_file' ||
278
+ (call.toolName === 'bash' && /\b(refactor|rewrite|overhaul)\b/i.test(call.errorMessage ?? '')));
279
+ if (hasNonTrivialWrite) {
280
+ return {
281
+ applicable: true,
282
+ reason: 'Non-trivial code creation — opportunity to prefer simplicity',
283
+ };
284
+ }
285
+ return { applicable: false, reason: 'No non-trivial writes — T-06 not applicable' };
286
+ }
287
+ /**
288
+ * T-07 "Minimal Change Surface" — Limit the blast radius.
289
+ *
290
+ * APPLICABLE when: Multiple files are touched in a single session.
291
+ */
292
+ function detectT07Opportunity(session) {
293
+ const filePaths = session.toolCalls
294
+ .filter((call) => call.filePath !== undefined)
295
+ .map((call) => normalizePath(call.filePath));
296
+ const uniqueFiles = new Set(filePaths);
297
+ if (uniqueFiles.size >= 3) {
298
+ return {
299
+ applicable: true,
300
+ reason: `Multiple files touched (${uniqueFiles.size}) — opportunity to minimize change surface`,
301
+ };
302
+ }
303
+ return { applicable: false, reason: 'Few files touched — T-07 not applicable' };
304
+ }
305
+ /**
306
+ * T-08 "Pain As Signal" — Treat failures and friction as clues.
307
+ *
308
+ * APPLICABLE when: Pain signals are present after a failure.
309
+ */
310
+ function detectT08Opportunity(session) {
311
+ const hasPain = session.painSignals.length > 0;
312
+ const hasFailure = session.toolCalls.some((call) => call.outcome === 'failure');
313
+ if (hasPain && hasFailure) {
314
+ return {
315
+ applicable: true,
316
+ reason: 'Pain signals following failures — opportunity to treat pain as signal',
317
+ };
318
+ }
319
+ return { applicable: false, reason: 'No pain-after-failure — T-08 not applicable' };
320
+ }
321
+ /**
322
+ * T-09 "Divide And Conquer" — Split the task into smaller phases before execution.
323
+ *
324
+ * APPLICABLE when: Complex operations are attempted (multi-file edits, refactors,
325
+ * architecture changes) OR when pain events occur on complex tasks.
326
+ *
327
+ * COMPLEXITY INDICATORS:
328
+ * - 5+ tool calls in a session (indicates multi-step task)
329
+ * - Multiple file paths touched
330
+ * - Pain events on multi-step tasks
331
+ * - Explicit "complex" or "refactor" or "architecture" in operations
332
+ */
333
+ function detectT09Opportunity(session) {
334
+ const toolCallCount = session.toolCalls.length;
335
+ const uniqueFiles = new Set(session.toolCalls
336
+ .filter((call) => call.filePath !== undefined)
337
+ .map((call) => normalizePath(call.filePath)));
338
+ const hasComplexity = toolCallCount >= 5 || uniqueFiles.size >= 3;
339
+ const hasPain = session.painSignals.length > 0;
340
+ const hasFailure = session.toolCalls.some((call) => call.outcome === 'failure');
341
+ if (hasComplexity) {
342
+ return {
343
+ applicable: true,
344
+ reason: `Complex task detected (${toolCallCount} calls, ${uniqueFiles.size} files) — opportunity to decompose`,
345
+ };
346
+ }
347
+ if (hasPain || hasFailure) {
348
+ // Pain/failure may indicate the task was too complex without decomposition
349
+ return {
350
+ applicable: true,
351
+ reason: 'Pain or failure present — opportunity to decompose before retry',
352
+ };
353
+ }
354
+ return {
355
+ applicable: false,
356
+ reason: 'No complexity indicators — T-09 not applicable in this session',
357
+ };
358
+ }
359
+ // ---------------------------------------------------------------------------
360
+ // Violation Detection
361
+ // ---------------------------------------------------------------------------
362
+ /**
363
+ * Detects whether a principle was VIOLATED in a session where an
364
+ * opportunity was applicable.
365
+ *
366
+ * Returns a ViolationMatch with violated=true if violation signals are present.
367
+ */
368
+ export function detectViolation(principleId, session) {
369
+ switch (principleId) {
370
+ case 'T-01':
371
+ return detectT01Violation(session);
372
+ case 'T-02':
373
+ return detectT02Violation(session);
374
+ case 'T-03':
375
+ return detectT03Violation(session);
376
+ case 'T-04':
377
+ return detectT04Violation(session);
378
+ case 'T-05':
379
+ return detectT05Violation(session);
380
+ case 'T-06':
381
+ return detectT06Violation(session);
382
+ case 'T-07':
383
+ return detectT07Violation(session);
384
+ case 'T-08':
385
+ return detectT08Violation(session);
386
+ case 'T-09':
387
+ return detectT09Violation(session);
388
+ default:
389
+ return { violated: false, reason: `Unknown principle: ${principleId}` };
390
+ }
391
+ }
392
+ /**
393
+ * T-01 violation:
394
+ * - Pain signal or tool failure on an edit where the file was NOT read first
395
+ * - Pain signal with source indicating structural misunderstanding
396
+ */
397
+ function detectT01Violation(session) {
398
+ // Build set of files that were read (normalized for cross-platform consistency)
399
+ const readFiles = new Set(session.toolCalls
400
+ .filter((call) => READ_TOOLS.has(call.toolName) && call.filePath !== undefined)
401
+ .map((call) => normalizePath(call.filePath)));
402
+ // Find edits to files that were NOT read first
403
+ const unreadEdits = session.toolCalls.filter((call) => EDIT_TOOLS.has(call.toolName) &&
404
+ call.filePath !== undefined &&
405
+ !readFiles.has(normalizePath(call.filePath)));
406
+ // If there were edits to unread files AND pain/failure followed → T-01 likely violated
407
+ if (unreadEdits.length > 0) {
408
+ const painOnUnreadEdit = session.painSignals.some((p) => unreadEdits.some((e) => e.filePath !== undefined && p.source.includes(e.filePath)) ||
409
+ /structure|architecture|dependency|context|before.*edit|survey/i.test(p.reason ?? ''));
410
+ if (painOnUnreadEdit) {
411
+ return {
412
+ violated: true,
413
+ reason: `Edits to unread files (${unreadEdits.length}) followed by pain — T-01 violated: agent acted without surveying first`,
414
+ };
415
+ }
416
+ // If edits to unread files AND tool failures → likely violated
417
+ const failuresOnUnread = unreadEdits.some((e) => e.outcome === 'failure');
418
+ if (failuresOnUnread) {
419
+ return {
420
+ violated: true,
421
+ reason: `Edits to unread files (${unreadEdits.length}) followed by failures — T-01 violated: agent acted without understanding`,
422
+ };
423
+ }
424
+ }
425
+ // Also check for pain signals specifically mentioning T-01-relevant themes
426
+ // without any prior read
427
+ const hasPainTheme = /structure|architecture|context|before.*acting|didn't.*survey|didn't.*read.*first/i.test(session.painSignals.map((p) => p.reason ?? '').join(' '));
428
+ if (hasPainTheme && unreadEdits.length > 0) {
429
+ return {
430
+ violated: true,
431
+ reason: 'Pain signals mentioning structure/context themes after edits to unread files — T-01 violated',
432
+ };
433
+ }
434
+ return {
435
+ violated: false,
436
+ reason: 'No violation signals detected for T-01',
437
+ };
438
+ }
439
+ /**
440
+ * T-02 violation:
441
+ * - Tool failures on type/test/contract interactions without prior verification
442
+ */
443
+ function detectT02Violation(session) {
444
+ const constraintFailures = session.toolCalls.filter((call) => call.outcome === 'failure' &&
445
+ call.filePath !== undefined &&
446
+ (/\b(test|spec|contract|schema|interface|type)\b/i.test(call.filePath) ||
447
+ /\b(type|test|contract)\b/i.test(call.errorMessage ?? '')));
448
+ if (constraintFailures.length > 0) {
449
+ return {
450
+ violated: true,
451
+ reason: `Tool failures on type/test/contract interactions (${constraintFailures.length}) — T-02 violated: constraints not verified`,
452
+ };
453
+ }
454
+ return { violated: false, reason: 'No violation signals for T-02' };
455
+ }
456
+ /**
457
+ * T-03 violation:
458
+ * - Tool failures without prior evidence gathering (no read calls before failure)
459
+ */
460
+ function detectT03Violation(session) {
461
+ const failureIndices = session.toolCalls
462
+ .map((call, i) => (call.outcome === 'failure' ? i : -1))
463
+ .filter((i) => i >= 0);
464
+ for (const failIdx of failureIndices) {
465
+ const priorCalls = session.toolCalls.slice(0, failIdx);
466
+ const hasPriorRead = priorCalls.some((call) => READ_TOOLS.has(call.toolName) && call.filePath !== undefined);
467
+ if (!hasPriorRead) {
468
+ return {
469
+ violated: true,
470
+ reason: `Tool failure at index ${failIdx} without prior read operations — T-03 violated: assumption made without evidence`,
471
+ };
472
+ }
473
+ }
474
+ return { violated: false, reason: 'No violation signals for T-03' };
475
+ }
476
+ /**
477
+ * T-04 violation:
478
+ * - Pain signals following risky operations (the operation succeeded but caused issues)
479
+ */
480
+ function detectT04Violation(session) {
481
+ const riskyIndices = session.toolCalls
482
+ .map((call, i) => (RISKY_TOOLS.has(call.toolName) || call.toolName === 'bash' ? i : -1))
483
+ .filter((i) => i >= 0);
484
+ if (riskyIndices.length === 0)
485
+ return { violated: false, reason: 'No risky operations — T-04 not violated' };
486
+ // If risky operations AND pain signals are present in the same session,
487
+ // that indicates the risky operation caused negative consequences.
488
+ const hasPain = session.painSignals.length > 0;
489
+ if (hasPain) {
490
+ return {
491
+ violated: true,
492
+ reason: 'Pain signals present alongside risky operations — T-04 violated: irreversible consequences',
493
+ };
494
+ }
495
+ return { violated: false, reason: 'No violation signals for T-04' };
496
+ }
497
+ /**
498
+ * T-05 violation:
499
+ * - Gate block fires → the agent tried a risky operation without first applying
500
+ * safety reasoning. The gate block IS the violation signal.
501
+ * - Gate block on a dangerous bash command is an explicit violation.
502
+ */
503
+ function detectT05Violation(session) {
504
+ if (session.gateBlocks.length > 0) {
505
+ // Check if any gate block was on a dangerous operation.
506
+ // A block is dangerous if:
507
+ // 1. The tool is in RISKY_TOOLS (delete_file, bash, MultiExec, etc.)
508
+ // 2. The tool is 'bash' AND the reason mentions a dangerous pattern
509
+ // 3. The reason contains risky keywords (delete, force, credential, exec, etc.)
510
+ const dangerousBlocks = session.gateBlocks.filter((block) => {
511
+ if (RISKY_TOOLS.has(block.toolName))
512
+ return true;
513
+ if (block.toolName === 'bash' && DANGEROUS_BASH_PATTERNS.some((p) => p.test(block.reason)))
514
+ return true;
515
+ // Fallback: scan reason for risky keywords
516
+ if (RISKY_KEYWORDS_IN_REASON.some((p) => p.test(block.reason)))
517
+ return true;
518
+ return false;
519
+ });
520
+ if (dangerousBlocks.length > 0) {
521
+ return {
522
+ violated: true,
523
+ reason: `Gate blocks on dangerous operations (${dangerousBlocks.length}) — T-05 violated: safety rail not called out`,
524
+ };
525
+ }
526
+ return {
527
+ violated: true,
528
+ reason: `Gate blocks present (${session.gateBlocks.length}) — T-05 violated: safety rail not respected`,
529
+ };
530
+ }
531
+ return { violated: false, reason: 'No gate blocks — T-05 not violated' };
532
+ }
533
+ /**
534
+ * T-06 violation:
535
+ * - Over-engineering signals: pain from overly complex solutions
536
+ */
537
+ function detectT06Violation(session) {
538
+ const hasOverEngineerPain = session.painSignals.some((p) => /over.*engineer|over.*complicat|too.*complex|unnecessarily.*complex/i.test(p.reason ?? '') &&
539
+ p.severity === 'severe');
540
+ if (hasOverEngineerPain) {
541
+ return {
542
+ violated: true,
543
+ reason: 'Severe pain from over-engineering — T-06 violated: simplicity not preferred',
544
+ };
545
+ }
546
+ return { violated: false, reason: 'No over-engineering signals — T-06 not violated' };
547
+ }
548
+ /**
549
+ * T-07 violation:
550
+ * - Pain from wide blast radius: many files modified, cascading failures
551
+ */
552
+ function detectT07Violation(session) {
553
+ const modifiedFiles = new Set(session.toolCalls
554
+ .filter((call) => EDIT_TOOLS.has(call.toolName) && call.filePath !== undefined)
555
+ .map((call) => normalizePath(call.filePath)));
556
+ const failures = session.toolCalls.filter((call) => call.outcome === 'failure');
557
+ if (modifiedFiles.size >= 5 && failures.length >= 2) {
558
+ return {
559
+ violated: true,
560
+ reason: `Wide blast radius (${modifiedFiles.size} files, ${failures.length} failures) — T-07 violated: change surface not minimized`,
561
+ };
562
+ }
563
+ return { violated: false, reason: 'No blast radius violations — T-07 not violated' };
564
+ }
565
+ /**
566
+ * T-08 violation:
567
+ * - Pain signal present but no reflection/self-correction behavior
568
+ * (This is harder to detect without explicit reflection events.
569
+ * We use pain-without-correction as a proxy.)
570
+ */
571
+ function detectT08Violation(session) {
572
+ const hasPain = session.painSignals.length > 0;
573
+ const hasFailure = session.toolCalls.some((call) => call.outcome === 'failure');
574
+ // If pain and failure, but the agent immediately retries without pause/reflect
575
+ if (hasPain && hasFailure) {
576
+ // Find the first failure index and check if the agent continued without reflecting
577
+ const failureIdx = session.toolCalls.findIndex((c) => c.outcome === 'failure');
578
+ if (failureIdx >= 0) {
579
+ const postFailure = session.toolCalls.slice(failureIdx + 1, failureIdx + 4);
580
+ // If the agent immediately continues without a read/reflect call, T-08 may be violated
581
+ const continuesImmediately = postFailure.length > 0 && !postFailure.some((c) => READ_TOOLS.has(c.toolName));
582
+ if (continuesImmediately) {
583
+ return {
584
+ violated: true,
585
+ reason: 'Failure followed immediately by continued operations without pause/reflect — T-08 violated: pain not treated as signal',
586
+ };
587
+ }
588
+ }
589
+ }
590
+ return { violated: false, reason: 'No T-08 violation signals detected' };
591
+ }
592
+ /**
593
+ * T-09 violation:
594
+ * - Pain or failures on complex tasks that should have been decomposed.
595
+ * Signal: pain/failure on multi-step task without prior planning calls.
596
+ */
597
+ function detectT09Violation(session) {
598
+ const toolCallCount = session.toolCalls.length;
599
+ const uniqueFiles = new Set(session.toolCalls
600
+ .filter((call) => call.filePath !== undefined)
601
+ .map((call) => normalizePath(call.filePath)));
602
+ // Only applies if the session was complex
603
+ if (toolCallCount < 5 && uniqueFiles.size < 3) {
604
+ return { violated: false, reason: 'Session not complex enough for T-09 applicability' };
605
+ }
606
+ // Check: failures on complex task without prior planning
607
+ const hasFailures = session.toolCalls.some((call) => call.outcome === 'failure');
608
+ const hasPain = session.painSignals.length > 0;
609
+ if (hasFailures || hasPain) {
610
+ // Check if the agent showed decomposition/planning behavior
611
+ const hasPlanApproval = session.planApprovals.length > 0;
612
+ const hasReadFirst = session.toolCalls.some((call) => READ_TOOLS.has(call.toolName));
613
+ if (!hasPlanApproval && !hasReadFirst) {
614
+ return {
615
+ violated: true,
616
+ reason: `Complex task with failures/pain but no planning or decomposition signals — T-09 violated: task not divided`,
617
+ };
618
+ }
619
+ }
620
+ return { violated: false, reason: 'No T-09 violation signals' };
621
+ }
622
+ // ---------------------------------------------------------------------------
623
+ // Compliance Computation
624
+ // ---------------------------------------------------------------------------
625
+ /**
626
+ * Computes compliance metrics for a single T-xx principle across a batch of sessions.
627
+ *
628
+ * DILUTION PREVENTION:
629
+ * - Sessions where the principle had NO opportunity are EXCLUDED from
630
+ * applicableOpportunityCount and do not affect complianceRate.
631
+ * - Example: T-05 sessions with no risky operations do not dilute
632
+ * the compliance rate computed from T-05 sessions with gate blocks.
633
+ *
634
+ * TREND COMPUTATION:
635
+ * - Sessions are ordered chronologically (most recent first).
636
+ * - Current window: last 3 applicable sessions.
637
+ * - Previous window: sessions 4-6 (if available).
638
+ * - If either window has < 1 applicable session, trend = 0 (insufficient data).
639
+ * - Otherwise: trend = prevViolationRate - currentViolationRate
640
+ * (+1 = improving, 0 = stable, -1 = worsening).
641
+ */
642
+ export function computeCompliance(principleId, sessions, options = {}) {
643
+ const windowSize = options.trendWindowSize ?? 3;
644
+ let applicableOpportunityCount = 0;
645
+ let observedViolationCount = 0;
646
+ const applicableSessions = [];
647
+ for (const session of sessions) {
648
+ const opp = detectOpportunity(principleId, session);
649
+ if (!opp.applicable) {
650
+ // Principle had no opportunity in this session — skip entirely.
651
+ // This is the key dilution-prevention mechanism.
652
+ continue;
653
+ }
654
+ applicableOpportunityCount++;
655
+ const violation = detectViolation(principleId, session);
656
+ if (violation.violated) {
657
+ observedViolationCount++;
658
+ }
659
+ applicableSessions.push({
660
+ session,
661
+ violated: violation.violated,
662
+ reason: violation.reason,
663
+ });
664
+ }
665
+ // Compute complianceRate
666
+ const complianceRate = applicableOpportunityCount > 0
667
+ ? (applicableOpportunityCount - observedViolationCount) / applicableOpportunityCount
668
+ : 0;
669
+ // Compute violationTrend using windows
670
+ const violationTrend = computeViolationTrend(applicableSessions, windowSize);
671
+ // Build explanation
672
+ const explanation = buildExplanation(principleId, applicableOpportunityCount, observedViolationCount, complianceRate, violationTrend, applicableSessions);
673
+ return {
674
+ principleId,
675
+ applicableOpportunityCount,
676
+ observedViolationCount,
677
+ complianceRate,
678
+ violationTrend,
679
+ explanation,
680
+ };
681
+ }
682
+ /**
683
+ * Computes violation trend across the applicable session list.
684
+ *
685
+ * Trend is positive (+1) when violations are DECREASING (improving).
686
+ * Trend is negative (-1) when violations are INCREASING (worsening).
687
+ *
688
+ * Sessions are ordered most-recent-first.
689
+ * currentWindow = first windowSize sessions (most recent)
690
+ * previousWindow = next windowSize sessions
691
+ */
692
+ function computeViolationTrend(applicableSessions, windowSize) {
693
+ if (applicableSessions.length < 2) {
694
+ // Not enough data for trend
695
+ return 0;
696
+ }
697
+ // Sessions are ordered most-recent-first in the input array.
698
+ // currentWindow = most recent N sessions
699
+ // previousWindow = N sessions before that (older)
700
+ const currentWindow = applicableSessions.slice(0, windowSize);
701
+ const previousWindow = applicableSessions.slice(windowSize, windowSize * 2);
702
+ if (currentWindow.length === 0)
703
+ return 0;
704
+ const currentViolationRate = currentWindow.filter((s) => s.violated).length / currentWindow.length;
705
+ if (previousWindow.length === 0) {
706
+ // No previous window — compare to overall rate
707
+ const overallRate = applicableSessions.filter((s) => s.violated).length / applicableSessions.length;
708
+ if (currentViolationRate < overallRate - 0.1)
709
+ return 1; // improving
710
+ if (currentViolationRate > overallRate + 0.1)
711
+ return -1; // worsening
712
+ return 0;
713
+ }
714
+ const previousViolationRate = previousWindow.filter((s) => s.violated).length / previousWindow.length;
715
+ const delta = previousViolationRate - currentViolationRate;
716
+ if (delta > 0.1)
717
+ return 1; // violations decreasing → improving
718
+ if (delta < -0.1)
719
+ return -1; // violations increasing → worsening
720
+ return 0; // stable
721
+ }
722
+ /**
723
+ * Builds a human-readable explanation for the compliance result.
724
+ */
725
+ function buildExplanation(principleId, applicableOpportunityCount, observedViolationCount, complianceRate, violationTrend, applicableSessions) {
726
+ const trendStr = violationTrend === 1
727
+ ? '↑ improving'
728
+ : violationTrend === -1
729
+ ? '↓ worsening'
730
+ : '→ stable';
731
+ if (applicableOpportunityCount === 0) {
732
+ return `${principleId}: No applicable opportunities in provided sessions — compliance cannot be assessed.`;
733
+ }
734
+ const violationExamples = applicableSessions
735
+ .filter((s) => s.violated)
736
+ .slice(0, 2)
737
+ .map((s) => ` • ${s.reason}`)
738
+ .join('\n');
739
+ return [
740
+ `${principleId}: ${applicableOpportunityCount} applicable opportunities, ${observedViolationCount} violations.`,
741
+ `Compliance rate: ${(complianceRate * 100).toFixed(1)}%. Trend: ${trendStr}.`,
742
+ violationExamples ? `Sample violation signals:\n${violationExamples}` : 'No violations detected in recent sessions.',
743
+ ].join('\n');
744
+ }
745
+ // ---------------------------------------------------------------------------
746
+ // Batch Update Helpers
747
+ // ---------------------------------------------------------------------------
748
+ /**
749
+ * Computes compliance results for all T-01 through T-09 principles
750
+ * across the provided sessions.
751
+ *
752
+ * Sessions are assumed to be ordered most-recent-first.
753
+ */
754
+ export function computeAllCompliance(sessions, options = {}) {
755
+ const results = [];
756
+ for (const id of ['T-01', 'T-02', 'T-03', 'T-04', 'T-05', 'T-06', 'T-07', 'T-08', 'T-09']) {
757
+ results.push(computeCompliance(id, sessions, options));
758
+ }
759
+ return results;
760
+ }
761
+ /**
762
+ * Converts raw EventLogEntry[] from event-types.ts into SessionEvents.
763
+ *
764
+ * Groups events by sessionId and maps to the SessionEvents interface.
765
+ * Events with no sessionId are grouped under sessionId = 'unknown'.
766
+ */
767
+ export function groupEventsIntoSessions(events) {
768
+ const sessionMap = new Map();
769
+ for (const event of events) {
770
+ const sessionId = event.sessionId ?? 'unknown';
771
+ if (!sessionMap.has(sessionId)) {
772
+ sessionMap.set(sessionId, {
773
+ sessionId,
774
+ toolCalls: [],
775
+ painSignals: [],
776
+ gateBlocks: [],
777
+ userCorrections: [],
778
+ planApprovals: [],
779
+ });
780
+ }
781
+ const session = sessionMap.get(sessionId);
782
+ switch (event.type) {
783
+ case 'tool_call':
784
+ if (event.data.toolName) {
785
+ session.toolCalls.push({
786
+ toolName: event.data.toolName,
787
+ filePath: event.data.filePath,
788
+ outcome: (event.data.error ? 'failure' : 'success'),
789
+ errorType: event.data.errorType,
790
+ errorMessage: event.data.error,
791
+ });
792
+ }
793
+ break;
794
+ case 'pain_signal':
795
+ session.painSignals.push({
796
+ source: event.data.source ?? 'unknown',
797
+ score: event.data.score ?? 0,
798
+ severity: event.data.severity,
799
+ reason: event.data.reason,
800
+ });
801
+ break;
802
+ case 'gate_block':
803
+ session.gateBlocks.push({
804
+ toolName: event.data.toolName ?? 'unknown',
805
+ filePath: event.data.filePath,
806
+ reason: event.data.reason ?? '',
807
+ });
808
+ break;
809
+ case 'empathy_rollback':
810
+ // User corrections are flagged via empathy rollback
811
+ session.userCorrections.push({
812
+ correctionCue: event.data.reason,
813
+ });
814
+ break;
815
+ case 'plan_approval':
816
+ session.planApprovals.push({
817
+ toolName: event.data.toolName ?? 'unknown',
818
+ filePath: event.data.filePath,
819
+ });
820
+ break;
821
+ }
822
+ }
823
+ return sessionMap;
824
+ }