principles-disciple 1.7.6 → 1.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/context.js +5 -15
- package/dist/commands/evolution-status.js +2 -9
- package/dist/commands/export.js +61 -8
- package/dist/commands/nocturnal-review.d.ts +24 -0
- package/dist/commands/nocturnal-review.js +265 -0
- package/dist/commands/nocturnal-rollout.d.ts +27 -0
- package/dist/commands/nocturnal-rollout.js +671 -0
- package/dist/commands/nocturnal-train.d.ts +25 -0
- package/dist/commands/nocturnal-train.js +919 -0
- package/dist/commands/pain.js +8 -21
- package/dist/constants/tools.d.ts +2 -2
- package/dist/constants/tools.js +1 -1
- package/dist/core/adaptive-thresholds.d.ts +186 -0
- package/dist/core/adaptive-thresholds.js +300 -0
- package/dist/core/config.d.ts +2 -38
- package/dist/core/config.js +6 -61
- package/dist/core/event-log.d.ts +1 -2
- package/dist/core/event-log.js +0 -3
- package/dist/core/evolution-engine.js +1 -21
- package/dist/core/evolution-reducer.d.ts +7 -1
- package/dist/core/evolution-reducer.js +56 -4
- package/dist/core/evolution-types.d.ts +61 -9
- package/dist/core/evolution-types.js +31 -9
- package/dist/core/external-training-contract.d.ts +276 -0
- package/dist/core/external-training-contract.js +269 -0
- package/dist/core/local-worker-routing.d.ts +175 -0
- package/dist/core/local-worker-routing.js +525 -0
- package/dist/core/model-deployment-registry.d.ts +218 -0
- package/dist/core/model-deployment-registry.js +503 -0
- package/dist/core/model-training-registry.d.ts +295 -0
- package/dist/core/model-training-registry.js +475 -0
- package/dist/core/nocturnal-arbiter.d.ts +159 -0
- package/dist/core/nocturnal-arbiter.js +534 -0
- package/dist/core/nocturnal-candidate-scoring.d.ts +137 -0
- package/dist/core/nocturnal-candidate-scoring.js +266 -0
- package/dist/core/nocturnal-compliance.d.ts +175 -0
- package/dist/core/nocturnal-compliance.js +824 -0
- package/dist/core/nocturnal-dataset.d.ts +224 -0
- package/dist/core/nocturnal-dataset.js +443 -0
- package/dist/core/nocturnal-executability.d.ts +85 -0
- package/dist/core/nocturnal-executability.js +331 -0
- package/dist/core/nocturnal-export.d.ts +124 -0
- package/dist/core/nocturnal-export.js +275 -0
- package/dist/core/nocturnal-paths.d.ts +124 -0
- package/dist/core/nocturnal-paths.js +214 -0
- package/dist/core/nocturnal-trajectory-extractor.d.ts +242 -0
- package/dist/core/nocturnal-trajectory-extractor.js +307 -0
- package/dist/core/nocturnal-trinity.d.ts +311 -0
- package/dist/core/nocturnal-trinity.js +880 -0
- package/dist/core/paths.d.ts +6 -0
- package/dist/core/paths.js +6 -0
- package/dist/core/principle-training-state.d.ts +121 -0
- package/dist/core/principle-training-state.js +321 -0
- package/dist/core/promotion-gate.d.ts +238 -0
- package/dist/core/promotion-gate.js +529 -0
- package/dist/core/session-tracker.d.ts +10 -0
- package/dist/core/session-tracker.js +14 -0
- package/dist/core/shadow-observation-registry.d.ts +217 -0
- package/dist/core/shadow-observation-registry.js +308 -0
- package/dist/core/training-program.d.ts +233 -0
- package/dist/core/training-program.js +433 -0
- package/dist/core/trajectory.d.ts +95 -1
- package/dist/core/trajectory.js +220 -6
- package/dist/core/workspace-context.d.ts +0 -6
- package/dist/core/workspace-context.js +0 -12
- package/dist/hooks/bash-risk.d.ts +6 -6
- package/dist/hooks/bash-risk.js +8 -8
- package/dist/hooks/gate-block-helper.js +1 -1
- package/dist/hooks/gate.d.ts +1 -1
- package/dist/hooks/gate.js +2 -2
- package/dist/hooks/gfi-gate.d.ts +3 -3
- package/dist/hooks/gfi-gate.js +15 -14
- package/dist/hooks/pain.js +6 -9
- package/dist/hooks/progressive-trust-gate.d.ts +21 -49
- package/dist/hooks/progressive-trust-gate.js +51 -204
- package/dist/hooks/prompt.d.ts +11 -11
- package/dist/hooks/prompt.js +158 -72
- package/dist/hooks/subagent.js +43 -6
- package/dist/i18n/commands.js +8 -8
- package/dist/index.js +129 -28
- package/dist/service/evolution-worker.d.ts +42 -4
- package/dist/service/evolution-worker.js +321 -13
- package/dist/service/nocturnal-runtime.d.ts +183 -0
- package/dist/service/nocturnal-runtime.js +352 -0
- package/dist/service/nocturnal-service.d.ts +163 -0
- package/dist/service/nocturnal-service.js +787 -0
- package/dist/service/nocturnal-target-selector.d.ts +145 -0
- package/dist/service/nocturnal-target-selector.js +315 -0
- package/dist/service/phase3-input-filter.d.ts +2 -23
- package/dist/service/phase3-input-filter.js +3 -27
- package/dist/service/runtime-summary-service.d.ts +0 -10
- package/dist/service/runtime-summary-service.js +1 -54
- package/dist/tools/deep-reflect.js +2 -1
- package/dist/types/event-types.d.ts +2 -10
- package/dist/types/runtime-summary.d.ts +1 -8
- package/dist/types.d.ts +0 -3
- package/dist/types.js +0 -2
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/templates/langs/en/skills/pd-mentor/SKILL.md +5 -5
- package/templates/langs/zh/skills/pd-mentor/SKILL.md +5 -5
- package/templates/pain_settings.json +0 -6
- package/dist/commands/trust.d.ts +0 -4
- package/dist/commands/trust.js +0 -78
- package/dist/core/trust-engine.d.ts +0 -96
- package/dist/core/trust-engine.js +0 -286
|
@@ -0,0 +1,824 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Nocturnal Compliance Engine — Opportunity-Based Principle Evaluation
|
|
3
|
+
* =====================================================================
|
|
4
|
+
*
|
|
5
|
+
* Replaces session-average compliance with opportunity-based compliance.
|
|
6
|
+
*
|
|
7
|
+
* CORE CONCEPTS:
|
|
8
|
+
*
|
|
9
|
+
* Opportunity — a session context where a principle COULD have been applied.
|
|
10
|
+
* An opportunity exists when the agent's action (or planned action)
|
|
11
|
+
* falls within the principle's applicability scope.
|
|
12
|
+
*
|
|
13
|
+
* Compliance — the principle was followed in an opportunity.
|
|
14
|
+
* Determined by absence of violation signals, not presence of
|
|
15
|
+
* positive confirmation (avoids LLM scoring).
|
|
16
|
+
*
|
|
17
|
+
* Violation — strong evidence the principle was NOT followed.
|
|
18
|
+
* Detected through deterministic event signals (pain, tool failures,
|
|
19
|
+
* gate blocks) — no LLM involved.
|
|
20
|
+
*
|
|
21
|
+
* Dilution prevention — compliance is computed ONLY over sessions where the
|
|
22
|
+
* principle had an opportunity. Unrelated sessions
|
|
23
|
+
* (where T-05's risky operations never occurred) do NOT
|
|
24
|
+
* dilute the compliance rate.
|
|
25
|
+
*
|
|
26
|
+
* DESIGN CONSTRAINTS (Phase 1):
|
|
27
|
+
* - T-xx principles only (deterministic / weak-heuristic evaluability)
|
|
28
|
+
* - No P_xxx automation (requires detector metadata — Task 1.3 scope)
|
|
29
|
+
* - No LLM-based scoring
|
|
30
|
+
* - No training logic
|
|
31
|
+
*
|
|
32
|
+
* FILE: No file persistence — stateless computation over event stream.
|
|
33
|
+
* Caller is responsible for writing results to principle-training-state.ts.
|
|
34
|
+
*/
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// Risky Operation Registry
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
/**
|
|
39
|
+
* Tools and operations that constitute risky actions.
|
|
40
|
+
* Gate blocks on these map to T-05 (Safety Rails) violations.
|
|
41
|
+
*/
|
|
42
|
+
const RISKY_TOOLS = new Set([
|
|
43
|
+
'delete_file',
|
|
44
|
+
'move_file',
|
|
45
|
+
'rename_file',
|
|
46
|
+
'delete_directory',
|
|
47
|
+
'bash',
|
|
48
|
+
'MultiExec',
|
|
49
|
+
]);
|
|
50
|
+
/**
|
|
51
|
+
* Bash command patterns that constitute dangerous operations.
|
|
52
|
+
* Matched against bash command text in tool_call events.
|
|
53
|
+
*/
|
|
54
|
+
const DANGEROUS_BASH_PATTERNS = [
|
|
55
|
+
/rm\s+(-[a-z]*r[a-z]*f?|-rf)/i, // rm -rf / rm -r
|
|
56
|
+
/del\s+\/[s/q]/i, // Windows del /s
|
|
57
|
+
/rmdir\s+\/s/i, // rmdir /s
|
|
58
|
+
/git\s+push\s+.*--force/i, // git push --force
|
|
59
|
+
/git\s+reset\s+--hard/i, // git reset --hard
|
|
60
|
+
/git\s+clean\s+-f[dx]/i, // git clean -fd
|
|
61
|
+
/npm\s+publish/i, // npm publish
|
|
62
|
+
/pip\s+upload/i, // pip upload
|
|
63
|
+
/docker\s+push/i, // docker push
|
|
64
|
+
/curl.+\|\s*(ba)?sh/i, // curl | bash
|
|
65
|
+
/wget.+\|\s*(ba)?sh/i, // wget | bash
|
|
66
|
+
/^make\s+[^-|]+$/i, // bare make (destructive)
|
|
67
|
+
];
|
|
68
|
+
/**
|
|
69
|
+
* Keywords in gate block reason that indicate a dangerous/risky operation.
|
|
70
|
+
* Used as a fallback when the tool itself is risky but the reason is free text.
|
|
71
|
+
*/
|
|
72
|
+
const RISKY_KEYWORDS_IN_REASON = [
|
|
73
|
+
/delete|remove|destroy|drop/i,
|
|
74
|
+
/force|unsafe|dangerous/i,
|
|
75
|
+
/format|truncate|overwrite/i,
|
|
76
|
+
/exec|eval|shell|command/i,
|
|
77
|
+
/credential|secret|password|token/i,
|
|
78
|
+
];
|
|
79
|
+
/**
|
|
80
|
+
* Edit/write tool names.
|
|
81
|
+
*/
|
|
82
|
+
const EDIT_TOOLS = new Set([
|
|
83
|
+
'edit_file',
|
|
84
|
+
'edit_file_batch',
|
|
85
|
+
'write_to_file',
|
|
86
|
+
'create_file',
|
|
87
|
+
'apply_patch',
|
|
88
|
+
]);
|
|
89
|
+
/**
|
|
90
|
+
* Read tool names.
|
|
91
|
+
*/
|
|
92
|
+
const READ_TOOLS = new Set([
|
|
93
|
+
'read_file',
|
|
94
|
+
'read_multiple_files',
|
|
95
|
+
'grep',
|
|
96
|
+
'search_files',
|
|
97
|
+
'list_directory',
|
|
98
|
+
'glob',
|
|
99
|
+
]);
|
|
100
|
+
// ---------------------------------------------------------------------------
|
|
101
|
+
// Path Normalization (cross-platform)
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
/**
|
|
104
|
+
* Normalizes a file path to POSIX forward-slash format for consistent matching.
|
|
105
|
+
* Handles Windows backslash paths on any platform.
|
|
106
|
+
*/
|
|
107
|
+
function normalizePath(filePath) {
|
|
108
|
+
return filePath.replace(/\\/g, '/');
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Returns true if the file path matches any of the given patterns when normalized.
|
|
112
|
+
*/
|
|
113
|
+
function pathMatches(filePath, patterns) {
|
|
114
|
+
if (!filePath)
|
|
115
|
+
return false;
|
|
116
|
+
const normalized = normalizePath(filePath);
|
|
117
|
+
return patterns.some((p) => p.test(normalized));
|
|
118
|
+
}
|
|
119
|
+
// ---------------------------------------------------------------------------
|
|
120
|
+
// Opportunity Detection
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
/**
|
|
123
|
+
* Detects whether a given session presents an APPLICABLE OPPORTUNITY
|
|
124
|
+
* for a specific T-xx principle.
|
|
125
|
+
*
|
|
126
|
+
* An opportunity exists when the session context falls within the
|
|
127
|
+
* principle's applicability scope — regardless of whether the agent
|
|
128
|
+
* followed the principle.
|
|
129
|
+
*
|
|
130
|
+
* IMPORTANT: This does NOT assess compliance. It only answers:
|
|
131
|
+
* "Could the principle have applied here?"
|
|
132
|
+
*/
|
|
133
|
+
export function detectOpportunity(principleId, session) {
|
|
134
|
+
switch (principleId) {
|
|
135
|
+
case 'T-01':
|
|
136
|
+
return detectT01Opportunity(session);
|
|
137
|
+
case 'T-02':
|
|
138
|
+
return detectT02Opportunity(session);
|
|
139
|
+
case 'T-03':
|
|
140
|
+
return detectT03Opportunity(session);
|
|
141
|
+
case 'T-04':
|
|
142
|
+
return detectT04Opportunity(session);
|
|
143
|
+
case 'T-05':
|
|
144
|
+
return detectT05Opportunity(session);
|
|
145
|
+
case 'T-06':
|
|
146
|
+
return detectT06Opportunity(session);
|
|
147
|
+
case 'T-07':
|
|
148
|
+
return detectT07Opportunity(session);
|
|
149
|
+
case 'T-08':
|
|
150
|
+
return detectT08Opportunity(session);
|
|
151
|
+
case 'T-09':
|
|
152
|
+
return detectT09Opportunity(session);
|
|
153
|
+
default:
|
|
154
|
+
return { applicable: false, reason: `Unknown principle: ${principleId}` };
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* T-01 "Survey Before Acting" — Understand the structure first before making changes.
|
|
159
|
+
*
|
|
160
|
+
* APPLICABLE when: Agent performs edit/write operations.
|
|
161
|
+
* Rationale: Any edit to code is an opportunity to survey first.
|
|
162
|
+
* Excluded: Read-only sessions (no applicable opportunity).
|
|
163
|
+
*/
|
|
164
|
+
function detectT01Opportunity(session) {
|
|
165
|
+
const hasEdit = session.toolCalls.some((call) => EDIT_TOOLS.has(call.toolName));
|
|
166
|
+
if (hasEdit) {
|
|
167
|
+
return { applicable: true, reason: 'Edit operations present — opportunity to survey before acting' };
|
|
168
|
+
}
|
|
169
|
+
return { applicable: false, reason: 'No edit operations in session — T-01 not applicable' };
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* T-02 "Respect Constraints" — Explicitly reason about contracts, tests, schemas.
|
|
173
|
+
*
|
|
174
|
+
* APPLICABLE when: Agent interacts with type/test/schema/contract files.
|
|
175
|
+
*/
|
|
176
|
+
function detectT02Opportunity(session) {
|
|
177
|
+
const hasConstraintInteraction = session.toolCalls.some((call) => {
|
|
178
|
+
if (!call.filePath)
|
|
179
|
+
return false;
|
|
180
|
+
const normalized = normalizePath(call.filePath);
|
|
181
|
+
return (/\.(ts|tsx|js|jsx)$/.test(normalized) || // type-aware files
|
|
182
|
+
/\b(test|spec|contract|schema|interface|type)\b/i.test(normalized));
|
|
183
|
+
});
|
|
184
|
+
if (hasConstraintInteraction) {
|
|
185
|
+
return { applicable: true, reason: 'Type/test/contract interaction — opportunity to respect constraints' };
|
|
186
|
+
}
|
|
187
|
+
return { applicable: false, reason: 'No type/test/contract interaction — T-02 not applicable' };
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* T-03 "Evidence Over Assumption" — Use logs, code, and outputs before inferring.
|
|
191
|
+
*
|
|
192
|
+
* APPLICABLE when: Pain signals or tool failures follow an edit/write operation.
|
|
193
|
+
* Rationale: When a change causes something to go wrong, there's an opportunity
|
|
194
|
+
* to gather evidence instead of assuming. Read-only failures are less relevant.
|
|
195
|
+
* Narrowed: requires an edit/write in the session before the failure/pain signal.
|
|
196
|
+
*/
|
|
197
|
+
function detectT03Opportunity(session) {
|
|
198
|
+
const hasWriteBeforeFailure = session.toolCalls.some((call, i) => {
|
|
199
|
+
if (call.outcome !== 'failure')
|
|
200
|
+
return false;
|
|
201
|
+
// Check that at least one prior call was an edit/write
|
|
202
|
+
const priorCalls = session.toolCalls.slice(0, i);
|
|
203
|
+
return priorCalls.some((c) => EDIT_TOOLS.has(c.toolName));
|
|
204
|
+
});
|
|
205
|
+
if (hasWriteBeforeFailure) {
|
|
206
|
+
return { applicable: true, reason: 'Write operation followed by failure — opportunity to gather evidence before retry' };
|
|
207
|
+
}
|
|
208
|
+
// Also applicable: pain signal with severity moderate+ (indicating something went wrong after a change)
|
|
209
|
+
const hasSignificantPain = session.painSignals.some((p) => p.severity === 'moderate' || p.severity === 'severe');
|
|
210
|
+
if (hasSignificantPain) {
|
|
211
|
+
return { applicable: true, reason: 'Significant pain signal — opportunity to use evidence over assumption' };
|
|
212
|
+
}
|
|
213
|
+
return { applicable: false, reason: 'No pain or failure on write operations — T-03 not applicable' };
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* T-04 "Reversible First" — Prefer changes that are safe to roll back.
|
|
217
|
+
*
|
|
218
|
+
* APPLICABLE when: Risky or destructive operations are attempted.
|
|
219
|
+
*/
|
|
220
|
+
function detectT04Opportunity(session) {
|
|
221
|
+
const hasRisky = session.toolCalls.some((call) => RISKY_TOOLS.has(call.toolName) || call.toolName === 'bash');
|
|
222
|
+
if (hasRisky) {
|
|
223
|
+
return { applicable: true, reason: 'Risky/destructive operations — opportunity to prefer reversible changes' };
|
|
224
|
+
}
|
|
225
|
+
return { applicable: false, reason: 'No risky operations — T-04 not applicable' };
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* T-05 "Safety Rails" — Call out guardrails, prohibitions, failure-prevention constraints.
|
|
229
|
+
*
|
|
230
|
+
* APPLICABLE when: A gate block fires on a risky operation.
|
|
231
|
+
* Rationale: The gate block IS the safety rail being tested. An opportunity
|
|
232
|
+
* exists when the system judged an operation risky enough to block.
|
|
233
|
+
* This makes T-05 applicable ONLY when gate blocks fire — preventing dilution
|
|
234
|
+
* by unrelated sessions.
|
|
235
|
+
*
|
|
236
|
+
* IMPORTANT: T-05's compliance is tied to gate blocks specifically.
|
|
237
|
+
* A risky operation without a gate block may still be a T-05 opportunity
|
|
238
|
+
* if the reason mentions safety-relevant terms.
|
|
239
|
+
*/
|
|
240
|
+
function detectT05Opportunity(session) {
|
|
241
|
+
const hasGateBlock = session.gateBlocks.length > 0;
|
|
242
|
+
if (hasGateBlock) {
|
|
243
|
+
return {
|
|
244
|
+
applicable: true,
|
|
245
|
+
reason: 'Gate block present — opportunity to call out safety rails',
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
// Also applicable when a risky operation is attempted
|
|
249
|
+
// (even if not yet blocked — the agent should self-censor)
|
|
250
|
+
const hasRisky = session.toolCalls.some((call) => {
|
|
251
|
+
if (RISKY_TOOLS.has(call.toolName))
|
|
252
|
+
return true;
|
|
253
|
+
// Check bash for dangerous patterns
|
|
254
|
+
if (call.toolName === 'bash' && call.errorMessage) {
|
|
255
|
+
return DANGEROUS_BASH_PATTERNS.some((p) => p.test(call.errorMessage));
|
|
256
|
+
}
|
|
257
|
+
return false;
|
|
258
|
+
});
|
|
259
|
+
if (hasRisky) {
|
|
260
|
+
return {
|
|
261
|
+
applicable: true,
|
|
262
|
+
reason: 'Risky operation attempted — opportunity to apply safety rails',
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
return {
|
|
266
|
+
applicable: false,
|
|
267
|
+
reason: 'No gate blocks or risky operations — T-05 not applicable in this session',
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* T-06 "Simplicity First" — Prefer the smallest understandable solution.
|
|
272
|
+
*
|
|
273
|
+
* APPLICABLE when: The task involves non-trivial code creation or refactoring.
|
|
274
|
+
*/
|
|
275
|
+
function detectT06Opportunity(session) {
|
|
276
|
+
const hasNonTrivialWrite = session.toolCalls.some((call) => call.toolName === 'create_file' ||
|
|
277
|
+
call.toolName === 'write_to_file' ||
|
|
278
|
+
(call.toolName === 'bash' && /\b(refactor|rewrite|overhaul)\b/i.test(call.errorMessage ?? '')));
|
|
279
|
+
if (hasNonTrivialWrite) {
|
|
280
|
+
return {
|
|
281
|
+
applicable: true,
|
|
282
|
+
reason: 'Non-trivial code creation — opportunity to prefer simplicity',
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
return { applicable: false, reason: 'No non-trivial writes — T-06 not applicable' };
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* T-07 "Minimal Change Surface" — Limit the blast radius.
|
|
289
|
+
*
|
|
290
|
+
* APPLICABLE when: Multiple files are touched in a single session.
|
|
291
|
+
*/
|
|
292
|
+
function detectT07Opportunity(session) {
|
|
293
|
+
const filePaths = session.toolCalls
|
|
294
|
+
.filter((call) => call.filePath !== undefined)
|
|
295
|
+
.map((call) => normalizePath(call.filePath));
|
|
296
|
+
const uniqueFiles = new Set(filePaths);
|
|
297
|
+
if (uniqueFiles.size >= 3) {
|
|
298
|
+
return {
|
|
299
|
+
applicable: true,
|
|
300
|
+
reason: `Multiple files touched (${uniqueFiles.size}) — opportunity to minimize change surface`,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
return { applicable: false, reason: 'Few files touched — T-07 not applicable' };
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* T-08 "Pain As Signal" — Treat failures and friction as clues.
|
|
307
|
+
*
|
|
308
|
+
* APPLICABLE when: Pain signals are present after a failure.
|
|
309
|
+
*/
|
|
310
|
+
function detectT08Opportunity(session) {
|
|
311
|
+
const hasPain = session.painSignals.length > 0;
|
|
312
|
+
const hasFailure = session.toolCalls.some((call) => call.outcome === 'failure');
|
|
313
|
+
if (hasPain && hasFailure) {
|
|
314
|
+
return {
|
|
315
|
+
applicable: true,
|
|
316
|
+
reason: 'Pain signals following failures — opportunity to treat pain as signal',
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
return { applicable: false, reason: 'No pain-after-failure — T-08 not applicable' };
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* T-09 "Divide And Conquer" — Split the task into smaller phases before execution.
|
|
323
|
+
*
|
|
324
|
+
* APPLICABLE when: Complex operations are attempted (multi-file edits, refactors,
|
|
325
|
+
* architecture changes) OR when pain events occur on complex tasks.
|
|
326
|
+
*
|
|
327
|
+
* COMPLEXITY INDICATORS:
|
|
328
|
+
* - 5+ tool calls in a session (indicates multi-step task)
|
|
329
|
+
* - Multiple file paths touched
|
|
330
|
+
* - Pain events on multi-step tasks
|
|
331
|
+
* - Explicit "complex" or "refactor" or "architecture" in operations
|
|
332
|
+
*/
|
|
333
|
+
function detectT09Opportunity(session) {
|
|
334
|
+
const toolCallCount = session.toolCalls.length;
|
|
335
|
+
const uniqueFiles = new Set(session.toolCalls
|
|
336
|
+
.filter((call) => call.filePath !== undefined)
|
|
337
|
+
.map((call) => normalizePath(call.filePath)));
|
|
338
|
+
const hasComplexity = toolCallCount >= 5 || uniqueFiles.size >= 3;
|
|
339
|
+
const hasPain = session.painSignals.length > 0;
|
|
340
|
+
const hasFailure = session.toolCalls.some((call) => call.outcome === 'failure');
|
|
341
|
+
if (hasComplexity) {
|
|
342
|
+
return {
|
|
343
|
+
applicable: true,
|
|
344
|
+
reason: `Complex task detected (${toolCallCount} calls, ${uniqueFiles.size} files) — opportunity to decompose`,
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
if (hasPain || hasFailure) {
|
|
348
|
+
// Pain/failure may indicate the task was too complex without decomposition
|
|
349
|
+
return {
|
|
350
|
+
applicable: true,
|
|
351
|
+
reason: 'Pain or failure present — opportunity to decompose before retry',
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
return {
|
|
355
|
+
applicable: false,
|
|
356
|
+
reason: 'No complexity indicators — T-09 not applicable in this session',
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
// ---------------------------------------------------------------------------
|
|
360
|
+
// Violation Detection
|
|
361
|
+
// ---------------------------------------------------------------------------
|
|
362
|
+
/**
|
|
363
|
+
* Detects whether a principle was VIOLATED in a session where an
|
|
364
|
+
* opportunity was applicable.
|
|
365
|
+
*
|
|
366
|
+
* Returns a ViolationMatch with violated=true if violation signals are present.
|
|
367
|
+
*/
|
|
368
|
+
export function detectViolation(principleId, session) {
|
|
369
|
+
switch (principleId) {
|
|
370
|
+
case 'T-01':
|
|
371
|
+
return detectT01Violation(session);
|
|
372
|
+
case 'T-02':
|
|
373
|
+
return detectT02Violation(session);
|
|
374
|
+
case 'T-03':
|
|
375
|
+
return detectT03Violation(session);
|
|
376
|
+
case 'T-04':
|
|
377
|
+
return detectT04Violation(session);
|
|
378
|
+
case 'T-05':
|
|
379
|
+
return detectT05Violation(session);
|
|
380
|
+
case 'T-06':
|
|
381
|
+
return detectT06Violation(session);
|
|
382
|
+
case 'T-07':
|
|
383
|
+
return detectT07Violation(session);
|
|
384
|
+
case 'T-08':
|
|
385
|
+
return detectT08Violation(session);
|
|
386
|
+
case 'T-09':
|
|
387
|
+
return detectT09Violation(session);
|
|
388
|
+
default:
|
|
389
|
+
return { violated: false, reason: `Unknown principle: ${principleId}` };
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* T-01 violation:
|
|
394
|
+
* - Pain signal or tool failure on an edit where the file was NOT read first
|
|
395
|
+
* - Pain signal with source indicating structural misunderstanding
|
|
396
|
+
*/
|
|
397
|
+
function detectT01Violation(session) {
|
|
398
|
+
// Build set of files that were read (normalized for cross-platform consistency)
|
|
399
|
+
const readFiles = new Set(session.toolCalls
|
|
400
|
+
.filter((call) => READ_TOOLS.has(call.toolName) && call.filePath !== undefined)
|
|
401
|
+
.map((call) => normalizePath(call.filePath)));
|
|
402
|
+
// Find edits to files that were NOT read first
|
|
403
|
+
const unreadEdits = session.toolCalls.filter((call) => EDIT_TOOLS.has(call.toolName) &&
|
|
404
|
+
call.filePath !== undefined &&
|
|
405
|
+
!readFiles.has(normalizePath(call.filePath)));
|
|
406
|
+
// If there were edits to unread files AND pain/failure followed → T-01 likely violated
|
|
407
|
+
if (unreadEdits.length > 0) {
|
|
408
|
+
const painOnUnreadEdit = session.painSignals.some((p) => unreadEdits.some((e) => e.filePath !== undefined && p.source.includes(e.filePath)) ||
|
|
409
|
+
/structure|architecture|dependency|context|before.*edit|survey/i.test(p.reason ?? ''));
|
|
410
|
+
if (painOnUnreadEdit) {
|
|
411
|
+
return {
|
|
412
|
+
violated: true,
|
|
413
|
+
reason: `Edits to unread files (${unreadEdits.length}) followed by pain — T-01 violated: agent acted without surveying first`,
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
// If edits to unread files AND tool failures → likely violated
|
|
417
|
+
const failuresOnUnread = unreadEdits.some((e) => e.outcome === 'failure');
|
|
418
|
+
if (failuresOnUnread) {
|
|
419
|
+
return {
|
|
420
|
+
violated: true,
|
|
421
|
+
reason: `Edits to unread files (${unreadEdits.length}) followed by failures — T-01 violated: agent acted without understanding`,
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
// Also check for pain signals specifically mentioning T-01-relevant themes
|
|
426
|
+
// without any prior read
|
|
427
|
+
const hasPainTheme = /structure|architecture|context|before.*acting|didn't.*survey|didn't.*read.*first/i.test(session.painSignals.map((p) => p.reason ?? '').join(' '));
|
|
428
|
+
if (hasPainTheme && unreadEdits.length > 0) {
|
|
429
|
+
return {
|
|
430
|
+
violated: true,
|
|
431
|
+
reason: 'Pain signals mentioning structure/context themes after edits to unread files — T-01 violated',
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
return {
|
|
435
|
+
violated: false,
|
|
436
|
+
reason: 'No violation signals detected for T-01',
|
|
437
|
+
};
|
|
438
|
+
}
|
|
439
|
+
/**
|
|
440
|
+
* T-02 violation:
|
|
441
|
+
* - Tool failures on type/test/contract interactions without prior verification
|
|
442
|
+
*/
|
|
443
|
+
function detectT02Violation(session) {
|
|
444
|
+
const constraintFailures = session.toolCalls.filter((call) => call.outcome === 'failure' &&
|
|
445
|
+
call.filePath !== undefined &&
|
|
446
|
+
(/\b(test|spec|contract|schema|interface|type)\b/i.test(call.filePath) ||
|
|
447
|
+
/\b(type|test|contract)\b/i.test(call.errorMessage ?? '')));
|
|
448
|
+
if (constraintFailures.length > 0) {
|
|
449
|
+
return {
|
|
450
|
+
violated: true,
|
|
451
|
+
reason: `Tool failures on type/test/contract interactions (${constraintFailures.length}) — T-02 violated: constraints not verified`,
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
return { violated: false, reason: 'No violation signals for T-02' };
|
|
455
|
+
}
|
|
456
|
+
/**
|
|
457
|
+
* T-03 violation:
|
|
458
|
+
* - Tool failures without prior evidence gathering (no read calls before failure)
|
|
459
|
+
*/
|
|
460
|
+
function detectT03Violation(session) {
|
|
461
|
+
const failureIndices = session.toolCalls
|
|
462
|
+
.map((call, i) => (call.outcome === 'failure' ? i : -1))
|
|
463
|
+
.filter((i) => i >= 0);
|
|
464
|
+
for (const failIdx of failureIndices) {
|
|
465
|
+
const priorCalls = session.toolCalls.slice(0, failIdx);
|
|
466
|
+
const hasPriorRead = priorCalls.some((call) => READ_TOOLS.has(call.toolName) && call.filePath !== undefined);
|
|
467
|
+
if (!hasPriorRead) {
|
|
468
|
+
return {
|
|
469
|
+
violated: true,
|
|
470
|
+
reason: `Tool failure at index ${failIdx} without prior read operations — T-03 violated: assumption made without evidence`,
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
return { violated: false, reason: 'No violation signals for T-03' };
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* T-04 violation:
|
|
478
|
+
* - Pain signals following risky operations (the operation succeeded but caused issues)
|
|
479
|
+
*/
|
|
480
|
+
function detectT04Violation(session) {
|
|
481
|
+
const riskyIndices = session.toolCalls
|
|
482
|
+
.map((call, i) => (RISKY_TOOLS.has(call.toolName) || call.toolName === 'bash' ? i : -1))
|
|
483
|
+
.filter((i) => i >= 0);
|
|
484
|
+
if (riskyIndices.length === 0)
|
|
485
|
+
return { violated: false, reason: 'No risky operations — T-04 not violated' };
|
|
486
|
+
// If risky operations AND pain signals are present in the same session,
|
|
487
|
+
// that indicates the risky operation caused negative consequences.
|
|
488
|
+
const hasPain = session.painSignals.length > 0;
|
|
489
|
+
if (hasPain) {
|
|
490
|
+
return {
|
|
491
|
+
violated: true,
|
|
492
|
+
reason: 'Pain signals present alongside risky operations — T-04 violated: irreversible consequences',
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
return { violated: false, reason: 'No violation signals for T-04' };
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* T-05 violation:
|
|
499
|
+
* - Gate block fires → the agent tried a risky operation without first applying
|
|
500
|
+
* safety reasoning. The gate block IS the violation signal.
|
|
501
|
+
* - Gate block on a dangerous bash command is an explicit violation.
|
|
502
|
+
*/
|
|
503
|
+
function detectT05Violation(session) {
|
|
504
|
+
if (session.gateBlocks.length > 0) {
|
|
505
|
+
// Check if any gate block was on a dangerous operation.
|
|
506
|
+
// A block is dangerous if:
|
|
507
|
+
// 1. The tool is in RISKY_TOOLS (delete_file, bash, MultiExec, etc.)
|
|
508
|
+
// 2. The tool is 'bash' AND the reason mentions a dangerous pattern
|
|
509
|
+
// 3. The reason contains risky keywords (delete, force, credential, exec, etc.)
|
|
510
|
+
const dangerousBlocks = session.gateBlocks.filter((block) => {
|
|
511
|
+
if (RISKY_TOOLS.has(block.toolName))
|
|
512
|
+
return true;
|
|
513
|
+
if (block.toolName === 'bash' && DANGEROUS_BASH_PATTERNS.some((p) => p.test(block.reason)))
|
|
514
|
+
return true;
|
|
515
|
+
// Fallback: scan reason for risky keywords
|
|
516
|
+
if (RISKY_KEYWORDS_IN_REASON.some((p) => p.test(block.reason)))
|
|
517
|
+
return true;
|
|
518
|
+
return false;
|
|
519
|
+
});
|
|
520
|
+
if (dangerousBlocks.length > 0) {
|
|
521
|
+
return {
|
|
522
|
+
violated: true,
|
|
523
|
+
reason: `Gate blocks on dangerous operations (${dangerousBlocks.length}) — T-05 violated: safety rail not called out`,
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
return {
|
|
527
|
+
violated: true,
|
|
528
|
+
reason: `Gate blocks present (${session.gateBlocks.length}) — T-05 violated: safety rail not respected`,
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
return { violated: false, reason: 'No gate blocks — T-05 not violated' };
|
|
532
|
+
}
|
|
533
|
+
/**
|
|
534
|
+
* T-06 violation:
|
|
535
|
+
* - Over-engineering signals: pain from overly complex solutions
|
|
536
|
+
*/
|
|
537
|
+
function detectT06Violation(session) {
|
|
538
|
+
const hasOverEngineerPain = session.painSignals.some((p) => /over.*engineer|over.*complicat|too.*complex|unnecessarily.*complex/i.test(p.reason ?? '') &&
|
|
539
|
+
p.severity === 'severe');
|
|
540
|
+
if (hasOverEngineerPain) {
|
|
541
|
+
return {
|
|
542
|
+
violated: true,
|
|
543
|
+
reason: 'Severe pain from over-engineering — T-06 violated: simplicity not preferred',
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
return { violated: false, reason: 'No over-engineering signals — T-06 not violated' };
|
|
547
|
+
}
|
|
548
|
+
/**
|
|
549
|
+
* T-07 violation:
|
|
550
|
+
* - Pain from wide blast radius: many files modified, cascading failures
|
|
551
|
+
*/
|
|
552
|
+
function detectT07Violation(session) {
|
|
553
|
+
const modifiedFiles = new Set(session.toolCalls
|
|
554
|
+
.filter((call) => EDIT_TOOLS.has(call.toolName) && call.filePath !== undefined)
|
|
555
|
+
.map((call) => normalizePath(call.filePath)));
|
|
556
|
+
const failures = session.toolCalls.filter((call) => call.outcome === 'failure');
|
|
557
|
+
if (modifiedFiles.size >= 5 && failures.length >= 2) {
|
|
558
|
+
return {
|
|
559
|
+
violated: true,
|
|
560
|
+
reason: `Wide blast radius (${modifiedFiles.size} files, ${failures.length} failures) — T-07 violated: change surface not minimized`,
|
|
561
|
+
};
|
|
562
|
+
}
|
|
563
|
+
return { violated: false, reason: 'No blast radius violations — T-07 not violated' };
|
|
564
|
+
}
|
|
565
|
+
/**
|
|
566
|
+
* T-08 violation:
|
|
567
|
+
* - Pain signal present but no reflection/self-correction behavior
|
|
568
|
+
* (This is harder to detect without explicit reflection events.
|
|
569
|
+
* We use pain-without-correction as a proxy.)
|
|
570
|
+
*/
|
|
571
|
+
function detectT08Violation(session) {
|
|
572
|
+
const hasPain = session.painSignals.length > 0;
|
|
573
|
+
const hasFailure = session.toolCalls.some((call) => call.outcome === 'failure');
|
|
574
|
+
// If pain and failure, but the agent immediately retries without pause/reflect
|
|
575
|
+
if (hasPain && hasFailure) {
|
|
576
|
+
// Find the first failure index and check if the agent continued without reflecting
|
|
577
|
+
const failureIdx = session.toolCalls.findIndex((c) => c.outcome === 'failure');
|
|
578
|
+
if (failureIdx >= 0) {
|
|
579
|
+
const postFailure = session.toolCalls.slice(failureIdx + 1, failureIdx + 4);
|
|
580
|
+
// If the agent immediately continues without a read/reflect call, T-08 may be violated
|
|
581
|
+
const continuesImmediately = postFailure.length > 0 && !postFailure.some((c) => READ_TOOLS.has(c.toolName));
|
|
582
|
+
if (continuesImmediately) {
|
|
583
|
+
return {
|
|
584
|
+
violated: true,
|
|
585
|
+
reason: 'Failure followed immediately by continued operations without pause/reflect — T-08 violated: pain not treated as signal',
|
|
586
|
+
};
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
return { violated: false, reason: 'No T-08 violation signals detected' };
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* T-09 violation:
|
|
594
|
+
* - Pain or failures on complex tasks that should have been decomposed.
|
|
595
|
+
* Signal: pain/failure on multi-step task without prior planning calls.
|
|
596
|
+
*/
|
|
597
|
+
function detectT09Violation(session) {
|
|
598
|
+
const toolCallCount = session.toolCalls.length;
|
|
599
|
+
const uniqueFiles = new Set(session.toolCalls
|
|
600
|
+
.filter((call) => call.filePath !== undefined)
|
|
601
|
+
.map((call) => normalizePath(call.filePath)));
|
|
602
|
+
// Only applies if the session was complex
|
|
603
|
+
if (toolCallCount < 5 && uniqueFiles.size < 3) {
|
|
604
|
+
return { violated: false, reason: 'Session not complex enough for T-09 applicability' };
|
|
605
|
+
}
|
|
606
|
+
// Check: failures on complex task without prior planning
|
|
607
|
+
const hasFailures = session.toolCalls.some((call) => call.outcome === 'failure');
|
|
608
|
+
const hasPain = session.painSignals.length > 0;
|
|
609
|
+
if (hasFailures || hasPain) {
|
|
610
|
+
// Check if the agent showed decomposition/planning behavior
|
|
611
|
+
const hasPlanApproval = session.planApprovals.length > 0;
|
|
612
|
+
const hasReadFirst = session.toolCalls.some((call) => READ_TOOLS.has(call.toolName));
|
|
613
|
+
if (!hasPlanApproval && !hasReadFirst) {
|
|
614
|
+
return {
|
|
615
|
+
violated: true,
|
|
616
|
+
reason: `Complex task with failures/pain but no planning or decomposition signals — T-09 violated: task not divided`,
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
return { violated: false, reason: 'No T-09 violation signals' };
|
|
621
|
+
}
|
|
622
|
+
// ---------------------------------------------------------------------------
|
|
623
|
+
// Compliance Computation
|
|
624
|
+
// ---------------------------------------------------------------------------
|
|
625
|
+
/**
|
|
626
|
+
* Computes compliance metrics for a single T-xx principle across a batch of sessions.
|
|
627
|
+
*
|
|
628
|
+
* DILUTION PREVENTION:
|
|
629
|
+
* - Sessions where the principle had NO opportunity are EXCLUDED from
|
|
630
|
+
* applicableOpportunityCount and do not affect complianceRate.
|
|
631
|
+
* - Example: T-05 sessions with no risky operations do not dilute
|
|
632
|
+
* the compliance rate computed from T-05 sessions with gate blocks.
|
|
633
|
+
*
|
|
634
|
+
* TREND COMPUTATION:
|
|
635
|
+
* - Sessions are ordered chronologically (most recent first).
|
|
636
|
+
* - Current window: last 3 applicable sessions.
|
|
637
|
+
* - Previous window: sessions 4-6 (if available).
|
|
638
|
+
* - If either window has < 1 applicable session, trend = 0 (insufficient data).
|
|
639
|
+
* - Otherwise: trend = prevViolationRate - currentViolationRate
|
|
640
|
+
* (+1 = improving, 0 = stable, -1 = worsening).
|
|
641
|
+
*/
|
|
642
|
+
export function computeCompliance(principleId, sessions, options = {}) {
|
|
643
|
+
const windowSize = options.trendWindowSize ?? 3;
|
|
644
|
+
let applicableOpportunityCount = 0;
|
|
645
|
+
let observedViolationCount = 0;
|
|
646
|
+
const applicableSessions = [];
|
|
647
|
+
for (const session of sessions) {
|
|
648
|
+
const opp = detectOpportunity(principleId, session);
|
|
649
|
+
if (!opp.applicable) {
|
|
650
|
+
// Principle had no opportunity in this session — skip entirely.
|
|
651
|
+
// This is the key dilution-prevention mechanism.
|
|
652
|
+
continue;
|
|
653
|
+
}
|
|
654
|
+
applicableOpportunityCount++;
|
|
655
|
+
const violation = detectViolation(principleId, session);
|
|
656
|
+
if (violation.violated) {
|
|
657
|
+
observedViolationCount++;
|
|
658
|
+
}
|
|
659
|
+
applicableSessions.push({
|
|
660
|
+
session,
|
|
661
|
+
violated: violation.violated,
|
|
662
|
+
reason: violation.reason,
|
|
663
|
+
});
|
|
664
|
+
}
|
|
665
|
+
// Compute complianceRate
|
|
666
|
+
const complianceRate = applicableOpportunityCount > 0
|
|
667
|
+
? (applicableOpportunityCount - observedViolationCount) / applicableOpportunityCount
|
|
668
|
+
: 0;
|
|
669
|
+
// Compute violationTrend using windows
|
|
670
|
+
const violationTrend = computeViolationTrend(applicableSessions, windowSize);
|
|
671
|
+
// Build explanation
|
|
672
|
+
const explanation = buildExplanation(principleId, applicableOpportunityCount, observedViolationCount, complianceRate, violationTrend, applicableSessions);
|
|
673
|
+
return {
|
|
674
|
+
principleId,
|
|
675
|
+
applicableOpportunityCount,
|
|
676
|
+
observedViolationCount,
|
|
677
|
+
complianceRate,
|
|
678
|
+
violationTrend,
|
|
679
|
+
explanation,
|
|
680
|
+
};
|
|
681
|
+
}
|
|
682
|
+
/**
|
|
683
|
+
* Computes violation trend across the applicable session list.
|
|
684
|
+
*
|
|
685
|
+
* Trend is positive (+1) when violations are DECREASING (improving).
|
|
686
|
+
* Trend is negative (-1) when violations are INCREASING (worsening).
|
|
687
|
+
*
|
|
688
|
+
* Sessions are ordered most-recent-first.
|
|
689
|
+
* currentWindow = first windowSize sessions (most recent)
|
|
690
|
+
* previousWindow = next windowSize sessions
|
|
691
|
+
*/
|
|
692
|
+
function computeViolationTrend(applicableSessions, windowSize) {
|
|
693
|
+
if (applicableSessions.length < 2) {
|
|
694
|
+
// Not enough data for trend
|
|
695
|
+
return 0;
|
|
696
|
+
}
|
|
697
|
+
// Sessions are ordered most-recent-first in the input array.
|
|
698
|
+
// currentWindow = most recent N sessions
|
|
699
|
+
// previousWindow = N sessions before that (older)
|
|
700
|
+
const currentWindow = applicableSessions.slice(0, windowSize);
|
|
701
|
+
const previousWindow = applicableSessions.slice(windowSize, windowSize * 2);
|
|
702
|
+
if (currentWindow.length === 0)
|
|
703
|
+
return 0;
|
|
704
|
+
const currentViolationRate = currentWindow.filter((s) => s.violated).length / currentWindow.length;
|
|
705
|
+
if (previousWindow.length === 0) {
|
|
706
|
+
// No previous window — compare to overall rate
|
|
707
|
+
const overallRate = applicableSessions.filter((s) => s.violated).length / applicableSessions.length;
|
|
708
|
+
if (currentViolationRate < overallRate - 0.1)
|
|
709
|
+
return 1; // improving
|
|
710
|
+
if (currentViolationRate > overallRate + 0.1)
|
|
711
|
+
return -1; // worsening
|
|
712
|
+
return 0;
|
|
713
|
+
}
|
|
714
|
+
const previousViolationRate = previousWindow.filter((s) => s.violated).length / previousWindow.length;
|
|
715
|
+
const delta = previousViolationRate - currentViolationRate;
|
|
716
|
+
if (delta > 0.1)
|
|
717
|
+
return 1; // violations decreasing → improving
|
|
718
|
+
if (delta < -0.1)
|
|
719
|
+
return -1; // violations increasing → worsening
|
|
720
|
+
return 0; // stable
|
|
721
|
+
}
|
|
722
|
+
/**
|
|
723
|
+
* Builds a human-readable explanation for the compliance result.
|
|
724
|
+
*/
|
|
725
|
+
function buildExplanation(principleId, applicableOpportunityCount, observedViolationCount, complianceRate, violationTrend, applicableSessions) {
|
|
726
|
+
const trendStr = violationTrend === 1
|
|
727
|
+
? '↑ improving'
|
|
728
|
+
: violationTrend === -1
|
|
729
|
+
? '↓ worsening'
|
|
730
|
+
: '→ stable';
|
|
731
|
+
if (applicableOpportunityCount === 0) {
|
|
732
|
+
return `${principleId}: No applicable opportunities in provided sessions — compliance cannot be assessed.`;
|
|
733
|
+
}
|
|
734
|
+
const violationExamples = applicableSessions
|
|
735
|
+
.filter((s) => s.violated)
|
|
736
|
+
.slice(0, 2)
|
|
737
|
+
.map((s) => ` • ${s.reason}`)
|
|
738
|
+
.join('\n');
|
|
739
|
+
return [
|
|
740
|
+
`${principleId}: ${applicableOpportunityCount} applicable opportunities, ${observedViolationCount} violations.`,
|
|
741
|
+
`Compliance rate: ${(complianceRate * 100).toFixed(1)}%. Trend: ${trendStr}.`,
|
|
742
|
+
violationExamples ? `Sample violation signals:\n${violationExamples}` : 'No violations detected in recent sessions.',
|
|
743
|
+
].join('\n');
|
|
744
|
+
}
|
|
745
|
+
// ---------------------------------------------------------------------------
|
|
746
|
+
// Batch Update Helpers
|
|
747
|
+
// ---------------------------------------------------------------------------
|
|
748
|
+
/**
|
|
749
|
+
* Computes compliance results for all T-01 through T-09 principles
|
|
750
|
+
* across the provided sessions.
|
|
751
|
+
*
|
|
752
|
+
* Sessions are assumed to be ordered most-recent-first.
|
|
753
|
+
*/
|
|
754
|
+
export function computeAllCompliance(sessions, options = {}) {
|
|
755
|
+
const results = [];
|
|
756
|
+
for (const id of ['T-01', 'T-02', 'T-03', 'T-04', 'T-05', 'T-06', 'T-07', 'T-08', 'T-09']) {
|
|
757
|
+
results.push(computeCompliance(id, sessions, options));
|
|
758
|
+
}
|
|
759
|
+
return results;
|
|
760
|
+
}
|
|
761
|
+
/**
|
|
762
|
+
* Converts raw EventLogEntry[] from event-types.ts into SessionEvents.
|
|
763
|
+
*
|
|
764
|
+
* Groups events by sessionId and maps to the SessionEvents interface.
|
|
765
|
+
* Events with no sessionId are grouped under sessionId = 'unknown'.
|
|
766
|
+
*/
|
|
767
|
+
export function groupEventsIntoSessions(events) {
|
|
768
|
+
const sessionMap = new Map();
|
|
769
|
+
for (const event of events) {
|
|
770
|
+
const sessionId = event.sessionId ?? 'unknown';
|
|
771
|
+
if (!sessionMap.has(sessionId)) {
|
|
772
|
+
sessionMap.set(sessionId, {
|
|
773
|
+
sessionId,
|
|
774
|
+
toolCalls: [],
|
|
775
|
+
painSignals: [],
|
|
776
|
+
gateBlocks: [],
|
|
777
|
+
userCorrections: [],
|
|
778
|
+
planApprovals: [],
|
|
779
|
+
});
|
|
780
|
+
}
|
|
781
|
+
const session = sessionMap.get(sessionId);
|
|
782
|
+
switch (event.type) {
|
|
783
|
+
case 'tool_call':
|
|
784
|
+
if (event.data.toolName) {
|
|
785
|
+
session.toolCalls.push({
|
|
786
|
+
toolName: event.data.toolName,
|
|
787
|
+
filePath: event.data.filePath,
|
|
788
|
+
outcome: (event.data.error ? 'failure' : 'success'),
|
|
789
|
+
errorType: event.data.errorType,
|
|
790
|
+
errorMessage: event.data.error,
|
|
791
|
+
});
|
|
792
|
+
}
|
|
793
|
+
break;
|
|
794
|
+
case 'pain_signal':
|
|
795
|
+
session.painSignals.push({
|
|
796
|
+
source: event.data.source ?? 'unknown',
|
|
797
|
+
score: event.data.score ?? 0,
|
|
798
|
+
severity: event.data.severity,
|
|
799
|
+
reason: event.data.reason,
|
|
800
|
+
});
|
|
801
|
+
break;
|
|
802
|
+
case 'gate_block':
|
|
803
|
+
session.gateBlocks.push({
|
|
804
|
+
toolName: event.data.toolName ?? 'unknown',
|
|
805
|
+
filePath: event.data.filePath,
|
|
806
|
+
reason: event.data.reason ?? '',
|
|
807
|
+
});
|
|
808
|
+
break;
|
|
809
|
+
case 'empathy_rollback':
|
|
810
|
+
// User corrections are flagged via empathy rollback
|
|
811
|
+
session.userCorrections.push({
|
|
812
|
+
correctionCue: event.data.reason,
|
|
813
|
+
});
|
|
814
|
+
break;
|
|
815
|
+
case 'plan_approval':
|
|
816
|
+
session.planApprovals.push({
|
|
817
|
+
toolName: event.data.toolName ?? 'unknown',
|
|
818
|
+
filePath: event.data.filePath,
|
|
819
|
+
});
|
|
820
|
+
break;
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
return sessionMap;
|
|
824
|
+
}
|