principles-disciple 1.98.0 → 1.100.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/hooks/trajectory-collector.ts +19 -126
- package/src/index.ts +0 -31
- package/tests/core/pain-diagnostic-gate.test.ts +117 -1
- package/tests/core/surface-guard.test.ts +7 -9
- package/tests/evolution-worker-slimming.test.ts +0 -4
- package/tests/hooks/trajectory-evidence.test.ts +194 -0
- package/tests/integration/mvp-surface-registry-guard.test.ts +6 -8
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "principles-disciple",
|
|
3
3
|
"name": "Principles Disciple",
|
|
4
4
|
"description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.100.0",
|
|
6
6
|
"activation": {
|
|
7
7
|
"onCapabilities": [
|
|
8
8
|
"hook"
|
package/package.json
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Trajectory Collector -
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* Trajectory Collector - message write trajectory recording
|
|
3
|
+
*
|
|
4
|
+
* Records message data to memory/trajectories/ JSONL files.
|
|
5
|
+
* PRI-347 removed tool_call and llm_output JSONL writers (no consumers).
|
|
6
|
+
* PRI-346 will repurpose handleBeforeMessageWrite for SQLite collection.
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
import * as fs from 'fs';
|
|
9
10
|
import * as path from 'path';
|
|
10
11
|
import type {
|
|
11
|
-
PluginHookAfterToolCallEvent,
|
|
12
|
-
PluginHookToolContext,
|
|
13
|
-
PluginHookLlmOutputEvent,
|
|
14
12
|
PluginHookAgentContext,
|
|
15
13
|
PluginHookBeforeMessageWriteEvent
|
|
16
14
|
} from '../openclaw-sdk.js';
|
|
@@ -21,19 +19,16 @@ const TRAJECTORY_DIR = 'memory/trajectories/';
|
|
|
21
19
|
// 敏感字段匹配正则
|
|
22
20
|
const SENSITIVE_KEY_PATTERN = /password|token|authorization|secret|api[_-]?key|credential|cookie|session/i;
|
|
23
21
|
|
|
24
|
-
// 最大结果长度(不同于 MAX_STRING_LENGTH)
|
|
25
|
-
const MAX_RESULT_LENGTH = 500;
|
|
26
|
-
|
|
27
22
|
/**
|
|
28
23
|
* 递归脱敏处理:遍历对象/数组,移除敏感字段值
|
|
29
24
|
*/
|
|
30
25
|
function scrubSensitive(obj: unknown, depth = 0): unknown {
|
|
31
26
|
// 防止无限递归
|
|
32
27
|
if (depth > 10) return '[MAX_DEPTH]';
|
|
33
|
-
|
|
28
|
+
|
|
34
29
|
// 处理 null/undefined
|
|
35
30
|
if (obj == null) return obj;
|
|
36
|
-
|
|
31
|
+
|
|
37
32
|
// 处理基本类型
|
|
38
33
|
if (typeof obj !== 'object') {
|
|
39
34
|
if (typeof obj === 'string' && obj.length > MAX_STRING_LENGTH) {
|
|
@@ -41,12 +36,12 @@ function scrubSensitive(obj: unknown, depth = 0): unknown {
|
|
|
41
36
|
}
|
|
42
37
|
return obj;
|
|
43
38
|
}
|
|
44
|
-
|
|
39
|
+
|
|
45
40
|
// 处理数组
|
|
46
41
|
if (Array.isArray(obj)) {
|
|
47
42
|
return obj.map(item => scrubSensitive(item, depth + 1));
|
|
48
43
|
}
|
|
49
|
-
|
|
44
|
+
|
|
50
45
|
// 处理对象
|
|
51
46
|
const result: Record<string, unknown> = {};
|
|
52
47
|
for (const [key, value] of Object.entries(obj as Record<string, unknown>)) {
|
|
@@ -65,20 +60,20 @@ function scrubSensitive(obj: unknown, depth = 0): unknown {
|
|
|
65
60
|
class AsyncWriteQueue {
|
|
66
61
|
private readonly queue: (() => Promise<void>)[] = [];
|
|
67
62
|
private processing = false;
|
|
68
|
-
|
|
63
|
+
|
|
69
64
|
async enqueue(task: () => Promise<void>): Promise<void> {
|
|
70
65
|
this.queue.push(task);
|
|
71
66
|
if (!this.processing) {
|
|
72
67
|
this.processNext();
|
|
73
68
|
}
|
|
74
69
|
}
|
|
75
|
-
|
|
70
|
+
|
|
76
71
|
private async processNext(): Promise<void> {
|
|
77
72
|
if (this.queue.length === 0) {
|
|
78
73
|
this.processing = false;
|
|
79
74
|
return;
|
|
80
75
|
}
|
|
81
|
-
|
|
76
|
+
|
|
82
77
|
this.processing = true;
|
|
83
78
|
const task = this.queue.shift();
|
|
84
79
|
|
|
@@ -92,7 +87,7 @@ class AsyncWriteQueue {
|
|
|
92
87
|
} catch {
|
|
93
88
|
// Silently fail - trajectory collection should not block main functionality
|
|
94
89
|
}
|
|
95
|
-
|
|
90
|
+
|
|
96
91
|
// 处理下一个任务
|
|
97
92
|
this.processNext();
|
|
98
93
|
}
|
|
@@ -109,11 +104,11 @@ const dirCache = new Map<string, boolean>();
|
|
|
109
104
|
*/
|
|
110
105
|
async function ensureTrajectoryDirAsync(workspaceDir: string): Promise<string> {
|
|
111
106
|
const dir = path.join(workspaceDir, TRAJECTORY_DIR);
|
|
112
|
-
|
|
107
|
+
|
|
113
108
|
if (dirCache.get(dir)) {
|
|
114
109
|
return dir;
|
|
115
110
|
}
|
|
116
|
-
|
|
111
|
+
|
|
117
112
|
try {
|
|
118
113
|
await fs.promises.mkdir(dir, { recursive: true });
|
|
119
114
|
dirCache.set(dir, true);
|
|
@@ -121,7 +116,7 @@ async function ensureTrajectoryDirAsync(workspaceDir: string): Promise<string> {
|
|
|
121
116
|
// 目录可能已存在,忽略错误
|
|
122
117
|
dirCache.set(dir, true);
|
|
123
118
|
}
|
|
124
|
-
|
|
119
|
+
|
|
125
120
|
return dir;
|
|
126
121
|
}
|
|
127
122
|
|
|
@@ -140,7 +135,7 @@ function getTodayFilename(): string {
|
|
|
140
135
|
*/
|
|
141
136
|
function writeTrajectoryRecord(workspaceDir: string, record: object): void {
|
|
142
137
|
const line = JSON.stringify(record) + '\n';
|
|
143
|
-
|
|
138
|
+
|
|
144
139
|
writeQueue.enqueue(async () => {
|
|
145
140
|
const dir = await ensureTrajectoryDirAsync(workspaceDir);
|
|
146
141
|
const filepath = path.join(dir, getTodayFilename());
|
|
@@ -148,71 +143,12 @@ function writeTrajectoryRecord(workspaceDir: string, record: object): void {
|
|
|
148
143
|
});
|
|
149
144
|
}
|
|
150
145
|
|
|
151
|
-
/**
|
|
152
|
-
* 工具调用完成后的处理
|
|
153
|
-
* 记录:工具名、参数、结果、错误、执行时间
|
|
154
|
-
*/
|
|
155
|
-
export function handleAfterToolCall(
|
|
156
|
-
event: PluginHookAfterToolCallEvent,
|
|
157
|
-
ctx: PluginHookToolContext & { workspaceDir?: string }
|
|
158
|
-
): void {
|
|
159
|
-
const {workspaceDir} = ctx;
|
|
160
|
-
if (!workspaceDir) return;
|
|
161
|
-
|
|
162
|
-
// 递归脱敏处理所有字段
|
|
163
|
-
const sanitizedParams = scrubSensitive(event.params);
|
|
164
|
-
const sanitizedResult = event.result == null
|
|
165
|
-
? null
|
|
166
|
-
: String(scrubSensitive(event.result)).slice(0, MAX_RESULT_LENGTH);
|
|
167
|
-
const sanitizedError = event.error == null
|
|
168
|
-
? null
|
|
169
|
-
: String(scrubSensitive(event.error));
|
|
170
|
-
|
|
171
|
-
writeTrajectoryRecord(workspaceDir, {
|
|
172
|
-
type: 'tool_call',
|
|
173
|
-
timestamp: new Date().toISOString(),
|
|
174
|
-
sessionId: ctx.sessionId || 'unknown',
|
|
175
|
-
toolName: event.toolName,
|
|
176
|
-
params: sanitizedParams,
|
|
177
|
-
result: sanitizedResult,
|
|
178
|
-
error: sanitizedError,
|
|
179
|
-
durationMs: event.durationMs,
|
|
180
|
-
success: !event.error,
|
|
181
|
-
runId: event.runId || null,
|
|
182
|
-
toolCallId: event.toolCallId || null
|
|
183
|
-
});
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
/**
|
|
187
|
-
* LLM 输出处理
|
|
188
|
-
* 记录:provider、model、输出长度、token 使用量
|
|
189
|
-
*/
|
|
190
|
-
export function handleLlmOutput(
|
|
191
|
-
event: PluginHookLlmOutputEvent,
|
|
192
|
-
ctx: PluginHookAgentContext & { workspaceDir?: string }
|
|
193
|
-
): void {
|
|
194
|
-
const {workspaceDir} = ctx;
|
|
195
|
-
if (!workspaceDir) return;
|
|
196
|
-
|
|
197
|
-
const totalTextLength = event.assistantTexts?.reduce((sum, text) => sum + (text?.length || 0), 0) || 0;
|
|
198
|
-
|
|
199
|
-
writeTrajectoryRecord(workspaceDir, {
|
|
200
|
-
type: 'llm_output',
|
|
201
|
-
timestamp: new Date().toISOString(),
|
|
202
|
-
sessionId: ctx.sessionId || 'unknown',
|
|
203
|
-
provider: event.provider,
|
|
204
|
-
model: event.model,
|
|
205
|
-
textLength: totalTextLength,
|
|
206
|
-
outputCount: event.assistantTexts?.length || 0,
|
|
207
|
-
usage: event.usage ? scrubSensitive(event.usage) : null
|
|
208
|
-
});
|
|
209
|
-
}
|
|
210
|
-
|
|
211
146
|
/**
|
|
212
147
|
* 消息写入前的处理
|
|
213
148
|
* 记录:用户/助手消息内容
|
|
149
|
+
*
|
|
150
|
+
* PRI-346 will repurpose this to write to SQLite instead of JSONL.
|
|
214
151
|
*/
|
|
215
|
-
|
|
216
152
|
export function handleBeforeMessageWrite(
|
|
217
153
|
event: PluginHookBeforeMessageWriteEvent,
|
|
218
154
|
ctx: PluginHookAgentContext & { workspaceDir?: string }
|
|
@@ -229,9 +165,6 @@ export function handleBeforeMessageWrite(
|
|
|
229
165
|
// 提取文本内容
|
|
230
166
|
let content = '';
|
|
231
167
|
if (typeof msg.content === 'string') {
|
|
232
|
-
|
|
233
|
-
// Reason: msg.content is string | ContentPart[]; destructuring would require renaming in the else branch
|
|
234
|
-
|
|
235
168
|
content = msg.content;
|
|
236
169
|
} else if (Array.isArray(msg.content)) {
|
|
237
170
|
content = msg.content
|
|
@@ -253,43 +186,3 @@ export function handleBeforeMessageWrite(
|
|
|
253
186
|
agentId: event.agentId || null
|
|
254
187
|
});
|
|
255
188
|
}
|
|
256
|
-
|
|
257
|
-
/**
|
|
258
|
-
* 脱敏处理:移除敏感参数(保留旧函数签名以兼容)
|
|
259
|
-
* @deprecated 使用 scrubSensitive 替代
|
|
260
|
-
*/
|
|
261
|
-
/**
|
|
262
|
-
* 轨迹汇总统计(供 cron 任务调用)
|
|
263
|
-
*/
|
|
264
|
-
export function computeTrajectoryStats(workspaceDir: string): object {
|
|
265
|
-
const dir = path.join(workspaceDir, TRAJECTORY_DIR);
|
|
266
|
-
const todayFile = path.join(dir, getTodayFilename());
|
|
267
|
-
|
|
268
|
-
if (!fs.existsSync(todayFile)) {
|
|
269
|
-
return { date: getTodayFilename(), totalRecords: 0, toolCalls: 0, llmOutputs: 0, messages: 0 };
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
const content = fs.readFileSync(todayFile, 'utf8');
|
|
273
|
-
const lines = content.split('\n').filter(line => line.trim());
|
|
274
|
-
|
|
275
|
-
const toolCalls = lines.filter(line => {
|
|
276
|
-
try { return JSON.parse(line).type === 'tool_call'; } catch { return false; }
|
|
277
|
-
}).length;
|
|
278
|
-
|
|
279
|
-
const llmOutputs = lines.filter(line => {
|
|
280
|
-
try { return JSON.parse(line).type === 'llm_output'; } catch { return false; }
|
|
281
|
-
}).length;
|
|
282
|
-
|
|
283
|
-
const messages = lines.filter(line => {
|
|
284
|
-
try { return JSON.parse(line).type === 'message'; } catch { return false; }
|
|
285
|
-
}).length;
|
|
286
|
-
|
|
287
|
-
return {
|
|
288
|
-
date: getTodayFilename(),
|
|
289
|
-
totalRecords: lines.length,
|
|
290
|
-
toolCalls,
|
|
291
|
-
llmOutputs,
|
|
292
|
-
messages,
|
|
293
|
-
generatedAt: new Date().toISOString()
|
|
294
|
-
};
|
|
295
|
-
}
|
package/src/index.ts
CHANGED
|
@@ -29,7 +29,6 @@ import { handleAfterToolCall } from './hooks/pain.js';
|
|
|
29
29
|
import { handleBeforeReset, handleBeforeCompaction, handleAfterCompaction } from './hooks/lifecycle.js';
|
|
30
30
|
import { handleLlmOutput } from './hooks/llm.js';
|
|
31
31
|
import { handleSubagentEnded } from './hooks/subagent.js';
|
|
32
|
-
import * as TrajectoryCollector from './hooks/trajectory-collector.js';
|
|
33
32
|
import { handleInitStrategy } from './commands/strategy.js';
|
|
34
33
|
import { handleBootstrapTools, handleResearchTools } from './commands/capabilities.js';
|
|
35
34
|
import { handleThinkingOs } from './commands/thinking-os.js';
|
|
@@ -436,36 +435,6 @@ const plugin = {
|
|
|
436
435
|
})
|
|
437
436
|
);
|
|
438
437
|
|
|
439
|
-
// ── Hook: Trajectory Collection (Behavior Evolution Phase 0) ──
|
|
440
|
-
// Note: after_tool_call and llm_output are safe to collect
|
|
441
|
-
api.on(
|
|
442
|
-
'after_tool_call',
|
|
443
|
-
guardHook('hook:after_tool_call.trajectory', api.logger, (event: PluginHookAfterToolCallEvent, ctx: PluginHookToolContext): void => {
|
|
444
|
-
try {
|
|
445
|
-
const workspaceDir = resolveToolHookWorkspaceDirSafe(ctx, api, 'trajectory.after_tool_call');
|
|
446
|
-
if (!workspaceDir) return;
|
|
447
|
-
TrajectoryCollector.handleAfterToolCall(event, { ...ctx, workspaceDir });
|
|
448
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars -- Reason: catch binding intentionally unused
|
|
449
|
-
} catch (_err) {
|
|
450
|
-
// Non-critical: don't log, just skip
|
|
451
|
-
}
|
|
452
|
-
})
|
|
453
|
-
);
|
|
454
|
-
|
|
455
|
-
api.on(
|
|
456
|
-
'llm_output',
|
|
457
|
-
guardHook('hook:llm_output.trajectory', api.logger, (event: PluginHookLlmOutputEvent, ctx: PluginHookAgentContext): void => {
|
|
458
|
-
try {
|
|
459
|
-
const workspaceDir = resolveToolHookWorkspaceDirSafe(ctx, api, 'trajectory.llm_output');
|
|
460
|
-
if (!workspaceDir) return;
|
|
461
|
-
TrajectoryCollector.handleLlmOutput(event, { ...ctx, workspaceDir });
|
|
462
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars -- Reason: catch binding intentionally unused
|
|
463
|
-
} catch (_err) {
|
|
464
|
-
// Non-critical: don't log, just skip
|
|
465
|
-
}
|
|
466
|
-
})
|
|
467
|
-
);
|
|
468
|
-
|
|
469
438
|
// ── Hook: Subagent Loop Closure ──
|
|
470
439
|
api.on(
|
|
471
440
|
'subagent_spawning',
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { beforeEach, describe, expect, it } from 'vitest';
|
|
2
|
-
import { evaluatePainDiagnosticGate, resetPainDiagnosticGateForTest } from '../../src/core/pain-diagnostic-gate.js';
|
|
2
|
+
import { evaluatePainDiagnosticGate, resetPainDiagnosticGateForTest, isCooldownActiveForEpisode } from '../../src/core/pain-diagnostic-gate.js';
|
|
3
3
|
|
|
4
4
|
describe('PainDiagnosticGate', () => {
|
|
5
5
|
beforeEach(() => {
|
|
@@ -496,3 +496,119 @@ describe('PainDiagnosticGate', () => {
|
|
|
496
496
|
}
|
|
497
497
|
});
|
|
498
498
|
});
|
|
499
|
+
|
|
500
|
+
// ── isCooldownActiveForEpisode ─────────────────────────────────────────────────
|
|
501
|
+
|
|
502
|
+
describe('isCooldownActiveForEpisode', () => {
|
|
503
|
+
beforeEach(() => {
|
|
504
|
+
resetPainDiagnosticGateForTest();
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
it('returns false when no diagnosis has been recorded', () => {
|
|
508
|
+
const result = isCooldownActiveForEpisode('tool_failure', 's1', 'hash-abc');
|
|
509
|
+
expect(result).toBe(false);
|
|
510
|
+
});
|
|
511
|
+
|
|
512
|
+
it('returns false when cooldownMs is 0 (disabled)', () => {
|
|
513
|
+
// Record diagnosis
|
|
514
|
+
evaluatePainDiagnosticGate({
|
|
515
|
+
source: 'tool_failure',
|
|
516
|
+
score: 50,
|
|
517
|
+
currentGfi: 72,
|
|
518
|
+
sessionId: 's1',
|
|
519
|
+
errorHash: 'hash-abc',
|
|
520
|
+
nowMs: 1_000,
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
// With cooldown disabled, should always return false
|
|
524
|
+
const noCooldown = isCooldownActiveForEpisode('tool_failure', 's1', 'hash-abc', 0);
|
|
525
|
+
expect(noCooldown).toBe(false);
|
|
526
|
+
});
|
|
527
|
+
|
|
528
|
+
it('different sessionId does not share cooldown', () => {
|
|
529
|
+
// Record diagnosis for session s1
|
|
530
|
+
evaluatePainDiagnosticGate({
|
|
531
|
+
source: 'tool_failure',
|
|
532
|
+
score: 50,
|
|
533
|
+
currentGfi: 72,
|
|
534
|
+
sessionId: 's1',
|
|
535
|
+
errorHash: 'hash-abc',
|
|
536
|
+
nowMs: 1_000,
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
// Check for different session s2 - should not be in cooldown
|
|
540
|
+
const differentSession = isCooldownActiveForEpisode('tool_failure', 's2', 'hash-abc');
|
|
541
|
+
expect(differentSession).toBe(false);
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
it('different errorHash does not share cooldown', () => {
|
|
545
|
+
// Record diagnosis for hash-abc
|
|
546
|
+
evaluatePainDiagnosticGate({
|
|
547
|
+
source: 'tool_failure',
|
|
548
|
+
score: 50,
|
|
549
|
+
currentGfi: 72,
|
|
550
|
+
sessionId: 's1',
|
|
551
|
+
errorHash: 'hash-abc',
|
|
552
|
+
nowMs: 1_000,
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
// Check for different hash - should not be in cooldown
|
|
556
|
+
const differentHash = isCooldownActiveForEpisode('tool_failure', 's1', 'hash-xyz');
|
|
557
|
+
expect(differentHash).toBe(false);
|
|
558
|
+
});
|
|
559
|
+
|
|
560
|
+
it('different source does not share cooldown', () => {
|
|
561
|
+
// Record diagnosis for tool_failure
|
|
562
|
+
evaluatePainDiagnosticGate({
|
|
563
|
+
source: 'tool_failure',
|
|
564
|
+
score: 50,
|
|
565
|
+
currentGfi: 72,
|
|
566
|
+
sessionId: 's1',
|
|
567
|
+
errorHash: 'hash-abc',
|
|
568
|
+
nowMs: 1_000,
|
|
569
|
+
});
|
|
570
|
+
|
|
571
|
+
// Check for different source dispatch_error - should not be in cooldown
|
|
572
|
+
const differentSource = isCooldownActiveForEpisode('dispatch_error', 's1', 'hash-abc');
|
|
573
|
+
expect(differentSource).toBe(false);
|
|
574
|
+
});
|
|
575
|
+
|
|
576
|
+
it('undefined sessionId uses unknown as session identifier', () => {
|
|
577
|
+
// Record diagnosis with undefined sessionId
|
|
578
|
+
evaluatePainDiagnosticGate({
|
|
579
|
+
source: 'tool_failure',
|
|
580
|
+
score: 50,
|
|
581
|
+
currentGfi: 72,
|
|
582
|
+
errorHash: 'hash-abc',
|
|
583
|
+
nowMs: 1_000,
|
|
584
|
+
});
|
|
585
|
+
|
|
586
|
+
// Check cooldown with undefined sessionId - should not be in cooldown
|
|
587
|
+
// because evaluate used Date.now() but isCooldownActiveForEpisode uses current Date.now()
|
|
588
|
+
// and 15 seconds haven't passed
|
|
589
|
+
const undefinedSession = isCooldownActiveForEpisode('tool_failure', undefined, 'hash-abc');
|
|
590
|
+
// The episodeKey built from undefined sessionId uses 'unknown'
|
|
591
|
+
// But we can't reliably test time-based behavior without mocking Date.now()
|
|
592
|
+
// So we just verify it doesn't throw
|
|
593
|
+
expect(typeof undefinedSession).toBe('boolean');
|
|
594
|
+
});
|
|
595
|
+
|
|
596
|
+
it('episodeKey alignment: same inputs produce same cooldown state', () => {
|
|
597
|
+
// Use exact same inputs that would create an episodeKey
|
|
598
|
+
const episodeInput = {
|
|
599
|
+
source: 'manual' as const,
|
|
600
|
+
score: 100,
|
|
601
|
+
currentGfi: 0,
|
|
602
|
+
sessionId: 's-ep-test',
|
|
603
|
+
errorHash: 'hash-ep',
|
|
604
|
+
nowMs: 5_000,
|
|
605
|
+
};
|
|
606
|
+
|
|
607
|
+
// First diagnosis
|
|
608
|
+
evaluatePainDiagnosticGate(episodeInput);
|
|
609
|
+
|
|
610
|
+
// isCooldownActiveForEpisode should not throw with same inputs
|
|
611
|
+
const inCooldown = isCooldownActiveForEpisode('manual', 's-ep-test', 'hash-ep');
|
|
612
|
+
expect(typeof inCooldown).toBe('boolean');
|
|
613
|
+
});
|
|
614
|
+
});
|
|
@@ -93,8 +93,8 @@ describe('surface-guard', () => {
|
|
|
93
93
|
});
|
|
94
94
|
|
|
95
95
|
it('allows override for quiet surface', () => {
|
|
96
|
-
const result = isSurfaceEnabled('hook:
|
|
97
|
-
'hook:
|
|
96
|
+
const result = isSurfaceEnabled('hook:subagent_spawning', {
|
|
97
|
+
'hook:subagent_spawning': true,
|
|
98
98
|
});
|
|
99
99
|
expect(result.enabled).toBe(true);
|
|
100
100
|
});
|
|
@@ -308,15 +308,13 @@ describe('surface-guard', () => {
|
|
|
308
308
|
}
|
|
309
309
|
});
|
|
310
310
|
|
|
311
|
-
it('
|
|
312
|
-
const
|
|
313
|
-
s => s.id === 'hook:
|
|
311
|
+
it('subagent hook disabledReason is opt-in and ADR-anchored (PRI-298)', () => {
|
|
312
|
+
const subagent = PLUGIN_SURFACE_REGISTRY.find(
|
|
313
|
+
s => s.id === 'hook:subagent_spawning',
|
|
314
314
|
);
|
|
315
|
-
expect(
|
|
316
|
-
const reason =
|
|
315
|
+
expect(subagent?.disabledReason).toBeDefined();
|
|
316
|
+
const reason = subagent!.disabledReason!.toLowerCase();
|
|
317
317
|
// Quiet hook copy is opt-in / opt-out anchored on a real ADR section
|
|
318
|
-
// (no MVP-phase residue, no promise of a feature-flag override that
|
|
319
|
-
// the production guard path does not actually consume — chatgpt P2).
|
|
320
318
|
expect(reason).toContain('opt-in');
|
|
321
319
|
expect(reason).toContain('default off');
|
|
322
320
|
expect(reason).toMatch(/adr-?0014/);
|
|
@@ -139,8 +139,6 @@ describe('PRI-294: Surface registry coverage audit', () => {
|
|
|
139
139
|
'hook:before_tool_call',
|
|
140
140
|
'hook:after_tool_call',
|
|
141
141
|
'hook:llm_output',
|
|
142
|
-
'hook:after_tool_call.trajectory',
|
|
143
|
-
'hook:llm_output.trajectory',
|
|
144
142
|
'hook:subagent_spawning',
|
|
145
143
|
'hook:subagent_ended',
|
|
146
144
|
'hook:before_reset',
|
|
@@ -257,8 +255,6 @@ describe('PRI-294: MVP core hooks enabled, non-core disabled', () => {
|
|
|
257
255
|
];
|
|
258
256
|
|
|
259
257
|
const QUIET_HOOKS = [
|
|
260
|
-
'hook:after_tool_call.trajectory',
|
|
261
|
-
'hook:llm_output.trajectory',
|
|
262
258
|
'hook:subagent_spawning',
|
|
263
259
|
'hook:subagent_ended',
|
|
264
260
|
'hook:before_reset',
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Trajectory Evidence Builder Tests — PRI-326
|
|
3
|
+
*
|
|
4
|
+
* Tests the pure data extraction function buildTrajectoryEvidence
|
|
5
|
+
* which reads from trajectory DB, sanitizes, and returns evidence entries.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
9
|
+
import { buildTrajectoryEvidence } from '../../src/hooks/trajectory-evidence.js';
|
|
10
|
+
import type { WorkspaceContext } from '../../src/core/workspace-context.js';
|
|
11
|
+
import type { TrajectoryDatabase } from '../../src/core/trajectory.js';
|
|
12
|
+
|
|
13
|
+
// Mock sanitizeAssistantText to avoid testing message-sanitize here
|
|
14
|
+
vi.mock('../../src/hooks/message-sanitize.js', () => ({
|
|
15
|
+
sanitizeAssistantText: vi.fn((text: string) => text),
|
|
16
|
+
}));
|
|
17
|
+
|
|
18
|
+
describe('buildTrajectoryEvidence', () => {
|
|
19
|
+
let mockTrajectory: Partial<TrajectoryDatabase>;
|
|
20
|
+
let mockWctx: Partial<WorkspaceContext>;
|
|
21
|
+
|
|
22
|
+
beforeEach(() => {
|
|
23
|
+
mockTrajectory = {
|
|
24
|
+
listUserTurnsForSession: vi.fn(),
|
|
25
|
+
listAssistantTurns: vi.fn(),
|
|
26
|
+
};
|
|
27
|
+
mockWctx = {
|
|
28
|
+
trajectory: mockTrajectory as TrajectoryDatabase,
|
|
29
|
+
workspaceDir: '/test/workspace',
|
|
30
|
+
};
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it('returns unavailable evidence when trajectory is not available', () => {
|
|
34
|
+
mockWctx.trajectory = undefined;
|
|
35
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'unknown');
|
|
36
|
+
expect(result).toHaveLength(1);
|
|
37
|
+
expect(result[0].sourceRef).toBe('owner_message:unavailable');
|
|
38
|
+
expect(result[0].note).toContain('no_trajectory_db');
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it('returns unavailable evidence when sessionId is unknown', () => {
|
|
42
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'unknown');
|
|
43
|
+
expect(result).toHaveLength(1);
|
|
44
|
+
expect(result[0].sourceRef).toBe('owner_message:unavailable');
|
|
45
|
+
expect(result[0].note).toContain('unknown_session');
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('returns last correction owner message as evidence', () => {
|
|
49
|
+
const mockUserTurn = {
|
|
50
|
+
createdAt: '2024-01-15T10:00:00Z',
|
|
51
|
+
correctionDetected: true,
|
|
52
|
+
rawExcerpt: 'Please fix this bug',
|
|
53
|
+
};
|
|
54
|
+
vi.mocked(mockTrajectory.listUserTurnsForSession!).mockReturnValue([mockUserTurn as any]);
|
|
55
|
+
vi.mocked(mockTrajectory.listAssistantTurns!).mockReturnValue([]);
|
|
56
|
+
|
|
57
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'session-123');
|
|
58
|
+
|
|
59
|
+
expect(result).toHaveLength(1);
|
|
60
|
+
expect(result[0].sourceRef).toContain('owner_message:');
|
|
61
|
+
expect(result[0].note).toBe('Please fix this bug');
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('handles trajectory listUserTurnsForSession throwing an error gracefully', () => {
|
|
65
|
+
vi.mocked(mockTrajectory.listUserTurnsForSession!).mockImplementation(() => {
|
|
66
|
+
throw new Error('Database error');
|
|
67
|
+
});
|
|
68
|
+
vi.mocked(mockTrajectory.listAssistantTurns!).mockReturnValue([]);
|
|
69
|
+
|
|
70
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'session-123');
|
|
71
|
+
|
|
72
|
+
expect(result).toHaveLength(1);
|
|
73
|
+
expect(result[0].sourceRef).toBe('owner_message:unavailable');
|
|
74
|
+
expect(result[0].note).toContain('trajectory_user_turns_unavailable');
|
|
75
|
+
expect(result[0].note).toContain('Database error');
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it('returns recent assistant turns as evidence', () => {
|
|
79
|
+
const mockUserTurn = {
|
|
80
|
+
createdAt: '2024-01-15T10:00:00Z',
|
|
81
|
+
correctionDetected: false,
|
|
82
|
+
rawExcerpt: '',
|
|
83
|
+
};
|
|
84
|
+
const mockAssistantTurns = [
|
|
85
|
+
{ createdAt: '2024-01-15T09:58:00Z', sanitizedText: 'Turn 1' },
|
|
86
|
+
{ createdAt: '2024-01-15T09:59:00Z', sanitizedText: 'Turn 2' },
|
|
87
|
+
{ createdAt: '2024-01-15T10:00:00Z', sanitizedText: 'Turn 3' },
|
|
88
|
+
];
|
|
89
|
+
vi.mocked(mockTrajectory.listUserTurnsForSession!).mockReturnValue([mockUserTurn as any]);
|
|
90
|
+
vi.mocked(mockTrajectory.listAssistantTurns!).mockReturnValue(mockAssistantTurns as any);
|
|
91
|
+
|
|
92
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'session-123');
|
|
93
|
+
|
|
94
|
+
// Should have the last 3 assistant turns (MAX is 3 from core constants)
|
|
95
|
+
expect(result.length).toBeGreaterThan(0);
|
|
96
|
+
expect(result.some(e => e.note === 'Turn 3')).toBe(true);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('handles trajectory listAssistantTurns throwing an error gracefully', () => {
|
|
100
|
+
const mockUserTurn = {
|
|
101
|
+
createdAt: '2024-01-15T10:00:00Z',
|
|
102
|
+
correctionDetected: true,
|
|
103
|
+
rawExcerpt: 'Last correction',
|
|
104
|
+
};
|
|
105
|
+
vi.mocked(mockTrajectory.listUserTurnsForSession!).mockReturnValue([mockUserTurn as any]);
|
|
106
|
+
vi.mocked(mockTrajectory.listAssistantTurns!).mockImplementation(() => {
|
|
107
|
+
throw new Error('Trajectory DB error');
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'session-123');
|
|
111
|
+
|
|
112
|
+
// Should have owner message plus error entry
|
|
113
|
+
expect(result.length).toBeGreaterThanOrEqual(1);
|
|
114
|
+
expect(result[0].sourceRef).toContain('owner_message:');
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it('returns empty trajectory notice when no user corrections or assistant turns', () => {
|
|
118
|
+
vi.mocked(mockTrajectory.listUserTurnsForSession!).mockReturnValue([]);
|
|
119
|
+
vi.mocked(mockTrajectory.listAssistantTurns!).mockReturnValue([]);
|
|
120
|
+
|
|
121
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'session-123');
|
|
122
|
+
|
|
123
|
+
expect(result).toHaveLength(1);
|
|
124
|
+
expect(result[0].sourceRef).toBe('trajectory:empty');
|
|
125
|
+
expect(result[0].note).toContain('trajectory_available_but_empty');
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('respects MAX_EVIDENCE_ENTRIES limit', () => {
|
|
129
|
+
// Create many user turns with corrections
|
|
130
|
+
const manyUserTurns = Array.from({ length: 10 }, (_, i) => ({
|
|
131
|
+
createdAt: `2024-01-15T${String(i).padStart(2, '0')}:00:00Z`,
|
|
132
|
+
correctionDetected: true,
|
|
133
|
+
rawExcerpt: `Correction ${i}`,
|
|
134
|
+
}));
|
|
135
|
+
vi.mocked(mockTrajectory.listUserTurnsForSession!).mockReturnValue(manyUserTurns as any);
|
|
136
|
+
vi.mocked(mockTrajectory.listAssistantTurns!).mockReturnValue([]);
|
|
137
|
+
|
|
138
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'session-123');
|
|
139
|
+
|
|
140
|
+
// MAX_EVIDENCE_ENTRIES from core is 5, so should be capped
|
|
141
|
+
expect(result.length).toBeLessThanOrEqual(5);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('uses last correction turn (most recent) when multiple corrections exist', () => {
|
|
145
|
+
const olderCorrection = {
|
|
146
|
+
createdAt: '2024-01-15T09:00:00Z',
|
|
147
|
+
correctionDetected: true,
|
|
148
|
+
rawExcerpt: 'Older correction',
|
|
149
|
+
};
|
|
150
|
+
const newerCorrection = {
|
|
151
|
+
createdAt: '2024-01-15T10:00:00Z',
|
|
152
|
+
correctionDetected: true,
|
|
153
|
+
rawExcerpt: 'Newer correction',
|
|
154
|
+
};
|
|
155
|
+
vi.mocked(mockTrajectory.listUserTurnsForSession!).mockReturnValue([olderCorrection, newerCorrection] as any);
|
|
156
|
+
vi.mocked(mockTrajectory.listAssistantTurns!).mockReturnValue([]);
|
|
157
|
+
|
|
158
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'session-123');
|
|
159
|
+
|
|
160
|
+
// Should use the newer correction (last in reverse order)
|
|
161
|
+
expect(result[0].note).toBe('Newer correction');
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it('sanitizes owner message text', () => {
|
|
165
|
+
const mockUserTurn = {
|
|
166
|
+
createdAt: '2024-01-15T10:00:00Z',
|
|
167
|
+
correctionDetected: true,
|
|
168
|
+
rawExcerpt: 'Text with [EMOTIONAL_DAMAGE_DETECTED:mild] internal tags',
|
|
169
|
+
};
|
|
170
|
+
vi.mocked(mockTrajectory.listUserTurnsForSession!).mockReturnValue([mockUserTurn] as any);
|
|
171
|
+
vi.mocked(mockTrajectory.listAssistantTurns!).mockReturnValue([]);
|
|
172
|
+
|
|
173
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'session-123');
|
|
174
|
+
|
|
175
|
+
// The mock sanitizeAssistantText returns text as-is
|
|
176
|
+
expect(result[0].note).toContain('Text with [EMOTIONAL_DAMAGE_DETECTED:mild] internal tags');
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it('truncates long notes to MAX_EVIDENCE_NOTE_CHARS', () => {
|
|
180
|
+
const longText = 'A'.repeat(10000);
|
|
181
|
+
const mockUserTurn = {
|
|
182
|
+
createdAt: '2024-01-15T10:00:00Z',
|
|
183
|
+
correctionDetected: true,
|
|
184
|
+
rawExcerpt: longText,
|
|
185
|
+
};
|
|
186
|
+
vi.mocked(mockTrajectory.listUserTurnsForSession!).mockReturnValue([mockUserTurn] as any);
|
|
187
|
+
vi.mocked(mockTrajectory.listAssistantTurns!).mockReturnValue([]);
|
|
188
|
+
|
|
189
|
+
const result = buildTrajectoryEvidence(mockWctx as WorkspaceContext, 'session-123');
|
|
190
|
+
|
|
191
|
+
// Note should be truncated to MAX_EVIDENCE_NOTE_CHARS (1000 from core)
|
|
192
|
+
expect(result[0].note.length).toBeLessThanOrEqual(1000);
|
|
193
|
+
});
|
|
194
|
+
});
|
|
@@ -186,18 +186,16 @@ describe('MVP Surface Registry Guard (PRI-289)', () => {
|
|
|
186
186
|
}
|
|
187
187
|
});
|
|
188
188
|
|
|
189
|
-
it('after_tool_call has
|
|
189
|
+
it('after_tool_call has one registration: core only', () => {
|
|
190
190
|
const afterToolCallRegs = registrations.filter(r => r.event === 'after_tool_call');
|
|
191
|
-
expect(afterToolCallRegs.length).toBe(
|
|
191
|
+
expect(afterToolCallRegs.length).toBe(1);
|
|
192
192
|
expect(afterToolCallRegs[0].surfaceId).toBe('hook:after_tool_call');
|
|
193
|
-
expect(afterToolCallRegs[1].surfaceId).toBe('hook:after_tool_call.trajectory');
|
|
194
193
|
});
|
|
195
194
|
|
|
196
|
-
it('llm_output has
|
|
195
|
+
it('llm_output has one registration: core only', () => {
|
|
197
196
|
const llmOutputRegs = registrations.filter(r => r.event === 'llm_output');
|
|
198
|
-
expect(llmOutputRegs.length).toBe(
|
|
197
|
+
expect(llmOutputRegs.length).toBe(1);
|
|
199
198
|
expect(llmOutputRegs[0].surfaceId).toBe('hook:llm_output');
|
|
200
|
-
expect(llmOutputRegs[1].surfaceId).toBe('hook:llm_output.trajectory');
|
|
201
199
|
});
|
|
202
200
|
|
|
203
201
|
it('total api.on registrations with guardHook match registry hook count', () => {
|
|
@@ -375,14 +373,14 @@ describe('MVP Surface Registry Guard (PRI-289)', () => {
|
|
|
375
373
|
|
|
376
374
|
it('isSurfaceEnabled returns false for quiet surfaces by default', async () => {
|
|
377
375
|
const { isSurfaceEnabled } = await import('../../src/core/surface-guard.js');
|
|
378
|
-
const result = isSurfaceEnabled('hook:
|
|
376
|
+
const result = isSurfaceEnabled('hook:subagent_spawning');
|
|
379
377
|
expect(result.enabled).toBe(false);
|
|
380
378
|
expect(result.reason).toBeDefined();
|
|
381
379
|
});
|
|
382
380
|
|
|
383
381
|
it('isSurfaceEnabled allows quiet surfaces with explicit override', async () => {
|
|
384
382
|
const { isSurfaceEnabled } = await import('../../src/core/surface-guard.js');
|
|
385
|
-
const result = isSurfaceEnabled('hook:
|
|
383
|
+
const result = isSurfaceEnabled('hook:subagent_spawning', { 'hook:subagent_spawning': true });
|
|
386
384
|
expect(result.enabled).toBe(true);
|
|
387
385
|
});
|
|
388
386
|
|