principles-disciple 1.70.0 → 1.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "principles-disciple",
|
|
3
3
|
"name": "Principles Disciple",
|
|
4
4
|
"description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.72.0",
|
|
6
6
|
"skills": [
|
|
7
7
|
"templates/langs/en/skills",
|
|
8
8
|
"templates/langs/zh/skills"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "principles-disciple",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.72.0",
|
|
4
4
|
"description": "Native OpenClaw plugin for Principles Disciple",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/bundle.js",
|
|
@@ -53,7 +53,7 @@
|
|
|
53
53
|
"@typescript-eslint/parser": "^8.58.0",
|
|
54
54
|
"@vitest/coverage-v8": "^4.1.0",
|
|
55
55
|
"esbuild": "^0.28.0",
|
|
56
|
-
"eslint": "^10.1
|
|
56
|
+
"eslint": "^10.2.1",
|
|
57
57
|
"jsdom": "^29.0.1",
|
|
58
58
|
"typescript": "^6.0.3",
|
|
59
59
|
"vitest": "^4.1.0",
|
package/src/hooks/prompt.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
|
|
2
2
|
|
|
3
3
|
import * as fs from 'fs';
|
|
4
4
|
import * as path from 'path';
|
|
@@ -449,6 +449,8 @@ export async function handleBeforePromptBuild(
|
|
|
449
449
|
let prependSystemContext: string;
|
|
450
450
|
let prependContext = '';
|
|
451
451
|
let appendSystemContext = '';
|
|
452
|
+
// Tracks pending diagnostician task count for diagnostician-priority mode in size guard
|
|
453
|
+
let pendingDiagTaskCount = 0;
|
|
452
454
|
|
|
453
455
|
// ──── 0. Manual Pain Clearance ────
|
|
454
456
|
if (trigger === 'user' && sessionId && session && session.currentGfi >= 100) {
|
|
@@ -720,49 +722,81 @@ The empathy observer subagent handles pain detection independently.
|
|
|
720
722
|
const heartbeatChecklist = fs.readFileSync(heartbeatPath, 'utf8');
|
|
721
723
|
prependContext += `<heartbeat_checklist>
|
|
722
724
|
${heartbeatChecklist}
|
|
723
|
-
|
|
724
|
-
ACTION: Run self-audit. If stable, reply ONLY with "HEARTBEAT_OK".
|
|
725
725
|
</heartbeat_checklist>\n`;
|
|
726
726
|
} catch (e) {
|
|
727
727
|
logger?.error(`[PD:Prompt] Failed to read HEARTBEAT: ${String(e)}`);
|
|
728
728
|
}
|
|
729
729
|
}
|
|
730
730
|
|
|
731
|
-
// ──── 4b. Inject pending diagnostician tasks ────
|
|
732
|
-
// FIX (#283): The evolution worker writes pain diagnosis tasks to
|
|
731
|
+
// ──── 4b. Inject pending diagnostician tasks (compact summary) ────
|
|
732
|
+
// FIX (#283/#380): The evolution worker writes pain diagnosis tasks to
|
|
733
733
|
// diagnostician_tasks.json. The heartbeat prompt hook must read and inject
|
|
734
734
|
// them so the LLM (acting as diagnostician) can process them.
|
|
735
|
+
//
|
|
736
|
+
// INJECTION FORMAT: Compact summary (not full prompt) to stay well within
|
|
737
|
+
// OpenClaw's ~10 000 char platform limit. Full task.prompt can be 2–4 KB;
|
|
738
|
+
// the compact block is < 400 chars. The agent is instructed to read the
|
|
739
|
+
// original from diagnostician_tasks.json if it needs the full context.
|
|
735
740
|
try {
|
|
736
741
|
const pendingTasks = getPendingDiagnosticianTasks(wctx.stateDir);
|
|
737
742
|
if (pendingTasks.length > 0) {
|
|
743
|
+
pendingDiagTaskCount = pendingTasks.length;
|
|
744
|
+
|
|
745
|
+
// Build compact summary blocks — one per task (only first is processed per heartbeat)
|
|
738
746
|
const taskBlocks = pendingTasks
|
|
739
|
-
.slice(0,
|
|
740
|
-
.map(({ id, task }) =>
|
|
747
|
+
.slice(0, 1)
|
|
748
|
+
.map(({ id, task }) => {
|
|
749
|
+
// Extract summary fields; reason is truncated to 200 chars to keep
|
|
750
|
+
// the injected block small and stable.
|
|
751
|
+
const reason = (task.prompt
|
|
752
|
+
.match(/reason["\s:]+([^\n]{0,240})/i)?.[1]
|
|
753
|
+
?? task.prompt.slice(0, 200)
|
|
754
|
+
).slice(0, 200);
|
|
755
|
+
|
|
756
|
+
const safeId = escapeXml(id);
|
|
757
|
+
const safeReason = escapeXml(reason);
|
|
758
|
+
const safeCreatedAt = escapeXml(task.createdAt);
|
|
759
|
+
const markerFile = `.evolution_complete_${safeId}`;
|
|
760
|
+
const reportFile = `.diagnostician_report_${safeId}.json`;
|
|
761
|
+
|
|
762
|
+
return `<diagnostician_task id="${safeId}">
|
|
763
|
+
task_id: ${safeId}
|
|
764
|
+
reason: ${safeReason}
|
|
765
|
+
marker: ${markerFile}
|
|
766
|
+
report: ${reportFile}
|
|
767
|
+
queued_at: ${safeCreatedAt}
|
|
768
|
+
action: Analyze pain signal → identify violated principles → write ${markerFile} + ${reportFile}
|
|
769
|
+
</diagnostician_task>`;
|
|
770
|
+
})
|
|
741
771
|
.join('\n\n');
|
|
742
772
|
|
|
743
|
-
const
|
|
744
|
-
|
|
745
|
-
|
|
773
|
+
const processingNote = pendingDiagTaskCount > 1
|
|
774
|
+
? `\n\nNOTE: ${pendingDiagTaskCount - 1} more task(s) are queued. ` +
|
|
775
|
+
`Process one at a time; remaining tasks are handled on subsequent heartbeats.`
|
|
746
776
|
: '';
|
|
747
777
|
|
|
748
|
-
prependContext += `<diagnostician_tasks pending="${
|
|
749
|
-
You are acting as a **Pain Diagnostician**.
|
|
750
|
-
1.
|
|
751
|
-
2.
|
|
752
|
-
3.
|
|
753
|
-
4.
|
|
778
|
+
prependContext += `<diagnostician_tasks pending="${pendingDiagTaskCount}">
|
|
779
|
+
You are acting as a **Pain Diagnostician**. For each task:
|
|
780
|
+
1. Read the full prompt from: ${escapeXml(wctx.stateDir)}/diagnostician_tasks.json [task_id=${escapeXml(pendingTasks[0]?.id ?? '')}]
|
|
781
|
+
2. Analyze the pain signal and its context
|
|
782
|
+
3. Identify the root cause and violated principles
|
|
783
|
+
4. Write a completion marker: .evolution_complete_<TASK_ID>
|
|
784
|
+
5. Write a diagnostic report: .diagnostician_report_<TASK_ID>.json
|
|
754
785
|
|
|
755
786
|
${taskBlocks}${processingNote}
|
|
756
787
|
</diagnostician_tasks>\n`;
|
|
757
788
|
|
|
758
|
-
logger?.info?.(
|
|
789
|
+
logger?.info?.(
|
|
790
|
+
`[PD:Prompt] Injected compact diagnostician task block ` +
|
|
791
|
+
`(task=${pendingTasks[0]?.id}, total_pending=${pendingDiagTaskCount})`
|
|
792
|
+
);
|
|
759
793
|
|
|
760
794
|
// C: Record heartbeat_diagnosis event for observability
|
|
761
795
|
try {
|
|
762
796
|
const eventLog = EventLogService.get(wctx.stateDir, logger);
|
|
763
797
|
eventLog.recordHeartbeatDiagnosis({
|
|
764
|
-
taskCount:
|
|
765
|
-
taskIds: pendingTasks.slice(0,
|
|
798
|
+
taskCount: pendingDiagTaskCount,
|
|
799
|
+
taskIds: pendingTasks.slice(0, 1).map(t => t.id),
|
|
766
800
|
trigger: 'heartbeat',
|
|
767
801
|
});
|
|
768
802
|
} catch (evErr) {
|
|
@@ -1124,36 +1158,112 @@ ${attitudeDirective}
|
|
|
1124
1158
|
}
|
|
1125
1159
|
|
|
1126
1160
|
// ──── 8. SIZE GUARD ────
|
|
1127
|
-
//
|
|
1161
|
+
// Hard cap for OpenClaw prompt injection. OpenClaw's actual platform limit is
|
|
1162
|
+
// approximately 10 000 characters. We use 9 000 here to leave ~1 000 chars of
|
|
1163
|
+
// headroom for the user's message, tool call delimiters, and encoding overhead.
|
|
1164
|
+
// IMPORTANT: PD must never treat the platform's upper bound as its own safe
|
|
1165
|
+
// working limit. Always keep a margin.
|
|
1128
1166
|
const totalSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
|
|
1129
|
-
const
|
|
1167
|
+
const MAX_INJECTION_SIZE = 9000;
|
|
1130
1168
|
|
|
1131
|
-
if (totalSize >
|
|
1169
|
+
if (totalSize > MAX_INJECTION_SIZE) {
|
|
1132
1170
|
const originalSize = totalSize;
|
|
1133
1171
|
const truncationLog: string[] = [];
|
|
1134
1172
|
|
|
1135
|
-
//
|
|
1173
|
+
// Deterministically remove low-priority context blocks in priority order.
|
|
1174
|
+
// In diagnostician-priority mode we aggressively strip everything except
|
|
1175
|
+
// the task block and minimum behavioral constraints.
|
|
1176
|
+
const inDiagMode = pendingDiagTaskCount > 0;
|
|
1177
|
+
|
|
1178
|
+
// Step 1 — strip project_context (largest, lowest priority) — always in diag mode,
|
|
1179
|
+
// only strip in normal mode if we are already over limit
|
|
1136
1180
|
if (projectContextContent && appendSystemContext.includes('<project_context>')) {
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1181
|
+
appendSystemContext = appendSystemContext.replace(
|
|
1182
|
+
`<project_context>\n${projectContextContent}\n</project_context>`,
|
|
1183
|
+
'<project_context>\n[stripped: project_context]\n</project_context>'
|
|
1184
|
+
);
|
|
1185
|
+
truncationLog.push('project_context');
|
|
1186
|
+
}
|
|
1187
|
+
|
|
1188
|
+
// Steps 2-4: only strip in diagnostician priority mode (inDiagMode)
|
|
1189
|
+
// In normal mode we stop after project_context to preserve context quality
|
|
1190
|
+
if (inDiagMode) {
|
|
1191
|
+
// Step 2 — strip thinking_os
|
|
1192
|
+
if (thinkingOsContent && appendSystemContext.includes('<thinking_os>')) {
|
|
1193
|
+
appendSystemContext = appendSystemContext.replace(
|
|
1194
|
+
`<thinking_os>\n${thinkingOsContent}\n</thinking_os>`,
|
|
1195
|
+
'<thinking_os>\n[stripped: thinking_os]\n</thinking_os>'
|
|
1196
|
+
);
|
|
1197
|
+
truncationLog.push('thinking_os');
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
// Step 3 — strip evolution_principles (keep core_principles only)
|
|
1201
|
+
if (evolutionPrinciplesContent && appendSystemContext.includes('<evolution_principles>')) {
|
|
1140
1202
|
appendSystemContext = appendSystemContext.replace(
|
|
1141
|
-
`<
|
|
1142
|
-
|
|
1203
|
+
`<evolution_principles>\n${evolutionPrinciplesContent}\n</evolution_principles>`,
|
|
1204
|
+
'<evolution_principles>\n[stripped: evolution_principles]\n</evolution_principles>'
|
|
1143
1205
|
);
|
|
1144
|
-
truncationLog.push('
|
|
1206
|
+
truncationLog.push('evolution_principles');
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1209
|
+
// Step 4 — strip reflection_log if present
|
|
1210
|
+
if (appendSystemContext.includes('<reflection_log>')) {
|
|
1211
|
+
appendSystemContext = appendSystemContext.replace(
|
|
1212
|
+
/<reflection_log>[\s\S]*?<\/reflection_log>/,
|
|
1213
|
+
'<reflection_log>\n[stripped: reflection_log]\n</reflection_log>'
|
|
1214
|
+
);
|
|
1215
|
+
truncationLog.push('reflection_log');
|
|
1145
1216
|
}
|
|
1146
1217
|
}
|
|
1147
1218
|
|
|
1148
|
-
//
|
|
1219
|
+
// Step 5 — re-evaluate: check if still over limit
|
|
1149
1220
|
let newSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
|
|
1150
|
-
if (newSize >
|
|
1151
|
-
//
|
|
1152
|
-
//
|
|
1153
|
-
|
|
1221
|
+
if (newSize > MAX_INJECTION_SIZE) {
|
|
1222
|
+
// Truncate the injected reason field by finding the "reason:" line prefix
|
|
1223
|
+
// and cutting to 120 chars. This is safe because the full prompt is
|
|
1224
|
+
// still available in diagnostician_tasks.json for the agent to read.
|
|
1225
|
+
prependContext = prependContext
|
|
1226
|
+
.split('\n')
|
|
1227
|
+
.map((line) => {
|
|
1228
|
+
if (line.startsWith('reason: ') && line.length > 129) {
|
|
1229
|
+
return line.slice(0, 129) + '...[truncated]';
|
|
1230
|
+
}
|
|
1231
|
+
return line;
|
|
1232
|
+
})
|
|
1233
|
+
.join('\n');
|
|
1234
|
+
newSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
|
|
1235
|
+
truncationLog.push('diagnostician_reason');
|
|
1154
1236
|
}
|
|
1155
1237
|
|
|
1156
|
-
|
|
1238
|
+
// FAIL-CLOSED: if we are still over the limit after all deterministic
|
|
1239
|
+
// removals, do NOT return a prompt that exceeds MAX_INJECTION_SIZE.
|
|
1240
|
+
// Drop the entire appendSystemContext (keep only prependContext +
|
|
1241
|
+
// prependSystemContext) and log a hard error.
|
|
1242
|
+
if (newSize > MAX_INJECTION_SIZE) {
|
|
1243
|
+
const fallbackContext = `
|
|
1244
|
+
## 【CONTEXT SECTIONS】
|
|
1245
|
+
|
|
1246
|
+
[WARNING: Context sections stripped due to prompt size constraints.
|
|
1247
|
+
This is a diagnostician-priority session — see diagnostician_tasks.json for full task context.]
|
|
1248
|
+
|
|
1249
|
+
${attitudeDirective}
|
|
1250
|
+
`.trim();
|
|
1251
|
+
|
|
1252
|
+
appendSystemContext = fallbackContext;
|
|
1253
|
+
newSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
|
|
1254
|
+
|
|
1255
|
+
logger?.error(
|
|
1256
|
+
`[PD:Prompt] PROMPT OVER LIMIT AFTER ALL REDUCTIONS — using fallback. ` +
|
|
1257
|
+
`Original: ${originalSize}, Current: ${newSize}, Limit: ${MAX_INJECTION_SIZE}. ` +
|
|
1258
|
+
`Stripped: ${truncationLog.join(', ')}. Diagnostician mode: ${inDiagMode}.`
|
|
1259
|
+
);
|
|
1260
|
+
} else {
|
|
1261
|
+
logger?.warn(
|
|
1262
|
+
`[PD:Prompt] Injection size exceeded: ${originalSize} chars (limit: ${MAX_INJECTION_SIZE}), ` +
|
|
1263
|
+
`truncated: ${truncationLog.join(', ') || 'none'}, new size: ${newSize} chars, ` +
|
|
1264
|
+
`diagnostician mode: ${inDiagMode}`
|
|
1265
|
+
);
|
|
1266
|
+
}
|
|
1157
1267
|
}
|
|
1158
1268
|
|
|
1159
1269
|
return {
|
|
@@ -222,7 +222,7 @@ export class RuntimeSummaryService {
|
|
|
222
222
|
const selectedSession = this.selectSession(sessions, options?.sessionId ?? null);
|
|
223
223
|
const selectedSessionId = selectedSession.session?.sessionId ?? null;
|
|
224
224
|
|
|
225
|
-
const persistedEvents = this.readEvents(path.join(wctx.stateDir, 'logs'
|
|
225
|
+
const persistedEvents = this.readEvents(path.join(wctx.stateDir, 'logs'), warnings);
|
|
226
226
|
const hasBufferedEventAccess =
|
|
227
227
|
typeof (wctx.eventLog as { getBufferedEvents?: () => EventLogEntry[] }).getBufferedEvents === 'function';
|
|
228
228
|
const bufferedEvents = hasBufferedEventAccess
|
|
@@ -358,6 +358,22 @@ export class RuntimeSummaryService {
|
|
|
358
358
|
heartbeatsInjectedToday: diagDailyStats?.heartbeatsInjected ?? 0,
|
|
359
359
|
};
|
|
360
360
|
|
|
361
|
+
// D: Stall detection — high-signal warning when the diagnostician loop appears broken.
|
|
362
|
+
// Conditions: tasks are being injected (heartbeats > 0) but no reports are being written.
|
|
363
|
+
if (
|
|
364
|
+
heartbeatDiagnosis.heartbeatsInjectedToday > 0 &&
|
|
365
|
+
heartbeatDiagnosis.reportsWrittenToday === 0 &&
|
|
366
|
+
heartbeatDiagnosis.pendingTasks > 0
|
|
367
|
+
) {
|
|
368
|
+
pushWarning(
|
|
369
|
+
warnings,
|
|
370
|
+
'Diagnostician appears stalled: heartbeats are injecting tasks ' +
|
|
371
|
+
`(${heartbeatDiagnosis.heartbeatsInjectedToday}) but no reports are being written. ` +
|
|
372
|
+
`${heartbeatDiagnosis.pendingTasks} task(s) remain pending. ` +
|
|
373
|
+
'Check prompt injection size limits and diagnostician task processing.'
|
|
374
|
+
);
|
|
375
|
+
}
|
|
376
|
+
|
|
361
377
|
// Read trajectory analytics data (historical data, NOT runtime truth)
|
|
362
378
|
const trajectoryStats = this.readTrajectoryStats(workspaceDir, warnings);
|
|
363
379
|
|
|
@@ -596,14 +612,49 @@ export class RuntimeSummaryService {
|
|
|
596
612
|
};
|
|
597
613
|
}
|
|
598
614
|
|
|
599
|
-
private static readEvents(
|
|
600
|
-
|
|
601
|
-
|
|
615
|
+
private static readEvents(logsDir: string, warnings: string[]): EventLogEntry[] {
|
|
616
|
+
// The event log is stored as daily files: events_YYYY-MM-DD.jsonl.
|
|
617
|
+
// Prefer today's file; fall back to the most recent daily file so that
|
|
618
|
+
// gate/pain stats are still populated when the day rolled over.
|
|
619
|
+
const dir = logsDir;
|
|
620
|
+
|
|
621
|
+
let bestFile: string | null = null;
|
|
622
|
+
|
|
623
|
+
if (fs.existsSync(dir)) {
|
|
624
|
+
const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
|
|
625
|
+
// Prefer exact match on today's file
|
|
626
|
+
const todayFile = path.join(dir, `events_${today}.jsonl`);
|
|
627
|
+
if (fs.existsSync(todayFile)) {
|
|
628
|
+
bestFile = todayFile;
|
|
629
|
+
} else {
|
|
630
|
+
// Fallback: pick the most recent file by date embedded in the filename
|
|
631
|
+
// (lexical comparison works for ISO dates YYYY-MM-DD).
|
|
632
|
+
let newestDate = '';
|
|
633
|
+
try {
|
|
634
|
+
for (const file of fs.readdirSync(dir)) {
|
|
635
|
+
const m = file.match(/^events_(\d{4}-\d{2}-\d{2})\.jsonl$/);
|
|
636
|
+
if (!m) continue;
|
|
637
|
+
const fileDate = m[1];
|
|
638
|
+
if (fileDate > newestDate) {
|
|
639
|
+
newestDate = fileDate;
|
|
640
|
+
bestFile = path.join(dir, file);
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
} catch { /* ignore scan errors */ }
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
if (!bestFile) {
|
|
648
|
+
pushWarning(
|
|
649
|
+
warnings,
|
|
650
|
+
'No event log file found; recent pain and gate summaries are partial. ' +
|
|
651
|
+
'Expected format: events_YYYY-MM-DD.jsonl in the logs directory.'
|
|
652
|
+
);
|
|
602
653
|
return [];
|
|
603
654
|
}
|
|
604
655
|
|
|
605
656
|
try {
|
|
606
|
-
const raw = fs.readFileSync(
|
|
657
|
+
const raw = fs.readFileSync(bestFile, 'utf8').trim();
|
|
607
658
|
if (!raw) return [];
|
|
608
659
|
let parseFailures = 0;
|
|
609
660
|
const entries = raw
|
|
@@ -620,12 +671,15 @@ export class RuntimeSummaryService {
|
|
|
620
671
|
if (parseFailures > 0) {
|
|
621
672
|
pushWarning(
|
|
622
673
|
warnings,
|
|
623
|
-
`Skipped ${parseFailures} malformed event line${parseFailures === 1 ? '' : 's'} while reading
|
|
674
|
+
`Skipped ${parseFailures} malformed event line${parseFailures === 1 ? '' : 's'} while reading ${path.basename(bestFile!)}.`
|
|
624
675
|
);
|
|
625
676
|
}
|
|
626
677
|
return entries;
|
|
627
678
|
} catch {
|
|
628
|
-
pushWarning(
|
|
679
|
+
pushWarning(
|
|
680
|
+
warnings,
|
|
681
|
+
`Failed to read ${path.basename(bestFile!)}; recent pain and gate summaries are partial.`
|
|
682
|
+
);
|
|
629
683
|
return [];
|
|
630
684
|
}
|
|
631
685
|
}
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for prompt.ts diagnostician fixes (Phase A: Immediate Hemorrhage Control)
|
|
3
|
+
*
|
|
4
|
+
* Covers:
|
|
5
|
+
* 1. Compact diagnostician task injection block format
|
|
6
|
+
* 2. Size guard: injection stays under MAX_INJECTION_SIZE (9000)
|
|
7
|
+
* 3. Diagnostician priority mode: low-priority blocks stripped when tasks pending
|
|
8
|
+
* 4. Fail-closed: never returns injection over limit
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
12
|
+
import * as fs from 'fs';
|
|
13
|
+
import * as os from 'os';
|
|
14
|
+
import * as path from 'path';
|
|
15
|
+
|
|
16
|
+
// ─── Mock dependencies ───────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
const mockGetPendingDiagnosticianTasks = vi.fn<(stateDir: string) => unknown[]>();
|
|
19
|
+
|
|
20
|
+
beforeEach(() => {
|
|
21
|
+
vi.clearAllMocks();
|
|
22
|
+
mockGetPendingDiagnosticianTasks.mockReturnValue([]);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
vi.mock('../../src/core/diagnostician-task-store.js', async () => ({
|
|
26
|
+
getPendingDiagnosticianTasks: (...args: unknown[]) =>
|
|
27
|
+
mockGetPendingDiagnosticianTasks(...args),
|
|
28
|
+
}));
|
|
29
|
+
|
|
30
|
+
vi.mock('../../src/core/event-log.js', () => ({
|
|
31
|
+
EventLogService: {
|
|
32
|
+
get: vi.fn().mockReturnValue({
|
|
33
|
+
recordHeartbeatDiagnosis: vi.fn(),
|
|
34
|
+
}),
|
|
35
|
+
},
|
|
36
|
+
}));
|
|
37
|
+
|
|
38
|
+
vi.mock('../../src/core/workspace-context.js', () => ({
|
|
39
|
+
WorkspaceContext: {
|
|
40
|
+
fromHookContext: vi.fn().mockReturnValue({
|
|
41
|
+
stateDir: '/fake/state',
|
|
42
|
+
resolve: (key: string) => `/fake/${key}`,
|
|
43
|
+
trajectory: { recordSession: vi.fn(), recordUserTurn: vi.fn() },
|
|
44
|
+
config: { get: vi.fn() },
|
|
45
|
+
evolutionReducer: {
|
|
46
|
+
getActivePrinciples: vi.fn().mockReturnValue([]),
|
|
47
|
+
getProbationPrinciples: vi.fn().mockReturnValue([]),
|
|
48
|
+
},
|
|
49
|
+
}),
|
|
50
|
+
},
|
|
51
|
+
}));
|
|
52
|
+
|
|
53
|
+
vi.mock('../../src/core/session-tracker.js', () => ({
|
|
54
|
+
getSession: vi.fn().mockReturnValue({ currentGfi: 20 }),
|
|
55
|
+
resetFriction: vi.fn(),
|
|
56
|
+
trackFriction: vi.fn(),
|
|
57
|
+
setInjectedProbationIds: vi.fn(),
|
|
58
|
+
clearInjectedProbationIds: vi.fn(),
|
|
59
|
+
decayGfi: vi.fn(),
|
|
60
|
+
getGfiDecayElapsed: vi.fn().mockReturnValue(0),
|
|
61
|
+
}));
|
|
62
|
+
|
|
63
|
+
vi.mock('../../src/core/path-resolver.js', () => ({
|
|
64
|
+
PathResolver: { getExtensionRoot: vi.fn().mockReturnValue('/fake/extension') },
|
|
65
|
+
}));
|
|
66
|
+
|
|
67
|
+
vi.mock('../../src/core/principle-injection.js', () => ({
|
|
68
|
+
selectPrinciplesForInjection: vi.fn().mockReturnValue({
|
|
69
|
+
selected: [],
|
|
70
|
+
wasTruncated: false,
|
|
71
|
+
breakdown: { p0: 0, p1: 0, p2: 0 },
|
|
72
|
+
totalChars: 0,
|
|
73
|
+
}),
|
|
74
|
+
DEFAULT_PRINCIPLE_BUDGET: 3000,
|
|
75
|
+
}));
|
|
76
|
+
|
|
77
|
+
vi.mock('../../src/core/empathy-keyword-matcher.js', () => ({
|
|
78
|
+
matchEmpathyKeywords: vi.fn().mockReturnValue({ score: 0, matched: null, severity: 'none', matchedTerms: [] }),
|
|
79
|
+
loadKeywordStore: vi.fn().mockReturnValue({ terms: {}, stats: { totalHits: 0 } }),
|
|
80
|
+
saveKeywordStore: vi.fn(),
|
|
81
|
+
shouldTriggerOptimization: vi.fn().mockReturnValue(false),
|
|
82
|
+
getKeywordStoreSummary: vi.fn().mockReturnValue({ totalTerms: 0, highFalsePositiveTerms: [] }),
|
|
83
|
+
}));
|
|
84
|
+
|
|
85
|
+
vi.mock('../../src/core/empathy-types.js', () => ({
|
|
86
|
+
severityToPenalty: vi.fn().mockReturnValue(5),
|
|
87
|
+
DEFAULT_EMPATHY_KEYWORD_CONFIG: {},
|
|
88
|
+
}));
|
|
89
|
+
|
|
90
|
+
vi.mock('../../src/core/correction-cue-learner.js', () => ({
|
|
91
|
+
CorrectionCueLearner: {
|
|
92
|
+
get: vi.fn().mockReturnValue({
|
|
93
|
+
match: vi.fn().mockReturnValue({ matched: null, matchedTerms: [], confidence: 0 }),
|
|
94
|
+
recordHits: vi.fn(),
|
|
95
|
+
recordTruePositive: vi.fn(),
|
|
96
|
+
flush: vi.fn(),
|
|
97
|
+
}),
|
|
98
|
+
},
|
|
99
|
+
}));
|
|
100
|
+
|
|
101
|
+
vi.mock('../../src/core/focus-history.js', () => ({
|
|
102
|
+
extractSummary: vi.fn().mockReturnValue(''),
|
|
103
|
+
getHistoryVersions: vi.fn().mockReturnValue([]),
|
|
104
|
+
parseWorkingMemorySection: vi.fn().mockReturnValue(null),
|
|
105
|
+
workingMemoryToInjection: vi.fn().mockReturnValue(''),
|
|
106
|
+
autoCompressFocus: vi.fn().mockReturnValue({ compressed: false, reason: 'not_needed' }),
|
|
107
|
+
safeReadCurrentFocus: vi.fn().mockReturnValue({ content: '', recovered: false, validationErrors: [] }),
|
|
108
|
+
}));
|
|
109
|
+
|
|
110
|
+
vi.mock('../../src/service/subagent-workflow/index.js', () => ({
|
|
111
|
+
EmpathyObserverWorkflowManager: vi.fn(),
|
|
112
|
+
empathyObserverWorkflowSpec: {},
|
|
113
|
+
isExpectedSubagentError: vi.fn().mockReturnValue(false),
|
|
114
|
+
}));
|
|
115
|
+
|
|
116
|
+
vi.mock('../../src/utils/subagent-probe.js', () => ({
|
|
117
|
+
isSubagentRuntimeAvailable: vi.fn().mockReturnValue(false),
|
|
118
|
+
}));
|
|
119
|
+
|
|
120
|
+
vi.mock('../../src/core/local-worker-routing.js', () => ({
|
|
121
|
+
classifyTask: vi.fn().mockReturnValue({
|
|
122
|
+
decision: 'stay_main',
|
|
123
|
+
classification: 'unknown',
|
|
124
|
+
reason: 'mocked',
|
|
125
|
+
blockers: [],
|
|
126
|
+
}),
|
|
127
|
+
}));
|
|
128
|
+
|
|
129
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
130
|
+
|
|
131
|
+
function fakeTask(overrides: Partial<{
|
|
132
|
+
id: string; prompt: string; createdAt: string; status: string;
|
|
133
|
+
}> = {}): { id: string; task: { prompt: string; createdAt: string; status: 'pending' } } {
|
|
134
|
+
return {
|
|
135
|
+
id: overrides.id ?? 'test-task-1',
|
|
136
|
+
task: {
|
|
137
|
+
prompt: overrides.prompt ?? 'Diagnose pain signal: source=tool_failure score=75 reason=Command failed',
|
|
138
|
+
createdAt: overrides.createdAt ?? '2026-04-21T10:00:00.000Z',
|
|
139
|
+
status: 'pending',
|
|
140
|
+
},
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function makeMinimalEvent(): Parameters<typeof import('../../src/hooks/prompt.js').handleBeforePromptBuild>[0] {
|
|
145
|
+
return {
|
|
146
|
+
prompt: 'hello world',
|
|
147
|
+
messages: [],
|
|
148
|
+
trigger: 'heartbeat',
|
|
149
|
+
sessionId: 'test-session-123',
|
|
150
|
+
} as unknown as Parameters<typeof import('../../src/hooks/prompt.js').handleBeforePromptBuild>[0];
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// ─── Tests ───────────────────────────────────────────────────────────────────
|
|
154
|
+
|
|
155
|
+
describe('Diagnostician compact task injection', () => {
|
|
156
|
+
it('injects a compact block containing task_id, reason, marker and report paths', async () => {
|
|
157
|
+
const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
|
|
158
|
+
|
|
159
|
+
mockGetPendingDiagnosticianTasks.mockReturnValueOnce([fakeTask({
|
|
160
|
+
id: 'task-abc',
|
|
161
|
+
prompt:
|
|
162
|
+
'Pain signal: source=tool_failure\nscore=75\nreason=Command npm test failed with exit code 1\nsession_id=sess-123',
|
|
163
|
+
})]);
|
|
164
|
+
|
|
165
|
+
const ctx = {
|
|
166
|
+
workspaceDir: '/fake/workspace',
|
|
167
|
+
trigger: 'heartbeat',
|
|
168
|
+
sessionId: 'test-session-123',
|
|
169
|
+
api: {
|
|
170
|
+
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
|
|
171
|
+
runtime: {},
|
|
172
|
+
config: {},
|
|
173
|
+
},
|
|
174
|
+
} as unknown as Parameters<typeof handleBeforePromptBuild>[1];
|
|
175
|
+
|
|
176
|
+
const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
|
|
177
|
+
|
|
178
|
+
const combined = (result?.prependContext ?? '') + (result?.appendSystemContext ?? '');
|
|
179
|
+
|
|
180
|
+
// Must contain structural fields
|
|
181
|
+
expect(combined).toContain('task_id: task-abc');
|
|
182
|
+
expect(combined).toContain('.evolution_complete_task-abc');
|
|
183
|
+
expect(combined).toContain('.diagnostician_report_task-abc.json');
|
|
184
|
+
|
|
185
|
+
// Must NOT contain the full raw prompt (which could be 2-4 KB)
|
|
186
|
+
// The compact diagnostician block is small; the heartbeat checklist adds to total
|
|
187
|
+
expect(combined.length).toBeLessThan(2000);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it('injects exactly one task per heartbeat regardless of queue depth', async () => {
|
|
191
|
+
const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
|
|
192
|
+
|
|
193
|
+
const tasks = [
|
|
194
|
+
fakeTask({ id: 'task-1' }),
|
|
195
|
+
fakeTask({ id: 'task-2' }),
|
|
196
|
+
fakeTask({ id: 'task-3' }),
|
|
197
|
+
];
|
|
198
|
+
mockGetPendingDiagnosticianTasks.mockReturnValueOnce(tasks);
|
|
199
|
+
|
|
200
|
+
const ctx = {
|
|
201
|
+
workspaceDir: '/fake/workspace',
|
|
202
|
+
trigger: 'heartbeat',
|
|
203
|
+
sessionId: 'test-session-123',
|
|
204
|
+
api: {
|
|
205
|
+
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
|
|
206
|
+
runtime: {},
|
|
207
|
+
config: {},
|
|
208
|
+
},
|
|
209
|
+
} as unknown as Parameters<typeof handleBeforePromptBuild>[1];
|
|
210
|
+
|
|
211
|
+
const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
|
|
212
|
+
const combined = (result?.prependContext ?? '') + (result?.appendSystemContext ?? '');
|
|
213
|
+
|
|
214
|
+
// Only the first task ID appears in the block
|
|
215
|
+
expect(combined).toContain('task-1');
|
|
216
|
+
expect(combined).not.toContain('task-2');
|
|
217
|
+
expect(combined).not.toContain('task-3');
|
|
218
|
+
// Note mentions remaining count
|
|
219
|
+
expect(combined).toContain('2 more task(s) are queued');
|
|
220
|
+
});
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
describe('Size guard: fail-closed', () => {
|
|
224
|
+
it('never returns a combined injection that exceeds MAX_INJECTION_SIZE (9000)', async () => {
|
|
225
|
+
const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
|
|
226
|
+
|
|
227
|
+
// One large pending task to trigger diagnostician priority mode
|
|
228
|
+
const largePrompt = 'Pain signal: ' + 'x'.repeat(5000);
|
|
229
|
+
mockGetPendingDiagnosticianTasks.mockReturnValueOnce([fakeTask({ prompt: largePrompt })]);
|
|
230
|
+
|
|
231
|
+
const ctx = {
|
|
232
|
+
workspaceDir: '/fake/workspace',
|
|
233
|
+
trigger: 'heartbeat',
|
|
234
|
+
sessionId: 'test-session-123',
|
|
235
|
+
api: {
|
|
236
|
+
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
|
|
237
|
+
runtime: {},
|
|
238
|
+
config: {},
|
|
239
|
+
},
|
|
240
|
+
} as unknown as Parameters<typeof handleBeforePromptBuild>[1];
|
|
241
|
+
|
|
242
|
+
const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
|
|
243
|
+
|
|
244
|
+
const totalSize =
|
|
245
|
+
(result?.prependSystemContext?.length ?? 0) +
|
|
246
|
+
(result?.prependContext?.length ?? 0) +
|
|
247
|
+
(result?.appendSystemContext?.length ?? 0);
|
|
248
|
+
|
|
249
|
+
expect(totalSize).toBeLessThanOrEqual(9000);
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
it('strips project_context and strips thinking_os/evolution_principles when inDiagMode and over limit', async () => {
|
|
253
|
+
const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
|
|
254
|
+
|
|
255
|
+
// A long reason string in the task so prependContext itself is large
|
|
256
|
+
const longReason = 'x'.repeat(500);
|
|
257
|
+
mockGetPendingDiagnosticianTasks.mockReturnValueOnce([
|
|
258
|
+
fakeTask({ prompt: `Pain signal: reason=${longReason} source=tool_failure score=85` }),
|
|
259
|
+
]);
|
|
260
|
+
|
|
261
|
+
const ctx = {
|
|
262
|
+
workspaceDir: '/fake/workspace',
|
|
263
|
+
trigger: 'heartbeat',
|
|
264
|
+
sessionId: 'test-session-123',
|
|
265
|
+
api: {
|
|
266
|
+
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
|
|
267
|
+
runtime: {},
|
|
268
|
+
config: {},
|
|
269
|
+
},
|
|
270
|
+
} as unknown as Parameters<typeof handleBeforePromptBuild>[1];
|
|
271
|
+
|
|
272
|
+
const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
|
|
273
|
+
|
|
274
|
+
// The size guard must never throw — result must be defined
|
|
275
|
+
expect(result).toBeDefined();
|
|
276
|
+
const combined =
|
|
277
|
+
(result?.prependSystemContext?.length ?? 0) +
|
|
278
|
+
(result?.prependContext?.length ?? 0) +
|
|
279
|
+
(result?.appendSystemContext?.length ?? 0);
|
|
280
|
+
|
|
281
|
+
// Must stay within MAX_INJECTION_SIZE (9000)
|
|
282
|
+
expect(combined).toBeLessThanOrEqual(9000);
|
|
283
|
+
});
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
describe('Diagnostician priority mode', () => {
|
|
287
|
+
it('sets pendingDiagTaskCount > 0 so size guard knows to strip low-priority blocks', async () => {
|
|
288
|
+
const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
|
|
289
|
+
|
|
290
|
+
mockGetPendingDiagnosticianTasks.mockReturnValueOnce([fakeTask()]);
|
|
291
|
+
|
|
292
|
+
const infoLogger = vi.fn();
|
|
293
|
+
const ctx = {
|
|
294
|
+
workspaceDir: '/fake/workspace',
|
|
295
|
+
trigger: 'heartbeat',
|
|
296
|
+
sessionId: 'test-session-123',
|
|
297
|
+
api: {
|
|
298
|
+
logger: { info: infoLogger, warn: vi.fn(), error: vi.fn() },
|
|
299
|
+
runtime: {},
|
|
300
|
+
config: {},
|
|
301
|
+
},
|
|
302
|
+
} as unknown as Parameters<typeof handleBeforePromptBuild>[1];
|
|
303
|
+
|
|
304
|
+
const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
|
|
305
|
+
|
|
306
|
+
// Should have logged task injection
|
|
307
|
+
expect(infoLogger).toHaveBeenCalledWith(
|
|
308
|
+
expect.stringContaining('Injected compact diagnostician task block')
|
|
309
|
+
);
|
|
310
|
+
|
|
311
|
+
// Result must be valid
|
|
312
|
+
expect(result?.prependContext).toBeDefined();
|
|
313
|
+
});
|
|
314
|
+
});
|
|
@@ -31,7 +31,9 @@ function writeSession(workspace: string, sessionId: string, payload: Record<stri
|
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
function writeEvents(workspace: string, entries: unknown[]): void {
|
|
34
|
-
|
|
34
|
+
// Write to today's daily event file (events_YYYY-MM-DD.jsonl)
|
|
35
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
36
|
+
const filePath = path.join(workspace, '.state', 'logs', `events_${today}.jsonl`);
|
|
35
37
|
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
36
38
|
const content = entries.map((entry) => JSON.stringify(entry)).join('\n');
|
|
37
39
|
fs.writeFileSync(filePath, content ? `${content}\n` : '', 'utf8');
|
|
@@ -471,11 +473,12 @@ describe('RuntimeSummaryService', () => {
|
|
|
471
473
|
trust_score: 59,
|
|
472
474
|
last_updated: '2026-03-20T10:00:00Z',
|
|
473
475
|
});
|
|
476
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
474
477
|
fs.writeFileSync(
|
|
475
|
-
path.join(workspace, '.state', 'logs',
|
|
478
|
+
path.join(workspace, '.state', 'logs', `events_${today}.jsonl`),
|
|
476
479
|
[
|
|
477
480
|
JSON.stringify({
|
|
478
|
-
ts:
|
|
481
|
+
ts: `${today}T10:00:01Z`,
|
|
479
482
|
type: 'pain_signal',
|
|
480
483
|
category: 'detected',
|
|
481
484
|
sessionId: 's1',
|
|
@@ -916,4 +919,180 @@ describe('RuntimeSummaryService', () => {
|
|
|
916
919
|
expect(summary.phase3.directiveIgnoredReason).toBe('queue is only truth source');
|
|
917
920
|
});
|
|
918
921
|
});
|
|
922
|
+
|
|
923
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
924
|
+
// Phase A: Event log daily-file compatibility + stalled diagnostician warning
|
|
925
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
926
|
+
|
|
927
|
+
describe('Event log daily-file format (events_YYYY-MM-DD.jsonl)', () => {
|
|
928
|
+
it('reads today events_YYYY-MM-DD.jsonl and does NOT warn about missing events.jsonl', () => {
|
|
929
|
+
const workspace = makeWorkspace();
|
|
930
|
+
writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
|
|
931
|
+
trust_score: 59,
|
|
932
|
+
last_updated: '2026-03-20T10:00:00Z',
|
|
933
|
+
});
|
|
934
|
+
// Write daily file (today's date)
|
|
935
|
+
const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
|
|
936
|
+
const dailyFilePath = path.join(workspace, '.state', 'logs', `events_${today}.jsonl`);
|
|
937
|
+
fs.mkdirSync(path.dirname(dailyFilePath), { recursive: true });
|
|
938
|
+
fs.writeFileSync(
|
|
939
|
+
dailyFilePath,
|
|
940
|
+
JSON.stringify({
|
|
941
|
+
ts: `${today}T10:00:01Z`,
|
|
942
|
+
type: 'pain_signal',
|
|
943
|
+
category: 'detected',
|
|
944
|
+
sessionId: 's1',
|
|
945
|
+
data: { source: 'tool_failure', score: 10, reason: 'write failed' },
|
|
946
|
+
}) + '\n',
|
|
947
|
+
'utf8'
|
|
948
|
+
);
|
|
949
|
+
|
|
950
|
+
const summary = RuntimeSummaryService.getSummary(workspace);
|
|
951
|
+
|
|
952
|
+
// Must have read the daily file successfully
|
|
953
|
+
expect(summary.pain.lastSignal?.source).toBe('tool_failure');
|
|
954
|
+
// Must NOT warn about "No events.jsonl file"
|
|
955
|
+
expect(summary.metadata.warnings.join('\n')).not.toContain('No events.jsonl file');
|
|
956
|
+
expect(summary.metadata.warnings.join('\n')).not.toContain('No event log file');
|
|
957
|
+
});
|
|
958
|
+
|
|
959
|
+
it('falls back to most recent daily file when today file does not exist', () => {
|
|
960
|
+
const workspace = makeWorkspace();
|
|
961
|
+
writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
|
|
962
|
+
trust_score: 59,
|
|
963
|
+
last_updated: '2026-03-20T10:00:00Z',
|
|
964
|
+
});
|
|
965
|
+
// Write only an older daily file
|
|
966
|
+
const oldDate = '2026-03-18';
|
|
967
|
+
const oldFilePath = path.join(workspace, '.state', 'logs', `events_${oldDate}.jsonl`);
|
|
968
|
+
fs.mkdirSync(path.dirname(oldFilePath), { recursive: true });
|
|
969
|
+
fs.writeFileSync(
|
|
970
|
+
oldFilePath,
|
|
971
|
+
JSON.stringify({
|
|
972
|
+
ts: `${oldDate}T10:00:01Z`,
|
|
973
|
+
type: 'gate_block',
|
|
974
|
+
category: 'risk',
|
|
975
|
+
sessionId: 's2',
|
|
976
|
+
data: { reason: 'old gate event' },
|
|
977
|
+
}) + '\n',
|
|
978
|
+
'utf8'
|
|
979
|
+
);
|
|
980
|
+
|
|
981
|
+
const summary = RuntimeSummaryService.getSummary(workspace);
|
|
982
|
+
|
|
983
|
+
// Must have read the old file
|
|
984
|
+
expect(summary.gate.recentBlocks).toBeGreaterThan(0);
|
|
985
|
+
// Must NOT warn about "No event log file"
|
|
986
|
+
expect(summary.metadata.warnings.join('\n')).not.toContain('No event log file');
|
|
987
|
+
});
|
|
988
|
+
|
|
989
|
+
it('warns only when no daily event file exists at all', () => {
|
|
990
|
+
const workspace = makeWorkspace();
|
|
991
|
+
writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
|
|
992
|
+
trust_score: 59,
|
|
993
|
+
last_updated: '2026-03-20T10:00:00Z',
|
|
994
|
+
});
|
|
995
|
+
// Deliberately leave no event log files
|
|
996
|
+
|
|
997
|
+
const summary = RuntimeSummaryService.getSummary(workspace);
|
|
998
|
+
|
|
999
|
+
expect(summary.metadata.warnings.join('\n')).toContain('No event log file');
|
|
1000
|
+
});
|
|
1001
|
+
});
|
|
1002
|
+
|
|
1003
|
+
describe('Stalled diagnostician warning', () => {
|
|
1004
|
+
it('raises a high-signal warning when tasks are injected but no reports are written', () => {
|
|
1005
|
+
const workspace = makeWorkspace();
|
|
1006
|
+
writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
|
|
1007
|
+
trust_score: 59,
|
|
1008
|
+
last_updated: '2026-03-20T10:00:00Z',
|
|
1009
|
+
});
|
|
1010
|
+
writeJson(path.join(workspace, '.state', 'evolution_queue.json'), []);
|
|
1011
|
+
|
|
1012
|
+
// Simulate: pending tasks exist, heartbeats are injecting, but no reports written.
|
|
1013
|
+
// The pending task store is checked via getPendingDiagnosticianTasks which uses
|
|
1014
|
+
// the real filesystem path. We need to write diagnostician_tasks.json directly.
|
|
1015
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
1016
|
+
fs.mkdirSync(path.join(workspace, '.state'), { recursive: true });
|
|
1017
|
+
fs.writeFileSync(
|
|
1018
|
+
path.join(workspace, '.state', 'diagnostician_tasks.json'),
|
|
1019
|
+
JSON.stringify({
|
|
1020
|
+
tasks: {
|
|
1021
|
+
'stalled-task-1': {
|
|
1022
|
+
prompt: 'Diagnose: tool_failure score=80',
|
|
1023
|
+
createdAt: `${today}T09:00:00Z`,
|
|
1024
|
+
status: 'pending',
|
|
1025
|
+
},
|
|
1026
|
+
},
|
|
1027
|
+
}),
|
|
1028
|
+
'utf8'
|
|
1029
|
+
);
|
|
1030
|
+
|
|
1031
|
+
// Write daily-stats.json with heartbeats > 0 but reportsWritten = 0
|
|
1032
|
+
writeJson(path.join(workspace, '.state', 'logs', 'daily-stats.json'), {
|
|
1033
|
+
[today]: {
|
|
1034
|
+
evolution: {
|
|
1035
|
+
diagnosisTasksWritten: 3,
|
|
1036
|
+
diagnosticianReportsWritten: 0,
|
|
1037
|
+
reportsMissingJson: 0,
|
|
1038
|
+
reportsIncompleteFields: 0,
|
|
1039
|
+
principleCandidatesCreated: 0,
|
|
1040
|
+
heartbeatsInjected: 5,
|
|
1041
|
+
},
|
|
1042
|
+
},
|
|
1043
|
+
});
|
|
1044
|
+
|
|
1045
|
+
// Also create a valid daily event file so readEvents() does not emit
|
|
1046
|
+
// "No event log file" warning that would pollute warnings[] and mask
|
|
1047
|
+
// the stall detection logic we are actually testing.
|
|
1048
|
+
fs.mkdirSync(path.join(workspace, '.state', 'logs'), { recursive: true });
|
|
1049
|
+
fs.writeFileSync(
|
|
1050
|
+
path.join(workspace, '.state', 'logs', `events_${today}.jsonl`),
|
|
1051
|
+
JSON.stringify({ ts: `${today}T09:00:00Z`, type: 'heartbeat_diagnosis', category: 'task_injected', sessionId: 'test', data: {} }) + '\n',
|
|
1052
|
+
'utf8'
|
|
1053
|
+
);
|
|
1054
|
+
|
|
1055
|
+
const summary = RuntimeSummaryService.getSummary(workspace);
|
|
1056
|
+
|
|
1057
|
+
const warningText = summary.metadata.warnings.join('\n');
|
|
1058
|
+
expect(warningText).toContain('Diagnostician appears stalled');
|
|
1059
|
+
expect(warningText).toContain('5'); // heartbeatsInjected
|
|
1060
|
+
expect(warningText).toContain('reports are being written'); // confirms it says "0"
|
|
1061
|
+
expect(warningText).toContain('1 task(s) remain pending');
|
|
1062
|
+
});
|
|
1063
|
+
|
|
1064
|
+
it('does NOT raise the stalled warning when reports are being written', () => {
|
|
1065
|
+
const workspace = makeWorkspace();
|
|
1066
|
+
writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
|
|
1067
|
+
trust_score: 59,
|
|
1068
|
+
last_updated: '2026-03-20T10:00:00Z',
|
|
1069
|
+
});
|
|
1070
|
+
writeJson(path.join(workspace, '.state', 'evolution_queue.json'), []);
|
|
1071
|
+
|
|
1072
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
1073
|
+
fs.mkdirSync(path.join(workspace, '.state'), { recursive: true });
|
|
1074
|
+
fs.writeFileSync(
|
|
1075
|
+
path.join(workspace, '.state', 'diagnostician_tasks.json'),
|
|
1076
|
+
JSON.stringify({ tasks: {} }), // No pending tasks
|
|
1077
|
+
'utf8'
|
|
1078
|
+
);
|
|
1079
|
+
|
|
1080
|
+
writeJson(path.join(workspace, '.state', 'logs', 'daily-stats.json'), {
|
|
1081
|
+
[today]: {
|
|
1082
|
+
evolution: {
|
|
1083
|
+
diagnosisTasksWritten: 3,
|
|
1084
|
+
diagnosticianReportsWritten: 3, // Reports ARE being written
|
|
1085
|
+
reportsMissingJson: 0,
|
|
1086
|
+
reportsIncompleteFields: 0,
|
|
1087
|
+
principleCandidatesCreated: 1,
|
|
1088
|
+
heartbeatsInjected: 5,
|
|
1089
|
+
},
|
|
1090
|
+
},
|
|
1091
|
+
});
|
|
1092
|
+
|
|
1093
|
+
const summary = RuntimeSummaryService.getSummary(workspace);
|
|
1094
|
+
|
|
1095
|
+
expect(summary.metadata.warnings.join('\n')).not.toContain('Diagnostician appears stalled');
|
|
1096
|
+
});
|
|
1097
|
+
});
|
|
919
1098
|
});
|