principles-disciple 1.71.0 → 1.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  "id": "principles-disciple",
3
3
  "name": "Principles Disciple",
4
4
  "description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
5
- "version": "1.71.0",
5
+ "version": "1.72.0",
6
6
  "skills": [
7
7
  "templates/langs/en/skills",
8
8
  "templates/langs/zh/skills"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "principles-disciple",
3
- "version": "1.71.0",
3
+ "version": "1.72.0",
4
4
  "description": "Native OpenClaw plugin for Principles Disciple",
5
5
  "type": "module",
6
6
  "main": "./dist/bundle.js",
@@ -449,6 +449,8 @@ export async function handleBeforePromptBuild(
449
449
  let prependSystemContext: string;
450
450
  let prependContext = '';
451
451
  let appendSystemContext = '';
452
+ // Tracks pending diagnostician task count for diagnostician-priority mode in size guard
453
+ let pendingDiagTaskCount = 0;
452
454
 
453
455
  // ──── 0. Manual Pain Clearance ────
454
456
  if (trigger === 'user' && sessionId && session && session.currentGfi >= 100) {
@@ -720,49 +722,81 @@ The empathy observer subagent handles pain detection independently.
720
722
  const heartbeatChecklist = fs.readFileSync(heartbeatPath, 'utf8');
721
723
  prependContext += `<heartbeat_checklist>
722
724
  ${heartbeatChecklist}
723
-
724
- // HEARTBEAT_OK removed - tasks must always be processed
725
725
  </heartbeat_checklist>\n`;
726
726
  } catch (e) {
727
727
  logger?.error(`[PD:Prompt] Failed to read HEARTBEAT: ${String(e)}`);
728
728
  }
729
729
  }
730
730
 
731
- // ──── 4b. Inject pending diagnostician tasks ────
732
- // FIX (#283): The evolution worker writes pain diagnosis tasks to
731
+ // ──── 4b. Inject pending diagnostician tasks (compact summary) ────
732
+ // FIX (#283/#380): The evolution worker writes pain diagnosis tasks to
733
733
  // diagnostician_tasks.json. The heartbeat prompt hook must read and inject
734
734
  // them so the LLM (acting as diagnostician) can process them.
735
+ //
736
+ // INJECTION FORMAT: Compact summary (not full prompt) to stay well within
737
+ // OpenClaw's ~10 000 char platform limit. Full task.prompt can be 2–4 KB;
738
+ // the compact block is < 400 chars. The agent is instructed to read the
739
+ // original from diagnostician_tasks.json if it needs the full context.
735
740
  try {
736
741
  const pendingTasks = getPendingDiagnosticianTasks(wctx.stateDir);
737
742
  if (pendingTasks.length > 0) {
743
+ pendingDiagTaskCount = pendingTasks.length;
744
+
745
+ // Build compact summary blocks — one per task (only first is processed per heartbeat)
738
746
  const taskBlocks = pendingTasks
739
- .slice(0, 3)
740
- .map(({ id, task }) => `<diagnostician_task id="${id}">\n${task.prompt}\n</diagnostician_task>`)
747
+ .slice(0, 1)
748
+ .map(({ id, task }) => {
749
+ // Extract summary fields; reason is truncated to 200 chars to keep
750
+ // the injected block small and stable.
751
+ const reason = (task.prompt
752
+ .match(/reason["\s:]+([^\n]{0,240})/i)?.[1]
753
+ ?? task.prompt.slice(0, 200)
754
+ ).slice(0, 200);
755
+
756
+ const safeId = escapeXml(id);
757
+ const safeReason = escapeXml(reason);
758
+ const safeCreatedAt = escapeXml(task.createdAt);
759
+ const markerFile = `.evolution_complete_${safeId}`;
760
+ const reportFile = `.diagnostician_report_${safeId}.json`;
761
+
762
+ return `<diagnostician_task id="${safeId}">
763
+ task_id: ${safeId}
764
+ reason: ${safeReason}
765
+ marker: ${markerFile}
766
+ report: ${reportFile}
767
+ queued_at: ${safeCreatedAt}
768
+ action: Analyze pain signal → identify violated principles → write ${markerFile} + ${reportFile}
769
+ </diagnostician_task>`;
770
+ })
741
771
  .join('\n\n');
742
772
 
743
- const pendingCount = pendingTasks.length;
744
- const processingNote = pendingCount > 3
745
- ? `\n\nNOTE: ${pendingCount - 3} more tasks are queued. Process these 3 first; remaining tasks will be handled on subsequent heartbeats.`
773
+ const processingNote = pendingDiagTaskCount > 1
774
+ ? `\n\nNOTE: ${pendingDiagTaskCount - 1} more task(s) are queued. ` +
775
+ `Process one at a time; remaining tasks are handled on subsequent heartbeats.`
746
776
  : '';
747
777
 
748
- prependContext += `<diagnostician_tasks pending="${pendingCount}">
749
- You are acting as a **Pain Diagnostician**. Process the following task(s) by:
750
- 1. Analyzing the pain signal and its context
751
- 2. Identifying the root cause and violated principles
752
- 3. Writing a completion marker file: .evolution_complete_<TASK_ID>
753
- 4. Writing a diagnostic report: .diagnostician_report_<TASK_ID>.json
778
+ prependContext += `<diagnostician_tasks pending="${pendingDiagTaskCount}">
779
+ You are acting as a **Pain Diagnostician**. For each task:
780
+ 1. Read the full prompt from: ${escapeXml(wctx.stateDir)}/diagnostician_tasks.json [task_id=${escapeXml(pendingTasks[0]?.id ?? '')}]
781
+ 2. Analyze the pain signal and its context
782
+ 3. Identify the root cause and violated principles
783
+ 4. Write a completion marker: .evolution_complete_<TASK_ID>
784
+ 5. Write a diagnostic report: .diagnostician_report_<TASK_ID>.json
754
785
 
755
786
  ${taskBlocks}${processingNote}
756
787
  </diagnostician_tasks>\n`;
757
788
 
758
- logger?.info?.(`[PD:Prompt] Injected ${Math.min(pendingCount, 3)}/${pendingCount} pending diagnostician task(s) into heartbeat prompt`);
789
+ logger?.info?.(
790
+ `[PD:Prompt] Injected compact diagnostician task block ` +
791
+ `(task=${pendingTasks[0]?.id}, total_pending=${pendingDiagTaskCount})`
792
+ );
759
793
 
760
794
  // C: Record heartbeat_diagnosis event for observability
761
795
  try {
762
796
  const eventLog = EventLogService.get(wctx.stateDir, logger);
763
797
  eventLog.recordHeartbeatDiagnosis({
764
- taskCount: pendingCount,
765
- taskIds: pendingTasks.slice(0, 3).map(t => t.id),
798
+ taskCount: pendingDiagTaskCount,
799
+ taskIds: pendingTasks.slice(0, 1).map(t => t.id),
766
800
  trigger: 'heartbeat',
767
801
  });
768
802
  } catch (evErr) {
@@ -1124,36 +1158,112 @@ ${attitudeDirective}
1124
1158
  }
1125
1159
 
1126
1160
  // ──── 8. SIZE GUARD ────
1127
- // Truncation happens within appendSystemContext (not prependContext)
1161
+ // Hard cap for OpenClaw prompt injection. OpenClaw's actual platform limit is
1162
+ // approximately 10 000 characters. We use 9 000 here to leave ~1 000 chars of
1163
+ // headroom for the user's message, tool call delimiters, and encoding overhead.
1164
+ // IMPORTANT: PD must never treat the platform's upper bound as its own safe
1165
+ // working limit. Always keep a margin.
1128
1166
  const totalSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
1129
- const MAX_SIZE = 10000;
1167
+ const MAX_INJECTION_SIZE = 9000;
1130
1168
 
1131
- if (totalSize > MAX_SIZE) {
1169
+ if (totalSize > MAX_INJECTION_SIZE) {
1132
1170
  const originalSize = totalSize;
1133
1171
  const truncationLog: string[] = [];
1134
1172
 
1135
- // 1. Truncate project_context in appendSystemContext
1173
+ // Deterministically remove low-priority context blocks in priority order.
1174
+ // In diagnostician-priority mode we aggressively strip everything except
1175
+ // the task block and minimum behavioral constraints.
1176
+ const inDiagMode = pendingDiagTaskCount > 0;
1177
+
1178
+ // Step 1 — strip project_context (largest, lowest priority) — always in diag mode,
1179
+ // only strip in normal mode if we are already over limit
1136
1180
  if (projectContextContent && appendSystemContext.includes('<project_context>')) {
1137
- const lines = projectContextContent.split('\n');
1138
- if (lines.length > 20) {
1139
- const truncated = lines.slice(0, 20).join('\n') + '\n...[truncated]';
1181
+ appendSystemContext = appendSystemContext.replace(
1182
+ `<project_context>\n${projectContextContent}\n</project_context>`,
1183
+ '<project_context>\n[stripped: project_context]\n</project_context>'
1184
+ );
1185
+ truncationLog.push('project_context');
1186
+ }
1187
+
1188
+ // Steps 2-4: only strip in diagnostician priority mode (inDiagMode)
1189
+ // In normal mode we stop after project_context to preserve context quality
1190
+ if (inDiagMode) {
1191
+ // Step 2 — strip thinking_os
1192
+ if (thinkingOsContent && appendSystemContext.includes('<thinking_os>')) {
1140
1193
  appendSystemContext = appendSystemContext.replace(
1141
- `<project_context>\n${projectContextContent}\n</project_context>`,
1142
- `<project_context>\n${truncated}\n</project_context>`
1194
+ `<thinking_os>\n${thinkingOsContent}\n</thinking_os>`,
1195
+ '<thinking_os>\n[stripped: thinking_os]\n</thinking_os>'
1143
1196
  );
1144
- truncationLog.push('project_context');
1197
+ truncationLog.push('thinking_os');
1198
+ }
1199
+
1200
+ // Step 3 — strip evolution_principles (keep core_principles only)
1201
+ if (evolutionPrinciplesContent && appendSystemContext.includes('<evolution_principles>')) {
1202
+ appendSystemContext = appendSystemContext.replace(
1203
+ `<evolution_principles>\n${evolutionPrinciplesContent}\n</evolution_principles>`,
1204
+ '<evolution_principles>\n[stripped: evolution_principles]\n</evolution_principles>'
1205
+ );
1206
+ truncationLog.push('evolution_principles');
1207
+ }
1208
+
1209
+ // Step 4 — strip reflection_log if present
1210
+ if (appendSystemContext.includes('<reflection_log>')) {
1211
+ appendSystemContext = appendSystemContext.replace(
1212
+ /<reflection_log>[\s\S]*?<\/reflection_log>/,
1213
+ '<reflection_log>\n[stripped: reflection_log]\n</reflection_log>'
1214
+ );
1215
+ truncationLog.push('reflection_log');
1145
1216
  }
1146
1217
  }
1147
1218
 
1148
- // 2. Final check
1219
+ // Step 5 — re-evaluate: check if still over limit
1149
1220
  let newSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
1150
- if (newSize > MAX_SIZE) {
1151
- // NOTE: We still return the content even if over limit, as truncating more
1152
- // could lose critical context like principles or evolution directives.
1153
- logger?.error(`[PD:Prompt] Cannot reduce injection size below limit. Current: ${newSize}, Limit: ${MAX_SIZE}`);
1221
+ if (newSize > MAX_INJECTION_SIZE) {
1222
+ // Truncate the injected reason field by finding the "reason:" line prefix
1223
+ // and cutting to 120 chars. This is safe because the full prompt is
1224
+ // still available in diagnostician_tasks.json for the agent to read.
1225
+ prependContext = prependContext
1226
+ .split('\n')
1227
+ .map((line) => {
1228
+ if (line.startsWith('reason: ') && line.length > 129) {
1229
+ return line.slice(0, 129) + '...[truncated]';
1230
+ }
1231
+ return line;
1232
+ })
1233
+ .join('\n');
1234
+ newSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
1235
+ truncationLog.push('diagnostician_reason');
1154
1236
  }
1155
1237
 
1156
- logger?.warn(`[PD:Prompt] Injection size exceeded: ${originalSize} chars (limit: ${MAX_SIZE}), truncated: ${truncationLog.join(', ') || 'none'}, new size: ${newSize} chars`);
1238
+ // FAIL-CLOSED: if we are still over the limit after all deterministic
1239
+ // removals, do NOT return a prompt that exceeds MAX_INJECTION_SIZE.
1240
+ // Drop the entire appendSystemContext (keep only prependContext +
1241
+ // prependSystemContext) and log a hard error.
1242
+ if (newSize > MAX_INJECTION_SIZE) {
1243
+ const fallbackContext = `
1244
+ ## 【CONTEXT SECTIONS】
1245
+
1246
+ [WARNING: Context sections stripped due to prompt size constraints.
1247
+ This is a diagnostician-priority session — see diagnostician_tasks.json for full task context.]
1248
+
1249
+ ${attitudeDirective}
1250
+ `.trim();
1251
+
1252
+ appendSystemContext = fallbackContext;
1253
+ newSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
1254
+
1255
+ logger?.error(
1256
+ `[PD:Prompt] PROMPT OVER LIMIT AFTER ALL REDUCTIONS — using fallback. ` +
1257
+ `Original: ${originalSize}, Current: ${newSize}, Limit: ${MAX_INJECTION_SIZE}. ` +
1258
+ `Stripped: ${truncationLog.join(', ')}. Diagnostician mode: ${inDiagMode}.`
1259
+ );
1260
+ } else {
1261
+ logger?.warn(
1262
+ `[PD:Prompt] Injection size exceeded: ${originalSize} chars (limit: ${MAX_INJECTION_SIZE}), ` +
1263
+ `truncated: ${truncationLog.join(', ') || 'none'}, new size: ${newSize} chars, ` +
1264
+ `diagnostician mode: ${inDiagMode}`
1265
+ );
1266
+ }
1157
1267
  }
1158
1268
 
1159
1269
  return {
@@ -222,7 +222,7 @@ export class RuntimeSummaryService {
222
222
  const selectedSession = this.selectSession(sessions, options?.sessionId ?? null);
223
223
  const selectedSessionId = selectedSession.session?.sessionId ?? null;
224
224
 
225
- const persistedEvents = this.readEvents(path.join(wctx.stateDir, 'logs', 'events.jsonl'), warnings);
225
+ const persistedEvents = this.readEvents(path.join(wctx.stateDir, 'logs'), warnings);
226
226
  const hasBufferedEventAccess =
227
227
  typeof (wctx.eventLog as { getBufferedEvents?: () => EventLogEntry[] }).getBufferedEvents === 'function';
228
228
  const bufferedEvents = hasBufferedEventAccess
@@ -358,6 +358,22 @@ export class RuntimeSummaryService {
358
358
  heartbeatsInjectedToday: diagDailyStats?.heartbeatsInjected ?? 0,
359
359
  };
360
360
 
361
+ // D: Stall detection — high-signal warning when the diagnostician loop appears broken.
362
+ // Conditions: tasks are being injected (heartbeats > 0) but no reports are being written.
363
+ if (
364
+ heartbeatDiagnosis.heartbeatsInjectedToday > 0 &&
365
+ heartbeatDiagnosis.reportsWrittenToday === 0 &&
366
+ heartbeatDiagnosis.pendingTasks > 0
367
+ ) {
368
+ pushWarning(
369
+ warnings,
370
+ 'Diagnostician appears stalled: heartbeats are injecting tasks ' +
371
+ `(${heartbeatDiagnosis.heartbeatsInjectedToday}) but no reports are being written. ` +
372
+ `${heartbeatDiagnosis.pendingTasks} task(s) remain pending. ` +
373
+ 'Check prompt injection size limits and diagnostician task processing.'
374
+ );
375
+ }
376
+
361
377
  // Read trajectory analytics data (historical data, NOT runtime truth)
362
378
  const trajectoryStats = this.readTrajectoryStats(workspaceDir, warnings);
363
379
 
@@ -596,14 +612,49 @@ export class RuntimeSummaryService {
596
612
  };
597
613
  }
598
614
 
599
- private static readEvents(eventsPath: string, warnings: string[]): EventLogEntry[] {
600
- if (!fs.existsSync(eventsPath)) {
601
- warnings.push('No events.jsonl file exists yet; recent pain and gate summaries are partial.');
615
+ private static readEvents(logsDir: string, warnings: string[]): EventLogEntry[] {
616
+ // The event log is stored as daily files: events_YYYY-MM-DD.jsonl.
617
+ // Prefer today's file; fall back to the most recent daily file so that
618
+ // gate/pain stats are still populated when the day rolled over.
619
+ const dir = logsDir;
620
+
621
+ let bestFile: string | null = null;
622
+
623
+ if (fs.existsSync(dir)) {
624
+ const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
625
+ // Prefer exact match on today's file
626
+ const todayFile = path.join(dir, `events_${today}.jsonl`);
627
+ if (fs.existsSync(todayFile)) {
628
+ bestFile = todayFile;
629
+ } else {
630
+ // Fallback: pick the most recent file by date embedded in the filename
631
+ // (lexical comparison works for ISO dates YYYY-MM-DD).
632
+ let newestDate = '';
633
+ try {
634
+ for (const file of fs.readdirSync(dir)) {
635
+ const m = file.match(/^events_(\d{4}-\d{2}-\d{2})\.jsonl$/);
636
+ if (!m) continue;
637
+ const fileDate = m[1];
638
+ if (fileDate > newestDate) {
639
+ newestDate = fileDate;
640
+ bestFile = path.join(dir, file);
641
+ }
642
+ }
643
+ } catch { /* ignore scan errors */ }
644
+ }
645
+ }
646
+
647
+ if (!bestFile) {
648
+ pushWarning(
649
+ warnings,
650
+ 'No event log file found; recent pain and gate summaries are partial. ' +
651
+ 'Expected format: events_YYYY-MM-DD.jsonl in the logs directory.'
652
+ );
602
653
  return [];
603
654
  }
604
655
 
605
656
  try {
606
- const raw = fs.readFileSync(eventsPath, 'utf8').trim();
657
+ const raw = fs.readFileSync(bestFile, 'utf8').trim();
607
658
  if (!raw) return [];
608
659
  let parseFailures = 0;
609
660
  const entries = raw
@@ -620,12 +671,15 @@ export class RuntimeSummaryService {
620
671
  if (parseFailures > 0) {
621
672
  pushWarning(
622
673
  warnings,
623
- `Skipped ${parseFailures} malformed event line${parseFailures === 1 ? '' : 's'} while reading events.jsonl.`
674
+ `Skipped ${parseFailures} malformed event line${parseFailures === 1 ? '' : 's'} while reading ${path.basename(bestFile!)}.`
624
675
  );
625
676
  }
626
677
  return entries;
627
678
  } catch {
628
- pushWarning(warnings, 'Failed to read events.jsonl; recent pain and gate summaries are partial.');
679
+ pushWarning(
680
+ warnings,
681
+ `Failed to read ${path.basename(bestFile!)}; recent pain and gate summaries are partial.`
682
+ );
629
683
  return [];
630
684
  }
631
685
  }
@@ -0,0 +1,314 @@
1
+ /**
2
+ * Tests for prompt.ts diagnostician fixes (Phase A: Immediate Hemorrhage Control)
3
+ *
4
+ * Covers:
5
+ * 1. Compact diagnostician task injection block format
6
+ * 2. Size guard: injection stays under MAX_INJECTION_SIZE (9000)
7
+ * 3. Diagnostician priority mode: low-priority blocks stripped when tasks pending
8
+ * 4. Fail-closed: never returns injection over limit
9
+ */
10
+
11
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
12
+ import * as fs from 'fs';
13
+ import * as os from 'os';
14
+ import * as path from 'path';
15
+
16
+ // ─── Mock dependencies ───────────────────────────────────────────────────────
17
+
18
+ const mockGetPendingDiagnosticianTasks = vi.fn<(stateDir: string) => unknown[]>();
19
+
20
+ beforeEach(() => {
21
+ vi.clearAllMocks();
22
+ mockGetPendingDiagnosticianTasks.mockReturnValue([]);
23
+ });
24
+
25
+ vi.mock('../../src/core/diagnostician-task-store.js', async () => ({
26
+ getPendingDiagnosticianTasks: (...args: unknown[]) =>
27
+ mockGetPendingDiagnosticianTasks(...args),
28
+ }));
29
+
30
+ vi.mock('../../src/core/event-log.js', () => ({
31
+ EventLogService: {
32
+ get: vi.fn().mockReturnValue({
33
+ recordHeartbeatDiagnosis: vi.fn(),
34
+ }),
35
+ },
36
+ }));
37
+
38
+ vi.mock('../../src/core/workspace-context.js', () => ({
39
+ WorkspaceContext: {
40
+ fromHookContext: vi.fn().mockReturnValue({
41
+ stateDir: '/fake/state',
42
+ resolve: (key: string) => `/fake/${key}`,
43
+ trajectory: { recordSession: vi.fn(), recordUserTurn: vi.fn() },
44
+ config: { get: vi.fn() },
45
+ evolutionReducer: {
46
+ getActivePrinciples: vi.fn().mockReturnValue([]),
47
+ getProbationPrinciples: vi.fn().mockReturnValue([]),
48
+ },
49
+ }),
50
+ },
51
+ }));
52
+
53
+ vi.mock('../../src/core/session-tracker.js', () => ({
54
+ getSession: vi.fn().mockReturnValue({ currentGfi: 20 }),
55
+ resetFriction: vi.fn(),
56
+ trackFriction: vi.fn(),
57
+ setInjectedProbationIds: vi.fn(),
58
+ clearInjectedProbationIds: vi.fn(),
59
+ decayGfi: vi.fn(),
60
+ getGfiDecayElapsed: vi.fn().mockReturnValue(0),
61
+ }));
62
+
63
+ vi.mock('../../src/core/path-resolver.js', () => ({
64
+ PathResolver: { getExtensionRoot: vi.fn().mockReturnValue('/fake/extension') },
65
+ }));
66
+
67
+ vi.mock('../../src/core/principle-injection.js', () => ({
68
+ selectPrinciplesForInjection: vi.fn().mockReturnValue({
69
+ selected: [],
70
+ wasTruncated: false,
71
+ breakdown: { p0: 0, p1: 0, p2: 0 },
72
+ totalChars: 0,
73
+ }),
74
+ DEFAULT_PRINCIPLE_BUDGET: 3000,
75
+ }));
76
+
77
+ vi.mock('../../src/core/empathy-keyword-matcher.js', () => ({
78
+ matchEmpathyKeywords: vi.fn().mockReturnValue({ score: 0, matched: null, severity: 'none', matchedTerms: [] }),
79
+ loadKeywordStore: vi.fn().mockReturnValue({ terms: {}, stats: { totalHits: 0 } }),
80
+ saveKeywordStore: vi.fn(),
81
+ shouldTriggerOptimization: vi.fn().mockReturnValue(false),
82
+ getKeywordStoreSummary: vi.fn().mockReturnValue({ totalTerms: 0, highFalsePositiveTerms: [] }),
83
+ }));
84
+
85
+ vi.mock('../../src/core/empathy-types.js', () => ({
86
+ severityToPenalty: vi.fn().mockReturnValue(5),
87
+ DEFAULT_EMPATHY_KEYWORD_CONFIG: {},
88
+ }));
89
+
90
+ vi.mock('../../src/core/correction-cue-learner.js', () => ({
91
+ CorrectionCueLearner: {
92
+ get: vi.fn().mockReturnValue({
93
+ match: vi.fn().mockReturnValue({ matched: null, matchedTerms: [], confidence: 0 }),
94
+ recordHits: vi.fn(),
95
+ recordTruePositive: vi.fn(),
96
+ flush: vi.fn(),
97
+ }),
98
+ },
99
+ }));
100
+
101
+ vi.mock('../../src/core/focus-history.js', () => ({
102
+ extractSummary: vi.fn().mockReturnValue(''),
103
+ getHistoryVersions: vi.fn().mockReturnValue([]),
104
+ parseWorkingMemorySection: vi.fn().mockReturnValue(null),
105
+ workingMemoryToInjection: vi.fn().mockReturnValue(''),
106
+ autoCompressFocus: vi.fn().mockReturnValue({ compressed: false, reason: 'not_needed' }),
107
+ safeReadCurrentFocus: vi.fn().mockReturnValue({ content: '', recovered: false, validationErrors: [] }),
108
+ }));
109
+
110
+ vi.mock('../../src/service/subagent-workflow/index.js', () => ({
111
+ EmpathyObserverWorkflowManager: vi.fn(),
112
+ empathyObserverWorkflowSpec: {},
113
+ isExpectedSubagentError: vi.fn().mockReturnValue(false),
114
+ }));
115
+
116
+ vi.mock('../../src/utils/subagent-probe.js', () => ({
117
+ isSubagentRuntimeAvailable: vi.fn().mockReturnValue(false),
118
+ }));
119
+
120
+ vi.mock('../../src/core/local-worker-routing.js', () => ({
121
+ classifyTask: vi.fn().mockReturnValue({
122
+ decision: 'stay_main',
123
+ classification: 'unknown',
124
+ reason: 'mocked',
125
+ blockers: [],
126
+ }),
127
+ }));
128
+
129
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
130
+
131
+ function fakeTask(overrides: Partial<{
132
+ id: string; prompt: string; createdAt: string; status: string;
133
+ }> = {}): { id: string; task: { prompt: string; createdAt: string; status: 'pending' } } {
134
+ return {
135
+ id: overrides.id ?? 'test-task-1',
136
+ task: {
137
+ prompt: overrides.prompt ?? 'Diagnose pain signal: source=tool_failure score=75 reason=Command failed',
138
+ createdAt: overrides.createdAt ?? '2026-04-21T10:00:00.000Z',
139
+ status: 'pending',
140
+ },
141
+ };
142
+ }
143
+
144
+ function makeMinimalEvent(): Parameters<typeof import('../../src/hooks/prompt.js').handleBeforePromptBuild>[0] {
145
+ return {
146
+ prompt: 'hello world',
147
+ messages: [],
148
+ trigger: 'heartbeat',
149
+ sessionId: 'test-session-123',
150
+ } as unknown as Parameters<typeof import('../../src/hooks/prompt.js').handleBeforePromptBuild>[0];
151
+ }
152
+
153
+ // ─── Tests ───────────────────────────────────────────────────────────────────
154
+
155
+ describe('Diagnostician compact task injection', () => {
156
+ it('injects a compact block containing task_id, reason, marker and report paths', async () => {
157
+ const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
158
+
159
+ mockGetPendingDiagnosticianTasks.mockReturnValueOnce([fakeTask({
160
+ id: 'task-abc',
161
+ prompt:
162
+ 'Pain signal: source=tool_failure\nscore=75\nreason=Command npm test failed with exit code 1\nsession_id=sess-123',
163
+ })]);
164
+
165
+ const ctx = {
166
+ workspaceDir: '/fake/workspace',
167
+ trigger: 'heartbeat',
168
+ sessionId: 'test-session-123',
169
+ api: {
170
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
171
+ runtime: {},
172
+ config: {},
173
+ },
174
+ } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
175
+
176
+ const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
177
+
178
+ const combined = (result?.prependContext ?? '') + (result?.appendSystemContext ?? '');
179
+
180
+ // Must contain structural fields
181
+ expect(combined).toContain('task_id: task-abc');
182
+ expect(combined).toContain('.evolution_complete_task-abc');
183
+ expect(combined).toContain('.diagnostician_report_task-abc.json');
184
+
185
+ // Must NOT contain the full raw prompt (which could be 2-4 KB)
186
+ // The compact diagnostician block is small; the heartbeat checklist adds to total
187
+ expect(combined.length).toBeLessThan(2000);
188
+ });
189
+
190
+ it('injects exactly one task per heartbeat regardless of queue depth', async () => {
191
+ const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
192
+
193
+ const tasks = [
194
+ fakeTask({ id: 'task-1' }),
195
+ fakeTask({ id: 'task-2' }),
196
+ fakeTask({ id: 'task-3' }),
197
+ ];
198
+ mockGetPendingDiagnosticianTasks.mockReturnValueOnce(tasks);
199
+
200
+ const ctx = {
201
+ workspaceDir: '/fake/workspace',
202
+ trigger: 'heartbeat',
203
+ sessionId: 'test-session-123',
204
+ api: {
205
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
206
+ runtime: {},
207
+ config: {},
208
+ },
209
+ } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
210
+
211
+ const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
212
+ const combined = (result?.prependContext ?? '') + (result?.appendSystemContext ?? '');
213
+
214
+ // Only the first task ID appears in the block
215
+ expect(combined).toContain('task-1');
216
+ expect(combined).not.toContain('task-2');
217
+ expect(combined).not.toContain('task-3');
218
+ // Note mentions remaining count
219
+ expect(combined).toContain('2 more task(s) are queued');
220
+ });
221
+ });
222
+
223
+ describe('Size guard: fail-closed', () => {
224
+ it('never returns a combined injection that exceeds MAX_INJECTION_SIZE (9000)', async () => {
225
+ const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
226
+
227
+ // One large pending task to trigger diagnostician priority mode
228
+ const largePrompt = 'Pain signal: ' + 'x'.repeat(5000);
229
+ mockGetPendingDiagnosticianTasks.mockReturnValueOnce([fakeTask({ prompt: largePrompt })]);
230
+
231
+ const ctx = {
232
+ workspaceDir: '/fake/workspace',
233
+ trigger: 'heartbeat',
234
+ sessionId: 'test-session-123',
235
+ api: {
236
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
237
+ runtime: {},
238
+ config: {},
239
+ },
240
+ } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
241
+
242
+ const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
243
+
244
+ const totalSize =
245
+ (result?.prependSystemContext?.length ?? 0) +
246
+ (result?.prependContext?.length ?? 0) +
247
+ (result?.appendSystemContext?.length ?? 0);
248
+
249
+ expect(totalSize).toBeLessThanOrEqual(9000);
250
+ });
251
+
252
+ it('strips project_context and strips thinking_os/evolution_principles when inDiagMode and over limit', async () => {
253
+ const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
254
+
255
+ // A long reason string in the task so prependContext itself is large
256
+ const longReason = 'x'.repeat(500);
257
+ mockGetPendingDiagnosticianTasks.mockReturnValueOnce([
258
+ fakeTask({ prompt: `Pain signal: reason=${longReason} source=tool_failure score=85` }),
259
+ ]);
260
+
261
+ const ctx = {
262
+ workspaceDir: '/fake/workspace',
263
+ trigger: 'heartbeat',
264
+ sessionId: 'test-session-123',
265
+ api: {
266
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
267
+ runtime: {},
268
+ config: {},
269
+ },
270
+ } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
271
+
272
+ const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
273
+
274
+ // The size guard must never throw — result must be defined
275
+ expect(result).toBeDefined();
276
+ const combined =
277
+ (result?.prependSystemContext?.length ?? 0) +
278
+ (result?.prependContext?.length ?? 0) +
279
+ (result?.appendSystemContext?.length ?? 0);
280
+
281
+ // Must stay within MAX_INJECTION_SIZE (9000)
282
+ expect(combined).toBeLessThanOrEqual(9000);
283
+ });
284
+ });
285
+
286
+ describe('Diagnostician priority mode', () => {
287
+ it('sets pendingDiagTaskCount > 0 so size guard knows to strip low-priority blocks', async () => {
288
+ const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
289
+
290
+ mockGetPendingDiagnosticianTasks.mockReturnValueOnce([fakeTask()]);
291
+
292
+ const infoLogger = vi.fn();
293
+ const ctx = {
294
+ workspaceDir: '/fake/workspace',
295
+ trigger: 'heartbeat',
296
+ sessionId: 'test-session-123',
297
+ api: {
298
+ logger: { info: infoLogger, warn: vi.fn(), error: vi.fn() },
299
+ runtime: {},
300
+ config: {},
301
+ },
302
+ } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
303
+
304
+ const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
305
+
306
+ // Should have logged task injection
307
+ expect(infoLogger).toHaveBeenCalledWith(
308
+ expect.stringContaining('Injected compact diagnostician task block')
309
+ );
310
+
311
+ // Result must be valid
312
+ expect(result?.prependContext).toBeDefined();
313
+ });
314
+ });
@@ -31,7 +31,9 @@ function writeSession(workspace: string, sessionId: string, payload: Record<stri
31
31
  }
32
32
 
33
33
  function writeEvents(workspace: string, entries: unknown[]): void {
34
- const filePath = path.join(workspace, '.state', 'logs', 'events.jsonl');
34
+ // Write to today's daily event file (events_YYYY-MM-DD.jsonl)
35
+ const today = new Date().toISOString().slice(0, 10);
36
+ const filePath = path.join(workspace, '.state', 'logs', `events_${today}.jsonl`);
35
37
  fs.mkdirSync(path.dirname(filePath), { recursive: true });
36
38
  const content = entries.map((entry) => JSON.stringify(entry)).join('\n');
37
39
  fs.writeFileSync(filePath, content ? `${content}\n` : '', 'utf8');
@@ -471,11 +473,12 @@ describe('RuntimeSummaryService', () => {
471
473
  trust_score: 59,
472
474
  last_updated: '2026-03-20T10:00:00Z',
473
475
  });
476
+ const today = new Date().toISOString().slice(0, 10);
474
477
  fs.writeFileSync(
475
- path.join(workspace, '.state', 'logs', 'events.jsonl'),
478
+ path.join(workspace, '.state', 'logs', `events_${today}.jsonl`),
476
479
  [
477
480
  JSON.stringify({
478
- ts: '2026-03-20T10:00:01Z',
481
+ ts: `${today}T10:00:01Z`,
479
482
  type: 'pain_signal',
480
483
  category: 'detected',
481
484
  sessionId: 's1',
@@ -916,4 +919,180 @@ describe('RuntimeSummaryService', () => {
916
919
  expect(summary.phase3.directiveIgnoredReason).toBe('queue is only truth source');
917
920
  });
918
921
  });
922
+
923
+ // ─────────────────────────────────────────────────────────────────────────────
924
+ // Phase A: Event log daily-file compatibility + stalled diagnostician warning
925
+ // ─────────────────────────────────────────────────────────────────────────────
926
+
927
+ describe('Event log daily-file format (events_YYYY-MM-DD.jsonl)', () => {
928
+ it('reads today events_YYYY-MM-DD.jsonl and does NOT warn about missing events.jsonl', () => {
929
+ const workspace = makeWorkspace();
930
+ writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
931
+ trust_score: 59,
932
+ last_updated: '2026-03-20T10:00:00Z',
933
+ });
934
+ // Write daily file (today's date)
935
+ const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
936
+ const dailyFilePath = path.join(workspace, '.state', 'logs', `events_${today}.jsonl`);
937
+ fs.mkdirSync(path.dirname(dailyFilePath), { recursive: true });
938
+ fs.writeFileSync(
939
+ dailyFilePath,
940
+ JSON.stringify({
941
+ ts: `${today}T10:00:01Z`,
942
+ type: 'pain_signal',
943
+ category: 'detected',
944
+ sessionId: 's1',
945
+ data: { source: 'tool_failure', score: 10, reason: 'write failed' },
946
+ }) + '\n',
947
+ 'utf8'
948
+ );
949
+
950
+ const summary = RuntimeSummaryService.getSummary(workspace);
951
+
952
+ // Must have read the daily file successfully
953
+ expect(summary.pain.lastSignal?.source).toBe('tool_failure');
954
+ // Must NOT warn about "No events.jsonl file"
955
+ expect(summary.metadata.warnings.join('\n')).not.toContain('No events.jsonl file');
956
+ expect(summary.metadata.warnings.join('\n')).not.toContain('No event log file');
957
+ });
958
+
959
+ it('falls back to most recent daily file when today file does not exist', () => {
960
+ const workspace = makeWorkspace();
961
+ writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
962
+ trust_score: 59,
963
+ last_updated: '2026-03-20T10:00:00Z',
964
+ });
965
+ // Write only an older daily file
966
+ const oldDate = '2026-03-18';
967
+ const oldFilePath = path.join(workspace, '.state', 'logs', `events_${oldDate}.jsonl`);
968
+ fs.mkdirSync(path.dirname(oldFilePath), { recursive: true });
969
+ fs.writeFileSync(
970
+ oldFilePath,
971
+ JSON.stringify({
972
+ ts: `${oldDate}T10:00:01Z`,
973
+ type: 'gate_block',
974
+ category: 'risk',
975
+ sessionId: 's2',
976
+ data: { reason: 'old gate event' },
977
+ }) + '\n',
978
+ 'utf8'
979
+ );
980
+
981
+ const summary = RuntimeSummaryService.getSummary(workspace);
982
+
983
+ // Must have read the old file
984
+ expect(summary.gate.recentBlocks).toBeGreaterThan(0);
985
+ // Must NOT warn about "No event log file"
986
+ expect(summary.metadata.warnings.join('\n')).not.toContain('No event log file');
987
+ });
988
+
989
+ it('warns only when no daily event file exists at all', () => {
990
+ const workspace = makeWorkspace();
991
+ writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
992
+ trust_score: 59,
993
+ last_updated: '2026-03-20T10:00:00Z',
994
+ });
995
+ // Deliberately leave no event log files
996
+
997
+ const summary = RuntimeSummaryService.getSummary(workspace);
998
+
999
+ expect(summary.metadata.warnings.join('\n')).toContain('No event log file');
1000
+ });
1001
+ });
1002
+
1003
+ describe('Stalled diagnostician warning', () => {
1004
+ it('raises a high-signal warning when tasks are injected but no reports are written', () => {
1005
+ const workspace = makeWorkspace();
1006
+ writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
1007
+ trust_score: 59,
1008
+ last_updated: '2026-03-20T10:00:00Z',
1009
+ });
1010
+ writeJson(path.join(workspace, '.state', 'evolution_queue.json'), []);
1011
+
1012
+ // Simulate: pending tasks exist, heartbeats are injecting, but no reports written.
1013
+ // The pending task store is checked via getPendingDiagnosticianTasks which uses
1014
+ // the real filesystem path. We need to write diagnostician_tasks.json directly.
1015
+ const today = new Date().toISOString().slice(0, 10);
1016
+ fs.mkdirSync(path.join(workspace, '.state'), { recursive: true });
1017
+ fs.writeFileSync(
1018
+ path.join(workspace, '.state', 'diagnostician_tasks.json'),
1019
+ JSON.stringify({
1020
+ tasks: {
1021
+ 'stalled-task-1': {
1022
+ prompt: 'Diagnose: tool_failure score=80',
1023
+ createdAt: `${today}T09:00:00Z`,
1024
+ status: 'pending',
1025
+ },
1026
+ },
1027
+ }),
1028
+ 'utf8'
1029
+ );
1030
+
1031
+ // Write daily-stats.json with heartbeats > 0 but reportsWritten = 0
1032
+ writeJson(path.join(workspace, '.state', 'logs', 'daily-stats.json'), {
1033
+ [today]: {
1034
+ evolution: {
1035
+ diagnosisTasksWritten: 3,
1036
+ diagnosticianReportsWritten: 0,
1037
+ reportsMissingJson: 0,
1038
+ reportsIncompleteFields: 0,
1039
+ principleCandidatesCreated: 0,
1040
+ heartbeatsInjected: 5,
1041
+ },
1042
+ },
1043
+ });
1044
+
1045
+ // Also create a valid daily event file so readEvents() does not emit
1046
+ // "No event log file" warning that would pollute warnings[] and mask
1047
+ // the stall detection logic we are actually testing.
1048
+ fs.mkdirSync(path.join(workspace, '.state', 'logs'), { recursive: true });
1049
+ fs.writeFileSync(
1050
+ path.join(workspace, '.state', 'logs', `events_${today}.jsonl`),
1051
+ JSON.stringify({ ts: `${today}T09:00:00Z`, type: 'heartbeat_diagnosis', category: 'task_injected', sessionId: 'test', data: {} }) + '\n',
1052
+ 'utf8'
1053
+ );
1054
+
1055
+ const summary = RuntimeSummaryService.getSummary(workspace);
1056
+
1057
+ const warningText = summary.metadata.warnings.join('\n');
1058
+ expect(warningText).toContain('Diagnostician appears stalled');
1059
+ expect(warningText).toContain('5'); // heartbeatsInjected
1060
+ expect(warningText).toContain('reports are being written'); // confirms it says "0"
1061
+ expect(warningText).toContain('1 task(s) remain pending');
1062
+ });
1063
+
1064
+ it('does NOT raise the stalled warning when reports are being written', () => {
1065
+ const workspace = makeWorkspace();
1066
+ writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
1067
+ trust_score: 59,
1068
+ last_updated: '2026-03-20T10:00:00Z',
1069
+ });
1070
+ writeJson(path.join(workspace, '.state', 'evolution_queue.json'), []);
1071
+
1072
+ const today = new Date().toISOString().slice(0, 10);
1073
+ fs.mkdirSync(path.join(workspace, '.state'), { recursive: true });
1074
+ fs.writeFileSync(
1075
+ path.join(workspace, '.state', 'diagnostician_tasks.json'),
1076
+ JSON.stringify({ tasks: {} }), // No pending tasks
1077
+ 'utf8'
1078
+ );
1079
+
1080
+ writeJson(path.join(workspace, '.state', 'logs', 'daily-stats.json'), {
1081
+ [today]: {
1082
+ evolution: {
1083
+ diagnosisTasksWritten: 3,
1084
+ diagnosticianReportsWritten: 3, // Reports ARE being written
1085
+ reportsMissingJson: 0,
1086
+ reportsIncompleteFields: 0,
1087
+ principleCandidatesCreated: 1,
1088
+ heartbeatsInjected: 5,
1089
+ },
1090
+ },
1091
+ });
1092
+
1093
+ const summary = RuntimeSummaryService.getSummary(workspace);
1094
+
1095
+ expect(summary.metadata.warnings.join('\n')).not.toContain('Diagnostician appears stalled');
1096
+ });
1097
+ });
919
1098
  });