npm - principles-disciple - Versions diffs - 1.70.0 → 1.72.0 - Mend

principles-disciple 1.70.0 → 1.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/openclaw.plugin.json +1 -1
package/package.json +2 -2
package/src/hooks/prompt.ts +145 -35
package/src/service/runtime-summary-service.ts +61 -7
package/tests/hooks/prompt-size-guard.test.ts +314 -0
package/tests/service/runtime-summary-service.test.ts +182 -3

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "principles-disciple",
   "name": "Principles Disciple",
   "description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
-  "version": "1.70.0",
+  "version": "1.72.0",
   "skills": [
     "templates/langs/en/skills",
     "templates/langs/zh/skills"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "principles-disciple",
-  "version": "1.70.0",
+  "version": "1.72.0",
   "description": "Native OpenClaw plugin for Principles Disciple",
   "type": "module",
   "main": "./dist/bundle.js",
@@ -53,7 +53,7 @@
     "@typescript-eslint/parser": "^8.58.0",
     "@vitest/coverage-v8": "^4.1.0",
     "esbuild": "^0.28.0",
-    "eslint": "^10.1.0",
+    "eslint": "^10.2.1",
     "jsdom": "^29.0.1",
     "typescript": "^6.0.3",
     "vitest": "^4.1.0",

package/src/hooks/prompt.ts CHANGED Viewed

@@ -1,4 +1,4 @@
 import * as fs from 'fs';
 import * as path from 'path';
@@ -449,6 +449,8 @@ export async function handleBeforePromptBuild(
   let prependSystemContext: string;
   let prependContext = '';
   let appendSystemContext = '';
+  // Tracks pending diagnostician task count for diagnostician-priority mode in size guard
+  let pendingDiagTaskCount = 0;
   // ──── 0. Manual Pain Clearance ────
   if (trigger === 'user' && sessionId && session && session.currentGfi >= 100) {
@@ -720,49 +722,81 @@ The empathy observer subagent handles pain detection independently.
         const heartbeatChecklist = fs.readFileSync(heartbeatPath, 'utf8');
         prependContext += `<heartbeat_checklist>
 ${heartbeatChecklist}
-ACTION: Run self-audit. If stable, reply ONLY with "HEARTBEAT_OK".
 </heartbeat_checklist>\n`;
       } catch (e) {
         logger?.error(`[PD:Prompt] Failed to read HEARTBEAT: ${String(e)}`);
       }
     }
-    // ──── 4b. Inject pending diagnostician tasks ────
-    // FIX (#283): The evolution worker writes pain diagnosis tasks to
+    // ──── 4b. Inject pending diagnostician tasks (compact summary) ────
+    // FIX (#283/#380): The evolution worker writes pain diagnosis tasks to
     // diagnostician_tasks.json. The heartbeat prompt hook must read and inject
     // them so the LLM (acting as diagnostician) can process them.
+    //
+    // INJECTION FORMAT: Compact summary (not full prompt) to stay well within
+    // OpenClaw's ~10 000 char platform limit.  Full task.prompt can be 2–4 KB;
+    // the compact block is < 400 chars.  The agent is instructed to read the
+    // original from diagnostician_tasks.json if it needs the full context.
     try {
       const pendingTasks = getPendingDiagnosticianTasks(wctx.stateDir);
       if (pendingTasks.length > 0) {
+        pendingDiagTaskCount = pendingTasks.length;
+        // Build compact summary blocks — one per task (only first is processed per heartbeat)
         const taskBlocks = pendingTasks
-          .slice(0, 3)
-          .map(({ id, task }) => `<diagnostician_task id="${id}">\n${task.prompt}\n</diagnostician_task>`)
+          .slice(0, 1)
+          .map(({ id, task }) => {
+            // Extract summary fields; reason is truncated to 200 chars to keep
+            // the injected block small and stable.
+            const reason = (task.prompt
+              .match(/reason["\s:]+([^\n]{0,240})/i)?.[1]
+              ?? task.prompt.slice(0, 200)
+            ).slice(0, 200);
+            const safeId = escapeXml(id);
+            const safeReason = escapeXml(reason);
+            const safeCreatedAt = escapeXml(task.createdAt);
+            const markerFile = `.evolution_complete_${safeId}`;
+            const reportFile = `.diagnostician_report_${safeId}.json`;
+            return `<diagnostician_task id="${safeId}">
+task_id: ${safeId}
+reason: ${safeReason}
+marker: ${markerFile}
+report: ${reportFile}
+queued_at: ${safeCreatedAt}
+action: Analyze pain signal → identify violated principles → write ${markerFile} + ${reportFile}
+</diagnostician_task>`;
+          })
           .join('\n\n');
-        const pendingCount = pendingTasks.length;
-        const processingNote = pendingCount > 3
-          ? `\n\nNOTE: ${pendingCount - 3} more tasks are queued. Process these 3 first; remaining tasks will be handled on subsequent heartbeats.`
+        const processingNote = pendingDiagTaskCount > 1
+          ? `\n\nNOTE: ${pendingDiagTaskCount - 1} more task(s) are queued. ` +
+            `Process one at a time; remaining tasks are handled on subsequent heartbeats.`
           : '';
-        prependContext += `<diagnostician_tasks pending="${pendingCount}">
-You are acting as a **Pain Diagnostician**. Process the following task(s) by:
-1. Analyzing the pain signal and its context
-2. Identifying the root cause and violated principles
-3. Writing a completion marker file: .evolution_complete_<TASK_ID>
-4. Writing a diagnostic report: .diagnostician_report_<TASK_ID>.json
+        prependContext += `<diagnostician_tasks pending="${pendingDiagTaskCount}">
+You are acting as a **Pain Diagnostician**. For each task:
+1. Read the full prompt from: ${escapeXml(wctx.stateDir)}/diagnostician_tasks.json [task_id=${escapeXml(pendingTasks[0]?.id ?? '')}]
+2. Analyze the pain signal and its context
+3. Identify the root cause and violated principles
+4. Write a completion marker: .evolution_complete_<TASK_ID>
+5. Write a diagnostic report: .diagnostician_report_<TASK_ID>.json
 ${taskBlocks}${processingNote}
 </diagnostician_tasks>\n`;
-        logger?.info?.(`[PD:Prompt] Injected ${Math.min(pendingCount, 3)}/${pendingCount} pending diagnostician task(s) into heartbeat prompt`);
+        logger?.info?.(
+          `[PD:Prompt] Injected compact diagnostician task block ` +
+          `(task=${pendingTasks[0]?.id}, total_pending=${pendingDiagTaskCount})`
+        );
         // C: Record heartbeat_diagnosis event for observability
         try {
           const eventLog = EventLogService.get(wctx.stateDir, logger);
           eventLog.recordHeartbeatDiagnosis({
-            taskCount: pendingCount,
-            taskIds: pendingTasks.slice(0, 3).map(t => t.id),
+            taskCount: pendingDiagTaskCount,
+            taskIds: pendingTasks.slice(0, 1).map(t => t.id),
             trigger: 'heartbeat',
           });
         } catch (evErr) {
@@ -1124,36 +1158,112 @@ ${attitudeDirective}
   }
   // ──── 8. SIZE GUARD ────
-  // Truncation happens within appendSystemContext (not prependContext)
+  // Hard cap for OpenClaw prompt injection. OpenClaw's actual platform limit is
+  // approximately 10 000 characters. We use 9 000 here to leave ~1 000 chars of
+  // headroom for the user's message, tool call delimiters, and encoding overhead.
+  // IMPORTANT: PD must never treat the platform's upper bound as its own safe
+  // working limit. Always keep a margin.
   const totalSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
-  const MAX_SIZE = 10000;
+  const MAX_INJECTION_SIZE = 9000;
-  if (totalSize > MAX_SIZE) {
+  if (totalSize > MAX_INJECTION_SIZE) {
     const originalSize = totalSize;
     const truncationLog: string[] = [];
-    // 1. Truncate project_context in appendSystemContext
+    // Deterministically remove low-priority context blocks in priority order.
+    // In diagnostician-priority mode we aggressively strip everything except
+    // the task block and minimum behavioral constraints.
+    const inDiagMode = pendingDiagTaskCount > 0;
+    // Step 1 — strip project_context (largest, lowest priority) — always in diag mode,
+    // only strip in normal mode if we are already over limit
     if (projectContextContent && appendSystemContext.includes('<project_context>')) {
-      const lines = projectContextContent.split('\n');
-      if (lines.length > 20) {
-        const truncated = lines.slice(0, 20).join('\n') + '\n...[truncated]';
+      appendSystemContext = appendSystemContext.replace(
+        `<project_context>\n${projectContextContent}\n</project_context>`,
+        '<project_context>\n[stripped: project_context]\n</project_context>'
+      );
+      truncationLog.push('project_context');
+    }
+    // Steps 2-4: only strip in diagnostician priority mode (inDiagMode)
+    // In normal mode we stop after project_context to preserve context quality
+    if (inDiagMode) {
+      // Step 2 — strip thinking_os
+      if (thinkingOsContent && appendSystemContext.includes('<thinking_os>')) {
+        appendSystemContext = appendSystemContext.replace(
+          `<thinking_os>\n${thinkingOsContent}\n</thinking_os>`,
+          '<thinking_os>\n[stripped: thinking_os]\n</thinking_os>'
+        );
+        truncationLog.push('thinking_os');
+      }
+      // Step 3 — strip evolution_principles (keep core_principles only)
+      if (evolutionPrinciplesContent && appendSystemContext.includes('<evolution_principles>')) {
         appendSystemContext = appendSystemContext.replace(
-          `<project_context>\n${projectContextContent}\n</project_context>`,
-          `<project_context>\n${truncated}\n</project_context>`
+          `<evolution_principles>\n${evolutionPrinciplesContent}\n</evolution_principles>`,
+          '<evolution_principles>\n[stripped: evolution_principles]\n</evolution_principles>'
         );
-        truncationLog.push('project_context');
+        truncationLog.push('evolution_principles');
+      }
+      // Step 4 — strip reflection_log if present
+      if (appendSystemContext.includes('<reflection_log>')) {
+        appendSystemContext = appendSystemContext.replace(
+          /<reflection_log>[\s\S]*?<\/reflection_log>/,
+          '<reflection_log>\n[stripped: reflection_log]\n</reflection_log>'
+        );
+        truncationLog.push('reflection_log');
       }
     }
-    // 2. Final check
+    // Step 5 — re-evaluate: check if still over limit
     let newSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
-    if (newSize > MAX_SIZE) {
-      // NOTE: We still return the content even if over limit, as truncating more
-      // could lose critical context like principles or evolution directives.
-      logger?.error(`[PD:Prompt] Cannot reduce injection size below limit. Current: ${newSize}, Limit: ${MAX_SIZE}`);
+    if (newSize > MAX_INJECTION_SIZE) {
+      // Truncate the injected reason field by finding the "reason:" line prefix
+      // and cutting to 120 chars.  This is safe because the full prompt is
+      // still available in diagnostician_tasks.json for the agent to read.
+      prependContext = prependContext
+        .split('\n')
+        .map((line) => {
+          if (line.startsWith('reason: ') && line.length > 129) {
+            return line.slice(0, 129) + '...[truncated]';
+          }
+          return line;
+        })
+        .join('\n');
+      newSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
+      truncationLog.push('diagnostician_reason');
     }
-    logger?.warn(`[PD:Prompt] Injection size exceeded: ${originalSize} chars (limit: ${MAX_SIZE}), truncated: ${truncationLog.join(', ') || 'none'}, new size: ${newSize} chars`);
+    // FAIL-CLOSED: if we are still over the limit after all deterministic
+    // removals, do NOT return a prompt that exceeds MAX_INJECTION_SIZE.
+    // Drop the entire appendSystemContext (keep only prependContext +
+    // prependSystemContext) and log a hard error.
+    if (newSize > MAX_INJECTION_SIZE) {
+      const fallbackContext = `
+## 【CONTEXT SECTIONS】
+[WARNING: Context sections stripped due to prompt size constraints.
+This is a diagnostician-priority session — see diagnostician_tasks.json for full task context.]
+${attitudeDirective}
+`.trim();
+      appendSystemContext = fallbackContext;
+      newSize = prependSystemContext.length + prependContext.length + appendSystemContext.length;
+      logger?.error(
+        `[PD:Prompt] PROMPT OVER LIMIT AFTER ALL REDUCTIONS — using fallback. ` +
+        `Original: ${originalSize}, Current: ${newSize}, Limit: ${MAX_INJECTION_SIZE}. ` +
+        `Stripped: ${truncationLog.join(', ')}. Diagnostician mode: ${inDiagMode}.`
+      );
+    } else {
+      logger?.warn(
+        `[PD:Prompt] Injection size exceeded: ${originalSize} chars (limit: ${MAX_INJECTION_SIZE}), ` +
+        `truncated: ${truncationLog.join(', ') || 'none'}, new size: ${newSize} chars, ` +
+        `diagnostician mode: ${inDiagMode}`
+      );
+    }
   }
   return {

package/src/service/runtime-summary-service.ts CHANGED Viewed

@@ -222,7 +222,7 @@ export class RuntimeSummaryService {
     const selectedSession = this.selectSession(sessions, options?.sessionId ?? null);
     const selectedSessionId = selectedSession.session?.sessionId ?? null;
-    const persistedEvents = this.readEvents(path.join(wctx.stateDir, 'logs', 'events.jsonl'), warnings);
+    const persistedEvents = this.readEvents(path.join(wctx.stateDir, 'logs'), warnings);
     const hasBufferedEventAccess =
       typeof (wctx.eventLog as { getBufferedEvents?: () => EventLogEntry[] }).getBufferedEvents === 'function';
     const bufferedEvents = hasBufferedEventAccess
@@ -358,6 +358,22 @@ export class RuntimeSummaryService {
       heartbeatsInjectedToday: diagDailyStats?.heartbeatsInjected ?? 0,
     };
+    // D: Stall detection — high-signal warning when the diagnostician loop appears broken.
+    // Conditions: tasks are being injected (heartbeats > 0) but no reports are being written.
+    if (
+      heartbeatDiagnosis.heartbeatsInjectedToday > 0 &&
+      heartbeatDiagnosis.reportsWrittenToday === 0 &&
+      heartbeatDiagnosis.pendingTasks > 0
+    ) {
+      pushWarning(
+        warnings,
+        'Diagnostician appears stalled: heartbeats are injecting tasks ' +
+        `(${heartbeatDiagnosis.heartbeatsInjectedToday}) but no reports are being written. ` +
+        `${heartbeatDiagnosis.pendingTasks} task(s) remain pending. ` +
+        'Check prompt injection size limits and diagnostician task processing.'
+      );
+    }
     // Read trajectory analytics data (historical data, NOT runtime truth)
     const trajectoryStats = this.readTrajectoryStats(workspaceDir, warnings);
@@ -596,14 +612,49 @@ export class RuntimeSummaryService {
     };
   }
-  private static readEvents(eventsPath: string, warnings: string[]): EventLogEntry[] {
-    if (!fs.existsSync(eventsPath)) {
-      warnings.push('No events.jsonl file exists yet; recent pain and gate summaries are partial.');
+  private static readEvents(logsDir: string, warnings: string[]): EventLogEntry[] {
+    // The event log is stored as daily files: events_YYYY-MM-DD.jsonl.
+    // Prefer today's file; fall back to the most recent daily file so that
+    // gate/pain stats are still populated when the day rolled over.
+    const dir = logsDir;
+    let bestFile: string | null = null;
+    if (fs.existsSync(dir)) {
+      const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
+      // Prefer exact match on today's file
+      const todayFile = path.join(dir, `events_${today}.jsonl`);
+      if (fs.existsSync(todayFile)) {
+        bestFile = todayFile;
+      } else {
+        // Fallback: pick the most recent file by date embedded in the filename
+        // (lexical comparison works for ISO dates YYYY-MM-DD).
+        let newestDate = '';
+        try {
+          for (const file of fs.readdirSync(dir)) {
+            const m = file.match(/^events_(\d{4}-\d{2}-\d{2})\.jsonl$/);
+            if (!m) continue;
+            const fileDate = m[1];
+            if (fileDate > newestDate) {
+              newestDate = fileDate;
+              bestFile = path.join(dir, file);
+            }
+          }
+        } catch { /* ignore scan errors */ }
+      }
+    }
+    if (!bestFile) {
+      pushWarning(
+        warnings,
+        'No event log file found; recent pain and gate summaries are partial. ' +
+        'Expected format: events_YYYY-MM-DD.jsonl in the logs directory.'
+      );
       return [];
     }
     try {
-      const raw = fs.readFileSync(eventsPath, 'utf8').trim();
+      const raw = fs.readFileSync(bestFile, 'utf8').trim();
       if (!raw) return [];
       let parseFailures = 0;
       const entries = raw
@@ -620,12 +671,15 @@ export class RuntimeSummaryService {
       if (parseFailures > 0) {
         pushWarning(
           warnings,
-          `Skipped ${parseFailures} malformed event line${parseFailures === 1 ? '' : 's'} while reading events.jsonl.`
+          `Skipped ${parseFailures} malformed event line${parseFailures === 1 ? '' : 's'} while reading ${path.basename(bestFile!)}.`
         );
       }
       return entries;
     } catch {
-      pushWarning(warnings, 'Failed to read events.jsonl; recent pain and gate summaries are partial.');
+      pushWarning(
+        warnings,
+        `Failed to read ${path.basename(bestFile!)}; recent pain and gate summaries are partial.`
+      );
       return [];
     }
   }

package/tests/hooks/prompt-size-guard.test.ts ADDED Viewed

@@ -0,0 +1,314 @@
+/**
+ * Tests for prompt.ts diagnostician fixes (Phase A: Immediate Hemorrhage Control)
+ *
+ * Covers:
+ * 1. Compact diagnostician task injection block format
+ * 2. Size guard: injection stays under MAX_INJECTION_SIZE (9000)
+ * 3. Diagnostician priority mode: low-priority blocks stripped when tasks pending
+ * 4. Fail-closed: never returns injection over limit
+ */
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+// ─── Mock dependencies ───────────────────────────────────────────────────────
+const mockGetPendingDiagnosticianTasks = vi.fn<(stateDir: string) => unknown[]>();
+beforeEach(() => {
+  vi.clearAllMocks();
+  mockGetPendingDiagnosticianTasks.mockReturnValue([]);
+});
+vi.mock('../../src/core/diagnostician-task-store.js', async () => ({
+  getPendingDiagnosticianTasks: (...args: unknown[]) =>
+    mockGetPendingDiagnosticianTasks(...args),
+}));
+vi.mock('../../src/core/event-log.js', () => ({
+  EventLogService: {
+    get: vi.fn().mockReturnValue({
+      recordHeartbeatDiagnosis: vi.fn(),
+    }),
+  },
+}));
+vi.mock('../../src/core/workspace-context.js', () => ({
+  WorkspaceContext: {
+    fromHookContext: vi.fn().mockReturnValue({
+      stateDir: '/fake/state',
+      resolve: (key: string) => `/fake/${key}`,
+      trajectory: { recordSession: vi.fn(), recordUserTurn: vi.fn() },
+      config: { get: vi.fn() },
+      evolutionReducer: {
+        getActivePrinciples: vi.fn().mockReturnValue([]),
+        getProbationPrinciples: vi.fn().mockReturnValue([]),
+      },
+    }),
+  },
+}));
+vi.mock('../../src/core/session-tracker.js', () => ({
+  getSession: vi.fn().mockReturnValue({ currentGfi: 20 }),
+  resetFriction: vi.fn(),
+  trackFriction: vi.fn(),
+  setInjectedProbationIds: vi.fn(),
+  clearInjectedProbationIds: vi.fn(),
+  decayGfi: vi.fn(),
+  getGfiDecayElapsed: vi.fn().mockReturnValue(0),
+}));
+vi.mock('../../src/core/path-resolver.js', () => ({
+  PathResolver: { getExtensionRoot: vi.fn().mockReturnValue('/fake/extension') },
+}));
+vi.mock('../../src/core/principle-injection.js', () => ({
+  selectPrinciplesForInjection: vi.fn().mockReturnValue({
+    selected: [],
+    wasTruncated: false,
+    breakdown: { p0: 0, p1: 0, p2: 0 },
+    totalChars: 0,
+  }),
+  DEFAULT_PRINCIPLE_BUDGET: 3000,
+}));
+vi.mock('../../src/core/empathy-keyword-matcher.js', () => ({
+  matchEmpathyKeywords: vi.fn().mockReturnValue({ score: 0, matched: null, severity: 'none', matchedTerms: [] }),
+  loadKeywordStore: vi.fn().mockReturnValue({ terms: {}, stats: { totalHits: 0 } }),
+  saveKeywordStore: vi.fn(),
+  shouldTriggerOptimization: vi.fn().mockReturnValue(false),
+  getKeywordStoreSummary: vi.fn().mockReturnValue({ totalTerms: 0, highFalsePositiveTerms: [] }),
+}));
+vi.mock('../../src/core/empathy-types.js', () => ({
+  severityToPenalty: vi.fn().mockReturnValue(5),
+  DEFAULT_EMPATHY_KEYWORD_CONFIG: {},
+}));
+vi.mock('../../src/core/correction-cue-learner.js', () => ({
+  CorrectionCueLearner: {
+    get: vi.fn().mockReturnValue({
+      match: vi.fn().mockReturnValue({ matched: null, matchedTerms: [], confidence: 0 }),
+      recordHits: vi.fn(),
+      recordTruePositive: vi.fn(),
+      flush: vi.fn(),
+    }),
+  },
+}));
+vi.mock('../../src/core/focus-history.js', () => ({
+  extractSummary: vi.fn().mockReturnValue(''),
+  getHistoryVersions: vi.fn().mockReturnValue([]),
+  parseWorkingMemorySection: vi.fn().mockReturnValue(null),
+  workingMemoryToInjection: vi.fn().mockReturnValue(''),
+  autoCompressFocus: vi.fn().mockReturnValue({ compressed: false, reason: 'not_needed' }),
+  safeReadCurrentFocus: vi.fn().mockReturnValue({ content: '', recovered: false, validationErrors: [] }),
+}));
+vi.mock('../../src/service/subagent-workflow/index.js', () => ({
+  EmpathyObserverWorkflowManager: vi.fn(),
+  empathyObserverWorkflowSpec: {},
+  isExpectedSubagentError: vi.fn().mockReturnValue(false),
+}));
+vi.mock('../../src/utils/subagent-probe.js', () => ({
+  isSubagentRuntimeAvailable: vi.fn().mockReturnValue(false),
+}));
+vi.mock('../../src/core/local-worker-routing.js', () => ({
+  classifyTask: vi.fn().mockReturnValue({
+    decision: 'stay_main',
+    classification: 'unknown',
+    reason: 'mocked',
+    blockers: [],
+  }),
+}));
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+function fakeTask(overrides: Partial<{
+  id: string; prompt: string; createdAt: string; status: string;
+}> = {}): { id: string; task: { prompt: string; createdAt: string; status: 'pending' } } {
+  return {
+    id: overrides.id ?? 'test-task-1',
+    task: {
+      prompt: overrides.prompt ?? 'Diagnose pain signal: source=tool_failure score=75 reason=Command failed',
+      createdAt: overrides.createdAt ?? '2026-04-21T10:00:00.000Z',
+      status: 'pending',
+    },
+  };
+}
+function makeMinimalEvent(): Parameters<typeof import('../../src/hooks/prompt.js').handleBeforePromptBuild>[0] {
+  return {
+    prompt: 'hello world',
+    messages: [],
+    trigger: 'heartbeat',
+    sessionId: 'test-session-123',
+  } as unknown as Parameters<typeof import('../../src/hooks/prompt.js').handleBeforePromptBuild>[0];
+}
+// ─── Tests ───────────────────────────────────────────────────────────────────
+describe('Diagnostician compact task injection', () => {
+  it('injects a compact block containing task_id, reason, marker and report paths', async () => {
+    const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
+    mockGetPendingDiagnosticianTasks.mockReturnValueOnce([fakeTask({
+      id: 'task-abc',
+      prompt:
+        'Pain signal: source=tool_failure\nscore=75\nreason=Command npm test failed with exit code 1\nsession_id=sess-123',
+    })]);
+    const ctx = {
+      workspaceDir: '/fake/workspace',
+      trigger: 'heartbeat',
+      sessionId: 'test-session-123',
+      api: {
+        logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+        runtime: {},
+        config: {},
+      },
+    } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
+    const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
+    const combined = (result?.prependContext ?? '') + (result?.appendSystemContext ?? '');
+    // Must contain structural fields
+    expect(combined).toContain('task_id: task-abc');
+    expect(combined).toContain('.evolution_complete_task-abc');
+    expect(combined).toContain('.diagnostician_report_task-abc.json');
+    // Must NOT contain the full raw prompt (which could be 2-4 KB)
+    // The compact diagnostician block is small; the heartbeat checklist adds to total
+    expect(combined.length).toBeLessThan(2000);
+  });
+  it('injects exactly one task per heartbeat regardless of queue depth', async () => {
+    const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
+    const tasks = [
+      fakeTask({ id: 'task-1' }),
+      fakeTask({ id: 'task-2' }),
+      fakeTask({ id: 'task-3' }),
+    ];
+    mockGetPendingDiagnosticianTasks.mockReturnValueOnce(tasks);
+    const ctx = {
+      workspaceDir: '/fake/workspace',
+      trigger: 'heartbeat',
+      sessionId: 'test-session-123',
+      api: {
+        logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+        runtime: {},
+        config: {},
+      },
+    } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
+    const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
+    const combined = (result?.prependContext ?? '') + (result?.appendSystemContext ?? '');
+    // Only the first task ID appears in the block
+    expect(combined).toContain('task-1');
+    expect(combined).not.toContain('task-2');
+    expect(combined).not.toContain('task-3');
+    // Note mentions remaining count
+    expect(combined).toContain('2 more task(s) are queued');
+  });
+});
+describe('Size guard: fail-closed', () => {
+  it('never returns a combined injection that exceeds MAX_INJECTION_SIZE (9000)', async () => {
+    const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
+    // One large pending task to trigger diagnostician priority mode
+    const largePrompt = 'Pain signal: ' + 'x'.repeat(5000);
+    mockGetPendingDiagnosticianTasks.mockReturnValueOnce([fakeTask({ prompt: largePrompt })]);
+    const ctx = {
+      workspaceDir: '/fake/workspace',
+      trigger: 'heartbeat',
+      sessionId: 'test-session-123',
+      api: {
+        logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+        runtime: {},
+        config: {},
+      },
+    } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
+    const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
+    const totalSize =
+      (result?.prependSystemContext?.length ?? 0) +
+      (result?.prependContext?.length ?? 0) +
+      (result?.appendSystemContext?.length ?? 0);
+    expect(totalSize).toBeLessThanOrEqual(9000);
+  });
+  it('strips project_context and strips thinking_os/evolution_principles when inDiagMode and over limit', async () => {
+    const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
+    // A long reason string in the task so prependContext itself is large
+    const longReason = 'x'.repeat(500);
+    mockGetPendingDiagnosticianTasks.mockReturnValueOnce([
+      fakeTask({ prompt: `Pain signal: reason=${longReason} source=tool_failure score=85` }),
+    ]);
+    const ctx = {
+      workspaceDir: '/fake/workspace',
+      trigger: 'heartbeat',
+      sessionId: 'test-session-123',
+      api: {
+        logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+        runtime: {},
+        config: {},
+      },
+    } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
+    const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
+    // The size guard must never throw — result must be defined
+    expect(result).toBeDefined();
+    const combined =
+      (result?.prependSystemContext?.length ?? 0) +
+      (result?.prependContext?.length ?? 0) +
+      (result?.appendSystemContext?.length ?? 0);
+    // Must stay within MAX_INJECTION_SIZE (9000)
+    expect(combined).toBeLessThanOrEqual(9000);
+  });
+});
+describe('Diagnostician priority mode', () => {
+  it('sets pendingDiagTaskCount > 0 so size guard knows to strip low-priority blocks', async () => {
+    const { handleBeforePromptBuild } = await import('../../src/hooks/prompt.js');
+    mockGetPendingDiagnosticianTasks.mockReturnValueOnce([fakeTask()]);
+    const infoLogger = vi.fn();
+    const ctx = {
+      workspaceDir: '/fake/workspace',
+      trigger: 'heartbeat',
+      sessionId: 'test-session-123',
+      api: {
+        logger: { info: infoLogger, warn: vi.fn(), error: vi.fn() },
+        runtime: {},
+        config: {},
+      },
+    } as unknown as Parameters<typeof handleBeforePromptBuild>[1];
+    const result = await handleBeforePromptBuild(makeMinimalEvent(), ctx);
+    // Should have logged task injection
+    expect(infoLogger).toHaveBeenCalledWith(
+      expect.stringContaining('Injected compact diagnostician task block')
+    );
+    // Result must be valid
+    expect(result?.prependContext).toBeDefined();
+  });
+});

package/tests/service/runtime-summary-service.test.ts CHANGED Viewed

@@ -31,7 +31,9 @@ function writeSession(workspace: string, sessionId: string, payload: Record<stri
 }
 function writeEvents(workspace: string, entries: unknown[]): void {
-  const filePath = path.join(workspace, '.state', 'logs', 'events.jsonl');
+  // Write to today's daily event file (events_YYYY-MM-DD.jsonl)
+  const today = new Date().toISOString().slice(0, 10);
+  const filePath = path.join(workspace, '.state', 'logs', `events_${today}.jsonl`);
   fs.mkdirSync(path.dirname(filePath), { recursive: true });
   const content = entries.map((entry) => JSON.stringify(entry)).join('\n');
   fs.writeFileSync(filePath, content ? `${content}\n` : '', 'utf8');
@@ -471,11 +473,12 @@ describe('RuntimeSummaryService', () => {
       trust_score: 59,
       last_updated: '2026-03-20T10:00:00Z',
     });
+    const today = new Date().toISOString().slice(0, 10);
     fs.writeFileSync(
-      path.join(workspace, '.state', 'logs', 'events.jsonl'),
+      path.join(workspace, '.state', 'logs', `events_${today}.jsonl`),
       [
         JSON.stringify({
-          ts: '2026-03-20T10:00:01Z',
+          ts: `${today}T10:00:01Z`,
           type: 'pain_signal',
           category: 'detected',
           sessionId: 's1',
@@ -916,4 +919,180 @@ describe('RuntimeSummaryService', () => {
       expect(summary.phase3.directiveIgnoredReason).toBe('queue is only truth source');
     });
   });
+  // ─────────────────────────────────────────────────────────────────────────────
+  // Phase A: Event log daily-file compatibility + stalled diagnostician warning
+  // ─────────────────────────────────────────────────────────────────────────────
+  describe('Event log daily-file format (events_YYYY-MM-DD.jsonl)', () => {
+    it('reads today events_YYYY-MM-DD.jsonl and does NOT warn about missing events.jsonl', () => {
+      const workspace = makeWorkspace();
+      writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
+        trust_score: 59,
+        last_updated: '2026-03-20T10:00:00Z',
+      });
+      // Write daily file (today's date)
+      const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
+      const dailyFilePath = path.join(workspace, '.state', 'logs', `events_${today}.jsonl`);
+      fs.mkdirSync(path.dirname(dailyFilePath), { recursive: true });
+      fs.writeFileSync(
+        dailyFilePath,
+        JSON.stringify({
+          ts: `${today}T10:00:01Z`,
+          type: 'pain_signal',
+          category: 'detected',
+          sessionId: 's1',
+          data: { source: 'tool_failure', score: 10, reason: 'write failed' },
+        }) + '\n',
+        'utf8'
+      );
+      const summary = RuntimeSummaryService.getSummary(workspace);
+      // Must have read the daily file successfully
+      expect(summary.pain.lastSignal?.source).toBe('tool_failure');
+      // Must NOT warn about "No events.jsonl file"
+      expect(summary.metadata.warnings.join('\n')).not.toContain('No events.jsonl file');
+      expect(summary.metadata.warnings.join('\n')).not.toContain('No event log file');
+    });
+    it('falls back to most recent daily file when today file does not exist', () => {
+      const workspace = makeWorkspace();
+      writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
+        trust_score: 59,
+        last_updated: '2026-03-20T10:00:00Z',
+      });
+      // Write only an older daily file
+      const oldDate = '2026-03-18';
+      const oldFilePath = path.join(workspace, '.state', 'logs', `events_${oldDate}.jsonl`);
+      fs.mkdirSync(path.dirname(oldFilePath), { recursive: true });
+      fs.writeFileSync(
+        oldFilePath,
+        JSON.stringify({
+          ts: `${oldDate}T10:00:01Z`,
+          type: 'gate_block',
+          category: 'risk',
+          sessionId: 's2',
+          data: { reason: 'old gate event' },
+        }) + '\n',
+        'utf8'
+      );
+      const summary = RuntimeSummaryService.getSummary(workspace);
+      // Must have read the old file
+      expect(summary.gate.recentBlocks).toBeGreaterThan(0);
+      // Must NOT warn about "No event log file"
+      expect(summary.metadata.warnings.join('\n')).not.toContain('No event log file');
+    });
+    it('warns only when no daily event file exists at all', () => {
+      const workspace = makeWorkspace();
+      writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
+        trust_score: 59,
+        last_updated: '2026-03-20T10:00:00Z',
+      });
+      // Deliberately leave no event log files
+      const summary = RuntimeSummaryService.getSummary(workspace);
+      expect(summary.metadata.warnings.join('\n')).toContain('No event log file');
+    });
+  });
+  describe('Stalled diagnostician warning', () => {
+    it('raises a high-signal warning when tasks are injected but no reports are written', () => {
+      const workspace = makeWorkspace();
+      writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
+        trust_score: 59,
+        last_updated: '2026-03-20T10:00:00Z',
+      });
+      writeJson(path.join(workspace, '.state', 'evolution_queue.json'), []);
+      // Simulate: pending tasks exist, heartbeats are injecting, but no reports written.
+      // The pending task store is checked via getPendingDiagnosticianTasks which uses
+      // the real filesystem path. We need to write diagnostician_tasks.json directly.
+      const today = new Date().toISOString().slice(0, 10);
+      fs.mkdirSync(path.join(workspace, '.state'), { recursive: true });
+      fs.writeFileSync(
+        path.join(workspace, '.state', 'diagnostician_tasks.json'),
+        JSON.stringify({
+          tasks: {
+            'stalled-task-1': {
+              prompt: 'Diagnose: tool_failure score=80',
+              createdAt: `${today}T09:00:00Z`,
+              status: 'pending',
+            },
+          },
+        }),
+        'utf8'
+      );
+      // Write daily-stats.json with heartbeats > 0 but reportsWritten = 0
+      writeJson(path.join(workspace, '.state', 'logs', 'daily-stats.json'), {
+        [today]: {
+          evolution: {
+            diagnosisTasksWritten: 3,
+            diagnosticianReportsWritten: 0,
+            reportsMissingJson: 0,
+            reportsIncompleteFields: 0,
+            principleCandidatesCreated: 0,
+            heartbeatsInjected: 5,
+          },
+        },
+      });
+      // Also create a valid daily event file so readEvents() does not emit
+      // "No event log file" warning that would pollute warnings[] and mask
+      // the stall detection logic we are actually testing.
+      fs.mkdirSync(path.join(workspace, '.state', 'logs'), { recursive: true });
+      fs.writeFileSync(
+        path.join(workspace, '.state', 'logs', `events_${today}.jsonl`),
+        JSON.stringify({ ts: `${today}T09:00:00Z`, type: 'heartbeat_diagnosis', category: 'task_injected', sessionId: 'test', data: {} }) + '\n',
+        'utf8'
+      );
+      const summary = RuntimeSummaryService.getSummary(workspace);
+      const warningText = summary.metadata.warnings.join('\n');
+      expect(warningText).toContain('Diagnostician appears stalled');
+      expect(warningText).toContain('5'); // heartbeatsInjected
+      expect(warningText).toContain('reports are being written'); // confirms it says "0"
+      expect(warningText).toContain('1 task(s) remain pending');
+    });
+    it('does NOT raise the stalled warning when reports are being written', () => {
+      const workspace = makeWorkspace();
+      writeJson(path.join(workspace, '.state', 'AGENT_SCORECARD.json'), {
+        trust_score: 59,
+        last_updated: '2026-03-20T10:00:00Z',
+      });
+      writeJson(path.join(workspace, '.state', 'evolution_queue.json'), []);
+      const today = new Date().toISOString().slice(0, 10);
+      fs.mkdirSync(path.join(workspace, '.state'), { recursive: true });
+      fs.writeFileSync(
+        path.join(workspace, '.state', 'diagnostician_tasks.json'),
+        JSON.stringify({ tasks: {} }), // No pending tasks
+        'utf8'
+      );
+      writeJson(path.join(workspace, '.state', 'logs', 'daily-stats.json'), {
+        [today]: {
+          evolution: {
+            diagnosisTasksWritten: 3,
+            diagnosticianReportsWritten: 3, // Reports ARE being written
+            reportsMissingJson: 0,
+            reportsIncompleteFields: 0,
+            principleCandidatesCreated: 1,
+            heartbeatsInjected: 5,
+          },
+        },
+      });
+      const summary = RuntimeSummaryService.getSummary(workspace);
+      expect(summary.metadata.warnings.join('\n')).not.toContain('Diagnostician appears stalled');
+    });
+  });
 });