npm - nightytidy - Versions diffs - 0.2.4 → 0.2.7 - Mend

nightytidy 0.2.4 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/agent/cli-bridge.js +29 -7
package/src/agent/index.js +25 -3
package/src/agent/keep-awake.js +60 -0
package/src/executor.js +1 -1
package/src/prompts/steps/01-documentation.md +219 -41

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nightytidy",
-  "version": "0.2.4",
+  "version": "0.2.7",
   "description": "Automated overnight codebase improvement through Claude Code",
   "license": "MIT",
   "author": "Dorian Spitz",

package/src/agent/cli-bridge.js CHANGED Viewed

@@ -1,6 +1,9 @@
 import { spawn } from 'node:child_process';
 import path from 'node:path';
-import { debug, error as logError } from '../logger.js';
+import { debug, warn, error as logError } from '../logger.js';
+const INIT_TIMEOUT_MS = 5 * 60_000;   // 5 minutes — init should never take this long
+const FINISH_TIMEOUT_MS = 10 * 60_000; // 10 minutes — finish includes report generation
 export class CliBridge {
   constructor(projectDir) {
@@ -9,19 +12,20 @@ export class CliBridge {
   }
   async listSteps() {
-    return this._run(CliBridge.buildArgs({ list: true }));
+    return this._run(CliBridge.buildArgs({ list: true }), null, { timeout: 30_000 });
   }
   async initRun(steps, timeout) {
-    return this._run(CliBridge.buildArgs({ initRun: true, steps, timeout }));
+    return this._run(CliBridge.buildArgs({ initRun: true, steps, timeout }), null, { timeout: INIT_TIMEOUT_MS });
   }
   async runStep(stepNum, onOutput) {
+    // No timeout on steps — they have their own per-step timeout via the CLI
     return this._run(CliBridge.buildArgs({ runStep: stepNum }), onOutput);
   }
   async finishRun() {
-    return this._run(CliBridge.buildArgs({ finishRun: true }));
+    return this._run(CliBridge.buildArgs({ finishRun: true }), null, { timeout: FINISH_TIMEOUT_MS });
   }
   kill() {
@@ -71,7 +75,7 @@ export class CliBridge {
     return null;
   }
-  _run(args, onOutput) {
+  _run(args, onOutput, opts = {}) {
     return new Promise((resolve, reject) => {
       const binPath = path.resolve(import.meta.dirname, '../../bin/nightytidy.js');
       const proc = spawn('node', [binPath, ...args], {
@@ -82,6 +86,18 @@ export class CliBridge {
       let stdout = '';
       let stderr = '';
+      let killed = false;
+      // Timeout — kill the process if it takes too long
+      let timer = null;
+      if (opts.timeout) {
+        timer = setTimeout(() => {
+          killed = true;
+          const timeoutSec = Math.round(opts.timeout / 1000);
+          warn(`CLI process timed out after ${timeoutSec}s: ${args.join(' ')}`);
+          this.kill();
+        }, opts.timeout);
+      }
       proc.stdout.on('data', (data) => {
         const text = data.toString();
@@ -111,18 +127,23 @@ export class CliBridge {
       });
       proc.on('close', (code) => {
+        if (timer) clearTimeout(timer);
         this.activeProcess = null;
         const parsed = CliBridge.parseOutput(stdout);
         resolve({
-          success: code === 0,
+          success: code === 0 && !killed,
           exitCode: code,
           stdout,
-          stderr,
+          stderr: killed
+            ? `Process timed out after ${Math.round(opts.timeout / 1000)}s — Claude Code may be unavailable`
+            : stderr,
           parsed,
+          timedOut: killed,
         });
       });
       proc.on('error', (err) => {
+        if (timer) clearTimeout(timer);
         this.activeProcess = null;
         logError(`CLI process error: ${err.message}`);
         resolve({
@@ -131,6 +152,7 @@ export class CliBridge {
           stdout,
           stderr: err.message,
           parsed: null,
+          timedOut: false,
         });
       });
     });

package/src/agent/index.js CHANGED Viewed

@@ -12,6 +12,7 @@ import { WebhookDispatcher } from './webhook-dispatcher.js';
 import { CliBridge } from './cli-bridge.js';
 import { AgentGit } from './git-integration.js';
 import { FirebaseAuth } from './firebase-auth.js';
+import { acquireKeepAwake, releaseKeepAwake } from './keep-awake.js';
 const FIREBASE_WEBHOOK_URL = 'https://webhookingest-24h6taciuq-uc.a.run.app';
@@ -479,7 +480,10 @@ export async function startAgent() {
   async function processQueue() {
     const run = runQueue.dequeue();
-    if (!run) return;
+    if (!run) {
+      releaseKeepAwake();
+      return;
+    }
     const project = projectManager.getProject(run.projectId);
     if (!project) {
@@ -497,15 +501,32 @@ export async function startAgent() {
     runOutputBuffer = '';
     runProgress = { stepList: [], completedCount: 0, failedCount: 0, totalCost: 0, currentStepNum: null };
+    acquireKeepAwake();
     info(`\n━━━ Run started: ${project.name} ━━━`);
     info(`  Steps: [${run.steps.join(', ')}] (${run.steps.length} total)`);
     info(`  Project: ${project.path}`);
+    // Clean stale files from previous failed/abandoned runs
+    // so --init-run doesn't refuse to start
+    for (const staleFile of ['nightytidy-run-state.json', 'nightytidy.lock']) {
+      try {
+        const filePath = path.join(project.path, staleFile);
+        if (fs.existsSync(filePath)) {
+          fs.unlinkSync(filePath);
+          debug(`Removed stale ${staleFile}`);
+        }
+      } catch { /* ignore — init-run will report if it's still a problem */ }
+    }
     wsServer.broadcast({ type: 'run-started', runId: run.id, projectId: run.projectId, projectName: project.name, branch: '' });
+    wsServer.broadcast({ type: 'run-status', runId: run.id, status: 'initializing', message: 'Running pre-checks and setting up git branch...' });
     const initResult = await bridge.initRun(run.steps, run.timeout);
     if (!initResult.success) {
-      info(`  ✗ Init failed: ${initResult.stderr}`);
-      wsServer.broadcast({ type: 'run-failed', runId: run.id, error: initResult.stderr });
+      const errorMsg = initResult.timedOut
+        ? 'Initialization timed out — Claude Code may be unavailable. Restart the agent to retry.'
+        : (initResult.parsed?.error || initResult.stderr || 'Unknown init error');
+      info(`  ✗ Init failed: ${errorMsg}`);
+      wsServer.broadcast({ type: 'run-failed', runId: run.id, error: errorMsg });
       dispatchWithQueue('run_failed', {
         project: project.name,
         projectId: project.id,
@@ -1020,6 +1041,7 @@ export async function startAgent() {
   // Graceful shutdown
   const shutdown = async () => {
     info('Agent shutting down...');
+    releaseKeepAwake();
     saveInterruptedState();
     scheduler.stopAll();
     await wsServer.stop();

package/src/agent/keep-awake.js ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Prevents the OS from sleeping while NightyTidy runs are active.
+ *
+ * Windows: Uses PowerShell to call SetThreadExecutionState with
+ * ES_CONTINUOUS | ES_SYSTEM_REQUIRED (0x80000001). This tells Windows
+ * "don't sleep, this process needs the system." The flag is automatically
+ * cleared when the process exits or when releaseKeepAwake() is called.
+ *
+ * macOS/Linux: Uses caffeinate / systemd-inhibit respectively.
+ *
+ * No admin privileges required on any platform.
+ */
+import { execSync, spawn } from 'node:child_process';
+import { debug, warn } from '../logger.js';
+let keepAwakeProcess = null;
+export function acquireKeepAwake() {
+  if (keepAwakeProcess) return; // already held
+  try {
+    if (process.platform === 'win32') {
+      // PowerShell script that sets ES_CONTINUOUS | ES_SYSTEM_REQUIRED
+      // and then sleeps forever. When we kill this process, the flag clears.
+      keepAwakeProcess = spawn('powershell', [
+        '-NoProfile', '-WindowStyle', 'Hidden', '-Command',
+        `Add-Type -TypeDefinition 'using System; using System.Runtime.InteropServices; public class SleepPreventer { [DllImport("kernel32.dll")] public static extern uint SetThreadExecutionState(uint esFlags); }'; [SleepPreventer]::SetThreadExecutionState(0x80000001); while($true) { Start-Sleep -Seconds 3600 }`,
+      ], { stdio: 'ignore', detached: false });
+      keepAwakeProcess.unref();
+      keepAwakeProcess.on('error', () => { keepAwakeProcess = null; });
+      debug('Sleep prevention acquired (Windows SetThreadExecutionState)');
+    } else if (process.platform === 'darwin') {
+      // macOS: caffeinate prevents sleep, -i = idle sleep, -s = system sleep
+      keepAwakeProcess = spawn('caffeinate', ['-is'], { stdio: 'ignore' });
+      keepAwakeProcess.unref();
+      keepAwakeProcess.on('error', () => { keepAwakeProcess = null; });
+      debug('Sleep prevention acquired (macOS caffeinate)');
+    } else {
+      // Linux: systemd-inhibit (may not exist on all distros)
+      keepAwakeProcess = spawn('systemd-inhibit', [
+        '--what=idle:sleep', '--who=NightyTidy', '--why=Running codebase improvement',
+        'sleep', 'infinity',
+      ], { stdio: 'ignore' });
+      keepAwakeProcess.unref();
+      keepAwakeProcess.on('error', () => { keepAwakeProcess = null; });
+      debug('Sleep prevention acquired (Linux systemd-inhibit)');
+    }
+  } catch {
+    warn('Could not acquire sleep prevention — system may sleep during runs');
+  }
+}
+export function releaseKeepAwake() {
+  if (!keepAwakeProcess) return;
+  try {
+    keepAwakeProcess.kill();
+  } catch { /* already dead */ }
+  keepAwakeProcess = null;
+  debug('Sleep prevention released');
+}

package/src/executor.js CHANGED Viewed

@@ -66,7 +66,7 @@ import { info, warn, error as logError } from './logger.js';
 // SHA-256 of all STEPS[].prompt content — update when prompts change.
 // Detects unexpected modification of prompt data before passing to
 // Claude Code with --dangerously-skip-permissions.
-const STEPS_HASH = 'c341ed4301dc1600d848da5457d319e7f1c5a51c215e1142d3889aa3684fd7cf';
+const STEPS_HASH = 'ba4e25bc096db265682a8576d543c0a3697543e238ab99f852f99038581037be';
 // Hard cap on total step duration (all retries + doc-update combined).
 // Without this, retries × phases can exceed the user's expected timeout.

package/src/prompts/steps/01-documentation.md CHANGED Viewed

@@ -8,14 +8,37 @@ AI agents pay a token cost for every line loaded into context — whether releva
 - **Tier 2 (On-Demand):** Per-topic implementation details. Loaded only when relevant. ~1-2% per task.
 - **Tier 3 (Deep Reference):** Human-facing docs, ADRs, API reference. Never auto-loaded. Zero token cost.
-| Tier | Lines | Tokens | % of 200K |
-|------|-------|--------|-----------|
-| Always (Tier 1) | 300-400 | 10-13K | 5-7% |
-| Per-task (Tier 2, 1-2 files) | 60-120 | 2-4K | 1-2% |
-| **Typical total** | **360-520** | **12-17K** | **6-9%** |
+| Tier                         | Lines       | Tokens     | % of 200K |
+| ---------------------------- | ----------- | ---------- | --------- |
+| Always (Tier 1)              | 300-400     | 10-13K     | 5-7%      |
+| Per-task (Tier 2, 1-2 files) | 60-120      | 2-4K       | 1-2%      |
+| **Typical total**            | **360-520** | **12-17K** | **6-9%**  |
 Primary deliverable: Tier 1 + Tier 2. Tier 3 is secondary.
+## Documentation Philosophy: Progressive Disclosure
+The goal of this documentation system is simple: **an AI agent wakes up knowing nothing about this codebase and can navigate to exactly the information it needs — quickly and token-efficiently.**
+Every conversation starts cold. The agent has no memory of previous sessions, no familiarity with your architecture, and a finite context window. Every line loaded into that window is a tradeoff — useful context that helps vs. irrelevant context that displaces working memory for the actual task. A flat documentation dump forces the agent to load everything to find anything. Progressive disclosure fixes this.
+**How it works**: The agent gets a compact map first (Tier 1), then navigates to exactly the detail it needs (Tier 2 topic file), and only if the topic is deep enough, one more level down (Tier 2 sub-file). At most two navigational hops from cold start to specific answer.
+**The navigation chain**:
+1. **Always loaded** — CLAUDE.md + MEMORY.md are in context on every conversation. These orient the agent and tell it where to look next. Combined: ~12-17K tokens
+2. **First hop** — MEMORY.md contains a topic index with "when to load" triggers. The agent reads a trigger like "Writing or fixing tests, mock patterns, E2E" and knows to load `testing.md`. Cost: one file read
+3. **Second hop (only when needed)** — If a topic file is large enough to have been split into a hub, it contains a sub-topics table with its own triggers. The agent loads the specific sub-file. Cost: one more file read
+4. **Maximum depth: two levels below MEMORY.md.** Three levels of indirection wastes more navigational overhead than it saves in token cost
+**Design principles driving every structural decision**:
+- **Trigger-based loading**: Every file in the index has a "when to load" description written from the agent's task perspective — "Writing or fixing tests", not "Testing documentation"
+- **Hub files over bloated files**: When a topic file outgrows its target, promote it to a hub. Keep the 20% of content that covers 80% of use cases inline; split specialized detail into sub-files
+- **No orphan files**: Every file must be reachable from MEMORY.md within two hops. If a file isn't linked, the agent will never find it
+- **Scale with the codebase**: A 5-file CLI tool needs 3-5 memory files. A 30-service project with thousands of tests might need 20-30. File count follows complexity, not a fixed number
+- **Information completeness over compression** (**primary directive**): The entire codebase must be documented with sufficient depth for an agent to work with each module correctly. A one-line mention of a system is not documentation — it's an inventory entry. If adding proper depth pushes a file past its line target, create more files. Never sacrifice coverage to hit a line count. Line targets exist to trigger splits, not to cap documentation
 ---
 ## Phases
@@ -34,15 +57,18 @@ Read and map everything. No files produced — only understanding.
 **Pitfalls:** Non-obvious side effects, library workarounds, magic values, complex regex, unexplained constants, non-obvious business logic.
-**Cluster** learnings into topic areas → these become Tier 2 files.
+**Cluster** learnings into topic areas → these become Tier 2 files. For large codebases, identify which topics are broad enough to need sub-files and plan the hub structure now.
+**Coverage map (critical step):** Build an explicit mapping of every significant codebase module → the documentation file responsible for it. Every service, store, hook, feature, engine, and reusable system must appear in at least one memory file. If a module has no documentation home, either add it to an existing file or plan a new one. This map is your completeness checklist for Phase 3 — you will verify each entry is documented with sufficient depth, not just mentioned in a bullet point.
 ### Phase 2: CLAUDE.md (Tier 1)
 Create `CLAUDE.md` at project root. **Target: 250-350 lines. Hard constraint.**
-**Inclusion test:** *"If I removed this, would the AI write incorrect code on an unrelated task?"* No → Tier 2.
+**Inclusion test:** _"If I removed this, would the AI write incorrect code on an unrelated task?"_ No → Tier 2.
 **Required sections:**
 - **Project Identity** — One paragraph: what, who, why
 - **Workflow Rules** — Non-negotiable process (deploy, test, etc.)
 - **Tech Stack** — Table: technology | version | purpose
@@ -54,80 +80,227 @@ Create `CLAUDE.md` at project root. **Target: 250-350 lines. Hard constraint.**
 - **Build/Deploy Commands** — Copy-paste ready
 - **Coding Conventions** — Only those consistently followed in code
 - **Design System Rules** (if applicable) — Only if affecting every UI task; otherwise Tier 2
-- **Documentation Hierarchy** — Table telling AI where knowledge lives:
+- **Documentation Hierarchy** — Table telling AI where knowledge lives and how to navigate:
 ```markdown
 ## Documentation Hierarchy
-| Layer | Loaded | What goes here |
-|-------|--------|---------------|
-| **CLAUDE.md** | Every conversation | Rules preventing mistakes on ANY task |
-| **MEMORY.md** | Every conversation | Cross-cutting patterns/pitfalls |
-| **Sub-memory** (.claude/memory/) | On demand | Feature-specific deep dives |
-| **Inline comments** | When code is read | Non-obvious "why" explanations |
+| Layer                                 | Loaded             | What goes here                            |
+| ------------------------------------- | ------------------ | ----------------------------------------- |
+| **CLAUDE.md**                         | Every conversation | Rules preventing mistakes on ANY task     |
+| **MEMORY.md**                         | Every conversation | Navigation index + cross-cutting patterns |
+| **Topic files** (.claude/memory/)     | On demand          | Per-topic implementation details          |
+| **Sub-topic files** (.claude/memory/) | On demand          | Specialized detail within a topic         |
+| **Inline comments**                   | When code is read  | Non-obvious "why" explanations            |
+**Navigation**: MEMORY.md index → topic file → sub-topic file (if needed). Max 2 hops from cold start to answer. Every file reachable from MEMORY.md within 2 levels.
-Rule: Prevents mistakes on unrelated tasks → CLAUDE.md. Spans features → MEMORY.md. One feature only → sub-memory. Single line → inline comment.
+Rule: Prevents mistakes on unrelated tasks → CLAUDE.md. Spans features → MEMORY.md cross-cutting patterns. One feature → topic file. Narrow subtopic within a feature → sub-topic file. Single line → inline comment.
 ```
+**Note on hub files:** The hierarchy table above includes both topic files and sub-topic files. You don't need to know the full hub structure yet — Phase 3 covers it in detail. Just ensure CLAUDE.md's hierarchy table reflects both levels so agents know the navigation depth.
 **Does NOT belong in CLAUDE.md:** Feature implementation details, API response shapes, field-level schemas, testing patterns, debugging notes, security findings, historical context. All → Tier 2/3.
 **Format:** Terse, imperative. Tables and bullets, not paragraphs.
 ### Phase 3: Tier 2 Memory Files
-Create files at `.claude/memory/`.
+Create files at `.claude/memory/`. These are the documentation an agent loads on-demand to understand specific topics in depth.
+#### Two-Level Structure
+Memory files exist at two levels:
+- **Topic files**: Linked directly from MEMORY.md. One topic per file. This is what the agent loads first
+- **Sub-topic files**: Linked from a topic file that has become a hub. One narrow subtopic per file
+**Maximum depth: 2 levels below MEMORY.md.** The path is always: `MEMORY.md → topic file → sub-topic file`. Never deeper. If a sub-topic file itself outgrows its target, promote it to a topic file (move it up), don't nest deeper.
+#### Sizing and the Hub Pattern
+**Target: 40-80 lines per file.** This is a soft target, not a hard cap — the goal is token efficiency, not arbitrary limits. Files between 80-100 lines are fine if the content is cohesive. Past ~100 lines, split. When splitting:
+1. Identify which sections serve most tasks (the "always useful" core) vs. specialized tasks (the "sometimes useful" detail)
+2. Keep the core content inline in the file — aim for 40-60 lines in the hub
+3. Split specialized sections into sub-topic files
+4. Add a **Sub-Topics** table at the bottom of the hub with "when to load" triggers
+A topic file that has been split becomes a **hub file**. It still contains the most critical content inline — it is NOT reduced to a bare index. An agent loading only the hub should get what it needs for 80% of tasks involving that topic.
+**Hub file example:**
+```markdown
+# Testing — Tier 2 Reference
+## Infrastructure
+[Always-needed: framework, config, helpers — 15-20 lines]
+## Critical Anti-Patterns
+[Always-needed: mistakes that break tests — 10-15 lines]
+## Mock Patterns
+[Most common patterns — 10-15 lines]
+## Sub-Topics
+| File               | When to load                                 |
+| ------------------ | -------------------------------------------- |
+| testing-mocks.md   | Complex mock patterns for IPC, DB, or CJS    |
+| testing-e2e.md     | Running or writing E2E / Playwright tests    |
+| testing-quality.md | Mutation testing, coverage, assertion audits |
+```
+#### Coverage Verification (Do This Before Moving On)
+After drafting all topic files (and before Phase 4), verify coverage using the map from Phase 1:
+1. **For each module in the coverage map**: Find where it's documented. Read the actual documentation. Ask: "Does this give an agent enough detail to work with this module correctly — or just enough to know it exists?" A one-line mention is NOT sufficient documentation for a module with its own state, IPC channels, decision logic, or configuration
+2. **Depth test**: For each documented module, would an agent reading only this documentation be able to: modify behavior correctly, debug issues, add features, and avoid the known pitfalls? If not, the documentation is incomplete
+3. **Sub-file decision**: For any module where adding sufficient depth would push a topic file past ~80 lines, plan a sub-file. But also create sub-files when a topic file covers 3+ distinct systems and an agent working on one system would waste >40% of the file's tokens on irrelevant content — even if the file is within line targets
+4. **Gap action**: For any module with insufficient documentation depth, either expand the relevant topic file or create a new sub-file. Do not move to Phase 4 with known coverage gaps
+**The goal is not "every file is 40-80 lines." The goal is "every significant codebase module is documented with enough depth for an agent to work with it correctly." File count and line counts are consequences of completeness, not targets to satisfy.**
-**Rules:** One topic per file, 40-80 lines. Terse reference format. Don't repeat CLAUDE.md. Name by topic (`testing.md`) not area (`backend-stuff.md`). Assume reader has CLAUDE.md loaded.
+#### Content Rules
-**Each file covers:** Patterns/conventions, config details, correct-pattern snippets, common mistakes, external API quirks.
+- Terse reference format. Tables, bullets, code snippets — not prose
+- Don't repeat CLAUDE.md. Assume reader has it loaded
+- Name by topic (`testing.md`) not area (`backend-stuff.md`). Sub-files use parent prefix (`testing-mocks.md`, `testing-e2e.md`)
+- Each file covers: patterns/conventions, config details, correct-pattern snippets, common mistakes, external API quirks
 **Good** — tells you what to do:
 ```markdown
 ## Firestore Mock Routing
 Callables using `loadPromptForPhase()` + `recordUsage()` need collection routing:
 - `"prompts"` → return `{ doc: vi.fn(() => ({ get: async () => ({ exists: false }) })) }`
 - `"_rateLimits"` → return safe no-op mock
 ```
 **Bad** — teaches background knowledge (that's Tier 3):
 ```markdown
 ## About Firestore Mock Routing
 When writing tests for callable functions, you need to be aware that some callables
 access multiple Firestore collections...
 ```
-**Suggested files** (create only what's relevant):
+#### File Count Scaling
+File count scales with codebase complexity. Use this as rough guidance:
+| Codebase Size           | Topic Files | Sub-Topic Files | Total |
+| ----------------------- | ----------- | --------------- | ----- |
+| Small (< 20 files)      | 3-5         | 0-2             | 3-7   |
+| Medium (20-100 files)   | 5-10        | 2-5             | 7-15  |
+| Large (100-500 files)   | 8-15        | 5-15            | 13-30 |
+| Very large (500+ files) | 12-20       | 10-25           | 22-45 |
+**Indicators you should split a file:**
+- Exceeds ~100 lines
+- Covers 3+ distinct workflows or systems
+- Agents loading the file waste >50% of its content on most tasks
+- A module within the file has enough documentable detail (state shapes, decision logic, IPC channels, gotchas) to fill 30+ lines on its own — even if the parent file is within line targets. This is the coverage-driven split: the agent benefits from being able to load *just* that module's documentation without the surrounding context
+**Indicators you've over-split**: Multiple files under 20 lines. Agents need 3+ files for a single task. Hub files have more links than inline content. Two sub-files could be combined without exceeding 80 lines.
+#### Suggested Topic Files (create only what's relevant)
-| File | Covers |
-|------|--------|
-| testing.md | Framework config, mocks, pitfalls |
-| data-model.md | Field schemas, indexes, storage paths, migrations |
-| api-providers.md | External endpoints, auth, rate limits, quirks |
-| pitfalls-frontend.md | Framework gotchas, state traps, build issues |
-| pitfalls-backend.md | Server gotchas, auth helpers, error patterns |
-| feature-inventory.md | Features, shared components, reusable systems |
-| security.md | Auth details, vulnerabilities, audit findings |
-| deployment.md | Deploy process, env configs, infrastructure |
+| File                    | Covers                                            |
+| ----------------------- | ------------------------------------------------- |
+| testing.md              | Framework config, mocks, pitfalls                 |
+| data-model.md           | Field schemas, indexes, storage paths, migrations |
+| api-providers.md        | External endpoints, auth, rate limits, quirks     |
+| frontend-patterns.md    | Component patterns, stores, animations, theme     |
+| process-management.md   | Backend process lifecycle, spawn flow, guards     |
+| feature-inventory.md    | Features, shared components, reusable systems     |
+| security.md             | Auth details, vulnerabilities, audit findings     |
+| build-infrastructure.md | Build pipeline, CI/CD, packaging                  |
+| ipc-contracts.md        | IPC channels, schemas, handler conventions        |
+| account-management.md   | Auth flows, credential management, usage APIs     |
-Split/merge by project shape. **Target 8-15 files.** <5 = too broad. >20 = too granular.
+Split/merge by project shape. Not every project needs every file. Create what the codebase demands — the scaling table above is your guide, not a hard rule.
-### Phase 4: MEMORY.md (Tier 1 — Index)
+### Phase 4: MEMORY.md (Tier 1 — Navigation Index)
+Create `.claude/memory/MEMORY.md`. **Target: 40-80 lines.** This is the agent's primary navigation map — loaded on every conversation alongside CLAUDE.md.
+**Three roles:**
+1. **Orient** — Current project state (metrics, known debt, recent changes)
+2. **Navigate** — Topic index with trigger-based descriptions telling the agent which file to load
+3. **Remind** — Cross-cutting patterns too specific for CLAUDE.md but spanning multiple features
+#### Required Sections
-Create `.claude/memory/MEMORY.md`. **Target: 30-60 lines.** Index and state tracker only.
 ```markdown
 # Project Memory — Index
 [One-line description]. See CLAUDE.md for rules.
 ## Current State
-- [Key metrics: test count, endpoints, deploy URL, etc.]
-- [Recent major changes from git]
+- [Key metrics: test count, schema version, channel count, deploy URL, etc.]
+- [Known debt summary: 1-3 bullet points]
 ## Topic Files
-| File | When to load |
-|------|-------------|
-| testing.md | Writing or fixing tests |
-| data-model.md | Database schema or queries |
+| File                   | When to load                                        |
+| ---------------------- | --------------------------------------------------- |
+| `testing.md`           | Writing/fixing tests, mock patterns, E2E            |
+| `data-model.md`        | Database schema, queries, migrations, new tables    |
+| `frontend-patterns.md` | React components, stores, animations, design system |
+| `security.md`          | Auth flows, input validation, spawn security        |
+## Cross-Cutting Patterns
+- [Pattern]: [terse description of when/how to apply]
+- [Pattern]: [terse description of when/how to apply]
 ```
+#### Writing Good "When to Load" Triggers
+The topic index is the most important part of MEMORY.md. It is the agent's decision point — load this file or skip it. Write triggers from the **agent's task perspective**, not the file's content perspective.
+**Good triggers** — task-oriented, specific:
+| File            | When to load                                     |
+| --------------- | ------------------------------------------------ |
+| `testing.md`    | Writing or fixing tests, mock patterns, E2E      |
+| `security.md`   | Auth flows, input validation, spawn security     |
+| `data-model.md` | Database schema, queries, migrations, new tables |
+**Bad triggers** — vague, content-oriented:
+| File            | When to load          |
+| --------------- | --------------------- |
+| `testing.md`    | Testing documentation |
+| `security.md`   | Security details      |
+| `data-model.md` | Database information  |
+The agent should be able to read a trigger and immediately know: "yes, that's my current task" or "no, skip it."
+#### Cross-Cutting Patterns Section
+Include patterns that meet ALL three criteria:
+1. Too specific for CLAUDE.md (not every task needs them)
+2. Span multiple features (not one-file-only knowledge)
+3. High mistake frequency (agents get this wrong without the reminder)
+Examples: IPC envelope shapes, error handling helpers, state management gotchas. Keep to 10-15 bullets max. If this section grows past 15 items, move low-frequency ones into the most relevant topic file.
+#### Scaling MEMORY.md
+As the codebase grows and topic files multiply, MEMORY.md's index table grows too — but only the table. Cross-cutting patterns stay compact. If MEMORY.md exceeds ~100 lines, audit it: move low-frequency cross-cutting patterns into topic files. The index table can be as long as needed — each row costs 1 line and saves the agent from loading the wrong file.
 ### Phase 5: Version Control
 `.gitignore`:
@@ -137,15 +310,18 @@ Create `.claude/memory/MEMORY.md`. **Target: 30-60 lines.** Index and state trac
 In addition to writing the full report file, you MUST print a summary directly in the conversation when you finish. Do not make the user open the report to get the highlights. The chat summary should include:
 ### 1. Status Line
 One sentence: what you did, how long it took, and whether all tests still pass.
 ### 2. Key Findings
 The most important things discovered — bugs, risks, wins, or surprises. Each bullet should be specific and actionable, not vague. Lead with severity or impact.
 **Good:** "CRITICAL: No backup configuration found for the primary Postgres database — total data loss risk."
 **Bad:** "Found some issues with backups."
 ### 3. Changes Made (if applicable)
 Bullet list of what was actually modified, added, or removed. Skip this section for read-only analysis runs.
 ### 4. Recommendations
@@ -154,13 +330,14 @@ If there are legitimately beneficial recommendations worth pursuing right now, p
 When recommendations exist, use this table format:
-| # | Recommendation | Impact | Risk if Ignored | Worth Doing? | Details |
-|---|---|---|---|---|---|
-| *Sequential number* | *Short description (≤10 words)* | *What improves if addressed* | *Low / Medium / High / Critical* | *Yes / Probably / Only if time allows* | *1–3 sentences explaining the reasoning, context, or implementation guidance* |
+| #                   | Recommendation                  | Impact                       | Risk if Ignored                  | Worth Doing?                           | Details                                                                       |
+| ------------------- | ------------------------------- | ---------------------------- | -------------------------------- | -------------------------------------- | ----------------------------------------------------------------------------- |
+| _Sequential number_ | _Short description (≤10 words)_ | _What improves if addressed_ | _Low / Medium / High / Critical_ | _Yes / Probably / Only if time allows_ | _1–3 sentences explaining the reasoning, context, or implementation guidance_ |
 Order rows by risk descending (Critical → High → Medium → Low). Be honest in the "Worth Doing?" column — not everything flagged is worth the engineering time. If a recommendation is marginal, say so.
 ### 5. Report Location
 State the full path to the detailed report file for deeper review.
 Create `audit-reports/` in project root if needed. Save as `audit-reports/01_DOCUMENTATION_COVERAGE_REPORT_[run-number]_[date]_[time in user's local time].md`, incrementing run number based on existing reports.
@@ -168,6 +345,7 @@ Create `audit-reports/` in project root if needed. Save as `audit-reports/01_DOC
 ---
 **Formatting rules for chat output:**
 - Use markdown headers, bold for severity labels, and bullet points for scannability.
 - Do not duplicate the full report contents — just the highlights and recommendations.
 - If you made zero findings in a phase, say so in one line rather than omitting it silently.