nightytidy 0.2.3 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nightytidy",
3
- "version": "0.2.3",
3
+ "version": "0.2.6",
4
4
  "description": "Automated overnight codebase improvement through Claude Code",
5
5
  "license": "MIT",
6
6
  "author": "Dorian Spitz",
@@ -1,6 +1,9 @@
1
1
  import { spawn } from 'node:child_process';
2
2
  import path from 'node:path';
3
- import { debug, error as logError } from '../logger.js';
3
+ import { debug, warn, error as logError } from '../logger.js';
4
+
5
+ const INIT_TIMEOUT_MS = 5 * 60_000; // 5 minutes — init should never take this long
6
+ const FINISH_TIMEOUT_MS = 10 * 60_000; // 10 minutes — finish includes report generation
4
7
 
5
8
  export class CliBridge {
6
9
  constructor(projectDir) {
@@ -9,19 +12,20 @@ export class CliBridge {
9
12
  }
10
13
 
11
14
  async listSteps() {
12
- return this._run(CliBridge.buildArgs({ list: true }));
15
+ return this._run(CliBridge.buildArgs({ list: true }), null, { timeout: 30_000 });
13
16
  }
14
17
 
15
18
  async initRun(steps, timeout) {
16
- return this._run(CliBridge.buildArgs({ initRun: true, steps, timeout }));
19
+ return this._run(CliBridge.buildArgs({ initRun: true, steps, timeout }), null, { timeout: INIT_TIMEOUT_MS });
17
20
  }
18
21
 
19
22
  async runStep(stepNum, onOutput) {
23
+ // No timeout on steps — they have their own per-step timeout via the CLI
20
24
  return this._run(CliBridge.buildArgs({ runStep: stepNum }), onOutput);
21
25
  }
22
26
 
23
27
  async finishRun() {
24
- return this._run(CliBridge.buildArgs({ finishRun: true }));
28
+ return this._run(CliBridge.buildArgs({ finishRun: true }), null, { timeout: FINISH_TIMEOUT_MS });
25
29
  }
26
30
 
27
31
  kill() {
@@ -71,7 +75,7 @@ export class CliBridge {
71
75
  return null;
72
76
  }
73
77
 
74
- _run(args, onOutput) {
78
+ _run(args, onOutput, opts = {}) {
75
79
  return new Promise((resolve, reject) => {
76
80
  const binPath = path.resolve(import.meta.dirname, '../../bin/nightytidy.js');
77
81
  const proc = spawn('node', [binPath, ...args], {
@@ -82,6 +86,18 @@ export class CliBridge {
82
86
 
83
87
  let stdout = '';
84
88
  let stderr = '';
89
+ let killed = false;
90
+
91
+ // Timeout — kill the process if it takes too long
92
+ let timer = null;
93
+ if (opts.timeout) {
94
+ timer = setTimeout(() => {
95
+ killed = true;
96
+ const timeoutSec = Math.round(opts.timeout / 1000);
97
+ warn(`CLI process timed out after ${timeoutSec}s: ${args.join(' ')}`);
98
+ this.kill();
99
+ }, opts.timeout);
100
+ }
85
101
 
86
102
  proc.stdout.on('data', (data) => {
87
103
  const text = data.toString();
@@ -111,18 +127,23 @@ export class CliBridge {
111
127
  });
112
128
 
113
129
  proc.on('close', (code) => {
130
+ if (timer) clearTimeout(timer);
114
131
  this.activeProcess = null;
115
132
  const parsed = CliBridge.parseOutput(stdout);
116
133
  resolve({
117
- success: code === 0,
134
+ success: code === 0 && !killed,
118
135
  exitCode: code,
119
136
  stdout,
120
- stderr,
137
+ stderr: killed
138
+ ? `Process timed out after ${Math.round(opts.timeout / 1000)}s — Claude Code may be unavailable`
139
+ : stderr,
121
140
  parsed,
141
+ timedOut: killed,
122
142
  });
123
143
  });
124
144
 
125
145
  proc.on('error', (err) => {
146
+ if (timer) clearTimeout(timer);
126
147
  this.activeProcess = null;
127
148
  logError(`CLI process error: ${err.message}`);
128
149
  resolve({
@@ -131,6 +152,7 @@ export class CliBridge {
131
152
  stdout,
132
153
  stderr: err.message,
133
154
  parsed: null,
155
+ timedOut: false,
134
156
  });
135
157
  });
136
158
  });
@@ -483,6 +483,10 @@ export async function startAgent() {
483
483
 
484
484
  const project = projectManager.getProject(run.projectId);
485
485
  if (!project) {
486
+ dispatchWithQueue('run_failed', {
487
+ projectId: run.projectId,
488
+ run: { id: run.id },
489
+ }, []);
486
490
  runQueue.completeCurrent({ success: false });
487
491
  processQueue();
488
492
  return;
@@ -497,11 +501,32 @@ export async function startAgent() {
497
501
  info(` Steps: [${run.steps.join(', ')}] (${run.steps.length} total)`);
498
502
  info(` Project: ${project.path}`);
499
503
 
504
+ // Clean stale files from previous failed/abandoned runs
505
+ // so --init-run doesn't refuse to start
506
+ for (const staleFile of ['nightytidy-run-state.json', 'nightytidy.lock']) {
507
+ try {
508
+ const filePath = path.join(project.path, staleFile);
509
+ if (fs.existsSync(filePath)) {
510
+ fs.unlinkSync(filePath);
511
+ debug(`Removed stale ${staleFile}`);
512
+ }
513
+ } catch { /* ignore — init-run will report if it's still a problem */ }
514
+ }
515
+
500
516
  wsServer.broadcast({ type: 'run-started', runId: run.id, projectId: run.projectId, projectName: project.name, branch: '' });
517
+ wsServer.broadcast({ type: 'run-status', runId: run.id, status: 'initializing', message: 'Running pre-checks and setting up git branch...' });
501
518
  const initResult = await bridge.initRun(run.steps, run.timeout);
502
519
  if (!initResult.success) {
503
- info(` ✗ Init failed: ${initResult.stderr}`);
504
- wsServer.broadcast({ type: 'run-failed', runId: run.id, error: initResult.stderr });
520
+ const errorMsg = initResult.timedOut
521
+ ? 'Initialization timed out — Claude Code may be unavailable. Restart the agent to retry.'
522
+ : (initResult.parsed?.error || initResult.stderr || 'Unknown init error');
523
+ info(` ✗ Init failed: ${errorMsg}`);
524
+ wsServer.broadcast({ type: 'run-failed', runId: run.id, error: errorMsg });
525
+ dispatchWithQueue('run_failed', {
526
+ project: project.name,
527
+ projectId: project.id,
528
+ run: { id: run.id },
529
+ }, project.webhooks);
505
530
  runQueue.completeCurrent({ success: false });
506
531
  processQueue();
507
532
  return;
@@ -719,8 +744,14 @@ export async function startAgent() {
719
744
  activeBridge.kill();
720
745
  activeBridge = null;
721
746
  }
747
+ const project = projectManager.getProject(current.projectId);
722
748
  runQueue.completeCurrent({ success: false });
723
749
  wsServer.broadcast({ type: 'run-failed', runId: msg.runId, error: 'Stopped by user' });
750
+ dispatchWithQueue('run_failed', {
751
+ project: project?.name,
752
+ projectId: current.projectId,
753
+ run: { id: msg.runId },
754
+ }, project?.webhooks || []);
724
755
  reply({ type: 'run-failed', runId: msg.runId, error: 'Stopped by user' });
725
756
  processQueue();
726
757
  } else {
package/src/executor.js CHANGED
@@ -66,7 +66,7 @@ import { info, warn, error as logError } from './logger.js';
66
66
  // SHA-256 of all STEPS[].prompt content — update when prompts change.
67
67
  // Detects unexpected modification of prompt data before passing to
68
68
  // Claude Code with --dangerously-skip-permissions.
69
- const STEPS_HASH = 'c341ed4301dc1600d848da5457d319e7f1c5a51c215e1142d3889aa3684fd7cf';
69
+ const STEPS_HASH = 'ba4e25bc096db265682a8576d543c0a3697543e238ab99f852f99038581037be';
70
70
 
71
71
  // Hard cap on total step duration (all retries + doc-update combined).
72
72
  // Without this, retries × phases can exceed the user's expected timeout.
@@ -8,14 +8,37 @@ AI agents pay a token cost for every line loaded into context — whether releva
8
8
  - **Tier 2 (On-Demand):** Per-topic implementation details. Loaded only when relevant. ~1-2% per task.
9
9
  - **Tier 3 (Deep Reference):** Human-facing docs, ADRs, API reference. Never auto-loaded. Zero token cost.
10
10
 
11
- | Tier | Lines | Tokens | % of 200K |
12
- |------|-------|--------|-----------|
13
- | Always (Tier 1) | 300-400 | 10-13K | 5-7% |
14
- | Per-task (Tier 2, 1-2 files) | 60-120 | 2-4K | 1-2% |
15
- | **Typical total** | **360-520** | **12-17K** | **6-9%** |
11
+ | Tier | Lines | Tokens | % of 200K |
12
+ | ---------------------------- | ----------- | ---------- | --------- |
13
+ | Always (Tier 1) | 300-400 | 10-13K | 5-7% |
14
+ | Per-task (Tier 2, 1-2 files) | 60-120 | 2-4K | 1-2% |
15
+ | **Typical total** | **360-520** | **12-17K** | **6-9%** |
16
16
 
17
17
  Primary deliverable: Tier 1 + Tier 2. Tier 3 is secondary.
18
18
 
19
+ ## Documentation Philosophy: Progressive Disclosure
20
+
21
+ The goal of this documentation system is simple: **an AI agent wakes up knowing nothing about this codebase and can navigate to exactly the information it needs — quickly and token-efficiently.**
22
+
23
+ Every conversation starts cold. The agent has no memory of previous sessions, no familiarity with your architecture, and a finite context window. Every line loaded into that window is a tradeoff — useful context that helps vs. irrelevant context that displaces working memory for the actual task. A flat documentation dump forces the agent to load everything to find anything. Progressive disclosure fixes this.
24
+
25
+ **How it works**: The agent gets a compact map first (Tier 1), then navigates to exactly the detail it needs (Tier 2 topic file), and only if the topic is deep enough, one more level down (Tier 2 sub-file). At most two navigational hops from cold start to specific answer.
26
+
27
+ **The navigation chain**:
28
+
29
+ 1. **Always loaded** — CLAUDE.md + MEMORY.md are in context on every conversation. These orient the agent and tell it where to look next. Combined: ~12-17K tokens
30
+ 2. **First hop** — MEMORY.md contains a topic index with "when to load" triggers. The agent reads a trigger like "Writing or fixing tests, mock patterns, E2E" and knows to load `testing.md`. Cost: one file read
31
+ 3. **Second hop (only when needed)** — If a topic file is large enough to have been split into a hub, it contains a sub-topics table with its own triggers. The agent loads the specific sub-file. Cost: one more file read
32
+ 4. **Maximum depth: two levels below MEMORY.md.** Three levels of indirection wastes more navigational overhead than it saves in token cost
33
+
34
+ **Design principles driving every structural decision**:
35
+
36
+ - **Trigger-based loading**: Every file in the index has a "when to load" description written from the agent's task perspective — "Writing or fixing tests", not "Testing documentation"
37
+ - **Hub files over bloated files**: When a topic file outgrows its target, promote it to a hub. Keep the 20% of content that covers 80% of use cases inline; split specialized detail into sub-files
38
+ - **No orphan files**: Every file must be reachable from MEMORY.md within two hops. If a file isn't linked, the agent will never find it
39
+ - **Scale with the codebase**: A 5-file CLI tool needs 3-5 memory files. A 30-service project with thousands of tests might need 20-30. File count follows complexity, not a fixed number
40
+ - **Information completeness over compression** (**primary directive**): The entire codebase must be documented with sufficient depth for an agent to work with each module correctly. A one-line mention of a system is not documentation — it's an inventory entry. If adding proper depth pushes a file past its line target, create more files. Never sacrifice coverage to hit a line count. Line targets exist to trigger splits, not to cap documentation
41
+
19
42
  ---
20
43
 
21
44
  ## Phases
@@ -34,15 +57,18 @@ Read and map everything. No files produced — only understanding.
34
57
 
35
58
  **Pitfalls:** Non-obvious side effects, library workarounds, magic values, complex regex, unexplained constants, non-obvious business logic.
36
59
 
37
- **Cluster** learnings into topic areas → these become Tier 2 files.
60
+ **Cluster** learnings into topic areas → these become Tier 2 files. For large codebases, identify which topics are broad enough to need sub-files and plan the hub structure now.
61
+
62
+ **Coverage map (critical step):** Build an explicit mapping of every significant codebase module → the documentation file responsible for it. Every service, store, hook, feature, engine, and reusable system must appear in at least one memory file. If a module has no documentation home, either add it to an existing file or plan a new one. This map is your completeness checklist for Phase 3 — you will verify each entry is documented with sufficient depth, not just mentioned in a bullet point.
38
63
 
39
64
  ### Phase 2: CLAUDE.md (Tier 1)
40
65
 
41
66
  Create `CLAUDE.md` at project root. **Target: 250-350 lines. Hard constraint.**
42
67
 
43
- **Inclusion test:** *"If I removed this, would the AI write incorrect code on an unrelated task?"* No → Tier 2.
68
+ **Inclusion test:** _"If I removed this, would the AI write incorrect code on an unrelated task?"_ No → Tier 2.
44
69
 
45
70
  **Required sections:**
71
+
46
72
  - **Project Identity** — One paragraph: what, who, why
47
73
  - **Workflow Rules** — Non-negotiable process (deploy, test, etc.)
48
74
  - **Tech Stack** — Table: technology | version | purpose
@@ -54,80 +80,227 @@ Create `CLAUDE.md` at project root. **Target: 250-350 lines. Hard constraint.**
54
80
  - **Build/Deploy Commands** — Copy-paste ready
55
81
  - **Coding Conventions** — Only those consistently followed in code
56
82
  - **Design System Rules** (if applicable) — Only if affecting every UI task; otherwise Tier 2
57
- - **Documentation Hierarchy** — Table telling AI where knowledge lives:
83
+ - **Documentation Hierarchy** — Table telling AI where knowledge lives and how to navigate:
84
+
58
85
  ```markdown
59
86
  ## Documentation Hierarchy
60
87
 
61
- | Layer | Loaded | What goes here |
62
- |-------|--------|---------------|
63
- | **CLAUDE.md** | Every conversation | Rules preventing mistakes on ANY task |
64
- | **MEMORY.md** | Every conversation | Cross-cutting patterns/pitfalls |
65
- | **Sub-memory** (.claude/memory/) | On demand | Feature-specific deep dives |
66
- | **Inline comments** | When code is read | Non-obvious "why" explanations |
88
+ | Layer | Loaded | What goes here |
89
+ | ------------------------------------- | ------------------ | ----------------------------------------- |
90
+ | **CLAUDE.md** | Every conversation | Rules preventing mistakes on ANY task |
91
+ | **MEMORY.md** | Every conversation | Navigation index + cross-cutting patterns |
92
+ | **Topic files** (.claude/memory/) | On demand | Per-topic implementation details |
93
+ | **Sub-topic files** (.claude/memory/) | On demand | Specialized detail within a topic |
94
+ | **Inline comments** | When code is read | Non-obvious "why" explanations |
95
+
96
+ **Navigation**: MEMORY.md index → topic file → sub-topic file (if needed). Max 2 hops from cold start to answer. Every file reachable from MEMORY.md within 2 levels.
67
97
 
68
- Rule: Prevents mistakes on unrelated tasks → CLAUDE.md. Spans features → MEMORY.md. One feature only → sub-memory. Single line → inline comment.
98
+ Rule: Prevents mistakes on unrelated tasks → CLAUDE.md. Spans features → MEMORY.md cross-cutting patterns. One feature topic file. Narrow subtopic within a feature → sub-topic file. Single line → inline comment.
69
99
  ```
70
100
 
101
+ **Note on hub files:** The hierarchy table above includes both topic files and sub-topic files. You don't need to know the full hub structure yet — Phase 3 covers it in detail. Just ensure CLAUDE.md's hierarchy table reflects both levels so agents know the navigation depth.
102
+
71
103
  **Does NOT belong in CLAUDE.md:** Feature implementation details, API response shapes, field-level schemas, testing patterns, debugging notes, security findings, historical context. All → Tier 2/3.
72
104
 
73
105
  **Format:** Terse, imperative. Tables and bullets, not paragraphs.
74
106
 
75
107
  ### Phase 3: Tier 2 Memory Files
76
108
 
77
- Create files at `.claude/memory/`.
109
+ Create files at `.claude/memory/`. These are the documentation an agent loads on-demand to understand specific topics in depth.
110
+
111
+ #### Two-Level Structure
112
+
113
+ Memory files exist at two levels:
114
+
115
+ - **Topic files**: Linked directly from MEMORY.md. One topic per file. This is what the agent loads first
116
+ - **Sub-topic files**: Linked from a topic file that has become a hub. One narrow subtopic per file
117
+
118
+ **Maximum depth: 2 levels below MEMORY.md.** The path is always: `MEMORY.md → topic file → sub-topic file`. Never deeper. If a sub-topic file itself outgrows its target, promote it to a topic file (move it up), don't nest deeper.
119
+
120
+ #### Sizing and the Hub Pattern
121
+
122
+ **Target: 40-80 lines per file.** This is a soft target, not a hard cap — the goal is token efficiency, not arbitrary limits. Files between 80-100 lines are fine if the content is cohesive. Past ~100 lines, split. When splitting:
123
+
124
+ 1. Identify which sections serve most tasks (the "always useful" core) vs. specialized tasks (the "sometimes useful" detail)
125
+ 2. Keep the core content inline in the file — aim for 40-60 lines in the hub
126
+ 3. Split specialized sections into sub-topic files
127
+ 4. Add a **Sub-Topics** table at the bottom of the hub with "when to load" triggers
128
+
129
+ A topic file that has been split becomes a **hub file**. It still contains the most critical content inline — it is NOT reduced to a bare index. An agent loading only the hub should get what it needs for 80% of tasks involving that topic.
130
+
131
+ **Hub file example:**
132
+
133
+ ```markdown
134
+ # Testing — Tier 2 Reference
135
+
136
+ ## Infrastructure
137
+
138
+ [Always-needed: framework, config, helpers — 15-20 lines]
139
+
140
+ ## Critical Anti-Patterns
141
+
142
+ [Always-needed: mistakes that break tests — 10-15 lines]
143
+
144
+ ## Mock Patterns
145
+
146
+ [Most common patterns — 10-15 lines]
147
+
148
+ ## Sub-Topics
149
+
150
+ | File | When to load |
151
+ | ------------------ | -------------------------------------------- |
152
+ | testing-mocks.md | Complex mock patterns for IPC, DB, or CJS |
153
+ | testing-e2e.md | Running or writing E2E / Playwright tests |
154
+ | testing-quality.md | Mutation testing, coverage, assertion audits |
155
+ ```
156
+
157
+ #### Coverage Verification (Do This Before Moving On)
158
+
159
+ After drafting all topic files (and before Phase 4), verify coverage using the map from Phase 1:
160
+
161
+ 1. **For each module in the coverage map**: Find where it's documented. Read the actual documentation. Ask: "Does this give an agent enough detail to work with this module correctly — or just enough to know it exists?" A one-line mention is NOT sufficient documentation for a module with its own state, IPC channels, decision logic, or configuration
162
+ 2. **Depth test**: For each documented module, would an agent reading only this documentation be able to: modify behavior correctly, debug issues, add features, and avoid the known pitfalls? If not, the documentation is incomplete
163
+ 3. **Sub-file decision**: For any module where adding sufficient depth would push a topic file past ~80 lines, plan a sub-file. But also create sub-files when a topic file covers 3+ distinct systems and an agent working on one system would waste >40% of the file's tokens on irrelevant content — even if the file is within line targets
164
+ 4. **Gap action**: For any module with insufficient documentation depth, either expand the relevant topic file or create a new sub-file. Do not move to Phase 4 with known coverage gaps
165
+
166
+ **The goal is not "every file is 40-80 lines." The goal is "every significant codebase module is documented with enough depth for an agent to work with it correctly." File count and line counts are consequences of completeness, not targets to satisfy.**
78
167
 
79
- **Rules:** One topic per file, 40-80 lines. Terse reference format. Don't repeat CLAUDE.md. Name by topic (`testing.md`) not area (`backend-stuff.md`). Assume reader has CLAUDE.md loaded.
168
+ #### Content Rules
80
169
 
81
- **Each file covers:** Patterns/conventions, config details, correct-pattern snippets, common mistakes, external API quirks.
170
+ - Terse reference format. Tables, bullets, code snippets not prose
171
+ - Don't repeat CLAUDE.md. Assume reader has it loaded
172
+ - Name by topic (`testing.md`) not area (`backend-stuff.md`). Sub-files use parent prefix (`testing-mocks.md`, `testing-e2e.md`)
173
+ - Each file covers: patterns/conventions, config details, correct-pattern snippets, common mistakes, external API quirks
82
174
 
83
175
  **Good** — tells you what to do:
176
+
84
177
  ```markdown
85
178
  ## Firestore Mock Routing
179
+
86
180
  Callables using `loadPromptForPhase()` + `recordUsage()` need collection routing:
181
+
87
182
  - `"prompts"` → return `{ doc: vi.fn(() => ({ get: async () => ({ exists: false }) })) }`
88
183
  - `"_rateLimits"` → return safe no-op mock
89
184
  ```
90
185
 
91
186
  **Bad** — teaches background knowledge (that's Tier 3):
187
+
92
188
  ```markdown
93
189
  ## About Firestore Mock Routing
190
+
94
191
  When writing tests for callable functions, you need to be aware that some callables
95
192
  access multiple Firestore collections...
96
193
  ```
97
194
 
98
- **Suggested files** (create only what's relevant):
195
+ #### File Count Scaling
196
+
197
+ File count scales with codebase complexity. Use this as rough guidance:
198
+
199
+ | Codebase Size | Topic Files | Sub-Topic Files | Total |
200
+ | ----------------------- | ----------- | --------------- | ----- |
201
+ | Small (< 20 files) | 3-5 | 0-2 | 3-7 |
202
+ | Medium (20-100 files) | 5-10 | 2-5 | 7-15 |
203
+ | Large (100-500 files) | 8-15 | 5-15 | 13-30 |
204
+ | Very large (500+ files) | 12-20 | 10-25 | 22-45 |
205
+
206
+ **Indicators you should split a file:**
207
+ - Exceeds ~100 lines
208
+ - Covers 3+ distinct workflows or systems
209
+ - Agents loading the file waste >50% of its content on most tasks
210
+ - A module within the file has enough documentable detail (state shapes, decision logic, IPC channels, gotchas) to fill 30+ lines on its own — even if the parent file is within line targets. This is the coverage-driven split: the agent benefits from being able to load *just* that module's documentation without the surrounding context
211
+
212
+ **Indicators you've over-split**: Multiple files under 20 lines. Agents need 3+ files for a single task. Hub files have more links than inline content. Two sub-files could be combined without exceeding 80 lines.
213
+
214
+ #### Suggested Topic Files (create only what's relevant)
99
215
 
100
- | File | Covers |
101
- |------|--------|
102
- | testing.md | Framework config, mocks, pitfalls |
103
- | data-model.md | Field schemas, indexes, storage paths, migrations |
104
- | api-providers.md | External endpoints, auth, rate limits, quirks |
105
- | pitfalls-frontend.md | Framework gotchas, state traps, build issues |
106
- | pitfalls-backend.md | Server gotchas, auth helpers, error patterns |
107
- | feature-inventory.md | Features, shared components, reusable systems |
108
- | security.md | Auth details, vulnerabilities, audit findings |
109
- | deployment.md | Deploy process, env configs, infrastructure |
216
+ | File | Covers |
217
+ | ----------------------- | ------------------------------------------------- |
218
+ | testing.md | Framework config, mocks, pitfalls |
219
+ | data-model.md | Field schemas, indexes, storage paths, migrations |
220
+ | api-providers.md | External endpoints, auth, rate limits, quirks |
221
+ | frontend-patterns.md | Component patterns, stores, animations, theme |
222
+ | process-management.md | Backend process lifecycle, spawn flow, guards |
223
+ | feature-inventory.md | Features, shared components, reusable systems |
224
+ | security.md | Auth details, vulnerabilities, audit findings |
225
+ | build-infrastructure.md | Build pipeline, CI/CD, packaging |
226
+ | ipc-contracts.md | IPC channels, schemas, handler conventions |
227
+ | account-management.md | Auth flows, credential management, usage APIs |
110
228
 
111
- Split/merge by project shape. **Target 8-15 files.** <5 = too broad. >20 = too granular.
229
+ Split/merge by project shape. Not every project needs every file. Create what the codebase demands — the scaling table above is your guide, not a hard rule.
112
230
 
113
- ### Phase 4: MEMORY.md (Tier 1 — Index)
231
+ ### Phase 4: MEMORY.md (Tier 1 — Navigation Index)
232
+
233
+ Create `.claude/memory/MEMORY.md`. **Target: 40-80 lines.** This is the agent's primary navigation map — loaded on every conversation alongside CLAUDE.md.
234
+
235
+ **Three roles:**
236
+
237
+ 1. **Orient** — Current project state (metrics, known debt, recent changes)
238
+ 2. **Navigate** — Topic index with trigger-based descriptions telling the agent which file to load
239
+ 3. **Remind** — Cross-cutting patterns too specific for CLAUDE.md but spanning multiple features
240
+
241
+ #### Required Sections
114
242
 
115
- Create `.claude/memory/MEMORY.md`. **Target: 30-60 lines.** Index and state tracker only.
116
243
  ```markdown
117
244
  # Project Memory — Index
245
+
118
246
  [One-line description]. See CLAUDE.md for rules.
119
247
 
120
248
  ## Current State
121
- - [Key metrics: test count, endpoints, deploy URL, etc.]
122
- - [Recent major changes from git]
249
+
250
+ - [Key metrics: test count, schema version, channel count, deploy URL, etc.]
251
+ - [Known debt summary: 1-3 bullet points]
123
252
 
124
253
  ## Topic Files
125
- | File | When to load |
126
- |------|-------------|
127
- | testing.md | Writing or fixing tests |
128
- | data-model.md | Database schema or queries |
254
+
255
+ | File | When to load |
256
+ | ---------------------- | --------------------------------------------------- |
257
+ | `testing.md` | Writing/fixing tests, mock patterns, E2E |
258
+ | `data-model.md` | Database schema, queries, migrations, new tables |
259
+ | `frontend-patterns.md` | React components, stores, animations, design system |
260
+ | `security.md` | Auth flows, input validation, spawn security |
261
+
262
+ ## Cross-Cutting Patterns
263
+
264
+ - [Pattern]: [terse description of when/how to apply]
265
+ - [Pattern]: [terse description of when/how to apply]
129
266
  ```
130
267
 
268
+ #### Writing Good "When to Load" Triggers
269
+
270
+ The topic index is the most important part of MEMORY.md. It is the agent's decision point — load this file or skip it. Write triggers from the **agent's task perspective**, not the file's content perspective.
271
+
272
+ **Good triggers** — task-oriented, specific:
273
+
274
+ | File | When to load |
275
+ | --------------- | ------------------------------------------------ |
276
+ | `testing.md` | Writing or fixing tests, mock patterns, E2E |
277
+ | `security.md` | Auth flows, input validation, spawn security |
278
+ | `data-model.md` | Database schema, queries, migrations, new tables |
279
+
280
+ **Bad triggers** — vague, content-oriented:
281
+
282
+ | File | When to load |
283
+ | --------------- | --------------------- |
284
+ | `testing.md` | Testing documentation |
285
+ | `security.md` | Security details |
286
+ | `data-model.md` | Database information |
287
+
288
+ The agent should be able to read a trigger and immediately know: "yes, that's my current task" or "no, skip it."
289
+
290
+ #### Cross-Cutting Patterns Section
291
+
292
+ Include patterns that meet ALL three criteria:
293
+
294
+ 1. Too specific for CLAUDE.md (not every task needs them)
295
+ 2. Span multiple features (not one-file-only knowledge)
296
+ 3. High mistake frequency (agents get this wrong without the reminder)
297
+
298
+ Examples: IPC envelope shapes, error handling helpers, state management gotchas. Keep to 10-15 bullets max. If this section grows past 15 items, move low-frequency ones into the most relevant topic file.
299
+
300
+ #### Scaling MEMORY.md
301
+
302
+ As the codebase grows and topic files multiply, MEMORY.md's index table grows too — but only the table. Cross-cutting patterns stay compact. If MEMORY.md exceeds ~100 lines, audit it: move low-frequency cross-cutting patterns into topic files. The index table can be as long as needed — each row costs 1 line and saves the agent from loading the wrong file.
303
+
131
304
  ### Phase 5: Version Control
132
305
 
133
306
  `.gitignore`:
@@ -137,15 +310,18 @@ Create `.claude/memory/MEMORY.md`. **Target: 30-60 lines.** Index and state trac
137
310
  In addition to writing the full report file, you MUST print a summary directly in the conversation when you finish. Do not make the user open the report to get the highlights. The chat summary should include:
138
311
 
139
312
  ### 1. Status Line
313
+
140
314
  One sentence: what you did, how long it took, and whether all tests still pass.
141
315
 
142
316
  ### 2. Key Findings
317
+
143
318
  The most important things discovered — bugs, risks, wins, or surprises. Each bullet should be specific and actionable, not vague. Lead with severity or impact.
144
319
 
145
320
  **Good:** "CRITICAL: No backup configuration found for the primary Postgres database — total data loss risk."
146
321
  **Bad:** "Found some issues with backups."
147
322
 
148
323
  ### 3. Changes Made (if applicable)
324
+
149
325
  Bullet list of what was actually modified, added, or removed. Skip this section for read-only analysis runs.
150
326
 
151
327
  ### 4. Recommendations
@@ -154,13 +330,14 @@ If there are legitimately beneficial recommendations worth pursuing right now, p
154
330
 
155
331
  When recommendations exist, use this table format:
156
332
 
157
- | # | Recommendation | Impact | Risk if Ignored | Worth Doing? | Details |
158
- |---|---|---|---|---|---|
159
- | *Sequential number* | *Short description (≤10 words)* | *What improves if addressed* | *Low / Medium / High / Critical* | *Yes / Probably / Only if time allows* | *1–3 sentences explaining the reasoning, context, or implementation guidance* |
333
+ | # | Recommendation | Impact | Risk if Ignored | Worth Doing? | Details |
334
+ | ------------------- | ------------------------------- | ---------------------------- | -------------------------------- | -------------------------------------- | ----------------------------------------------------------------------------- |
335
+ | _Sequential number_ | _Short description (≤10 words)_ | _What improves if addressed_ | _Low / Medium / High / Critical_ | _Yes / Probably / Only if time allows_ | _1–3 sentences explaining the reasoning, context, or implementation guidance_ |
160
336
 
161
337
  Order rows by risk descending (Critical → High → Medium → Low). Be honest in the "Worth Doing?" column — not everything flagged is worth the engineering time. If a recommendation is marginal, say so.
162
338
 
163
339
  ### 5. Report Location
340
+
164
341
  State the full path to the detailed report file for deeper review.
165
342
 
166
343
  Create `audit-reports/` in project root if needed. Save as `audit-reports/01_DOCUMENTATION_COVERAGE_REPORT_[run-number]_[date]_[time in user's local time].md`, incrementing run number based on existing reports.
@@ -168,6 +345,7 @@ Create `audit-reports/` in project root if needed. Save as `audit-reports/01_DOC
168
345
  ---
169
346
 
170
347
  **Formatting rules for chat output:**
348
+
171
349
  - Use markdown headers, bold for severity labels, and bullet points for scannability.
172
350
  - Do not duplicate the full report contents — just the highlights and recommendations.
173
351
  - If you made zero findings in a phase, say so in one line rather than omitting it silently.