nightytidy 0.2.4 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/cli-bridge.js +29 -7
- package/src/agent/index.js +25 -3
- package/src/agent/keep-awake.js +60 -0
- package/src/executor.js +1 -1
- package/src/prompts/steps/01-documentation.md +219 -41
package/package.json
CHANGED
package/src/agent/cli-bridge.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { spawn } from 'node:child_process';
|
|
2
2
|
import path from 'node:path';
|
|
3
|
-
import { debug, error as logError } from '../logger.js';
|
|
3
|
+
import { debug, warn, error as logError } from '../logger.js';
|
|
4
|
+
|
|
5
|
+
const INIT_TIMEOUT_MS = 5 * 60_000; // 5 minutes — init should never take this long
|
|
6
|
+
const FINISH_TIMEOUT_MS = 10 * 60_000; // 10 minutes — finish includes report generation
|
|
4
7
|
|
|
5
8
|
export class CliBridge {
|
|
6
9
|
constructor(projectDir) {
|
|
@@ -9,19 +12,20 @@ export class CliBridge {
|
|
|
9
12
|
}
|
|
10
13
|
|
|
11
14
|
async listSteps() {
|
|
12
|
-
return this._run(CliBridge.buildArgs({ list: true }));
|
|
15
|
+
return this._run(CliBridge.buildArgs({ list: true }), null, { timeout: 30_000 });
|
|
13
16
|
}
|
|
14
17
|
|
|
15
18
|
async initRun(steps, timeout) {
|
|
16
|
-
return this._run(CliBridge.buildArgs({ initRun: true, steps, timeout }));
|
|
19
|
+
return this._run(CliBridge.buildArgs({ initRun: true, steps, timeout }), null, { timeout: INIT_TIMEOUT_MS });
|
|
17
20
|
}
|
|
18
21
|
|
|
19
22
|
async runStep(stepNum, onOutput) {
|
|
23
|
+
// No timeout on steps — they have their own per-step timeout via the CLI
|
|
20
24
|
return this._run(CliBridge.buildArgs({ runStep: stepNum }), onOutput);
|
|
21
25
|
}
|
|
22
26
|
|
|
23
27
|
async finishRun() {
|
|
24
|
-
return this._run(CliBridge.buildArgs({ finishRun: true }));
|
|
28
|
+
return this._run(CliBridge.buildArgs({ finishRun: true }), null, { timeout: FINISH_TIMEOUT_MS });
|
|
25
29
|
}
|
|
26
30
|
|
|
27
31
|
kill() {
|
|
@@ -71,7 +75,7 @@ export class CliBridge {
|
|
|
71
75
|
return null;
|
|
72
76
|
}
|
|
73
77
|
|
|
74
|
-
_run(args, onOutput) {
|
|
78
|
+
_run(args, onOutput, opts = {}) {
|
|
75
79
|
return new Promise((resolve, reject) => {
|
|
76
80
|
const binPath = path.resolve(import.meta.dirname, '../../bin/nightytidy.js');
|
|
77
81
|
const proc = spawn('node', [binPath, ...args], {
|
|
@@ -82,6 +86,18 @@ export class CliBridge {
|
|
|
82
86
|
|
|
83
87
|
let stdout = '';
|
|
84
88
|
let stderr = '';
|
|
89
|
+
let killed = false;
|
|
90
|
+
|
|
91
|
+
// Timeout — kill the process if it takes too long
|
|
92
|
+
let timer = null;
|
|
93
|
+
if (opts.timeout) {
|
|
94
|
+
timer = setTimeout(() => {
|
|
95
|
+
killed = true;
|
|
96
|
+
const timeoutSec = Math.round(opts.timeout / 1000);
|
|
97
|
+
warn(`CLI process timed out after ${timeoutSec}s: ${args.join(' ')}`);
|
|
98
|
+
this.kill();
|
|
99
|
+
}, opts.timeout);
|
|
100
|
+
}
|
|
85
101
|
|
|
86
102
|
proc.stdout.on('data', (data) => {
|
|
87
103
|
const text = data.toString();
|
|
@@ -111,18 +127,23 @@ export class CliBridge {
|
|
|
111
127
|
});
|
|
112
128
|
|
|
113
129
|
proc.on('close', (code) => {
|
|
130
|
+
if (timer) clearTimeout(timer);
|
|
114
131
|
this.activeProcess = null;
|
|
115
132
|
const parsed = CliBridge.parseOutput(stdout);
|
|
116
133
|
resolve({
|
|
117
|
-
success: code === 0,
|
|
134
|
+
success: code === 0 && !killed,
|
|
118
135
|
exitCode: code,
|
|
119
136
|
stdout,
|
|
120
|
-
stderr
|
|
137
|
+
stderr: killed
|
|
138
|
+
? `Process timed out after ${Math.round(opts.timeout / 1000)}s — Claude Code may be unavailable`
|
|
139
|
+
: stderr,
|
|
121
140
|
parsed,
|
|
141
|
+
timedOut: killed,
|
|
122
142
|
});
|
|
123
143
|
});
|
|
124
144
|
|
|
125
145
|
proc.on('error', (err) => {
|
|
146
|
+
if (timer) clearTimeout(timer);
|
|
126
147
|
this.activeProcess = null;
|
|
127
148
|
logError(`CLI process error: ${err.message}`);
|
|
128
149
|
resolve({
|
|
@@ -131,6 +152,7 @@ export class CliBridge {
|
|
|
131
152
|
stdout,
|
|
132
153
|
stderr: err.message,
|
|
133
154
|
parsed: null,
|
|
155
|
+
timedOut: false,
|
|
134
156
|
});
|
|
135
157
|
});
|
|
136
158
|
});
|
package/src/agent/index.js
CHANGED
|
@@ -12,6 +12,7 @@ import { WebhookDispatcher } from './webhook-dispatcher.js';
|
|
|
12
12
|
import { CliBridge } from './cli-bridge.js';
|
|
13
13
|
import { AgentGit } from './git-integration.js';
|
|
14
14
|
import { FirebaseAuth } from './firebase-auth.js';
|
|
15
|
+
import { acquireKeepAwake, releaseKeepAwake } from './keep-awake.js';
|
|
15
16
|
|
|
16
17
|
const FIREBASE_WEBHOOK_URL = 'https://webhookingest-24h6taciuq-uc.a.run.app';
|
|
17
18
|
|
|
@@ -479,7 +480,10 @@ export async function startAgent() {
|
|
|
479
480
|
|
|
480
481
|
async function processQueue() {
|
|
481
482
|
const run = runQueue.dequeue();
|
|
482
|
-
if (!run)
|
|
483
|
+
if (!run) {
|
|
484
|
+
releaseKeepAwake();
|
|
485
|
+
return;
|
|
486
|
+
}
|
|
483
487
|
|
|
484
488
|
const project = projectManager.getProject(run.projectId);
|
|
485
489
|
if (!project) {
|
|
@@ -497,15 +501,32 @@ export async function startAgent() {
|
|
|
497
501
|
runOutputBuffer = '';
|
|
498
502
|
runProgress = { stepList: [], completedCount: 0, failedCount: 0, totalCost: 0, currentStepNum: null };
|
|
499
503
|
|
|
504
|
+
acquireKeepAwake();
|
|
500
505
|
info(`\n━━━ Run started: ${project.name} ━━━`);
|
|
501
506
|
info(` Steps: [${run.steps.join(', ')}] (${run.steps.length} total)`);
|
|
502
507
|
info(` Project: ${project.path}`);
|
|
503
508
|
|
|
509
|
+
// Clean stale files from previous failed/abandoned runs
|
|
510
|
+
// so --init-run doesn't refuse to start
|
|
511
|
+
for (const staleFile of ['nightytidy-run-state.json', 'nightytidy.lock']) {
|
|
512
|
+
try {
|
|
513
|
+
const filePath = path.join(project.path, staleFile);
|
|
514
|
+
if (fs.existsSync(filePath)) {
|
|
515
|
+
fs.unlinkSync(filePath);
|
|
516
|
+
debug(`Removed stale ${staleFile}`);
|
|
517
|
+
}
|
|
518
|
+
} catch { /* ignore — init-run will report if it's still a problem */ }
|
|
519
|
+
}
|
|
520
|
+
|
|
504
521
|
wsServer.broadcast({ type: 'run-started', runId: run.id, projectId: run.projectId, projectName: project.name, branch: '' });
|
|
522
|
+
wsServer.broadcast({ type: 'run-status', runId: run.id, status: 'initializing', message: 'Running pre-checks and setting up git branch...' });
|
|
505
523
|
const initResult = await bridge.initRun(run.steps, run.timeout);
|
|
506
524
|
if (!initResult.success) {
|
|
507
|
-
|
|
508
|
-
|
|
525
|
+
const errorMsg = initResult.timedOut
|
|
526
|
+
? 'Initialization timed out — Claude Code may be unavailable. Restart the agent to retry.'
|
|
527
|
+
: (initResult.parsed?.error || initResult.stderr || 'Unknown init error');
|
|
528
|
+
info(` ✗ Init failed: ${errorMsg}`);
|
|
529
|
+
wsServer.broadcast({ type: 'run-failed', runId: run.id, error: errorMsg });
|
|
509
530
|
dispatchWithQueue('run_failed', {
|
|
510
531
|
project: project.name,
|
|
511
532
|
projectId: project.id,
|
|
@@ -1020,6 +1041,7 @@ export async function startAgent() {
|
|
|
1020
1041
|
// Graceful shutdown
|
|
1021
1042
|
const shutdown = async () => {
|
|
1022
1043
|
info('Agent shutting down...');
|
|
1044
|
+
releaseKeepAwake();
|
|
1023
1045
|
saveInterruptedState();
|
|
1024
1046
|
scheduler.stopAll();
|
|
1025
1047
|
await wsServer.stop();
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prevents the OS from sleeping while NightyTidy runs are active.
|
|
3
|
+
*
|
|
4
|
+
* Windows: Uses PowerShell to call SetThreadExecutionState with
|
|
5
|
+
* ES_CONTINUOUS | ES_SYSTEM_REQUIRED (0x80000001). This tells Windows
|
|
6
|
+
* "don't sleep, this process needs the system." The flag is automatically
|
|
7
|
+
* cleared when the process exits or when releaseKeepAwake() is called.
|
|
8
|
+
*
|
|
9
|
+
* macOS/Linux: Uses caffeinate / systemd-inhibit respectively.
|
|
10
|
+
*
|
|
11
|
+
* No admin privileges required on any platform.
|
|
12
|
+
*/
|
|
13
|
+
import { execSync, spawn } from 'node:child_process';
|
|
14
|
+
import { debug, warn } from '../logger.js';
|
|
15
|
+
|
|
16
|
+
let keepAwakeProcess = null;
|
|
17
|
+
|
|
18
|
+
export function acquireKeepAwake() {
|
|
19
|
+
if (keepAwakeProcess) return; // already held
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
if (process.platform === 'win32') {
|
|
23
|
+
// PowerShell script that sets ES_CONTINUOUS | ES_SYSTEM_REQUIRED
|
|
24
|
+
// and then sleeps forever. When we kill this process, the flag clears.
|
|
25
|
+
keepAwakeProcess = spawn('powershell', [
|
|
26
|
+
'-NoProfile', '-WindowStyle', 'Hidden', '-Command',
|
|
27
|
+
`Add-Type -TypeDefinition 'using System; using System.Runtime.InteropServices; public class SleepPreventer { [DllImport("kernel32.dll")] public static extern uint SetThreadExecutionState(uint esFlags); }'; [SleepPreventer]::SetThreadExecutionState(0x80000001); while($true) { Start-Sleep -Seconds 3600 }`,
|
|
28
|
+
], { stdio: 'ignore', detached: false });
|
|
29
|
+
keepAwakeProcess.unref();
|
|
30
|
+
keepAwakeProcess.on('error', () => { keepAwakeProcess = null; });
|
|
31
|
+
debug('Sleep prevention acquired (Windows SetThreadExecutionState)');
|
|
32
|
+
} else if (process.platform === 'darwin') {
|
|
33
|
+
// macOS: caffeinate prevents sleep, -i = idle sleep, -s = system sleep
|
|
34
|
+
keepAwakeProcess = spawn('caffeinate', ['-is'], { stdio: 'ignore' });
|
|
35
|
+
keepAwakeProcess.unref();
|
|
36
|
+
keepAwakeProcess.on('error', () => { keepAwakeProcess = null; });
|
|
37
|
+
debug('Sleep prevention acquired (macOS caffeinate)');
|
|
38
|
+
} else {
|
|
39
|
+
// Linux: systemd-inhibit (may not exist on all distros)
|
|
40
|
+
keepAwakeProcess = spawn('systemd-inhibit', [
|
|
41
|
+
'--what=idle:sleep', '--who=NightyTidy', '--why=Running codebase improvement',
|
|
42
|
+
'sleep', 'infinity',
|
|
43
|
+
], { stdio: 'ignore' });
|
|
44
|
+
keepAwakeProcess.unref();
|
|
45
|
+
keepAwakeProcess.on('error', () => { keepAwakeProcess = null; });
|
|
46
|
+
debug('Sleep prevention acquired (Linux systemd-inhibit)');
|
|
47
|
+
}
|
|
48
|
+
} catch {
|
|
49
|
+
warn('Could not acquire sleep prevention — system may sleep during runs');
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function releaseKeepAwake() {
|
|
54
|
+
if (!keepAwakeProcess) return;
|
|
55
|
+
try {
|
|
56
|
+
keepAwakeProcess.kill();
|
|
57
|
+
} catch { /* already dead */ }
|
|
58
|
+
keepAwakeProcess = null;
|
|
59
|
+
debug('Sleep prevention released');
|
|
60
|
+
}
|
package/src/executor.js
CHANGED
|
@@ -66,7 +66,7 @@ import { info, warn, error as logError } from './logger.js';
|
|
|
66
66
|
// SHA-256 of all STEPS[].prompt content — update when prompts change.
|
|
67
67
|
// Detects unexpected modification of prompt data before passing to
|
|
68
68
|
// Claude Code with --dangerously-skip-permissions.
|
|
69
|
-
const STEPS_HASH = '
|
|
69
|
+
const STEPS_HASH = 'ba4e25bc096db265682a8576d543c0a3697543e238ab99f852f99038581037be';
|
|
70
70
|
|
|
71
71
|
// Hard cap on total step duration (all retries + doc-update combined).
|
|
72
72
|
// Without this, retries × phases can exceed the user's expected timeout.
|
|
@@ -8,14 +8,37 @@ AI agents pay a token cost for every line loaded into context — whether releva
|
|
|
8
8
|
- **Tier 2 (On-Demand):** Per-topic implementation details. Loaded only when relevant. ~1-2% per task.
|
|
9
9
|
- **Tier 3 (Deep Reference):** Human-facing docs, ADRs, API reference. Never auto-loaded. Zero token cost.
|
|
10
10
|
|
|
11
|
-
| Tier
|
|
12
|
-
|
|
13
|
-
| Always (Tier 1)
|
|
14
|
-
| Per-task (Tier 2, 1-2 files) | 60-120
|
|
15
|
-
| **Typical total**
|
|
11
|
+
| Tier | Lines | Tokens | % of 200K |
|
|
12
|
+
| ---------------------------- | ----------- | ---------- | --------- |
|
|
13
|
+
| Always (Tier 1) | 300-400 | 10-13K | 5-7% |
|
|
14
|
+
| Per-task (Tier 2, 1-2 files) | 60-120 | 2-4K | 1-2% |
|
|
15
|
+
| **Typical total** | **360-520** | **12-17K** | **6-9%** |
|
|
16
16
|
|
|
17
17
|
Primary deliverable: Tier 1 + Tier 2. Tier 3 is secondary.
|
|
18
18
|
|
|
19
|
+
## Documentation Philosophy: Progressive Disclosure
|
|
20
|
+
|
|
21
|
+
The goal of this documentation system is simple: **an AI agent wakes up knowing nothing about this codebase and can navigate to exactly the information it needs — quickly and token-efficiently.**
|
|
22
|
+
|
|
23
|
+
Every conversation starts cold. The agent has no memory of previous sessions, no familiarity with your architecture, and a finite context window. Every line loaded into that window is a tradeoff — useful context that helps vs. irrelevant context that displaces working memory for the actual task. A flat documentation dump forces the agent to load everything to find anything. Progressive disclosure fixes this.
|
|
24
|
+
|
|
25
|
+
**How it works**: The agent gets a compact map first (Tier 1), then navigates to exactly the detail it needs (Tier 2 topic file), and only if the topic is deep enough, one more level down (Tier 2 sub-file). At most two navigational hops from cold start to specific answer.
|
|
26
|
+
|
|
27
|
+
**The navigation chain**:
|
|
28
|
+
|
|
29
|
+
1. **Always loaded** — CLAUDE.md + MEMORY.md are in context on every conversation. These orient the agent and tell it where to look next. Combined: ~12-17K tokens
|
|
30
|
+
2. **First hop** — MEMORY.md contains a topic index with "when to load" triggers. The agent reads a trigger like "Writing or fixing tests, mock patterns, E2E" and knows to load `testing.md`. Cost: one file read
|
|
31
|
+
3. **Second hop (only when needed)** — If a topic file is large enough to have been split into a hub, it contains a sub-topics table with its own triggers. The agent loads the specific sub-file. Cost: one more file read
|
|
32
|
+
4. **Maximum depth: two levels below MEMORY.md.** Three levels of indirection wastes more navigational overhead than it saves in token cost
|
|
33
|
+
|
|
34
|
+
**Design principles driving every structural decision**:
|
|
35
|
+
|
|
36
|
+
- **Trigger-based loading**: Every file in the index has a "when to load" description written from the agent's task perspective — "Writing or fixing tests", not "Testing documentation"
|
|
37
|
+
- **Hub files over bloated files**: When a topic file outgrows its target, promote it to a hub. Keep the 20% of content that covers 80% of use cases inline; split specialized detail into sub-files
|
|
38
|
+
- **No orphan files**: Every file must be reachable from MEMORY.md within two hops. If a file isn't linked, the agent will never find it
|
|
39
|
+
- **Scale with the codebase**: A 5-file CLI tool needs 3-5 memory files. A 30-service project with thousands of tests might need 20-30. File count follows complexity, not a fixed number
|
|
40
|
+
- **Information completeness over compression** (**primary directive**): The entire codebase must be documented with sufficient depth for an agent to work with each module correctly. A one-line mention of a system is not documentation — it's an inventory entry. If adding proper depth pushes a file past its line target, create more files. Never sacrifice coverage to hit a line count. Line targets exist to trigger splits, not to cap documentation
|
|
41
|
+
|
|
19
42
|
---
|
|
20
43
|
|
|
21
44
|
## Phases
|
|
@@ -34,15 +57,18 @@ Read and map everything. No files produced — only understanding.
|
|
|
34
57
|
|
|
35
58
|
**Pitfalls:** Non-obvious side effects, library workarounds, magic values, complex regex, unexplained constants, non-obvious business logic.
|
|
36
59
|
|
|
37
|
-
**Cluster** learnings into topic areas → these become Tier 2 files.
|
|
60
|
+
**Cluster** learnings into topic areas → these become Tier 2 files. For large codebases, identify which topics are broad enough to need sub-files and plan the hub structure now.
|
|
61
|
+
|
|
62
|
+
**Coverage map (critical step):** Build an explicit mapping of every significant codebase module → the documentation file responsible for it. Every service, store, hook, feature, engine, and reusable system must appear in at least one memory file. If a module has no documentation home, either add it to an existing file or plan a new one. This map is your completeness checklist for Phase 3 — you will verify each entry is documented with sufficient depth, not just mentioned in a bullet point.
|
|
38
63
|
|
|
39
64
|
### Phase 2: CLAUDE.md (Tier 1)
|
|
40
65
|
|
|
41
66
|
Create `CLAUDE.md` at project root. **Target: 250-350 lines. Hard constraint.**
|
|
42
67
|
|
|
43
|
-
**Inclusion test:**
|
|
68
|
+
**Inclusion test:** _"If I removed this, would the AI write incorrect code on an unrelated task?"_ No → Tier 2.
|
|
44
69
|
|
|
45
70
|
**Required sections:**
|
|
71
|
+
|
|
46
72
|
- **Project Identity** — One paragraph: what, who, why
|
|
47
73
|
- **Workflow Rules** — Non-negotiable process (deploy, test, etc.)
|
|
48
74
|
- **Tech Stack** — Table: technology | version | purpose
|
|
@@ -54,80 +80,227 @@ Create `CLAUDE.md` at project root. **Target: 250-350 lines. Hard constraint.**
|
|
|
54
80
|
- **Build/Deploy Commands** — Copy-paste ready
|
|
55
81
|
- **Coding Conventions** — Only those consistently followed in code
|
|
56
82
|
- **Design System Rules** (if applicable) — Only if affecting every UI task; otherwise Tier 2
|
|
57
|
-
- **Documentation Hierarchy** — Table telling AI where knowledge lives:
|
|
83
|
+
- **Documentation Hierarchy** — Table telling AI where knowledge lives and how to navigate:
|
|
84
|
+
|
|
58
85
|
```markdown
|
|
59
86
|
## Documentation Hierarchy
|
|
60
87
|
|
|
61
|
-
| Layer
|
|
62
|
-
|
|
63
|
-
| **CLAUDE.md**
|
|
64
|
-
| **MEMORY.md**
|
|
65
|
-
| **
|
|
66
|
-
| **
|
|
88
|
+
| Layer | Loaded | What goes here |
|
|
89
|
+
| ------------------------------------- | ------------------ | ----------------------------------------- |
|
|
90
|
+
| **CLAUDE.md** | Every conversation | Rules preventing mistakes on ANY task |
|
|
91
|
+
| **MEMORY.md** | Every conversation | Navigation index + cross-cutting patterns |
|
|
92
|
+
| **Topic files** (.claude/memory/) | On demand | Per-topic implementation details |
|
|
93
|
+
| **Sub-topic files** (.claude/memory/) | On demand | Specialized detail within a topic |
|
|
94
|
+
| **Inline comments** | When code is read | Non-obvious "why" explanations |
|
|
95
|
+
|
|
96
|
+
**Navigation**: MEMORY.md index → topic file → sub-topic file (if needed). Max 2 hops from cold start to answer. Every file reachable from MEMORY.md within 2 levels.
|
|
67
97
|
|
|
68
|
-
Rule: Prevents mistakes on unrelated tasks → CLAUDE.md. Spans features → MEMORY.md. One feature
|
|
98
|
+
Rule: Prevents mistakes on unrelated tasks → CLAUDE.md. Spans features → MEMORY.md cross-cutting patterns. One feature → topic file. Narrow subtopic within a feature → sub-topic file. Single line → inline comment.
|
|
69
99
|
```
|
|
70
100
|
|
|
101
|
+
**Note on hub files:** The hierarchy table above includes both topic files and sub-topic files. You don't need to know the full hub structure yet — Phase 3 covers it in detail. Just ensure CLAUDE.md's hierarchy table reflects both levels so agents know the navigation depth.
|
|
102
|
+
|
|
71
103
|
**Does NOT belong in CLAUDE.md:** Feature implementation details, API response shapes, field-level schemas, testing patterns, debugging notes, security findings, historical context. All → Tier 2/3.
|
|
72
104
|
|
|
73
105
|
**Format:** Terse, imperative. Tables and bullets, not paragraphs.
|
|
74
106
|
|
|
75
107
|
### Phase 3: Tier 2 Memory Files
|
|
76
108
|
|
|
77
|
-
Create files at `.claude/memory/`.
|
|
109
|
+
Create files at `.claude/memory/`. These are the documentation an agent loads on-demand to understand specific topics in depth.
|
|
110
|
+
|
|
111
|
+
#### Two-Level Structure
|
|
112
|
+
|
|
113
|
+
Memory files exist at two levels:
|
|
114
|
+
|
|
115
|
+
- **Topic files**: Linked directly from MEMORY.md. One topic per file. This is what the agent loads first
|
|
116
|
+
- **Sub-topic files**: Linked from a topic file that has become a hub. One narrow subtopic per file
|
|
117
|
+
|
|
118
|
+
**Maximum depth: 2 levels below MEMORY.md.** The path is always: `MEMORY.md → topic file → sub-topic file`. Never deeper. If a sub-topic file itself outgrows its target, promote it to a topic file (move it up), don't nest deeper.
|
|
119
|
+
|
|
120
|
+
#### Sizing and the Hub Pattern
|
|
121
|
+
|
|
122
|
+
**Target: 40-80 lines per file.** This is a soft target, not a hard cap — the goal is token efficiency, not arbitrary limits. Files between 80-100 lines are fine if the content is cohesive. Past ~100 lines, split. When splitting:
|
|
123
|
+
|
|
124
|
+
1. Identify which sections serve most tasks (the "always useful" core) vs. specialized tasks (the "sometimes useful" detail)
|
|
125
|
+
2. Keep the core content inline in the file — aim for 40-60 lines in the hub
|
|
126
|
+
3. Split specialized sections into sub-topic files
|
|
127
|
+
4. Add a **Sub-Topics** table at the bottom of the hub with "when to load" triggers
|
|
128
|
+
|
|
129
|
+
A topic file that has been split becomes a **hub file**. It still contains the most critical content inline — it is NOT reduced to a bare index. An agent loading only the hub should get what it needs for 80% of tasks involving that topic.
|
|
130
|
+
|
|
131
|
+
**Hub file example:**
|
|
132
|
+
|
|
133
|
+
```markdown
|
|
134
|
+
# Testing — Tier 2 Reference
|
|
135
|
+
|
|
136
|
+
## Infrastructure
|
|
137
|
+
|
|
138
|
+
[Always-needed: framework, config, helpers — 15-20 lines]
|
|
139
|
+
|
|
140
|
+
## Critical Anti-Patterns
|
|
141
|
+
|
|
142
|
+
[Always-needed: mistakes that break tests — 10-15 lines]
|
|
143
|
+
|
|
144
|
+
## Mock Patterns
|
|
145
|
+
|
|
146
|
+
[Most common patterns — 10-15 lines]
|
|
147
|
+
|
|
148
|
+
## Sub-Topics
|
|
149
|
+
|
|
150
|
+
| File | When to load |
|
|
151
|
+
| ------------------ | -------------------------------------------- |
|
|
152
|
+
| testing-mocks.md | Complex mock patterns for IPC, DB, or CJS |
|
|
153
|
+
| testing-e2e.md | Running or writing E2E / Playwright tests |
|
|
154
|
+
| testing-quality.md | Mutation testing, coverage, assertion audits |
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
#### Coverage Verification (Do This Before Moving On)
|
|
158
|
+
|
|
159
|
+
After drafting all topic files (and before Phase 4), verify coverage using the map from Phase 1:
|
|
160
|
+
|
|
161
|
+
1. **For each module in the coverage map**: Find where it's documented. Read the actual documentation. Ask: "Does this give an agent enough detail to work with this module correctly — or just enough to know it exists?" A one-line mention is NOT sufficient documentation for a module with its own state, IPC channels, decision logic, or configuration
|
|
162
|
+
2. **Depth test**: For each documented module, would an agent reading only this documentation be able to: modify behavior correctly, debug issues, add features, and avoid the known pitfalls? If not, the documentation is incomplete
|
|
163
|
+
3. **Sub-file decision**: For any module where adding sufficient depth would push a topic file past ~80 lines, plan a sub-file. But also create sub-files when a topic file covers 3+ distinct systems and an agent working on one system would waste >40% of the file's tokens on irrelevant content — even if the file is within line targets
|
|
164
|
+
4. **Gap action**: For any module with insufficient documentation depth, either expand the relevant topic file or create a new sub-file. Do not move to Phase 4 with known coverage gaps
|
|
165
|
+
|
|
166
|
+
**The goal is not "every file is 40-80 lines." The goal is "every significant codebase module is documented with enough depth for an agent to work with it correctly." File count and line counts are consequences of completeness, not targets to satisfy.**
|
|
78
167
|
|
|
79
|
-
|
|
168
|
+
#### Content Rules
|
|
80
169
|
|
|
81
|
-
|
|
170
|
+
- Terse reference format. Tables, bullets, code snippets — not prose
|
|
171
|
+
- Don't repeat CLAUDE.md. Assume reader has it loaded
|
|
172
|
+
- Name by topic (`testing.md`) not area (`backend-stuff.md`). Sub-files use parent prefix (`testing-mocks.md`, `testing-e2e.md`)
|
|
173
|
+
- Each file covers: patterns/conventions, config details, correct-pattern snippets, common mistakes, external API quirks
|
|
82
174
|
|
|
83
175
|
**Good** — tells you what to do:
|
|
176
|
+
|
|
84
177
|
```markdown
|
|
85
178
|
## Firestore Mock Routing
|
|
179
|
+
|
|
86
180
|
Callables using `loadPromptForPhase()` + `recordUsage()` need collection routing:
|
|
181
|
+
|
|
87
182
|
- `"prompts"` → return `{ doc: vi.fn(() => ({ get: async () => ({ exists: false }) })) }`
|
|
88
183
|
- `"_rateLimits"` → return safe no-op mock
|
|
89
184
|
```
|
|
90
185
|
|
|
91
186
|
**Bad** — teaches background knowledge (that's Tier 3):
|
|
187
|
+
|
|
92
188
|
```markdown
|
|
93
189
|
## About Firestore Mock Routing
|
|
190
|
+
|
|
94
191
|
When writing tests for callable functions, you need to be aware that some callables
|
|
95
192
|
access multiple Firestore collections...
|
|
96
193
|
```
|
|
97
194
|
|
|
98
|
-
|
|
195
|
+
#### File Count Scaling
|
|
196
|
+
|
|
197
|
+
File count scales with codebase complexity. Use this as rough guidance:
|
|
198
|
+
|
|
199
|
+
| Codebase Size | Topic Files | Sub-Topic Files | Total |
|
|
200
|
+
| ----------------------- | ----------- | --------------- | ----- |
|
|
201
|
+
| Small (< 20 files) | 3-5 | 0-2 | 3-7 |
|
|
202
|
+
| Medium (20-100 files) | 5-10 | 2-5 | 7-15 |
|
|
203
|
+
| Large (100-500 files) | 8-15 | 5-15 | 13-30 |
|
|
204
|
+
| Very large (500+ files) | 12-20 | 10-25 | 22-45 |
|
|
205
|
+
|
|
206
|
+
**Indicators you should split a file:**
|
|
207
|
+
- Exceeds ~100 lines
|
|
208
|
+
- Covers 3+ distinct workflows or systems
|
|
209
|
+
- Agents loading the file waste >50% of its content on most tasks
|
|
210
|
+
- A module within the file has enough documentable detail (state shapes, decision logic, IPC channels, gotchas) to fill 30+ lines on its own — even if the parent file is within line targets. This is the coverage-driven split: the agent benefits from being able to load *just* that module's documentation without the surrounding context
|
|
211
|
+
|
|
212
|
+
**Indicators you've over-split**: Multiple files under 20 lines. Agents need 3+ files for a single task. Hub files have more links than inline content. Two sub-files could be combined without exceeding 80 lines.
|
|
213
|
+
|
|
214
|
+
#### Suggested Topic Files (create only what's relevant)
|
|
99
215
|
|
|
100
|
-
| File
|
|
101
|
-
|
|
102
|
-
| testing.md
|
|
103
|
-
| data-model.md
|
|
104
|
-
| api-providers.md
|
|
105
|
-
|
|
|
106
|
-
|
|
|
107
|
-
| feature-inventory.md
|
|
108
|
-
| security.md
|
|
109
|
-
|
|
|
216
|
+
| File | Covers |
|
|
217
|
+
| ----------------------- | ------------------------------------------------- |
|
|
218
|
+
| testing.md | Framework config, mocks, pitfalls |
|
|
219
|
+
| data-model.md | Field schemas, indexes, storage paths, migrations |
|
|
220
|
+
| api-providers.md | External endpoints, auth, rate limits, quirks |
|
|
221
|
+
| frontend-patterns.md | Component patterns, stores, animations, theme |
|
|
222
|
+
| process-management.md | Backend process lifecycle, spawn flow, guards |
|
|
223
|
+
| feature-inventory.md | Features, shared components, reusable systems |
|
|
224
|
+
| security.md | Auth details, vulnerabilities, audit findings |
|
|
225
|
+
| build-infrastructure.md | Build pipeline, CI/CD, packaging |
|
|
226
|
+
| ipc-contracts.md | IPC channels, schemas, handler conventions |
|
|
227
|
+
| account-management.md | Auth flows, credential management, usage APIs |
|
|
110
228
|
|
|
111
|
-
Split/merge by project shape.
|
|
229
|
+
Split/merge by project shape. Not every project needs every file. Create what the codebase demands — the scaling table above is your guide, not a hard rule.
|
|
112
230
|
|
|
113
|
-
### Phase 4: MEMORY.md (Tier 1 — Index)
|
|
231
|
+
### Phase 4: MEMORY.md (Tier 1 — Navigation Index)
|
|
232
|
+
|
|
233
|
+
Create `.claude/memory/MEMORY.md`. **Target: 40-80 lines.** This is the agent's primary navigation map — loaded on every conversation alongside CLAUDE.md.
|
|
234
|
+
|
|
235
|
+
**Three roles:**
|
|
236
|
+
|
|
237
|
+
1. **Orient** — Current project state (metrics, known debt, recent changes)
|
|
238
|
+
2. **Navigate** — Topic index with trigger-based descriptions telling the agent which file to load
|
|
239
|
+
3. **Remind** — Cross-cutting patterns too specific for CLAUDE.md but spanning multiple features
|
|
240
|
+
|
|
241
|
+
#### Required Sections
|
|
114
242
|
|
|
115
|
-
Create `.claude/memory/MEMORY.md`. **Target: 30-60 lines.** Index and state tracker only.
|
|
116
243
|
```markdown
|
|
117
244
|
# Project Memory — Index
|
|
245
|
+
|
|
118
246
|
[One-line description]. See CLAUDE.md for rules.
|
|
119
247
|
|
|
120
248
|
## Current State
|
|
121
|
-
|
|
122
|
-
- [
|
|
249
|
+
|
|
250
|
+
- [Key metrics: test count, schema version, channel count, deploy URL, etc.]
|
|
251
|
+
- [Known debt summary: 1-3 bullet points]
|
|
123
252
|
|
|
124
253
|
## Topic Files
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
|
128
|
-
|
|
|
254
|
+
|
|
255
|
+
| File | When to load |
|
|
256
|
+
| ---------------------- | --------------------------------------------------- |
|
|
257
|
+
| `testing.md` | Writing/fixing tests, mock patterns, E2E |
|
|
258
|
+
| `data-model.md` | Database schema, queries, migrations, new tables |
|
|
259
|
+
| `frontend-patterns.md` | React components, stores, animations, design system |
|
|
260
|
+
| `security.md` | Auth flows, input validation, spawn security |
|
|
261
|
+
|
|
262
|
+
## Cross-Cutting Patterns
|
|
263
|
+
|
|
264
|
+
- [Pattern]: [terse description of when/how to apply]
|
|
265
|
+
- [Pattern]: [terse description of when/how to apply]
|
|
129
266
|
```
|
|
130
267
|
|
|
268
|
+
#### Writing Good "When to Load" Triggers
|
|
269
|
+
|
|
270
|
+
The topic index is the most important part of MEMORY.md. It is the agent's decision point — load this file or skip it. Write triggers from the **agent's task perspective**, not the file's content perspective.
|
|
271
|
+
|
|
272
|
+
**Good triggers** — task-oriented, specific:
|
|
273
|
+
|
|
274
|
+
| File | When to load |
|
|
275
|
+
| --------------- | ------------------------------------------------ |
|
|
276
|
+
| `testing.md` | Writing or fixing tests, mock patterns, E2E |
|
|
277
|
+
| `security.md` | Auth flows, input validation, spawn security |
|
|
278
|
+
| `data-model.md` | Database schema, queries, migrations, new tables |
|
|
279
|
+
|
|
280
|
+
**Bad triggers** — vague, content-oriented:
|
|
281
|
+
|
|
282
|
+
| File | When to load |
|
|
283
|
+
| --------------- | --------------------- |
|
|
284
|
+
| `testing.md` | Testing documentation |
|
|
285
|
+
| `security.md` | Security details |
|
|
286
|
+
| `data-model.md` | Database information |
|
|
287
|
+
|
|
288
|
+
The agent should be able to read a trigger and immediately know: "yes, that's my current task" or "no, skip it."
|
|
289
|
+
|
|
290
|
+
#### Cross-Cutting Patterns Section
|
|
291
|
+
|
|
292
|
+
Include patterns that meet ALL three criteria:
|
|
293
|
+
|
|
294
|
+
1. Too specific for CLAUDE.md (not every task needs them)
|
|
295
|
+
2. Span multiple features (not one-file-only knowledge)
|
|
296
|
+
3. High mistake frequency (agents get this wrong without the reminder)
|
|
297
|
+
|
|
298
|
+
Examples: IPC envelope shapes, error handling helpers, state management gotchas. Keep to 10-15 bullets max. If this section grows past 15 items, move low-frequency ones into the most relevant topic file.
|
|
299
|
+
|
|
300
|
+
#### Scaling MEMORY.md
|
|
301
|
+
|
|
302
|
+
As the codebase grows and topic files multiply, MEMORY.md's index table grows too — but only the table. Cross-cutting patterns stay compact. If MEMORY.md exceeds ~100 lines, audit it: move low-frequency cross-cutting patterns into topic files. The index table can be as long as needed — each row costs 1 line and saves the agent from loading the wrong file.
|
|
303
|
+
|
|
131
304
|
### Phase 5: Version Control
|
|
132
305
|
|
|
133
306
|
`.gitignore`:
|
|
@@ -137,15 +310,18 @@ Create `.claude/memory/MEMORY.md`. **Target: 30-60 lines.** Index and state trac
|
|
|
137
310
|
In addition to writing the full report file, you MUST print a summary directly in the conversation when you finish. Do not make the user open the report to get the highlights. The chat summary should include:
|
|
138
311
|
|
|
139
312
|
### 1. Status Line
|
|
313
|
+
|
|
140
314
|
One sentence: what you did, how long it took, and whether all tests still pass.
|
|
141
315
|
|
|
142
316
|
### 2. Key Findings
|
|
317
|
+
|
|
143
318
|
The most important things discovered — bugs, risks, wins, or surprises. Each bullet should be specific and actionable, not vague. Lead with severity or impact.
|
|
144
319
|
|
|
145
320
|
**Good:** "CRITICAL: No backup configuration found for the primary Postgres database — total data loss risk."
|
|
146
321
|
**Bad:** "Found some issues with backups."
|
|
147
322
|
|
|
148
323
|
### 3. Changes Made (if applicable)
|
|
324
|
+
|
|
149
325
|
Bullet list of what was actually modified, added, or removed. Skip this section for read-only analysis runs.
|
|
150
326
|
|
|
151
327
|
### 4. Recommendations
|
|
@@ -154,13 +330,14 @@ If there are legitimately beneficial recommendations worth pursuing right now, p
|
|
|
154
330
|
|
|
155
331
|
When recommendations exist, use this table format:
|
|
156
332
|
|
|
157
|
-
| #
|
|
158
|
-
|
|
159
|
-
|
|
|
333
|
+
| # | Recommendation | Impact | Risk if Ignored | Worth Doing? | Details |
|
|
334
|
+
| ------------------- | ------------------------------- | ---------------------------- | -------------------------------- | -------------------------------------- | ----------------------------------------------------------------------------- |
|
|
335
|
+
| _Sequential number_ | _Short description (≤10 words)_ | _What improves if addressed_ | _Low / Medium / High / Critical_ | _Yes / Probably / Only if time allows_ | _1–3 sentences explaining the reasoning, context, or implementation guidance_ |
|
|
160
336
|
|
|
161
337
|
Order rows by risk descending (Critical → High → Medium → Low). Be honest in the "Worth Doing?" column — not everything flagged is worth the engineering time. If a recommendation is marginal, say so.
|
|
162
338
|
|
|
163
339
|
### 5. Report Location
|
|
340
|
+
|
|
164
341
|
State the full path to the detailed report file for deeper review.
|
|
165
342
|
|
|
166
343
|
Create `audit-reports/` in project root if needed. Save as `audit-reports/01_DOCUMENTATION_COVERAGE_REPORT_[run-number]_[date]_[time in user's local time].md`, incrementing run number based on existing reports.
|
|
@@ -168,6 +345,7 @@ Create `audit-reports/` in project root if needed. Save as `audit-reports/01_DOC
|
|
|
168
345
|
---
|
|
169
346
|
|
|
170
347
|
**Formatting rules for chat output:**
|
|
348
|
+
|
|
171
349
|
- Use markdown headers, bold for severity labels, and bullet points for scannability.
|
|
172
350
|
- Do not duplicate the full report contents — just the highlights and recommendations.
|
|
173
351
|
- If you made zero findings in a phase, say so in one line rather than omitting it silently.
|