@vuau/agent-memory 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/cli.js CHANGED
@@ -72,8 +72,17 @@ function scaffold(projectDir, options = {}) {
72
72
  writeFileSync(targetPath, content);
73
73
  result.created.push(target);
74
74
  }
75
+ const managedSpecs = [
76
+ { target: `${SPEC_DIR}/coding-principles.md`, template: "spec/coding-principles.md" }
77
+ ];
78
+ for (const { target, template } of managedSpecs) {
79
+ const targetPath = join(projectDir, target);
80
+ const content = applyVars(readTemplate(template), vars);
81
+ writeFileSync(targetPath, content);
82
+ result.created.push(target);
83
+ }
75
84
  const specKeep = join(projectDir, SPEC_DIR, ".gitkeep");
76
- if (!existsSync(specKeep)) {
85
+ if (!existsSync(specKeep) && !managedSpecs.length) {
77
86
  writeFileSync(specKeep, "");
78
87
  result.created.push(`${SPEC_DIR}/.gitkeep`);
79
88
  }
@@ -112,6 +121,15 @@ function updateRouter(projectDir) {
112
121
  const vars = { PROJECT_NAME: projectName };
113
122
  const content = applyVars(readTemplate("AGENTS.md"), vars);
114
123
  writeFileSync(targetPath, content);
124
+ const managedSpecs = [
125
+ { target: `${SPEC_DIR}/coding-principles.md`, template: "spec/coding-principles.md" }
126
+ ];
127
+ for (const { target, template } of managedSpecs) {
128
+ const specPath = join(projectDir, target);
129
+ const specDir = dirname(specPath);
130
+ if (!existsSync(specDir)) mkdirSync(specDir, { recursive: true });
131
+ writeFileSync(specPath, applyVars(readTemplate(template), vars));
132
+ }
115
133
  return true;
116
134
  }
117
135
 
@@ -124,7 +142,8 @@ function doctor(projectDir) {
124
142
  { file: AGENTS_MD, desc: "Root router file" },
125
143
  { file: CUSTOM_FILE, desc: "Project specific rules" },
126
144
  { file: MEMORY_FILE, desc: "Long-term memory" },
127
- { file: TASKS_FILE, desc: "Working memory" }
145
+ { file: TASKS_FILE, desc: "Working memory" },
146
+ { file: `${SPEC_DIR}/coding-principles.md`, desc: "Coding principles (run `agent-memory update` to create)" }
128
147
  ];
129
148
  for (const { file, desc } of required) {
130
149
  const filePath = join2(projectDir, file);
package/dist/index.js CHANGED
@@ -66,8 +66,17 @@ function scaffold(projectDir, options = {}) {
66
66
  writeFileSync(targetPath, content);
67
67
  result.created.push(target);
68
68
  }
69
+ const managedSpecs = [
70
+ { target: `${SPEC_DIR}/coding-principles.md`, template: "spec/coding-principles.md" }
71
+ ];
72
+ for (const { target, template } of managedSpecs) {
73
+ const targetPath = join(projectDir, target);
74
+ const content = applyVars(readTemplate(template), vars);
75
+ writeFileSync(targetPath, content);
76
+ result.created.push(target);
77
+ }
69
78
  const specKeep = join(projectDir, SPEC_DIR, ".gitkeep");
70
- if (!existsSync(specKeep)) {
79
+ if (!existsSync(specKeep) && !managedSpecs.length) {
71
80
  writeFileSync(specKeep, "");
72
81
  result.created.push(`${SPEC_DIR}/.gitkeep`);
73
82
  }
@@ -106,6 +115,15 @@ function updateRouter(projectDir) {
106
115
  const vars = { PROJECT_NAME: projectName };
107
116
  const content = applyVars(readTemplate("AGENTS.md"), vars);
108
117
  writeFileSync(targetPath, content);
118
+ const managedSpecs = [
119
+ { target: `${SPEC_DIR}/coding-principles.md`, template: "spec/coding-principles.md" }
120
+ ];
121
+ for (const { target, template } of managedSpecs) {
122
+ const specPath = join(projectDir, target);
123
+ const specDir = dirname(specPath);
124
+ if (!existsSync(specDir)) mkdirSync(specDir, { recursive: true });
125
+ writeFileSync(specPath, applyVars(readTemplate(template), vars));
126
+ }
109
127
  return true;
110
128
  }
111
129
 
@@ -118,7 +136,8 @@ function doctor(projectDir) {
118
136
  { file: AGENTS_MD, desc: "Root router file" },
119
137
  { file: CUSTOM_FILE, desc: "Project specific rules" },
120
138
  { file: MEMORY_FILE, desc: "Long-term memory" },
121
- { file: TASKS_FILE, desc: "Working memory" }
139
+ { file: TASKS_FILE, desc: "Working memory" },
140
+ { file: `${SPEC_DIR}/coding-principles.md`, desc: "Coding principles (run `agent-memory update` to create)" }
122
141
  ];
123
142
  for (const { file, desc } of required) {
124
143
  const filePath = join2(projectDir, file);
package/docs/RESEARCH.md CHANGED
@@ -1,209 +1,174 @@
1
- # Research: AI Memory Solutions Tools Tried & Why File-Based Won
1
+ # Research: Why the Team Chose File-Based Agent Memory
2
2
 
3
- ## The Problem
3
+ > Decision memo based on the team's operating constraints.
4
+ > External research source: [spikelab/memory-systems-ai-agents-research.md](https://gist.github.com/spikelab/7551c6368e23caa06a4056350f6b2db3) — researched 2025-12-02, updated 2026-02-06, 60+ sources.
4
5
 
5
- AI assistants (OpenCode, Copilot, Cursor, Windsurf) lose context between sessions. They need:
6
- - ✅ Local-first (no API keys)
7
- - ✅ Cross-IDE (OpenCode, VS Code, Windsurf, Antigravity)
8
- - ✅ Cross-platform (Host + VM, Windows 11 + Linux)
9
- - ✅ Persistent memory for decisions
10
- - ✅ Low token overhead
11
- - ✅ Reliable retrieval
6
+ ---
7
+
8
+ ## Decision
12
9
 
13
- ## Tools Evaluated
10
+ The team chose **file-based memory coordinated by `AGENTS.md`**.
14
11
 
15
- | Tool | Auto-capture | VM compatible | Blocker | Status |
16
- |------|--------------|---------------|---------|--------|
17
- | **qmd** | ❌ (search only) | ❌ | better-sqlite3 needs Visual Studio Build Tools; HuggingFace blocked | ❌ Failed |
18
- | **memsearch** | ✅ (daemon hooks) | ❌ | milvus-lite has no Windows wheels; HuggingFace blocked | ❌ Failed |
19
- | **mem0** | ✅ (hooks) | ❌ | Requires OpenAI API key or HuggingFace models | ❌ Failed |
20
- | **memories.sh** | ✅ (MCP) | ✅ | Auto-generates 10+ IDE config files (bloats repo) | ⚠️ Rejected |
21
- | **codemem** | ❌ | ❌ | Flaky (unreliable save/recall) | ⚠️ Rejected |
22
- | **File-based + rules** | Manual (via rules) | ✅ | None | ✅ **CHOSEN** |
12
+ Reason: best fit for the team's constraints, not because it is universally best.
23
13
 
24
14
  ---
25
15
 
26
- ## Why Each Failed
16
+ ## Constraints
27
17
 
28
- ### qmd
29
- **Blocker**: `better-sqlite3` native module requires Visual Studio Build Tools to compile on Windows. HuggingFace is also blocked in many environments.
30
- **Impact**: Cannot run on VM Windows 11 → breaks cross-platform requirement.
18
+ The team needs a memory system that is:
31
19
 
32
- ### memsearch
33
- **Blockers**:
34
- 1. `milvus-lite` has no pre-built wheels for Windows must compile from source
35
- 2. HuggingFace models blocked in isolated environments
36
- 3. **Context Blindness**: Auto-capture can't link user command ("remember this info") to 10-line analysis from previous turn → writes "No context provided" error
37
- 4. **Context Bloat**: Falls back to `memory_transcript`, pulling 19 old tool calls into context = **47,389 tokens (24% of context budget)**
20
+ - Local-first
21
+ - Cross-platform: Host + VM, Windows 11 + Linux
22
+ - Usable across editors via one control file: `AGENTS.md`
23
+ - Cheap in token overhead
24
+ - Reliable in retrieval
25
+ - Transparent and auditable
38
26
 
39
- **Impact**: Unreliable on Windows + VM. Token cost makes it unusable for real work.
27
+ ---
40
28
 
41
- ### mem0
42
- **Blocker**: Requires OpenAI API key or HuggingFace (both violate "local-first, no API key" requirement).
43
- **Impact**: Not viable for local-first requirement.
29
+ ## Alternatives Considered
44
30
 
45
- ### memories.sh
46
- **Reevaluation**:
47
- - CLI tool (good)
48
- - Local-first (good)
49
- - MCP support (cross-IDE capable)
50
- - Clear memory fragmentation (Session, Semantic, Episodic, Procedural)
51
- - **Auto-generates 10+ config files per IDE** (`.memories.sh` configs for Zsh, Bash, Fish, Zed, Helix, Neovim, etc.)
52
- - Conflicts with "lightweight, centralized control" goal
31
+ | Tool | Good at | Why not chosen |
32
+ |------|---------|----------------|
33
+ | **qmd** | Local search | `better-sqlite3` and HuggingFace requirements break Windows/VM portability |
34
+ | **memsearch** | Auto-capture | Windows packaging issues plus context blindness and transcript bloat |
35
+ | **mem0** | Managed long-term memory | Requires OpenAI API or HuggingFace, violates local-first constraint |
36
+ | **memories.sh** | MCP-based memory | Generates many tool/editor config files, conflicts with minimal centralized control |
37
+ | **codemem** | Lightweight idea | Retrieval/save behavior too flaky for decision memory |
38
+ | **File-based + rules** | Simplicity, portability, auditability | Chosen |
53
39
 
54
- **Decision**: Violates architecture principle of keeping configuration minimal and in one place. Repository becomes cluttered.
40
+ ---
55
41
 
56
- ### codemem
57
- **Issue**: Flaky (sometimes saves, sometimes doesn't). No consistent retrieval.
58
- **Impact**: Cannot be relied upon for critical decisions.
42
+ ## Why This Approach Fits the Team
59
43
 
60
- ---
44
+ ### 1. Portability First
61
45
 
62
- ## Why File-Based Won
63
-
64
- ### 1. **No Environmental Blockers**
65
- - Plain text files work everywhere (Host, VM, Windows 11, Linux)
66
- - No native modules, no HuggingFace, no build tools required
67
- - ✅ Cross-platform by default
68
-
69
- ### 2. **Handles Context Blindness**
70
- - **Problem with auto-capture**: System can't reliably link user intent ("remember this") to prior technical analysis
71
- - **Solution**: Agents write memory **when they understand context**
72
- - Agent just finished exploring codebase → writes 1-line decision
73
- - User approved decision → agent appends to MEMORY.md
74
- - Agent reads MEMORY.md before implementing → follows pointer to spec file
75
- - Result: Context is always linked because agent is in session when writing
76
-
77
- ### 3. **Solves Context Bloat**
78
- - Auto-capture tools fail gracefully → pull raw transcripts (~47k tokens)
79
- - File-based stores curated 1-liners (~200 tokens)
80
- - **66x cheaper per session**
81
-
82
- ### 4. **IDE Portability**
83
- | IDE | Integration | Works Now |
84
- |-----|-------------|-----------|
85
- | OpenCode | Reads `AGENTS.md` | ✅ Yes |
86
- | GitHub Copilot | Reads `.github/copilot-instructions.md` | ✅ Yes |
87
- | Cursor | Reads `.cursorrules` | ✅ Yes |
88
- | Windsurf | Reads `.windsurfrules` | ✅ Yes |
89
-
90
- No custom plugin needed per IDE. Markdown is portable.
91
-
92
- ### 5. **Sharing & Sync**
93
- - Lives in git repo → automatically shared via Git/Rsync/Dropbox
94
- - Developers see decision history in commits
95
- - Can be backed up, versioned, audited
96
- - No external database to sync across machines
97
-
98
- ### 6. **Transparent & Auditable**
99
- - Human-readable: can review MEMORY.md directly
100
- - No "locked in SQLite/Vector DB" problem
101
- - No export/import needed
102
- - Git history shows who decided what and when
46
+ Plain text works everywhere. No native modules, no vector DB, no model downloads, no build tool chain.
103
47
 
104
- ---
48
+ ### 2. Better Context Linking
105
49
 
106
- ## Architecture: 4-Layer Design
50
+ Automatic capture often stores events without enough surrounding intent. File-based memory shifts write time to the moment when the agent already understands the decision.
107
51
 
108
- ### Why 4 Layers?
52
+ That matters more than raw capture volume.
109
53
 
110
- **Problem**: Automatic memory systems fail when:
111
- 1. They can't link user intent to prior context (blindness)
112
- 2. They generate massive output when fallback fails (bloat)
113
- 3. They aren't portable across environments
54
+ ### 3. Low Overhead
114
55
 
115
- **Solution**: Explicit layers that separate concerns:
56
+ Team experience: curated notes plus spec pointers are dramatically cheaper than replaying raw transcripts.
116
57
 
117
- ```
118
- Layer 1: Router (AGENTS.md, ~100 lines)
119
- ├─ Critical rules + pointers only
120
- └─ Every IDE reads this first
121
-
122
- Layer 2: Memory (MEMORY.md, ~150 lines)
123
- ├─ Curated 1-line decisions
124
- ├─ Category headers with spec pointers
125
- └─ Agent reads before implementing
126
-
127
- Layer 3: Tasks (TASKS.md)
128
- ├─ Current work, in-progress, next steps
129
- └─ Enables session continuity
130
-
131
- Layer 4: Specs (spec/*.md, on-demand)
132
- ├─ Detailed patterns, examples
133
- ├─ Referenced by Layer 2
134
- └─ Agent loads only when needed
135
- ```
58
+ External research points same direction:
136
59
 
137
- **Progressive Disclosure**: Agents read ~200 tokens initially, follow pointers on-demand. Same token cost regardless of project size.
60
+ - Letta reports plain filesystem memory reached **74% on LoCoMo**, outperforming specialized memory tool libraries in that benchmark.
61
+ - Mem0 claims large token savings versus replaying full conversation history.
62
+
63
+ Takeaway: curated memory can be good enough long before sophisticated infrastructure pays off.
64
+
65
+ ### 4. Single Control Surface
66
+
67
+ The team now standardizes on `AGENTS.md`.
68
+
69
+ That keeps instruction routing centralized instead of scattering behavior across per-tool memory systems and generated config files.
70
+
71
+ ### 5. Auditability
72
+
73
+ Markdown in git is easy to inspect, diff, review, sync, and repair. That is operationally simpler than SQLite, graph stores, or opaque hosted systems.
138
74
 
139
75
  ---
140
76
 
141
- ## Token Cost Comparison
77
+ ## Evidence
142
78
 
143
- ### Session 1: Find Storybook Rules (memsearch)
144
- ```
145
- memory_search query: 100 tokens
146
- memory_get fails (file lock)
147
- fallback to memory_transcript
148
- memory_transcript (19 calls): 47,389 tokens
149
- TOTAL: 47,489 tokens
150
- ```
79
+ ### Personal Observations
80
+
81
+ - Windows/VM compatibility is a real blocker for native-module and model-heavy tools.
82
+ - Auto-capture systems can fail to connect a later command like "remember this" with the earlier analysis that gave it meaning.
83
+ - When fallback retrieval pulls transcripts instead of distilled memory, token cost becomes unreasonable.
84
+ - Repo clutter matters. A memory system that spreads configuration across many files raises maintenance cost.
85
+
86
+ ### External Research
87
+
88
+ - **Filesystem is stronger than expected**: Letta benchmarked file-backed memory at **74% on LoCoMo**.
89
+ - **Reflect pattern is emerging**: Claude Diary, fsck.com's episodic memory, and claude-mem all use some form of observation plus reflection loop.
90
+ - **Sophisticated systems do help**: Mem0 and Zep show better retrieval and richer memory operations when infrastructure is acceptable.
91
+ - **Field still fragmented**: surveys from 2025-2026 show no single architecture has clearly won.
92
+
93
+ ---
94
+
95
+ ## Current Architecture
151
96
 
152
- ### Session 2: Find Storybook Rules (file-based)
153
97
  ```
154
- Read MEMORY.md: 200 tokens
155
- Follow pointer → spec file: 500 tokens
156
- TOTAL: 700 tokens
98
+ Layer 1: AGENTS.md
99
+ - Critical rules
100
+ - Routing pointers
101
+ - Single entrypoint
102
+
103
+ Layer 2: .agents/MEMORY.md
104
+ - Curated one-line decisions
105
+ - Fast scan
106
+
107
+ Layer 3: .agents/TASKS.md
108
+ - In-progress work
109
+ - Session continuity
110
+
111
+ Layer 4: .agents/spec/*.md
112
+ - Detailed patterns
113
+ - Loaded only on demand
157
114
  ```
158
115
 
159
- **Ratio**: 66x cheaper with file-based approach.
116
+ Design principle: **progressive disclosure**. Read a small amount first, then follow pointers only when needed.
160
117
 
161
118
  ---
162
119
 
163
- ## Why "Manual" (Agent Rules) > "Automatic"
120
+ ## Trade-offs
121
+
122
+ This choice is pragmatic, not free.
123
+
124
+ 1. **Manual discipline required**
125
+ Agents must write useful memory entries. If they do not, memory quality degrades.
164
126
 
165
- ### Automatic Capture (memsearch, qmd, mem0)
166
- - Context Blindness: Can't link decision to prior context
167
- - ❌ Context Bloat: Fallback pulls massive raw data
168
- - ❌ Platform Bloat: Needs dependencies (sqlite, milvus, HF)
169
- - ✅ Zero manual effort
127
+ 2. **No semantic retrieval layer**
128
+ Keyword scan and file pointers are simpler, but weaker than graph or vector retrieval once memory grows.
170
129
 
171
- ### Agent Rules (File-based)
172
- - Context aware: Agent writes when they understand
173
- - ✅ Curated: Only important decisions survive
174
- - ✅ Portable: Works everywhere (no dependencies)
175
- - ✅ No plugin maintenance burden
130
+ 3. **No temporal weighting or decay**
131
+ We do not rank memories by freshness, importance, or confidence.
176
132
 
177
- **Verdict**: Quality + Portability > Automation for teams of 1-10.
133
+ 4. **Lower ceiling**
134
+ Specialized systems can outperform file-based memory on harder multi-session retrieval problems.
178
135
 
179
136
  ---
180
137
 
181
- ## Cross-IDE Reality Check
138
+ ## When To Reconsider
182
139
 
183
- ### What Works Now
184
- - OpenCode: Reads `AGENTS.md` natively
185
- - Copilot: Reads `.github/copilot-instructions.md` natively
186
- - Cursor: Reads `.cursorrules`
187
- - Windsurf: Reads `.windsurfrules`
140
+ Revisit this choice if any of these become true:
188
141
 
189
- All IDEs follow rules in their config file → agent writes to `.agents/MEMORY.md` when appropriate.
142
+ - Team grows beyond roughly 10 people
143
+ - Memory store grows beyond a few hundred important entries
144
+ - We need semantic retrieval across many related projects
145
+ - We need automatic capture with less reliance on agent discipline
146
+ - We need temporal ranking, decay, or confidence scoring
147
+
148
+ At that point, a hybrid design may make more sense: file-based decision memory plus indexed search over archived sessions.
149
+
150
+ ---
151
+
152
+ ## Non-Goals
153
+
154
+ - Not trying to build a universal memory layer for every agent platform
155
+ - Not trying to maximize benchmark accuracy at any infrastructure cost
156
+ - Not replacing knowledge graphs or vector search for large-scale organizational memory
157
+ - Not solving long-term security hardening for persistent agent memory yet
190
158
 
191
159
  ---
192
160
 
193
161
  ## Conclusion
194
162
 
195
- **For teams 1-10 working on focused projects:**
163
+ For the team's needs, file-based memory coordinated by `AGENTS.md` is the best current fit.
164
+
165
+ It wins on portability, simplicity, auditability, and cost. It loses on automation and retrieval sophistication. That is an acceptable trade for a small team working in constrained environments.
196
166
 
197
- File-based memory + agent rules beats every alternative because it:
198
- 1. Works on VM Windows 11 (no build tools, no native modules)
199
- 2. Doesn't bloat token budget (700 vs 47k tokens)
200
- 3. Works with all IDEs without custom drivers
201
- 4. Shares naturally via git (Host ↔ VM ↔ Team)
202
- 5. Is transparent and auditable
167
+ ---
203
168
 
204
- **Automated memory capture fails** because:
205
- - Context Blindness: Can't reliably link user intent to prior analysis
206
- - Context Bloat: Fallback to raw transcripts costs 47k+ tokens
207
- - Platform bloat: Requires dependencies that don't compile on Windows/VM
169
+ ## Sources
208
170
 
209
- **The right trade-off**: Sacrifice full automation for reliability, portability, and cost.
171
+ - [spikelab/memory-systems-ai-agents-research.md](https://gist.github.com/spikelab/7551c6368e23caa06a4056350f6b2db3)
172
+ - Letta benchmark discussion and filesystem results, as cited in the source above
173
+ - Mem0 architecture and performance claims, as cited in the source above
174
+ - Claude Diary, fsck.com episodic memory, and claude-mem examples, as cited in the source above
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vuau/agent-memory",
3
- "version": "0.5.2",
3
+ "version": "0.5.4",
4
4
  "description": "Structured AI memory for codebases — scaffolding CLI for OpenCode, Copilot, Cursor, Windsurf",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -35,6 +35,14 @@ Before ending a session with unfinished work, move items to `## In Progress` or
35
35
  - If MEMORY.md > 150 lines, archive old entries.
36
36
  - Do not create additional memory files outside `.agents/`.
37
37
 
38
+ ## Coding Principles
39
+ - State assumptions before implementing. If ambiguous, ask — don't pick silently.
40
+ - Surgical changes: touch only what's needed, match existing style.
41
+ - Remove only dead code YOUR changes created. Mention pre-existing issues, don't fix.
42
+ - Every changed line must trace to user's request.
43
+ - Complex tasks: define success criteria, verify before done.
44
+ - Full details: `.agents/spec/coding-principles.md`
45
+
38
46
  ## Response Style
39
47
  - Concrete, implementation-focused, caveman style (minimum words, zero fluff).
40
48
  - Propose the simplest solution first (KISS & YAGNI) before writing code.
@@ -0,0 +1,66 @@
1
+ # Coding Principles
2
+
3
+ > **Note**: This file is automatically managed by `@vuau/agent-memory`.
4
+ > Do not edit — changes will be overwritten on `agent-memory update`.
5
+
6
+ Behavioral guidelines to reduce common LLM coding mistakes. For trivial tasks, use judgment.
7
+
8
+ ## 1. Think Before Coding
9
+
10
+ **Don't assume. Don't hide confusion. Surface tradeoffs.**
11
+
12
+ Before implementing:
13
+ - State your assumptions explicitly. If uncertain, ask.
14
+ - If multiple interpretations exist, present them — don't pick silently.
15
+ - If a simpler approach exists, say so. Push back when warranted.
16
+ - If something is unclear, stop. Name what's confusing. Ask.
17
+
18
+ ## 2. Simplicity First
19
+
20
+ **Minimum code that solves the problem. Nothing speculative.**
21
+
22
+ - No features beyond what was asked.
23
+ - No abstractions for single-use code.
24
+ - No "flexibility" or "configurability" that wasn't requested.
25
+ - No error handling for impossible scenarios.
26
+ - If you write 200 lines and it could be 50, rewrite it.
27
+
28
+ Litmus test: "Would a senior engineer say this is overcomplicated?" If yes, simplify.
29
+
30
+ ## 3. Surgical Changes
31
+
32
+ **Touch only what you must. Clean up only your own mess.**
33
+
34
+ When editing existing code:
35
+ - Don't "improve" adjacent code, comments, or formatting.
36
+ - Don't refactor things that aren't broken.
37
+ - Match existing style, even if you'd do it differently.
38
+ - If you notice unrelated dead code, mention it — don't delete it.
39
+
40
+ When your changes create orphans:
41
+ - Remove imports/variables/functions that YOUR changes made unused.
42
+ - Don't remove pre-existing dead code unless asked.
43
+
44
+ Litmus test: Every changed line should trace directly to the user's request.
45
+
46
+ ## 4. Goal-Driven Execution
47
+
48
+ **Define success criteria. Loop until verified.**
49
+
50
+ Transform tasks into verifiable goals:
51
+ - "Add validation" → "Write tests for invalid inputs, then make them pass"
52
+ - "Fix the bug" → "Write a test that reproduces it, then make it pass"
53
+ - "Refactor X" → "Ensure tests pass before and after"
54
+
55
+ For multi-step tasks, state a brief plan:
56
+ ```
57
+ 1. [Step] → verify: [check]
58
+ 2. [Step] → verify: [check]
59
+ 3. [Step] → verify: [check]
60
+ ```
61
+
62
+ Strong success criteria let you loop independently. Weak criteria ("make it work") require constant clarification.
63
+
64
+ ---
65
+
66
+ **These guidelines are working if:** fewer unnecessary changes in diffs, fewer rewrites due to overcomplication, and clarifying questions come before implementation rather than after mistakes.
File without changes