npm - @vuau/agent-memory - Versions diffs - 0.5.2 → 0.5.4 - Mend

@vuau/agent-memory 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/bin/cli.js +21 -2
package/dist/index.js +21 -2
package/docs/RESEARCH.md +124 -159
package/package.json +1 -1
package/templates/AGENTS.md +8 -0
package/templates/spec/coding-principles.md +66 -0
package/templates/spec/.gitkeep +0 -0

package/dist/bin/cli.js CHANGED Viewed

@@ -72,8 +72,17 @@ function scaffold(projectDir, options = {}) {
     writeFileSync(targetPath, content);
     result.created.push(target);
   }
+  const managedSpecs = [
+    { target: `${SPEC_DIR}/coding-principles.md`, template: "spec/coding-principles.md" }
+  ];
+  for (const { target, template } of managedSpecs) {
+    const targetPath = join(projectDir, target);
+    const content = applyVars(readTemplate(template), vars);
+    writeFileSync(targetPath, content);
+    result.created.push(target);
+  }
   const specKeep = join(projectDir, SPEC_DIR, ".gitkeep");
-  if (!existsSync(specKeep)) {
+  if (!existsSync(specKeep) && !managedSpecs.length) {
     writeFileSync(specKeep, "");
     result.created.push(`${SPEC_DIR}/.gitkeep`);
   }
@@ -112,6 +121,15 @@ function updateRouter(projectDir) {
   const vars = { PROJECT_NAME: projectName };
   const content = applyVars(readTemplate("AGENTS.md"), vars);
   writeFileSync(targetPath, content);
+  const managedSpecs = [
+    { target: `${SPEC_DIR}/coding-principles.md`, template: "spec/coding-principles.md" }
+  ];
+  for (const { target, template } of managedSpecs) {
+    const specPath = join(projectDir, target);
+    const specDir = dirname(specPath);
+    if (!existsSync(specDir)) mkdirSync(specDir, { recursive: true });
+    writeFileSync(specPath, applyVars(readTemplate(template), vars));
+  }
   return true;
 }
@@ -124,7 +142,8 @@ function doctor(projectDir) {
     { file: AGENTS_MD, desc: "Root router file" },
     { file: CUSTOM_FILE, desc: "Project specific rules" },
     { file: MEMORY_FILE, desc: "Long-term memory" },
-    { file: TASKS_FILE, desc: "Working memory" }
+    { file: TASKS_FILE, desc: "Working memory" },
+    { file: `${SPEC_DIR}/coding-principles.md`, desc: "Coding principles (run `agent-memory update` to create)" }
   ];
   for (const { file, desc } of required) {
     const filePath = join2(projectDir, file);

package/dist/index.js CHANGED Viewed

@@ -66,8 +66,17 @@ function scaffold(projectDir, options = {}) {
     writeFileSync(targetPath, content);
     result.created.push(target);
   }
+  const managedSpecs = [
+    { target: `${SPEC_DIR}/coding-principles.md`, template: "spec/coding-principles.md" }
+  ];
+  for (const { target, template } of managedSpecs) {
+    const targetPath = join(projectDir, target);
+    const content = applyVars(readTemplate(template), vars);
+    writeFileSync(targetPath, content);
+    result.created.push(target);
+  }
   const specKeep = join(projectDir, SPEC_DIR, ".gitkeep");
-  if (!existsSync(specKeep)) {
+  if (!existsSync(specKeep) && !managedSpecs.length) {
     writeFileSync(specKeep, "");
     result.created.push(`${SPEC_DIR}/.gitkeep`);
   }
@@ -106,6 +115,15 @@ function updateRouter(projectDir) {
   const vars = { PROJECT_NAME: projectName };
   const content = applyVars(readTemplate("AGENTS.md"), vars);
   writeFileSync(targetPath, content);
+  const managedSpecs = [
+    { target: `${SPEC_DIR}/coding-principles.md`, template: "spec/coding-principles.md" }
+  ];
+  for (const { target, template } of managedSpecs) {
+    const specPath = join(projectDir, target);
+    const specDir = dirname(specPath);
+    if (!existsSync(specDir)) mkdirSync(specDir, { recursive: true });
+    writeFileSync(specPath, applyVars(readTemplate(template), vars));
+  }
   return true;
 }
@@ -118,7 +136,8 @@ function doctor(projectDir) {
     { file: AGENTS_MD, desc: "Root router file" },
     { file: CUSTOM_FILE, desc: "Project specific rules" },
     { file: MEMORY_FILE, desc: "Long-term memory" },
-    { file: TASKS_FILE, desc: "Working memory" }
+    { file: TASKS_FILE, desc: "Working memory" },
+    { file: `${SPEC_DIR}/coding-principles.md`, desc: "Coding principles (run `agent-memory update` to create)" }
   ];
   for (const { file, desc } of required) {
     const filePath = join2(projectDir, file);

package/docs/RESEARCH.md CHANGED Viewed

@@ -1,209 +1,174 @@
-# Research: AI Memory Solutions — Tools Tried & Why File-Based Won
+# Research: Why the Team Chose File-Based Agent Memory
-## The Problem
+> Decision memo based on the team's operating constraints.
+> External research source: [spikelab/memory-systems-ai-agents-research.md](https://gist.github.com/spikelab/7551c6368e23caa06a4056350f6b2db3) — researched 2025-12-02, updated 2026-02-06, 60+ sources.
-AI assistants (OpenCode, Copilot, Cursor, Windsurf) lose context between sessions. They need:
-- ✅ Local-first (no API keys)
-- ✅ Cross-IDE (OpenCode, VS Code, Windsurf, Antigravity)
-- ✅ Cross-platform (Host + VM, Windows 11 + Linux)
-- ✅ Persistent memory for decisions
-- ✅ Low token overhead
-- ✅ Reliable retrieval
+---
+## Decision
-## Tools Evaluated
+The team chose **file-based memory coordinated by `AGENTS.md`**.
-| Tool | Auto-capture | VM compatible | Blocker | Status |
-|------|--------------|---------------|---------|--------|
-| **qmd** | ❌ (search only) | ❌ | better-sqlite3 needs Visual Studio Build Tools; HuggingFace blocked | ❌ Failed |
-| **memsearch** | ✅ (daemon hooks) | ❌ | milvus-lite has no Windows wheels; HuggingFace blocked | ❌ Failed |
-| **mem0** | ✅ (hooks) | ❌ | Requires OpenAI API key or HuggingFace models | ❌ Failed |
-| **memories.sh** | ✅ (MCP) | ✅ | Auto-generates 10+ IDE config files (bloats repo) | ⚠️ Rejected |
-| **codemem** | ❌ | ❌ | Flaky (unreliable save/recall) | ⚠️ Rejected |
-| **File-based + rules** | Manual (via rules) | ✅ | None | ✅ **CHOSEN** |
+Reason: best fit for the team's constraints, not because it is universally best.
 ---
-## Why Each Failed
+## Constraints
-### qmd
-**Blocker**: `better-sqlite3` native module requires Visual Studio Build Tools to compile on Windows. HuggingFace is also blocked in many environments.
-**Impact**: Cannot run on VM Windows 11 → breaks cross-platform requirement.
+The team needs a memory system that is:
-### memsearch
-**Blockers**:
-1. `milvus-lite` has no pre-built wheels for Windows → must compile from source
-2. HuggingFace models blocked in isolated environments
-3. **Context Blindness**: Auto-capture can't link user command ("remember this info") to 10-line analysis from previous turn → writes "No context provided" error
-4. **Context Bloat**: Falls back to `memory_transcript`, pulling 19 old tool calls into context = **47,389 tokens (24% of context budget)**
+- Local-first
+- Cross-platform: Host + VM, Windows 11 + Linux
+- Usable across editors via one control file: `AGENTS.md`
+- Cheap in token overhead
+- Reliable in retrieval
+- Transparent and auditable
-**Impact**: Unreliable on Windows + VM. Token cost makes it unusable for real work.
+---
-### mem0
-**Blocker**: Requires OpenAI API key or HuggingFace (both violate "local-first, no API key" requirement).
-**Impact**: Not viable for local-first requirement.
+## Alternatives Considered
-### memories.sh
-**Reevaluation**:
-- ✅ CLI tool (good)
-- ✅ Local-first (good)
-- ✅ MCP support (cross-IDE capable)
-- ✅ Clear memory fragmentation (Session, Semantic, Episodic, Procedural)
-- ❌ **Auto-generates 10+ config files per IDE** (`.memories.sh` configs for Zsh, Bash, Fish, Zed, Helix, Neovim, etc.)
-- ❌ Conflicts with "lightweight, centralized control" goal
+| Tool | Good at | Why not chosen |
+|------|---------|----------------|
+| **qmd** | Local search | `better-sqlite3` and HuggingFace requirements break Windows/VM portability |
+| **memsearch** | Auto-capture | Windows packaging issues plus context blindness and transcript bloat |
+| **mem0** | Managed long-term memory | Requires OpenAI API or HuggingFace, violates local-first constraint |
+| **memories.sh** | MCP-based memory | Generates many tool/editor config files, conflicts with minimal centralized control |
+| **codemem** | Lightweight idea | Retrieval/save behavior too flaky for decision memory |
+| **File-based + rules** | Simplicity, portability, auditability | Chosen |
-**Decision**: Violates architecture principle of keeping configuration minimal and in one place. Repository becomes cluttered.
+---
-### codemem
-**Issue**: Flaky (sometimes saves, sometimes doesn't). No consistent retrieval.
-**Impact**: Cannot be relied upon for critical decisions.
+## Why This Approach Fits the Team
----
+### 1. Portability First
-## Why File-Based Won
-### 1. **No Environmental Blockers**
-- Plain text files work everywhere (Host, VM, Windows 11, Linux)
-- No native modules, no HuggingFace, no build tools required
-- ✅ Cross-platform by default
-### 2. **Handles Context Blindness**
-- **Problem with auto-capture**: System can't reliably link user intent ("remember this") to prior technical analysis
-- **Solution**: Agents write memory **when they understand context**
-  - Agent just finished exploring codebase → writes 1-line decision
-  - User approved decision → agent appends to MEMORY.md
-  - Agent reads MEMORY.md before implementing → follows pointer to spec file
-- Result: Context is always linked because agent is in session when writing
-### 3. **Solves Context Bloat**
-- Auto-capture tools fail gracefully → pull raw transcripts (~47k tokens)
-- File-based stores curated 1-liners (~200 tokens)
-- **66x cheaper per session**
-### 4. **IDE Portability**
-| IDE | Integration | Works Now |
-|-----|-------------|-----------|
-| OpenCode | Reads `AGENTS.md` | ✅ Yes |
-| GitHub Copilot | Reads `.github/copilot-instructions.md` | ✅ Yes |
-| Cursor | Reads `.cursorrules` | ✅ Yes |
-| Windsurf | Reads `.windsurfrules` | ✅ Yes |
-No custom plugin needed per IDE. Markdown is portable.
-### 5. **Sharing & Sync**
-- Lives in git repo → automatically shared via Git/Rsync/Dropbox
-- Developers see decision history in commits
-- Can be backed up, versioned, audited
-- No external database to sync across machines
-### 6. **Transparent & Auditable**
-- Human-readable: can review MEMORY.md directly
-- No "locked in SQLite/Vector DB" problem
-- No export/import needed
-- Git history shows who decided what and when
+Plain text works everywhere. No native modules, no vector DB, no model downloads, no build tool chain.
----
+### 2. Better Context Linking
-## Architecture: 4-Layer Design
+Automatic capture often stores events without enough surrounding intent. File-based memory shifts write time to the moment when the agent already understands the decision.
-### Why 4 Layers?
+That matters more than raw capture volume.
-**Problem**: Automatic memory systems fail when:
-1. They can't link user intent to prior context (blindness)
-2. They generate massive output when fallback fails (bloat)
-3. They aren't portable across environments
+### 3. Low Overhead
-**Solution**: Explicit layers that separate concerns:
+Team experience: curated notes plus spec pointers are dramatically cheaper than replaying raw transcripts.
-```
-Layer 1: Router (AGENTS.md, ~100 lines)
-├─ Critical rules + pointers only
-└─ Every IDE reads this first
-Layer 2: Memory (MEMORY.md, ~150 lines)
-├─ Curated 1-line decisions
-├─ Category headers with spec pointers
-└─ Agent reads before implementing
-Layer 3: Tasks (TASKS.md)
-├─ Current work, in-progress, next steps
-└─ Enables session continuity
-Layer 4: Specs (spec/*.md, on-demand)
-├─ Detailed patterns, examples
-├─ Referenced by Layer 2
-└─ Agent loads only when needed
-```
+External research points same direction:
-**Progressive Disclosure**: Agents read ~200 tokens initially, follow pointers on-demand. Same token cost regardless of project size.
+- Letta reports plain filesystem memory reached **74% on LoCoMo**, outperforming specialized memory tool libraries in that benchmark.
+- Mem0 claims large token savings versus replaying full conversation history.
+Takeaway: curated memory can be good enough long before sophisticated infrastructure pays off.
+### 4. Single Control Surface
+The team now standardizes on `AGENTS.md`.
+That keeps instruction routing centralized instead of scattering behavior across per-tool memory systems and generated config files.
+### 5. Auditability
+Markdown in git is easy to inspect, diff, review, sync, and repair. That is operationally simpler than SQLite, graph stores, or opaque hosted systems.
 ---
-## Token Cost Comparison
+## Evidence
-### Session 1: Find Storybook Rules (memsearch)
-```
-memory_search query:           100 tokens
-memory_get fails              (file lock)
-↓ fallback to memory_transcript
-memory_transcript (19 calls): 47,389 tokens
-TOTAL:                        47,489 tokens
-```
+### Personal Observations
+- Windows/VM compatibility is a real blocker for native-module and model-heavy tools.
+- Auto-capture systems can fail to connect a later command like "remember this" with the earlier analysis that gave it meaning.
+- When fallback retrieval pulls transcripts instead of distilled memory, token cost becomes unreasonable.
+- Repo clutter matters. A memory system that spreads configuration across many files raises maintenance cost.
+### External Research
+- **Filesystem is stronger than expected**: Letta benchmarked file-backed memory at **74% on LoCoMo**.
+- **Reflect pattern is emerging**: Claude Diary, fsck.com's episodic memory, and claude-mem all use some form of observation plus reflection loop.
+- **Sophisticated systems do help**: Mem0 and Zep show better retrieval and richer memory operations when infrastructure is acceptable.
+- **Field still fragmented**: surveys from 2025-2026 show no single architecture has clearly won.
+---
+## Current Architecture
-### Session 2: Find Storybook Rules (file-based)
 ```
-Read MEMORY.md:                 200 tokens
-Follow pointer → spec file:     500 tokens
-TOTAL:                          700 tokens
+Layer 1: AGENTS.md
+- Critical rules
+- Routing pointers
+- Single entrypoint
+Layer 2: .agents/MEMORY.md
+- Curated one-line decisions
+- Fast scan
+Layer 3: .agents/TASKS.md
+- In-progress work
+- Session continuity
+Layer 4: .agents/spec/*.md
+- Detailed patterns
+- Loaded only on demand
 ```
-**Ratio**: 66x cheaper with file-based approach.
+Design principle: **progressive disclosure**. Read a small amount first, then follow pointers only when needed.
 ---
-## Why "Manual" (Agent Rules) > "Automatic"
+## Trade-offs
+This choice is pragmatic, not free.
+1. **Manual discipline required**
+Agents must write useful memory entries. If they do not, memory quality degrades.
-### Automatic Capture (memsearch, qmd, mem0)
-- ❌ Context Blindness: Can't link decision to prior context
-- ❌ Context Bloat: Fallback pulls massive raw data
-- ❌ Platform Bloat: Needs dependencies (sqlite, milvus, HF)
-- ✅ Zero manual effort
+2. **No semantic retrieval layer**
+Keyword scan and file pointers are simpler, but weaker than graph or vector retrieval once memory grows.
-### Agent Rules (File-based)
-- ✅ Context aware: Agent writes when they understand
-- ✅ Curated: Only important decisions survive
-- ✅ Portable: Works everywhere (no dependencies)
-- ✅ No plugin maintenance burden
+3. **No temporal weighting or decay**
+We do not rank memories by freshness, importance, or confidence.
-**Verdict**: Quality + Portability > Automation for teams of 1-10.
+4. **Lower ceiling**
+Specialized systems can outperform file-based memory on harder multi-session retrieval problems.
 ---
-## Cross-IDE Reality Check
+## When To Reconsider
-### ✅ What Works Now
-- OpenCode: Reads `AGENTS.md` natively
-- Copilot: Reads `.github/copilot-instructions.md` natively
-- Cursor: Reads `.cursorrules`
-- Windsurf: Reads `.windsurfrules`
+Revisit this choice if any of these become true:
-All IDEs follow rules in their config file → agent writes to `.agents/MEMORY.md` when appropriate.
+- Team grows beyond roughly 10 people
+- Memory store grows beyond a few hundred important entries
+- We need semantic retrieval across many related projects
+- We need automatic capture with less reliance on agent discipline
+- We need temporal ranking, decay, or confidence scoring
+At that point, a hybrid design may make more sense: file-based decision memory plus indexed search over archived sessions.
+---
+## Non-Goals
+- Not trying to build a universal memory layer for every agent platform
+- Not trying to maximize benchmark accuracy at any infrastructure cost
+- Not replacing knowledge graphs or vector search for large-scale organizational memory
+- Not solving long-term security hardening for persistent agent memory yet
 ---
 ## Conclusion
-**For teams 1-10 working on focused projects:**
+For the team's needs, file-based memory coordinated by `AGENTS.md` is the best current fit.
+It wins on portability, simplicity, auditability, and cost. It loses on automation and retrieval sophistication. That is an acceptable trade for a small team working in constrained environments.
-File-based memory + agent rules beats every alternative because it:
-1. Works on VM Windows 11 (no build tools, no native modules)
-2. Doesn't bloat token budget (700 vs 47k tokens)
-3. Works with all IDEs without custom drivers
-4. Shares naturally via git (Host ↔ VM ↔ Team)
-5. Is transparent and auditable
+---
-**Automated memory capture fails** because:
-- Context Blindness: Can't reliably link user intent to prior analysis
-- Context Bloat: Fallback to raw transcripts costs 47k+ tokens
-- Platform bloat: Requires dependencies that don't compile on Windows/VM
+## Sources
-**The right trade-off**: Sacrifice full automation for reliability, portability, and cost.
+- [spikelab/memory-systems-ai-agents-research.md](https://gist.github.com/spikelab/7551c6368e23caa06a4056350f6b2db3)
+- Letta benchmark discussion and filesystem results, as cited in the source above
+- Mem0 architecture and performance claims, as cited in the source above
+- Claude Diary, fsck.com episodic memory, and claude-mem examples, as cited in the source above

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vuau/agent-memory",
-  "version": "0.5.2",
+  "version": "0.5.4",
   "description": "Structured AI memory for codebases — scaffolding CLI for OpenCode, Copilot, Cursor, Windsurf",
   "type": "module",
   "main": "./dist/index.js",

package/templates/AGENTS.md CHANGED Viewed

@@ -35,6 +35,14 @@ Before ending a session with unfinished work, move items to `## In Progress` or
 - If MEMORY.md > 150 lines, archive old entries.
 - Do not create additional memory files outside `.agents/`.
+## Coding Principles
+- State assumptions before implementing. If ambiguous, ask — don't pick silently.
+- Surgical changes: touch only what's needed, match existing style.
+- Remove only dead code YOUR changes created. Mention pre-existing issues, don't fix.
+- Every changed line must trace to user's request.
+- Complex tasks: define success criteria, verify before done.
+- Full details: `.agents/spec/coding-principles.md`
 ## Response Style
 - Concrete, implementation-focused, caveman style (minimum words, zero fluff).
 - Propose the simplest solution first (KISS & YAGNI) before writing code.

package/templates/spec/coding-principles.md ADDED Viewed

@@ -0,0 +1,66 @@
+# Coding Principles
+> **Note**: This file is automatically managed by `@vuau/agent-memory`.
+> Do not edit — changes will be overwritten on `agent-memory update`.
+Behavioral guidelines to reduce common LLM coding mistakes. For trivial tasks, use judgment.
+## 1. Think Before Coding
+**Don't assume. Don't hide confusion. Surface tradeoffs.**
+Before implementing:
+- State your assumptions explicitly. If uncertain, ask.
+- If multiple interpretations exist, present them — don't pick silently.
+- If a simpler approach exists, say so. Push back when warranted.
+- If something is unclear, stop. Name what's confusing. Ask.
+## 2. Simplicity First
+**Minimum code that solves the problem. Nothing speculative.**
+- No features beyond what was asked.
+- No abstractions for single-use code.
+- No "flexibility" or "configurability" that wasn't requested.
+- No error handling for impossible scenarios.
+- If you write 200 lines and it could be 50, rewrite it.
+Litmus test: "Would a senior engineer say this is overcomplicated?" If yes, simplify.
+## 3. Surgical Changes
+**Touch only what you must. Clean up only your own mess.**
+When editing existing code:
+- Don't "improve" adjacent code, comments, or formatting.
+- Don't refactor things that aren't broken.
+- Match existing style, even if you'd do it differently.
+- If you notice unrelated dead code, mention it — don't delete it.
+When your changes create orphans:
+- Remove imports/variables/functions that YOUR changes made unused.
+- Don't remove pre-existing dead code unless asked.
+Litmus test: Every changed line should trace directly to the user's request.
+## 4. Goal-Driven Execution
+**Define success criteria. Loop until verified.**
+Transform tasks into verifiable goals:
+- "Add validation" → "Write tests for invalid inputs, then make them pass"
+- "Fix the bug" → "Write a test that reproduces it, then make it pass"
+- "Refactor X" → "Ensure tests pass before and after"
+For multi-step tasks, state a brief plan:
+```
+1. [Step] → verify: [check]
+2. [Step] → verify: [check]
+3. [Step] → verify: [check]
+```
+Strong success criteria let you loop independently. Weak criteria ("make it work") require constant clarification.
+---
+**These guidelines are working if:** fewer unnecessary changes in diffs, fewer rewrites due to overcomplication, and clarifying questions come before implementation rather than after mistakes.

package/templates/spec/.gitkeep DELETED Viewed

File without changes