@thierrynakoa/fire-flow 10.0.0 → 12.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +9 -9
- package/ARCHITECTURE-DIAGRAM.md +7 -4
- package/COMMAND-REFERENCE.md +33 -13
- package/DOMINION-FLOW-OVERVIEW.md +581 -421
- package/QUICK-START.md +3 -3
- package/README.md +102 -45
- package/TROUBLESHOOTING.md +264 -264
- package/agents/fire-executor.md +200 -116
- package/agents/fire-fact-checker.md +276 -276
- package/agents/fire-phoenix-analyst.md +394 -0
- package/agents/fire-planner.md +145 -53
- package/agents/fire-project-researcher.md +155 -155
- package/agents/fire-research-synthesizer.md +166 -166
- package/agents/fire-researcher.md +144 -59
- package/agents/fire-roadmapper.md +215 -203
- package/agents/fire-verifier.md +247 -65
- package/agents/fire-vision-architect.md +381 -0
- package/commands/fire-0-orient.md +476 -476
- package/commands/fire-1a-new.md +216 -0
- package/commands/fire-1b-research.md +210 -0
- package/commands/fire-1c-setup.md +254 -0
- package/commands/{fire-1a-discuss.md → fire-1d-discuss.md} +35 -7
- package/commands/fire-3-execute.md +55 -2
- package/commands/fire-4-verify.md +61 -0
- package/commands/fire-5-handoff.md +2 -2
- package/commands/fire-6-resume.md +37 -2
- package/commands/fire-add-new-skill.md +2 -2
- package/commands/fire-autonomous.md +20 -3
- package/commands/fire-brainstorm.md +1 -1
- package/commands/fire-complete-milestone.md +2 -2
- package/commands/fire-cost.md +183 -0
- package/commands/fire-dashboard.md +2 -2
- package/commands/fire-debug.md +663 -663
- package/commands/fire-loop-resume.md +2 -2
- package/commands/fire-loop-stop.md +1 -1
- package/commands/fire-loop.md +1168 -1168
- package/commands/fire-map-codebase.md +3 -3
- package/commands/fire-new-milestone.md +356 -356
- package/commands/fire-phoenix.md +603 -0
- package/commands/fire-reflect.md +235 -235
- package/commands/fire-research.md +246 -246
- package/commands/fire-search.md +1 -1
- package/commands/fire-skills-diff.md +3 -3
- package/commands/fire-skills-history.md +3 -3
- package/commands/fire-skills-rollback.md +7 -7
- package/commands/fire-skills-sync.md +5 -5
- package/commands/fire-test.md +9 -9
- package/commands/fire-todos.md +1 -1
- package/commands/fire-update.md +5 -5
- package/hooks/hooks.json +16 -16
- package/hooks/run-hook.sh +8 -8
- package/hooks/run-session-end.sh +7 -7
- package/hooks/session-end.sh +90 -90
- package/hooks/session-start.sh +1 -1
- package/package.json +2 -2
- package/plugin.json +7 -7
- package/references/metrics-and-trends.md +1 -1
- package/skills-library/SKILLS-INDEX.md +588 -588
- package/skills-library/_general/methodology/AUTONOMOUS_ORCHESTRATION.md +182 -0
- package/skills-library/_general/methodology/BACKWARD_PLANNING_INTERVIEW.md +307 -0
- package/skills-library/_general/methodology/CIRCUIT_BREAKER_INTELLIGENCE.md +163 -0
- package/skills-library/_general/methodology/CONTEXT_ROTATION.md +151 -0
- package/skills-library/_general/methodology/DEAD_ENDS_SHELF.md +188 -0
- package/skills-library/_general/methodology/DESIGN_PHILOSOPHY_ENFORCEMENT.md +152 -0
- package/skills-library/_general/methodology/INTERNAL_CONSISTENCY_AUDIT.md +212 -0
- package/skills-library/_general/methodology/LIVE_BREADCRUMB_PROTOCOL.md +242 -0
- package/skills-library/_general/methodology/PHOENIX_REBUILD_METHODOLOGY.md +251 -0
- package/skills-library/_general/methodology/QUALITY_GATES_AND_VERIFICATION.md +157 -0
- package/skills-library/_general/methodology/RELIABILITY_PREDICTION.md +104 -0
- package/skills-library/_general/methodology/REQUIREMENTS_DECOMPOSITION.md +155 -0
- package/skills-library/_general/methodology/SELF_TESTING_FEEDBACK_LOOP.md +143 -0
- package/skills-library/_general/methodology/STACK_COMPATIBILITY_MATRIX.md +178 -0
- package/skills-library/_general/methodology/TIERED_CONTEXT_ARCHITECTURE.md +118 -0
- package/skills-library/_general/methodology/ZERO_FRICTION_CLI_SETUP.md +312 -0
- package/skills-library/_general/methodology/autonomous-multi-phase-build.md +133 -0
- package/skills-library/_general/methodology/claude-md-archival.md +280 -0
- package/skills-library/_general/methodology/debug-swarm-researcher-escape-hatch.md +240 -240
- package/skills-library/_general/methodology/git-worktrees-parallel.md +232 -0
- package/skills-library/_general/methodology/llm-judge-memory-crud.md +241 -0
- package/skills-library/_general/methodology/multi-project-autonomous-build.md +360 -0
- package/skills-library/_general/methodology/shell-autonomous-loop-fixplan.md +238 -238
- package/skills-library/_general/patterns-standards/GOF_DESIGN_PATTERNS_FOR_AI_AGENTS.md +358 -0
- package/skills-library/methodology/BREATH_BASED_PARALLEL_EXECUTION.md +1 -1
- package/skills-library/methodology/RESEARCH_BACKED_WORKFLOW_UPGRADE.md +1 -1
- package/skills-library/methodology/SABBATH_REST_PATTERN.md +1 -1
- package/templates/ASSUMPTIONS.md +1 -1
- package/templates/BLOCKERS.md +1 -1
- package/templates/DECISION_LOG.md +1 -1
- package/templates/phase-prompt.md +1 -1
- package/templates/phoenix-comparison.md +80 -0
- package/version.json +2 -2
- package/workflows/handoff-session.md +1 -1
- package/workflows/new-project.md +2 -2
- package/commands/fire-1-new.md +0 -281
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: git-worktrees-parallel
|
|
3
|
+
category: methodology
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
contributed: 2026-03-04
|
|
6
|
+
contributor: global
|
|
7
|
+
last_updated: 2026-03-04
|
|
8
|
+
tags: [git, worktrees, parallel, agents, isolation, productivity, boris-cherny]
|
|
9
|
+
difficulty: easy
|
|
10
|
+
scope: general
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Git Worktrees — Parallel Claude Sessions
|
|
14
|
+
|
|
15
|
+
## Why This Matters
|
|
16
|
+
|
|
17
|
+
> "Do more in parallel. Spin up 3–5 git worktrees at once, each running its own Claude session in parallel. It's the **single biggest productivity unlock**, and the top tip from the team."
|
|
18
|
+
> — **Boris Cherny, creator of Claude Code**
|
|
19
|
+
|
|
20
|
+
Anthropic built native `--worktree` support directly into Claude Code CLI because of this. It's not optional best practice — it's how the Claude Code team itself works.
|
|
21
|
+
|
|
22
|
+
**The problem without worktrees:** One Claude session works on one branch. Any parallel work means stashing, switching, or risking conflicts between tasks.
|
|
23
|
+
|
|
24
|
+
**The solution:** Each task gets its own isolated working directory + branch. Sessions never interfere. Work in parallel by default.
|
|
25
|
+
|
|
26
|
+
## When to Use Worktrees
|
|
27
|
+
|
|
28
|
+
Use a worktree for any task that:
|
|
29
|
+
- Takes more than a few minutes
|
|
30
|
+
- Involves more than one or two file changes
|
|
31
|
+
- Could conflict with other in-progress work
|
|
32
|
+
- Is independent enough to run alongside another task
|
|
33
|
+
|
|
34
|
+
**Default to worktrees for every non-trivial feature or bugfix.**
|
|
35
|
+
|
|
36
|
+
## Core Commands
|
|
37
|
+
|
|
38
|
+
### Start Claude in a Worktree (CLI)
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Named worktree — name becomes directory and branch name
|
|
42
|
+
claude --worktree feature-auth
|
|
43
|
+
claude -w bugfix-payment-webhook
|
|
44
|
+
|
|
45
|
+
# Auto-named (Claude generates a random name)
|
|
46
|
+
claude --worktree
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Worktrees are created at: `<repo>/.claude/worktrees/<name>/`
|
|
50
|
+
Branch created: `worktree-<name>`
|
|
51
|
+
|
|
52
|
+
### In-session Worktree Creation
|
|
53
|
+
|
|
54
|
+
Tell Claude during an active session:
|
|
55
|
+
```
|
|
56
|
+
> work in a worktree
|
|
57
|
+
> start a worktree named auth-refactor
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Claude creates and switches to it automatically.
|
|
61
|
+
|
|
62
|
+
### Name Your Sessions
|
|
63
|
+
|
|
64
|
+
Name sessions early so you can resume them later:
|
|
65
|
+
```
|
|
66
|
+
> /rename auth-refactor
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Resume later:
|
|
70
|
+
```bash
|
|
71
|
+
claude --resume auth-refactor
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Add to .gitignore (one-time setup)
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
echo ".claude/worktrees/" >> .gitignore
|
|
78
|
+
git add .gitignore && git commit -m "chore: ignore Claude worktrees"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Prevents worktree contents appearing as untracked files in the main repo.
|
|
82
|
+
|
|
83
|
+
## Running 3–5 Sessions in Parallel
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
# Terminal 1 — new auth feature
|
|
87
|
+
claude --worktree feature-oauth
|
|
88
|
+
|
|
89
|
+
# Terminal 2 — bug fix in parallel
|
|
90
|
+
claude --worktree bugfix-stripe-webhook
|
|
91
|
+
|
|
92
|
+
# Terminal 3 — UI polish
|
|
93
|
+
claude --worktree ui-dashboard-v2
|
|
94
|
+
|
|
95
|
+
# Terminal 4 — analysis/investigation (read-only)
|
|
96
|
+
claude --worktree analysis
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Each Claude has its own branch, its own files. No conflicts. While one finishes, you're reviewing another.
|
|
100
|
+
|
|
101
|
+
Some teams create shell aliases (`za`, `zb`, `zc`) to hop between sessions in one keystroke. A dedicated `analysis` worktree for log reading and investigation is a common pattern.
|
|
102
|
+
|
|
103
|
+
## Subagent Worktree Isolation
|
|
104
|
+
|
|
105
|
+
When Claude spawns subagents, each can run in its own worktree:
|
|
106
|
+
|
|
107
|
+
### In the Agent Tool (for orchestrators)
|
|
108
|
+
|
|
109
|
+
```json
|
|
110
|
+
{
|
|
111
|
+
"isolation": "worktree"
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Each subagent gets its own worktree that is automatically cleaned up when the subagent finishes without changes.
|
|
116
|
+
|
|
117
|
+
### In Custom Subagent Frontmatter (.claude/agents/)
|
|
118
|
+
|
|
119
|
+
```yaml
|
|
120
|
+
---
|
|
121
|
+
name: feature-builder
|
|
122
|
+
description: Builds new features in isolation
|
|
123
|
+
isolation: worktree
|
|
124
|
+
---
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Tell Claude to Use Worktrees for Its Agents
|
|
128
|
+
|
|
129
|
+
```
|
|
130
|
+
> use worktrees for your agents
|
|
131
|
+
> spawn agents with worktree isolation
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Cleanup Behavior
|
|
135
|
+
|
|
136
|
+
Claude handles cleanup automatically on session exit:
|
|
137
|
+
|
|
138
|
+
| Situation | What Happens |
|
|
139
|
+
|-----------|-------------|
|
|
140
|
+
| No changes made | Worktree and branch removed automatically |
|
|
141
|
+
| Changes or commits exist | Claude prompts: keep or remove? |
|
|
142
|
+
| Keep | Directory and branch preserved — resume later |
|
|
143
|
+
| Remove | Worktree deleted, branch deleted, uncommitted changes lost |
|
|
144
|
+
|
|
145
|
+
### Manual Cleanup
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
git worktree list # see all active worktrees
|
|
149
|
+
git worktree remove .claude/worktrees/auth # remove specific one
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Plan Before Parallelizing
|
|
153
|
+
|
|
154
|
+
Before spinning up multiple agents:
|
|
155
|
+
1. **Identify independent tasks** — if Task B depends on Task A's output, they can't truly run in parallel
|
|
156
|
+
2. **Separate concerns** — frontend/backend, feature/tests, main feature/docs
|
|
157
|
+
3. **Plan then execute** — one Claude in plan mode drafts; another reviews; then execute in worktrees
|
|
158
|
+
|
|
159
|
+
Pattern:
|
|
160
|
+
```
|
|
161
|
+
> /plan — analyze the feature and identify 3 independent subtasks
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Then spin up 3 worktrees, one per subtask.
|
|
165
|
+
|
|
166
|
+
## Integration with Existing Flows
|
|
167
|
+
|
|
168
|
+
### dominion-flow / power-flow / fire-flow
|
|
169
|
+
|
|
170
|
+
| Flow Command | Worktree Behavior |
|
|
171
|
+
|-------------|-----------------|
|
|
172
|
+
| `/fire-3-execute`, `/power-3-execute` | Start a worktree per wave/phase task |
|
|
173
|
+
| `/fire-1a-new`, `/power-1-new` | Set up `.gitignore` for `.claude/worktrees/` on project init |
|
|
174
|
+
| Subagent spawning (`dispatching-parallel-agents`) | Always use `isolation: "worktree"` for parallel agents |
|
|
175
|
+
| `/fire-5-handoff`, `/power-5-handoff` | Note active worktrees in handoff so next session can resume |
|
|
176
|
+
| `/fire-6-resume`, `/power-6-resume` | Check for open worktrees, resume named sessions |
|
|
177
|
+
|
|
178
|
+
### On Project Init
|
|
179
|
+
|
|
180
|
+
Any new project should immediately:
|
|
181
|
+
1. Add `.claude/worktrees/` to `.gitignore`
|
|
182
|
+
2. Set up shell aliases for worktree sessions (optional but fast)
|
|
183
|
+
3. Note worktree convention in CLAUDE.md
|
|
184
|
+
|
|
185
|
+
Add to CLAUDE.md:
|
|
186
|
+
```markdown
|
|
187
|
+
## Worktrees
|
|
188
|
+
|
|
189
|
+
All feature work runs in worktrees: `.claude/worktrees/` (gitignored)
|
|
190
|
+
Start: `claude --worktree <feature-name>`
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Boris Cherny's Full Tips (Relevant Excerpts)
|
|
194
|
+
|
|
195
|
+
1. **Do more in parallel** — 3-5 worktrees, each its own Claude session. Single biggest unlock.
|
|
196
|
+
2. **Plan then execute** — Plan mode (read-only) first, then execute in worktrees. One Claude drafts the plan, another reviews it as "staff engineer."
|
|
197
|
+
3. **CLAUDE.md as living rules** — After correcting Claude, ask it to update CLAUDE.md so the mistake doesn't recur.
|
|
198
|
+
4. **Subagents for clean context** — Offload subtasks to subagents; keep main context clean.
|
|
199
|
+
5. **Dedicated analysis worktree** — One permanent worktree just for reading logs and investigating.
|
|
200
|
+
|
|
201
|
+
## Quick Reference
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
# Start Claude in new isolated worktree
|
|
205
|
+
claude --worktree my-feature
|
|
206
|
+
|
|
207
|
+
# Auto-named worktree
|
|
208
|
+
claude -w
|
|
209
|
+
|
|
210
|
+
# Resume a named session later
|
|
211
|
+
claude --resume my-feature
|
|
212
|
+
|
|
213
|
+
# See all active worktrees
|
|
214
|
+
git worktree list
|
|
215
|
+
|
|
216
|
+
# Clean up manually
|
|
217
|
+
git worktree remove .claude/worktrees/my-feature
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Common Mistakes
|
|
221
|
+
|
|
222
|
+
- **Working in main without a worktree** — every non-trivial task should have its own worktree
|
|
223
|
+
- **Forgetting `.claude/worktrees/` in .gitignore** — adds noise to `git status`
|
|
224
|
+
- **Not naming sessions** — "what was that auth session called?" — always `/rename` early
|
|
225
|
+
- **Running dependent tasks in parallel** — if B needs A's output, they can't be parallel; sequence them
|
|
226
|
+
- **Skipping cleanup** — old worktrees accumulate; run `git worktree list` periodically
|
|
227
|
+
|
|
228
|
+
## Sources
|
|
229
|
+
|
|
230
|
+
- Boris Cherny, creator of Claude Code: [Threads tip #1](https://www.threads.com/@boris_cherny/post/DUMZsVuksVv)
|
|
231
|
+
- Boris Cherny on built-in worktree support: [X announcement](https://x.com/bcherny/status/2025007393290272904)
|
|
232
|
+
- Anthropic official docs: [Common Workflows — Git Worktrees](https://code.claude.com/docs/en/common-workflows)
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: llm-judge-memory-crud
|
|
3
|
+
category: methodology
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
contributed: 2026-02-25
|
|
6
|
+
contributor: your-memory-repo
|
|
7
|
+
last_updated: 2026-02-25
|
|
8
|
+
contributors:
|
|
9
|
+
- your-memory-repo
|
|
10
|
+
tags: [qdrant, vector-db, memory, crud, mem0, llm, lifecycle, dedup]
|
|
11
|
+
difficulty: medium
|
|
12
|
+
usage_count: 0
|
|
13
|
+
success_rate: 100
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# LLM-as-Judge Memory CRUD Pipeline
|
|
17
|
+
|
|
18
|
+
## Problem
|
|
19
|
+
|
|
20
|
+
Vector memory systems (Qdrant, Pinecone, Weaviate, ChromaDB) are append-only by default. Every new fact creates a new point. This causes:
|
|
21
|
+
|
|
22
|
+
- **Duplicate dilution:** Near-identical memories degrade search quality
|
|
23
|
+
- **Stale facts:** Outdated information persists indefinitely (e.g., "project uses port 3000" coexists with "project uses port 5001")
|
|
24
|
+
- **No correction:** Wrong facts can only be removed by deleting the entire source file and re-indexing
|
|
25
|
+
- **Unbounded growth:** Index grows forever without garbage collection
|
|
26
|
+
- **Frozen metadata:** Utility scores, version numbers, and confidence fields initialized but never updated
|
|
27
|
+
|
|
28
|
+
### Why It Was Hard
|
|
29
|
+
|
|
30
|
+
Rule-based deduplication (cosine similarity thresholds, hash matching) fails because:
|
|
31
|
+
- Two semantically equivalent statements can have low cosine similarity ("The server runs on port 5001" vs "API endpoint: localhost:5001")
|
|
32
|
+
- Partial overlap is ambiguous (should memory A be updated with info from B, or are they separate facts?)
|
|
33
|
+
- Contradiction detection requires understanding, not matching
|
|
34
|
+
|
|
35
|
+
### Impact
|
|
36
|
+
|
|
37
|
+
Without CRUD, memory quality degrades over time. Research shows 10% performance drop from bad memories that propagate errors to all downstream tasks that retrieve them (Experience-Following, arXiv 2505.16067).
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Solution Pattern
|
|
42
|
+
|
|
43
|
+
Use an LLM as a judge to decide how each new memory candidate should interact with existing memories. Instead of blindly appending, every new fact goes through a decision pipeline:
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
New Fact → Embed → Find Similar (top-5) → LLM Decides → Execute CRUD → Log
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
The LLM receives the new candidate plus its most similar existing memories and returns one of four operations:
|
|
50
|
+
|
|
51
|
+
| Operation | When | Action |
|
|
52
|
+
|-----------|------|--------|
|
|
53
|
+
| **ADD** | New unique information | Insert new point |
|
|
54
|
+
| **UPDATE** | Refines/corrects existing memory | Merge into existing point |
|
|
55
|
+
| **DELETE** | Contradicts with higher confidence | Remove old point |
|
|
56
|
+
| **NOOP** | Already known, no new info | Skip silently |
|
|
57
|
+
|
|
58
|
+
> **Research basis:** Mem0 (arXiv 2504.19413, 2025) — 26% higher accuracy than OpenAI memory, 91% lower p95 latency, 90% token savings. Confirmed by Memory-R1 (arXiv 2508.19828, 2025) and AgeMem (arXiv 2601.01885, 2026).
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Code Example
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
// Before (append-only — problematic)
|
|
66
|
+
async function indexFact(text: string, metadata: Record<string, unknown>) {
|
|
67
|
+
const embedding = await embed(text);
|
|
68
|
+
await qdrant.upsert(collection, {
|
|
69
|
+
id: generateId(),
|
|
70
|
+
vector: embedding,
|
|
71
|
+
payload: { text, ...metadata, created_at: new Date().toISOString() }
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// After (LLM-as-Judge CRUD)
|
|
76
|
+
interface MemoryOperation {
|
|
77
|
+
op: "ADD" | "UPDATE" | "DELETE" | "NOOP";
|
|
78
|
+
targetId?: string;
|
|
79
|
+
content?: string;
|
|
80
|
+
reason: string;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async function smartIndex(text: string, metadata: Record<string, unknown>) {
|
|
84
|
+
// 1. Embed the candidate
|
|
85
|
+
const embedding = await embed(text);
|
|
86
|
+
|
|
87
|
+
// 2. Find similar existing memories
|
|
88
|
+
const similar = await qdrant.search(collection, {
|
|
89
|
+
vector: embedding,
|
|
90
|
+
limit: 5,
|
|
91
|
+
score_threshold: 0.7
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
// 3. If no similar memories, just ADD
|
|
95
|
+
if (similar.length === 0) {
|
|
96
|
+
return await addMemory(text, embedding, metadata);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// 4. Ask LLM to decide
|
|
100
|
+
const decision = await llm.structured<MemoryOperation>({
|
|
101
|
+
system: `You manage a memory store. Given a NEW fact and EXISTING memories,
|
|
102
|
+
decide: ADD (new unique info), UPDATE (refines existing), DELETE (contradicts
|
|
103
|
+
existing with higher confidence), or NOOP (already known).
|
|
104
|
+
Return JSON: {op, targetId?, content?, reason}`,
|
|
105
|
+
user: `NEW FACT: ${text}
|
|
106
|
+
|
|
107
|
+
EXISTING MEMORIES:
|
|
108
|
+
${similar.map(s => `[ID: ${s.id}] (score: ${s.score.toFixed(2)}) ${s.payload.text}`).join('\n')}
|
|
109
|
+
|
|
110
|
+
What should happen?`
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
// 5. Execute the decision
|
|
114
|
+
switch (decision.op) {
|
|
115
|
+
case "ADD":
|
|
116
|
+
await addMemory(text, embedding, metadata);
|
|
117
|
+
break;
|
|
118
|
+
case "UPDATE":
|
|
119
|
+
const merged = decision.content || text;
|
|
120
|
+
const newEmbedding = await embed(merged);
|
|
121
|
+
await qdrant.setPayload(collection, {
|
|
122
|
+
points: [decision.targetId],
|
|
123
|
+
payload: {
|
|
124
|
+
text: merged,
|
|
125
|
+
updated_at: new Date().toISOString(),
|
|
126
|
+
version: (existing.version || 1) + 1,
|
|
127
|
+
_prev_content: existing.text
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
await qdrant.updateVectors(collection, {
|
|
131
|
+
points: [{ id: decision.targetId, vector: newEmbedding }]
|
|
132
|
+
});
|
|
133
|
+
break;
|
|
134
|
+
case "DELETE":
|
|
135
|
+
await qdrant.delete(collection, { points: [decision.targetId] });
|
|
136
|
+
break;
|
|
137
|
+
case "NOOP":
|
|
138
|
+
break; // Skip — already known
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// 6. Log to changelog
|
|
142
|
+
await changelog.append({
|
|
143
|
+
timestamp: new Date().toISOString(),
|
|
144
|
+
op: decision.op,
|
|
145
|
+
targetId: decision.targetId,
|
|
146
|
+
oldContent: existing?.text,
|
|
147
|
+
newContent: text,
|
|
148
|
+
reason: decision.reason
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Implementation Steps
|
|
156
|
+
|
|
157
|
+
1. **Add similarity search on ingest** — Before any upsert, query for top-5 similar points (threshold 0.7)
|
|
158
|
+
2. **Create LLM decision prompt** — Structured output: `{op, targetId?, content?, reason}`
|
|
159
|
+
3. **Implement 4 operation handlers** — ADD (new point), UPDATE (merge + re-embed), DELETE (remove), NOOP (skip)
|
|
160
|
+
4. **Add changelog** — JSON-lines file logging every operation with before/after content
|
|
161
|
+
5. **Wire into existing indexing pipeline** — Replace direct upserts with `smartIndex()` calls
|
|
162
|
+
|
|
163
|
+
### Supporting Infrastructure (recommended)
|
|
164
|
+
|
|
165
|
+
6. **Archive before delete** — Set `is_archived: true` instead of permanent delete; prune archived after 90 days
|
|
166
|
+
7. **Rehearsal decay** — On every search retrieval, update `lastAccessedAt` and `timesRetrieved`; use `max(createdAt, lastAccessedAt)` for decay base
|
|
167
|
+
8. **Version history** — Store `_prev_content` and `_prev_version` in payload for rollback capability
|
|
168
|
+
9. **Health audit** — Periodic job samples random points, asks LLM "Is this still accurate?", prunes low-quality entries
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## When to Use
|
|
173
|
+
|
|
174
|
+
- Any vector DB that stores facts/knowledge that changes over time
|
|
175
|
+
- Systems where the same information can be expressed differently across sources
|
|
176
|
+
- Memory stores that grow unbounded without maintenance
|
|
177
|
+
- Projects where outdated information causes real harm (wrong API endpoints, deprecated patterns)
|
|
178
|
+
- After observing search quality degrade from duplicates or contradictions
|
|
179
|
+
|
|
180
|
+
## When NOT to Use
|
|
181
|
+
|
|
182
|
+
- **Immutable archives** — If you need to preserve every version of every fact (use append + temporal queries instead)
|
|
183
|
+
- **High-throughput ingestion** — LLM call per fact adds ~500ms latency; use batch mode for bulk imports
|
|
184
|
+
- **Cost-sensitive environments** — Each CRUD decision costs an LLM API call; batch 10-20 candidates per call to amortize
|
|
185
|
+
- **Simple key-value stores** — If memories have unique keys, use direct upsert instead of similarity-based CRUD
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Common Mistakes
|
|
190
|
+
|
|
191
|
+
- **Threshold too high (>0.9):** Misses semantically equivalent memories with different wording
|
|
192
|
+
- **Threshold too low (<0.5):** LLM gets overwhelmed with irrelevant "similar" memories
|
|
193
|
+
- **No changelog:** Without audit trail, bad CRUD decisions are invisible and irreversible
|
|
194
|
+
- **UPDATE without re-embedding:** If you change the text but not the vector, search results become inconsistent
|
|
195
|
+
- **DELETE without archive:** Permanent deletion loses information that may be needed for rollback
|
|
196
|
+
- **Single-item processing at scale:** Process CRUD decisions in batches of 10-20 to reduce LLM call count (Anatomy of Agentic Memory, arXiv 2602.19320)
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Batch Mode (for bulk indexing)
|
|
201
|
+
|
|
202
|
+
```typescript
|
|
203
|
+
// Instead of N individual LLM calls, batch candidates
|
|
204
|
+
async function batchSmartIndex(candidates: string[], batchSize = 20) {
|
|
205
|
+
for (let i = 0; i < candidates.length; i += batchSize) {
|
|
206
|
+
const batch = candidates.slice(i, i + batchSize);
|
|
207
|
+
const allSimilar = await Promise.all(
|
|
208
|
+
batch.map(c => findSimilar(c, 5))
|
|
209
|
+
);
|
|
210
|
+
|
|
211
|
+
// One LLM call for entire batch
|
|
212
|
+
const decisions = await llm.structured<MemoryOperation[]>({
|
|
213
|
+
system: "For each candidate, decide ADD/UPDATE/DELETE/NOOP...",
|
|
214
|
+
user: formatBatch(batch, allSimilar)
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
for (const decision of decisions) {
|
|
218
|
+
await executeDecision(decision);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Related Skills
|
|
227
|
+
|
|
228
|
+
- [production-memory-patterns](../methodology/PRODUCTION_MEMORY_PATTERNS.md) — Catalog of 10 production memory patterns (this skill implements pattern #1)
|
|
229
|
+
- [RESEARCH_BACKED_WORKFLOW_UPGRADE](../methodology/RESEARCH_BACKED_WORKFLOW_UPGRADE.md) — The research methodology used to discover this pattern
|
|
230
|
+
|
|
231
|
+
## References
|
|
232
|
+
|
|
233
|
+
- Mem0: Building Production-Ready AI Agents with Scalable Long-Term Memory (arXiv 2504.19413, 2025) — 26% accuracy, 91% latency
|
|
234
|
+
- Memory-R1: Enhancing LLM Agents to Manage Memories via RL (arXiv 2508.19828, 2025) — RL-trained CRUD
|
|
235
|
+
- AgeMem: Learning Unified LTM/STM Management (arXiv 2601.01885, 2026) — Memory as tools
|
|
236
|
+
- Experience-Following Behavior (arXiv 2505.16067, 2025) — 10% boost from selective CRUD
|
|
237
|
+
- SimpleMem: Efficient Lifelong Memory (arXiv 2601.02553, 2026) — 26.4% F1 via merge
|
|
238
|
+
- MemoryBank: Ebbinghaus Forgetting Curve (AAAI 2024) — Rehearsal decay
|
|
239
|
+
- Zep/Graphiti: Temporal KG for Agent Memory (arXiv 2501.13956, 2025) — Bi-temporal versioning
|
|
240
|
+
- Anatomy of Agentic Memory (arXiv 2602.19320, 2026) — Batch processing for latency
|
|
241
|
+
- Contributed from: your-memory-repo v12.0 research session (2026-02-25)
|