agent-working-memory 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -21
- package/README.md +175 -191
- package/dist/api/index.d.ts.map +1 -1
- package/dist/api/index.js +2 -0
- package/dist/api/index.js.map +1 -1
- package/dist/api/routes.d.ts.map +1 -1
- package/dist/api/routes.js +7 -0
- package/dist/api/routes.js.map +1 -1
- package/dist/cli.d.ts +0 -9
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +85 -56
- package/dist/cli.js.map +1 -1
- package/dist/core/decay.d.ts.map +1 -1
- package/dist/core/decay.js +2 -0
- package/dist/core/decay.js.map +1 -1
- package/dist/core/embeddings.d.ts.map +1 -1
- package/dist/core/embeddings.js +2 -0
- package/dist/core/embeddings.js.map +1 -1
- package/dist/core/hebbian.d.ts.map +1 -1
- package/dist/core/hebbian.js +2 -0
- package/dist/core/hebbian.js.map +1 -1
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +2 -0
- package/dist/core/index.js.map +1 -1
- package/dist/core/logger.d.ts.map +1 -1
- package/dist/core/logger.js +2 -0
- package/dist/core/logger.js.map +1 -1
- package/dist/core/query-expander.d.ts.map +1 -1
- package/dist/core/query-expander.js +2 -0
- package/dist/core/query-expander.js.map +1 -1
- package/dist/core/reranker.d.ts.map +1 -1
- package/dist/core/reranker.js +2 -0
- package/dist/core/reranker.js.map +1 -1
- package/dist/core/salience.d.ts.map +1 -1
- package/dist/core/salience.js +12 -8
- package/dist/core/salience.js.map +1 -1
- package/dist/engine/activation.d.ts.map +1 -1
- package/dist/engine/activation.js +2 -0
- package/dist/engine/activation.js.map +1 -1
- package/dist/engine/connections.d.ts.map +1 -1
- package/dist/engine/connections.js +2 -0
- package/dist/engine/connections.js.map +1 -1
- package/dist/engine/consolidation-scheduler.d.ts.map +1 -1
- package/dist/engine/consolidation-scheduler.js +2 -0
- package/dist/engine/consolidation-scheduler.js.map +1 -1
- package/dist/engine/consolidation.d.ts.map +1 -1
- package/dist/engine/consolidation.js +12 -3
- package/dist/engine/consolidation.js.map +1 -1
- package/dist/engine/eval.d.ts.map +1 -1
- package/dist/engine/eval.js +2 -0
- package/dist/engine/eval.js.map +1 -1
- package/dist/engine/eviction.d.ts.map +1 -1
- package/dist/engine/eviction.js +2 -0
- package/dist/engine/eviction.js.map +1 -1
- package/dist/engine/index.d.ts.map +1 -1
- package/dist/engine/index.js +2 -0
- package/dist/engine/index.js.map +1 -1
- package/dist/engine/retraction.d.ts.map +1 -1
- package/dist/engine/retraction.js +2 -0
- package/dist/engine/retraction.js.map +1 -1
- package/dist/engine/staging.d.ts.map +1 -1
- package/dist/engine/staging.js +2 -0
- package/dist/engine/staging.js.map +1 -1
- package/dist/hooks/sidecar.d.ts.map +1 -1
- package/dist/hooks/sidecar.js +29 -0
- package/dist/hooks/sidecar.js.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/mcp.d.ts.map +1 -1
- package/dist/mcp.js +2 -0
- package/dist/mcp.js.map +1 -1
- package/dist/storage/index.d.ts.map +1 -1
- package/dist/storage/index.js +2 -0
- package/dist/storage/index.js.map +1 -1
- package/dist/storage/sqlite.d.ts.map +1 -1
- package/dist/storage/sqlite.js +12 -2
- package/dist/storage/sqlite.js.map +1 -1
- package/dist/types/agent.d.ts.map +1 -1
- package/dist/types/agent.js +2 -0
- package/dist/types/agent.js.map +1 -1
- package/dist/types/checkpoint.d.ts.map +1 -1
- package/dist/types/checkpoint.js +2 -0
- package/dist/types/checkpoint.js.map +1 -1
- package/dist/types/engram.d.ts.map +1 -1
- package/dist/types/engram.js +2 -0
- package/dist/types/engram.js.map +1 -1
- package/dist/types/eval.d.ts.map +1 -1
- package/dist/types/eval.js +2 -0
- package/dist/types/eval.js.map +1 -1
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -1
- package/package.json +2 -2
- package/src/api/index.ts +2 -0
- package/src/api/routes.ts +8 -0
- package/src/cli.ts +385 -355
- package/src/core/decay.ts +2 -0
- package/src/core/embeddings.ts +2 -0
- package/src/core/hebbian.ts +2 -0
- package/src/core/index.ts +2 -0
- package/src/core/logger.ts +2 -0
- package/src/core/query-expander.ts +2 -0
- package/src/core/reranker.ts +2 -0
- package/src/core/salience.ts +14 -10
- package/src/engine/activation.ts +2 -0
- package/src/engine/connections.ts +2 -0
- package/src/engine/consolidation-scheduler.ts +125 -123
- package/src/engine/consolidation.ts +11 -3
- package/src/engine/eval.ts +2 -0
- package/src/engine/eviction.ts +2 -0
- package/src/engine/index.ts +2 -0
- package/src/engine/retraction.ts +2 -0
- package/src/engine/staging.ts +2 -0
- package/src/hooks/sidecar.ts +31 -0
- package/src/index.ts +2 -0
- package/src/mcp.ts +2 -0
- package/src/storage/index.ts +2 -0
- package/src/storage/sqlite.ts +12 -2
- package/src/types/agent.ts +2 -0
- package/src/types/checkpoint.ts +46 -44
- package/src/types/engram.ts +2 -0
- package/src/types/eval.ts +2 -0
- package/src/types/index.ts +2 -0
package/README.md
CHANGED
|
@@ -1,177 +1,205 @@
|
|
|
1
1
|
# AgentWorkingMemory (AWM)
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**Persistent working memory for AI agents.**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
AWM helps agents retain important project knowledge across conversations and sessions. Instead of storing everything and retrieving by similarity alone, it filters for salience, builds associative links between related memories, and periodically consolidates useful knowledge while letting noise fade.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
Use it through Claude Code via MCP or as a local HTTP service for custom agents. Everything runs locally: SQLite + ONNX models + Node.js. No cloud, no API keys.
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
### Without AWM
|
|
10
|
+
- Agent forgets earlier architecture decision
|
|
11
|
+
- Suggests Redux after project standardized on Zustand
|
|
12
|
+
- Repeats discussion already settled three days ago
|
|
13
|
+
- Every new conversation starts from scratch
|
|
10
14
|
|
|
11
|
-
|
|
15
|
+
### With AWM
|
|
16
|
+
- Recalls prior state-management decision and rationale
|
|
17
|
+
- Surfaces related implementation patterns from past sessions
|
|
18
|
+
- Continues work without re-asking for context
|
|
19
|
+
- Gets more consistent the longer you use it
|
|
12
20
|
|
|
13
|
-
|
|
14
|
-
|---------|------------------------|-----|
|
|
15
|
-
| **What gets stored** | Everything | Only salient events (novelty + salience scoring filters 77% of noise at write time) |
|
|
16
|
-
| **Retrieval** | Single-signal cosine similarity | 10-phase pipeline: BM25 + vectors + cross-encoder reranking + graph walk + temporal decay |
|
|
17
|
-
| **Connections** | None | Hebbian associative edges that strengthen when memories are co-retrieved |
|
|
18
|
-
| **Over time** | Grows forever, gets noisier | Sleep cycle consolidation: strengthens clusters, prunes noise, builds cross-topic bridges |
|
|
19
|
-
| **Forgetting** | Manual cleanup or TTL | Cognitive forgetting: unretrieved memories fade, confirmed knowledge persists for months |
|
|
20
|
-
| **Feedback** | None | Explicit useful/not-useful signals tune confidence, affecting retrieval rank and forgetting resistance |
|
|
21
|
-
| **Self-correction** | Delete and re-insert | Retraction system: wrong memories get invalidated, corrections link back, confidence penalties propagate |
|
|
21
|
+
---
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
## Quick Start
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
**Node.js 20+** required — check with `node --version`.
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
| A/B Test | **AWM 100% vs Baseline 83%** | 100 project events, 24 recall questions |
|
|
32
|
-
| Self-Test | **97.4% EXCELLENT** | 31 pipeline component checks |
|
|
33
|
-
| Workday | **86.7% GOOD** | 43 memories across 4 simulated work sessions |
|
|
34
|
-
| Real-World | **93.1% EXCELLENT** | 300 code chunks from a 71K-line production monorepo |
|
|
35
|
-
| Token Savings | **64.5% savings** | Memory-guided context vs full conversation history |
|
|
27
|
+
```bash
|
|
28
|
+
npm install -g agent-working-memory
|
|
29
|
+
awm setup --global
|
|
30
|
+
```
|
|
36
31
|
|
|
37
|
-
|
|
32
|
+
Restart Claude Code. That's it — 13 memory tools appear automatically.
|
|
38
33
|
|
|
39
|
-
|
|
34
|
+
First conversation will be ~30 seconds slower while ML models download (~124MB, cached locally). After that, everything runs on your machine.
|
|
40
35
|
|
|
41
|
-
|
|
36
|
+
> For isolated memory per folder, see [Separate Memory Pools](#separate-memory-pools). For team onboarding, see [docs/quickstart.md](docs/quickstart.md).
|
|
42
37
|
|
|
43
|
-
|
|
38
|
+
---
|
|
44
39
|
|
|
45
|
-
|
|
40
|
+
## Who this is for
|
|
46
41
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
42
|
+
- **Long-running coding agents** that need cross-session project knowledge
|
|
43
|
+
- **Multi-agent workflows** where specialized agents share a common memory
|
|
44
|
+
- **Local-first setups** where cloud memory is not acceptable
|
|
45
|
+
- **Teams using Claude Code** who want persistent context without manual notes
|
|
50
46
|
|
|
51
|
-
|
|
47
|
+
## What this is not
|
|
52
48
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
49
|
+
- Not a chatbot UI
|
|
50
|
+
- Not a hosted SaaS
|
|
51
|
+
- Not a generic vector database
|
|
52
|
+
- Not a replacement for your source of truth (code, docs, tickets)
|
|
56
53
|
|
|
57
|
-
|
|
54
|
+
---
|
|
58
55
|
|
|
59
|
-
|
|
56
|
+
## Why it's different
|
|
60
57
|
|
|
61
|
-
|
|
58
|
+
Most "memory for AI" projects are vector databases with a retrieval wrapper. AWM goes further:
|
|
62
59
|
|
|
63
|
-
|
|
60
|
+
| | Typical RAG / Vector Store | AWM |
|
|
61
|
+
|---|---|---|
|
|
62
|
+
| **Storage** | Everything | Only novel, salient events (77% filtered at write time) |
|
|
63
|
+
| **Retrieval** | Cosine similarity | 10-phase pipeline: BM25 + vectors + reranking + graph walk + decay |
|
|
64
|
+
| **Connections** | None | Hebbian edges that strengthen when memories co-activate |
|
|
65
|
+
| **Over time** | Grows forever, gets noisier | Consolidation: strengthens clusters, prunes noise, builds bridges |
|
|
66
|
+
| **Forgetting** | Manual cleanup | Cognitive forgetting: unused memories fade, confirmed knowledge persists |
|
|
67
|
+
| **Feedback** | None | Useful/not-useful signals tune confidence and retrieval rank |
|
|
68
|
+
| **Correction** | Delete and re-insert | Retraction: wrong memories invalidated, corrections linked, penalties propagate |
|
|
64
69
|
|
|
65
|
-
|
|
66
|
-
cd your-project
|
|
67
|
-
awm setup
|
|
68
|
-
```
|
|
70
|
+
The design is based on cognitive science — ACT-R activation decay, Hebbian learning, complementary learning systems, and synaptic homeostasis — rather than ad-hoc heuristics. See [How It Works](#how-it-works) and [docs/cognitive-model.md](docs/cognitive-model.md) for details.
|
|
69
71
|
|
|
70
|
-
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Benchmarks
|
|
75
|
+
|
|
76
|
+
| Eval | Score | What it tests |
|
|
77
|
+
|------|-------|---------------|
|
|
78
|
+
| Edge Cases | **100% (34/34)** | 9 failure modes: hub toxicity, flashbulb distortion, narcissistic interference, identity collision, noise forgetting benefit |
|
|
79
|
+
| Stress Test | **92.3% (48/52)** | 500 memories, 100 sleep cycles, catastrophic forgetting, adversarial spam |
|
|
80
|
+
| A/B Test | **AWM 100% vs Baseline 83%** | 100 project events, 24 recall questions |
|
|
81
|
+
| Self-Test | **97.4%** | 31 pipeline component checks |
|
|
82
|
+
| Workday | **86.7%** | 43 memories across 4 simulated work sessions |
|
|
83
|
+
| Real-World | **93.1%** | 300 code chunks from a 71K-line production monorepo |
|
|
84
|
+
| Token Savings | **64.5% savings** | Memory-guided context vs full conversation history |
|
|
71
85
|
|
|
72
|
-
|
|
86
|
+
All evals are reproducible: `npm run test:self`, `npm run test:edge`, `npm run test:stress`, etc. See [Testing & Evaluation](#testing--evaluation) and [docs/benchmarks.md](docs/benchmarks.md) for full details.
|
|
87
|
+
|
|
88
|
+
---
|
|
73
89
|
|
|
74
|
-
|
|
90
|
+
## Features
|
|
75
91
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
|
92
|
+
### Memory Tools (13)
|
|
93
|
+
|
|
94
|
+
| Tool | Purpose |
|
|
95
|
+
|------|---------|
|
|
96
|
+
| `memory_write` | Store a memory (salience filter decides disposition) |
|
|
79
97
|
| `memory_recall` | Retrieve relevant memories by context |
|
|
80
|
-
| `memory_feedback` |
|
|
81
|
-
| `memory_retract` |
|
|
82
|
-
| `memory_stats` | View memory health metrics |
|
|
98
|
+
| `memory_feedback` | Report whether a recalled memory was useful |
|
|
99
|
+
| `memory_retract` | Invalidate a wrong memory with optional correction |
|
|
100
|
+
| `memory_stats` | View memory health metrics and activity |
|
|
83
101
|
| `memory_checkpoint` | Save execution state (survives context compaction) |
|
|
84
|
-
| `memory_restore` | Recover state + relevant context at
|
|
102
|
+
| `memory_restore` | Recover state + relevant context at session start |
|
|
85
103
|
| `memory_task_add` | Create a prioritized task |
|
|
86
104
|
| `memory_task_update` | Change task status/priority |
|
|
87
105
|
| `memory_task_list` | List tasks by status |
|
|
88
106
|
| `memory_task_next` | Get the highest-priority actionable task |
|
|
89
107
|
| `memory_task_begin` | Start a task — auto-checkpoints and recalls context |
|
|
90
108
|
| `memory_task_end` | End a task — writes summary and checkpoints |
|
|
91
|
-
You don't need to tell Claude to "use memory." Once connected, Claude will automatically write important decisions, recall relevant context, and learn from feedback. Over time, it builds up knowledge that persists across every conversation.
|
|
92
109
|
|
|
93
110
|
### Separate Memory Pools
|
|
94
111
|
|
|
95
|
-
By default,
|
|
112
|
+
By default, all projects share one memory pool. For isolated pools per folder, place a `.mcp.json` in each parent folder with a different `AWM_AGENT_ID`:
|
|
96
113
|
|
|
97
114
|
```
|
|
98
115
|
C:\Users\you\work\.mcp.json → AWM_AGENT_ID: "work"
|
|
99
116
|
C:\Users\you\personal\.mcp.json → AWM_AGENT_ID: "personal"
|
|
100
117
|
```
|
|
101
118
|
|
|
102
|
-
Claude Code uses the closest `.mcp.json` ancestor. Same database
|
|
119
|
+
Claude Code uses the closest `.mcp.json` ancestor. Same database, isolation by agent ID.
|
|
103
120
|
|
|
104
121
|
### Incognito Mode
|
|
105
122
|
|
|
106
|
-
Run Claude Code without AWM recording anything:
|
|
107
|
-
|
|
108
123
|
```bash
|
|
109
124
|
AWM_INCOGNITO=1 claude
|
|
110
125
|
```
|
|
111
126
|
|
|
112
|
-
|
|
127
|
+
Registers zero tools — Claude doesn't see memory at all. All other tools and MCP servers work normally.
|
|
113
128
|
|
|
114
129
|
### Auto-Checkpoint Hooks
|
|
115
130
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
- **Stop** — reminds Claude to save learnings after each response (async, no delay)
|
|
119
|
-
- **PreCompact** — auto-checkpoints before context window compression
|
|
120
|
-
- **SessionEnd** — auto-checkpoints and runs full consolidation on graceful exit
|
|
131
|
+
Installed by `awm setup --global`:
|
|
121
132
|
|
|
122
|
-
|
|
133
|
+
- **Stop** — reminds Claude to write/recall after each response
|
|
134
|
+
- **PreCompact** — auto-checkpoints before context compression
|
|
135
|
+
- **SessionEnd** — auto-checkpoints and consolidates on close
|
|
136
|
+
- **15-min timer** — silent auto-checkpoint while session is active
|
|
123
137
|
|
|
124
138
|
### Activity Log
|
|
125
139
|
|
|
126
|
-
AWM writes a real-time activity log so you can see exactly what's happening:
|
|
127
|
-
|
|
128
140
|
```bash
|
|
129
141
|
tail -f "$(npm root -g)/agent-working-memory/data/awm.log"
|
|
130
142
|
```
|
|
131
143
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
### Optional: Add workflow instructions to CLAUDE.md
|
|
144
|
+
Real-time: writes, recalls, checkpoints, consolidation, hook events.
|
|
135
145
|
|
|
136
|
-
|
|
146
|
+
### Activity Stats
|
|
137
147
|
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
You have persistent memory via the agent-working-memory MCP server.
|
|
141
|
-
- At conversation start: call memory_restore to recover previous context
|
|
142
|
-
- When you learn something important: call memory_write
|
|
143
|
-
- When you need past context: call memory_recall
|
|
144
|
-
- Before long operations: call memory_checkpoint to save your state
|
|
145
|
-
- After using a recalled memory: call memory_feedback (useful/not-useful)
|
|
148
|
+
```bash
|
|
149
|
+
curl http://127.0.0.1:8401/stats
|
|
146
150
|
```
|
|
147
151
|
|
|
152
|
+
Returns daily counts: `{"writes": 8, "recalls": 9, "hooks": 3, "total": 25}`
|
|
153
|
+
|
|
148
154
|
---
|
|
149
155
|
|
|
150
|
-
##
|
|
156
|
+
## Memory Invocation Strategy
|
|
151
157
|
|
|
152
|
-
|
|
158
|
+
AWM combines deterministic hooks for guaranteed memory operations at lifecycle transitions with agent-directed usage during active work.
|
|
153
159
|
|
|
154
|
-
###
|
|
160
|
+
### Deterministic triggers (always happen)
|
|
155
161
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
162
|
+
| Event | Action |
|
|
163
|
+
|-------|--------|
|
|
164
|
+
| Session start | `memory_restore` — recover state + recall context |
|
|
165
|
+
| Pre-compaction | Auto-checkpoint via hook sidecar |
|
|
166
|
+
| Session end | Auto-checkpoint + full consolidation |
|
|
167
|
+
| Every 15 min | Silent auto-checkpoint (if active) |
|
|
168
|
+
| Task start | `memory_task_begin` — checkpoint + recall |
|
|
169
|
+
| Task end | `memory_task_end` — summary + checkpoint |
|
|
160
170
|
|
|
161
|
-
###
|
|
171
|
+
### Agent-directed triggers (when these situations occur)
|
|
162
172
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
173
|
+
**Write memory when:**
|
|
174
|
+
- A project decision is made or changed
|
|
175
|
+
- A root cause is discovered
|
|
176
|
+
- A reusable implementation pattern is established
|
|
177
|
+
- A preference, constraint, or requirement is clarified
|
|
178
|
+
- A prior assumption is found to be wrong
|
|
179
|
+
|
|
180
|
+
**Recall memory when:**
|
|
181
|
+
- Starting work on a new task or subsystem
|
|
182
|
+
- Re-entering code you haven't touched recently
|
|
183
|
+
- After context compaction
|
|
184
|
+
- After a failed attempt (check if there's prior knowledge)
|
|
185
|
+
- Before refactoring or making architectural changes
|
|
186
|
+
|
|
187
|
+
**Retract when:**
|
|
188
|
+
- A stored memory turns out to be wrong or outdated
|
|
169
189
|
|
|
170
|
-
**
|
|
190
|
+
**Feedback when:**
|
|
191
|
+
- A recalled memory was used (useful) or irrelevant (not useful)
|
|
171
192
|
|
|
172
|
-
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## HTTP API
|
|
196
|
+
|
|
197
|
+
For custom agents, scripts, or non-Claude-Code workflows:
|
|
173
198
|
|
|
174
|
-
|
|
199
|
+
```bash
|
|
200
|
+
awm serve # From npm install
|
|
201
|
+
npx tsx src/index.ts # From source
|
|
202
|
+
```
|
|
175
203
|
|
|
176
204
|
Write a memory:
|
|
177
205
|
|
|
@@ -188,7 +216,7 @@ curl -X POST http://localhost:8400/memory/write \
|
|
|
188
216
|
}'
|
|
189
217
|
```
|
|
190
218
|
|
|
191
|
-
Recall
|
|
219
|
+
Recall:
|
|
192
220
|
|
|
193
221
|
```bash
|
|
194
222
|
curl -X POST http://localhost:8400/memory/activate \
|
|
@@ -199,60 +227,31 @@ curl -X POST http://localhost:8400/memory/activate \
|
|
|
199
227
|
}'
|
|
200
228
|
```
|
|
201
229
|
|
|
202
|
-
### Configuration
|
|
203
|
-
|
|
204
|
-
- **Change the port:** `awm serve --port 3000` or `AWM_PORT=3000`
|
|
205
|
-
- **Custom database:** `AWM_DB_PATH=/path/to/memory.db`
|
|
206
|
-
- **API key auth:** Set `AWM_API_KEY=your-secret` in `.env` — requests need `Authorization: Bearer your-secret` or `x-api-key: your-secret`
|
|
207
|
-
- **Run tests:** `npx vitest run` (68 tests)
|
|
208
|
-
- **Run eval suite:** `npm run test:self`
|
|
209
|
-
- **Data is a single file:** `data/memory.db` (SQLite). Back it up, move it, delete it to start fresh.
|
|
210
|
-
- **Models cached locally:** First run downloads to `models/`. No network after that.
|
|
211
|
-
|
|
212
230
|
---
|
|
213
231
|
|
|
214
|
-
## Docker (Quick Alternative)
|
|
215
|
-
|
|
216
|
-
If you'd rather not install Node.js locally:
|
|
217
|
-
|
|
218
|
-
```bash
|
|
219
|
-
docker build -t awm .
|
|
220
|
-
docker run -p 8400:8400 -v awm-data:/data -v awm-models:/models awm
|
|
221
|
-
```
|
|
222
|
-
|
|
223
|
-
This gives you the HTTP server on port 8400. The `-v` flags persist your database and models across container restarts. For MCP integration with Docker, point your `.mcp.json` to the HTTP API instead of the MCP script.
|
|
224
|
-
|
|
225
232
|
## How It Works
|
|
226
233
|
|
|
227
234
|
### The Memory Lifecycle
|
|
228
235
|
|
|
229
|
-
1. **Write** —
|
|
236
|
+
1. **Write** — Salience scoring evaluates novelty, surprise, causal depth, and effort. High-salience memories go active; borderline ones enter staging; noise is discarded.
|
|
230
237
|
|
|
231
|
-
2. **Connect** —
|
|
238
|
+
2. **Connect** — Vector embedding (MiniLM-L6-v2, 384d). Temporal edges link to recent memories. Hebbian edges form between co-retrieved memories.
|
|
232
239
|
|
|
233
|
-
3. **Retrieve** —
|
|
240
|
+
3. **Retrieve** — 10-phase pipeline: BM25 + semantic search + cross-encoder reranking + temporal decay (ACT-R) + graph walks + confidence gating.
|
|
234
241
|
|
|
235
|
-
4. **Consolidate** —
|
|
236
|
-
- **Replay** — Find clusters of semantically similar memories
|
|
237
|
-
- **Strengthen** — Reinforce edges within clusters (access-weighted)
|
|
238
|
-
- **Bridge** — Create cross-cluster shortcuts between related topics
|
|
239
|
-
- **Decay** — Weaken unused edges (confidence-modulated half-life)
|
|
240
|
-
- **Homeostasis** — Normalize outgoing edge weights to prevent hub explosion
|
|
241
|
-
- **Forget** — Archive unretrieved, weakly-connected memories (age-gated, access-scaled)
|
|
242
|
-
- **Prune redundancy** — Archive semantically duplicate low-quality memories
|
|
243
|
-
- **Sweep staging** — Promote staging memories that resonate with active ones
|
|
242
|
+
4. **Consolidate** — 7-phase sleep cycle: replay clusters, strengthen edges, bridge cross-topic, decay unused, normalize hubs, forget noise, sweep staging.
|
|
244
243
|
|
|
245
|
-
5. **Feedback** —
|
|
244
|
+
5. **Feedback** — Useful/not-useful signals adjust confidence, affecting retrieval rank and forgetting resistance.
|
|
246
245
|
|
|
247
|
-
### Cognitive
|
|
246
|
+
### Cognitive Foundations
|
|
248
247
|
|
|
249
|
-
|
|
248
|
+
- **ACT-R activation decay** (Anderson 1993) — memories decay with time, strengthen with use
|
|
249
|
+
- **Hebbian learning** — co-retrieved memories form stronger associative edges
|
|
250
|
+
- **Complementary Learning Systems** — fast capture (salience + staging) + slow consolidation (sleep cycle)
|
|
251
|
+
- **Synaptic homeostasis** — edge weight normalization prevents hub domination
|
|
252
|
+
- **Forgetting as feature** — noise removal improves signal-to-noise for connected memories
|
|
250
253
|
|
|
251
|
-
|
|
252
|
-
- **Hebbian learning** — "neurons that fire together wire together." Co-retrieved memories form stronger associative edges, enabling graph-based spreading activation.
|
|
253
|
-
- **Complementary Learning Systems** — fast capture (salience filter + staging) combined with slow consolidation (sleep cycle). Mirrors hippocampal-neocortical memory transfer.
|
|
254
|
-
- **Synaptic homeostasis** — total connection weight per memory is normalized to prevent any single "hub" from dominating retrieval. Similar to how the brain downscales synaptic strength during sleep.
|
|
255
|
-
- **Forgetting as feature** — noise removal improves signal-to-noise ratio for connected memories. Unretrieved noise gets pruned; confirmed knowledge gets stronger. In benchmarks, aggressive forgetting improves quality recall from 3/5 to 5/5.
|
|
254
|
+
---
|
|
256
255
|
|
|
257
256
|
## Architecture
|
|
258
257
|
|
|
@@ -274,70 +273,41 @@ src/
|
|
|
274
273
|
retraction.ts - Negative memory / corrections
|
|
275
274
|
eviction.ts - Capacity enforcement
|
|
276
275
|
hooks/
|
|
277
|
-
sidecar.ts - Hook HTTP server (auto-checkpoint
|
|
276
|
+
sidecar.ts - Hook HTTP server (auto-checkpoint, stats, timer)
|
|
278
277
|
storage/
|
|
279
278
|
sqlite.ts - SQLite + FTS5 persistence layer
|
|
280
279
|
api/
|
|
281
280
|
routes.ts - HTTP endpoints (memory + task + system)
|
|
282
|
-
mcp.ts - MCP server (13 tools
|
|
281
|
+
mcp.ts - MCP server (13 tools, incognito support)
|
|
283
282
|
cli.ts - CLI (setup, serve, hook config)
|
|
284
283
|
index.ts - HTTP server entry point
|
|
285
284
|
```
|
|
286
285
|
|
|
287
|
-
|
|
286
|
+
For detailed architecture including pipeline phases, database schema, and system diagrams, see [docs/architecture.md](docs/architecture.md).
|
|
288
287
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
```bash
|
|
292
|
-
# Create a task
|
|
293
|
-
curl -X POST http://localhost:8400/task/create \
|
|
294
|
-
-H "Content-Type: application/json" \
|
|
295
|
-
-d '{
|
|
296
|
-
"agentId": "my-agent",
|
|
297
|
-
"concept": "Fix login redirect bug",
|
|
298
|
-
"content": "Users get 404 after OAuth callback",
|
|
299
|
-
"priority": "urgent"
|
|
300
|
-
}'
|
|
301
|
-
|
|
302
|
-
# Get next actionable task
|
|
303
|
-
curl http://localhost:8400/task/next/my-agent
|
|
304
|
-
```
|
|
305
|
-
|
|
306
|
-
Priority levels: `urgent` > `high` > `medium` > `low`. Tasks can be blocked by other tasks and automatically unblock when dependencies complete.
|
|
288
|
+
---
|
|
307
289
|
|
|
308
290
|
## Testing & Evaluation
|
|
309
291
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
### Unit Tests (no server needed)
|
|
292
|
+
### Unit Tests
|
|
313
293
|
|
|
314
294
|
```bash
|
|
315
|
-
npx vitest run # 68 tests
|
|
295
|
+
npx vitest run # 68 tests
|
|
316
296
|
```
|
|
317
297
|
|
|
318
|
-
### Eval Suites
|
|
298
|
+
### Eval Suites
|
|
319
299
|
|
|
320
300
|
| Command | What it tests | Score |
|
|
321
301
|
|---------|--------------|-------|
|
|
322
|
-
| `npm run test:self` | 31 pipeline
|
|
323
|
-
| `npm run test:edge` | 9 adversarial failure modes
|
|
324
|
-
| `npm run test:stress` | 500 memories, 100 sleep cycles, catastrophic forgetting
|
|
325
|
-
| `npm run test:workday` |
|
|
326
|
-
| `npm run test:ab` |
|
|
327
|
-
| `npm run test:tokens` |
|
|
328
|
-
| `npm run test:realworld` |
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
### What the Edge Cases Prove
|
|
333
|
-
|
|
334
|
-
The edge case suite is particularly important because it tests the failure modes that kill other memory systems:
|
|
335
|
-
|
|
336
|
-
- **Context Collapse** — Can AWM find 5 rare but important memories buried under 100 routine ones? (Yes — salience scoring and feedback bonuses keep confirmed knowledge afloat.)
|
|
337
|
-
- **Mega-Hub Toxicity** — If one memory has 50 connections, does it hijack every query? (No — synaptic homeostasis normalizes edge weights.)
|
|
338
|
-
- **Flashbulb Distortion** — Does a high-emotion memory overwrite the actual facts? (No — retraction system and confidence scoring keep specifics intact.)
|
|
339
|
-
- **Narcissistic Interference** — Can 30 self-referential "I am amazing" claims drown out 4 real facts? (No — redundancy pruning archives the clones, feedback bonuses elevate confirmed knowledge.)
|
|
340
|
-
- **Noise Forgetting Benefit** — Does forgetting 150 noise memories hurt the 5 quality ones? (Opposite — recall stays 5/5 because forgetting *improves* signal-to-noise ratio for connected memories.)
|
|
302
|
+
| `npm run test:self` | 31 pipeline checks: embeddings, BM25, reranker, decay, confidence, Hebbian, graph walks, staging | **97.4%** |
|
|
303
|
+
| `npm run test:edge` | 9 adversarial failure modes: context collapse, hub toxicity, flashbulb distortion, narcissistic interference, identity collision, contradiction, bridge overshoot, noise benefit | **100%** |
|
|
304
|
+
| `npm run test:stress` | 500 memories, 100 sleep cycles, catastrophic forgetting, adversarial spam, recovery | **92.3%** |
|
|
305
|
+
| `npm run test:workday` | 43 memories across 4 projects, 14 recall challenges | **86.7%** |
|
|
306
|
+
| `npm run test:ab` | AWM vs keyword baseline, 100 events, 24 questions | **AWM 100% vs 83%** |
|
|
307
|
+
| `npm run test:tokens` | Token savings vs full conversation history | **64.5%** |
|
|
308
|
+
| `npm run test:realworld` | 300 chunks from 71K-line monorepo, 16 challenges | **93.1%** |
|
|
309
|
+
|
|
310
|
+
---
|
|
341
311
|
|
|
342
312
|
## Environment Variables
|
|
343
313
|
|
|
@@ -345,13 +315,13 @@ The edge case suite is particularly important because it tests the failure modes
|
|
|
345
315
|
|----------|---------|---------|
|
|
346
316
|
| `AWM_PORT` | `8400` | HTTP server port |
|
|
347
317
|
| `AWM_DB_PATH` | `memory.db` | SQLite database path |
|
|
348
|
-
| `AWM_AGENT_ID` | `claude-code` |
|
|
318
|
+
| `AWM_AGENT_ID` | `claude-code` | Agent ID (memory namespace) |
|
|
349
319
|
| `AWM_EMBED_MODEL` | `Xenova/all-MiniLM-L6-v2` | Embedding model |
|
|
350
320
|
| `AWM_EMBED_DIMS` | `384` | Embedding dimensions |
|
|
351
321
|
| `AWM_RERANKER_MODEL` | `Xenova/ms-marco-MiniLM-L-6-v2` | Reranker model |
|
|
352
|
-
| `AWM_HOOK_PORT` | `8401` | Hook sidecar
|
|
353
|
-
| `AWM_HOOK_SECRET` | *(none)* | Bearer token for hook
|
|
354
|
-
| `AWM_INCOGNITO` | *(unset)* | Set to `1` to disable all
|
|
322
|
+
| `AWM_HOOK_PORT` | `8401` | Hook sidecar port |
|
|
323
|
+
| `AWM_HOOK_SECRET` | *(none)* | Bearer token for hook auth |
|
|
324
|
+
| `AWM_INCOGNITO` | *(unset)* | Set to `1` to disable all tools |
|
|
355
325
|
|
|
356
326
|
## Tech Stack
|
|
357
327
|
|
|
@@ -365,8 +335,22 @@ The edge case suite is particularly important because it tests the failure modes
|
|
|
365
335
|
| Tests | Vitest 4 |
|
|
366
336
|
| Validation | Zod 4 |
|
|
367
337
|
|
|
368
|
-
All three ML models
|
|
338
|
+
All three ML models run locally via ONNX. No external API calls for retrieval. The entire system is a single SQLite file + a Node.js process.
|
|
339
|
+
|
|
340
|
+
## Project Status
|
|
341
|
+
|
|
342
|
+
AWM is in active development (v0.4.x). The core memory pipeline, consolidation system, and MCP integration are stable and used daily in production coding workflows.
|
|
343
|
+
|
|
344
|
+
- Core retrieval and consolidation: **stable**
|
|
345
|
+
- MCP tools and Claude Code integration: **stable**
|
|
346
|
+
- Task management: **stable**
|
|
347
|
+
- Hook sidecar and auto-checkpoint: **stable**
|
|
348
|
+
- HTTP API: **stable** (for custom agents)
|
|
349
|
+
|
|
350
|
+
See [CHANGELOG.md](CHANGELOG.md) for version history.
|
|
351
|
+
|
|
352
|
+
---
|
|
369
353
|
|
|
370
354
|
## License
|
|
371
355
|
|
|
372
|
-
|
|
356
|
+
Apache 2.0 — see [LICENSE](LICENSE) and [NOTICE](NOTICE).
|
package/dist/api/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/api/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/api/index.ts"],"names":[],"mappings":"AAEA,cAAc,aAAa,CAAC"}
|
package/dist/api/index.js
CHANGED
package/dist/api/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/api/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/api/index.ts"],"names":[],"mappings":"AAAA,gDAAgD;AAChD,sCAAsC;AACtC,cAAc,aAAa,CAAC"}
|
package/dist/api/routes.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"routes.d.ts","sourceRoot":"","sources":["../../src/api/routes.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"routes.d.ts","sourceRoot":"","sources":["../../src/api/routes.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAC/C,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAC5D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AACtE,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,sCAAsC,CAAC;AAQnF,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,WAAW,CAAC;IACnB,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,cAAc,EAAE,cAAc,CAAC;IAC/B,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,UAAU,EAAE,UAAU,CAAC;IACvB,mBAAmB,EAAE,mBAAmB,CAAC;IACzC,sBAAsB,EAAE,sBAAsB,CAAC;CAChD;AAED,wBAAgB,cAAc,CAAC,GAAG,EAAE,eAAe,EAAE,IAAI,EAAE,UAAU,GAAG,IAAI,CA4d3E"}
|
package/dist/api/routes.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
1
3
|
/**
|
|
2
4
|
* API Routes — the black box interface agents interact with.
|
|
3
5
|
*
|
|
@@ -40,6 +42,11 @@ export function registerRoutes(app, deps) {
|
|
|
40
42
|
// ============================================================
|
|
41
43
|
app.post('/memory/write', async (req, reply) => {
|
|
42
44
|
const body = req.body;
|
|
45
|
+
if (!body.agentId || typeof body.agentId !== 'string' ||
|
|
46
|
+
!body.concept || typeof body.concept !== 'string' ||
|
|
47
|
+
!body.content || typeof body.content !== 'string') {
|
|
48
|
+
return reply.status(400).send({ error: 'agentId, concept, and content are required strings' });
|
|
49
|
+
}
|
|
43
50
|
const novelty = computeNovelty(store, body.agentId, body.concept, body.content);
|
|
44
51
|
const salience = evaluateSalience({
|
|
45
52
|
content: body.content,
|