@bd7pil/opencode-deep-memory 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,11 +7,11 @@
7
7
  OpenCode sessions are stateless. Every restart is a cold start. Native compaction
8
8
  destroys conversation content. **deep-memory** adds three layers:
9
9
 
10
- | Layer | Hook | Purpose |
11
- |-------|------|---------|
12
- | **Remember** | `memory_search`, `memory_store`, `memory_forget`, `memory_expand` | Decisions, constraints, gotchas survive across sessions via BM25 + CJK search. Storage at `.deep-memory/` in your project root — visible, version-controllable. |
13
- | **Recover** | `session.created`, `experimental.session.compacting` | Checkpoint captures conversation before compaction destroys it. Resume injection recalls everything on a new session (3000 token first-turn budget). |
14
- | **Compress** | `experimental.chat.messages.transform` | Old reasoning, metadata, system injections, and thinking tags stripped deterministically — no LLM calls. Cache-stable sentinel replacements preserve prompt cache. |
10
+ | Layer | What survives | How |
11
+ |-------|--------------|-----|
12
+ | **Remember** | Decisions, constraints, gotchas | `memory_search` / `memory_store` BM25 + CJK search across sessions |
13
+ | **Recover** | Full conversation context | Checkpoint captures before compaction; resume injection on new session |
14
+ | **Compress** | Token budget | Deterministic stripping + pressure-triggered deep compression — no LLM calls |
15
15
 
16
16
  ## Quick start
17
17
 
@@ -30,125 +30,116 @@ OpenCode auto-installs on startup. Memory appears at `.deep-memory/` in your pro
30
30
  ## How it works
31
31
 
32
32
  ```
33
- ┌─────────────────────────────┐
34
- system.transform
35
- m[0] stable (cache hit)
36
- m[1] volatile (per-turn)
37
- repo map (code symbols)
38
- └─────────────────────────────┘
39
-
40
- ┌──────────────┐ ┌──────────────┐ │ ┌───────────────────────────┐
41
- │ chat.message │ │ chat.params │ │ │ messages.transform │
42
- keyword→notes│ │ agent→budget │ │ │ ① Layer 1: strip reason.
43
- "记住"/"rem" │ │ main 800t │ │ │ ② Layer 2: deep compress
44
- │ │ oracle 400t │ │ │ dedup / error purge /
45
- └──────────────┘ └──────────────┘ │ │ tool compress / JSON / │
46
- │ │ message prune / CCR │
47
- ┌──────────────┘ └───────────────────────────┘
48
-
49
- ┌────────────────────┴────────────────────────┐
50
- event
51
- session.created resume + dream schedule
52
- │ session.idle → enrichment + notify │
53
- │ session.compacted → checkpoint │
54
- └─────────────────────────────────────────────┘
33
+ ┌─────────────────────────────────────────────────────────────────┐
34
+ messages.transform (every turn)
35
+ ├─ Strip reasoning/thinking parts (physical removal)
36
+ ├─ Remove system-injected messages (physical removal)
37
+ ├─ Truncate old tool errors
38
+ │ └─ Deep compress: dedup / tool output / JSON / assistant text │
39
+ └─────────────────────────────────────────────────────────────────┘
40
+
41
+ ┌─────────────────────────────────────────────────────────────────┐
42
+ system.transform (every turn)
43
+ ├─ Inject stable: MEMORY.md constraints + tool hint (cache hit)
44
+ └─ Inject volatile: BM25 search results + repo map symbols
45
+ └─────────────────────────────────────────────────────────────────┘
46
+
47
+ ┌─────────────────────────────────────────────────────────────────┐
48
+ compacting (before OpenCode destroys messages) │
49
+ │ ├─ Capture raw messages → checkpoint.raw.json │
50
+ ├─ Extract knowledge → checkpoint.md
51
+ └─ Inject structured handoff prompt for LLM
52
+ └─────────────────────────────────────────────────────────────────┘
53
+
54
+ ┌─────────────────────────────────────────────────────────────────┐
55
+ │ events │
56
+ │ ├─ session.created → resume + dream schedule │
57
+ │ ├─ session.idle → enrichment │
58
+ │ └─ session.compacted → pressure calibration │
59
+ └─────────────────────────────────────────────────────────────────┘
55
60
  ```
56
61
 
57
62
  ## Context compression
58
63
 
59
- Two compression layers run automatically, no LLM calls required.
64
+ Two layers, fully automatic, no LLM calls.
60
65
 
61
- ### Layer 1: Deterministic stripping
66
+ ### Layer 1: Deterministic stripping (always active)
62
67
 
63
- Always active, strips disposable content from old messages:
68
+ | Target | Action |
69
+ |--------|--------|
70
+ | Old reasoning/thinking parts | Physical removal |
71
+ | System injections (`<system-reminder>`, etc.) | Physical removal |
72
+ | Tool errors >100 chars (older than 4 turns) | Truncate |
73
+ | Inline `<thinking>` tags | Regex strip |
64
74
 
65
- | What gets stripped | How | Why safe |
66
- |--------------------|-----|----------|
67
- | `reasoning_details` metadata | Delete the JSON blob | Billing metadata, never reaches model |
68
- | Old reasoning text | Replace with `[cleared]` | Conclusions are in assistant text |
69
- | System injections | Replace with `[stripped]` | `<system-reminder>` stale after one turn |
70
- | Tool errors >100 chars | Truncate | An old error only needs "it failed" |
71
- | Inline `<thinking>` tags | Regex strip | Process, not product |
75
+ No marker pollution old content is physically removed, not replaced with `[cleared]` or `[stripped]`. This prevents [context confusion](https://www.philschmid.de/context-engineering-part-2).
72
76
 
73
77
  ### Layer 2: Deep compression (pressure-triggered)
74
78
 
75
- Activates when context pressure exceeds thresholds. Inspired by
76
- [DCP](https://github.com/Opencode-DCP/opencode-dynamic-context-pruning),
77
- [Headroom](https://github.com/chopratejas/headroom), and
78
- [Edgee](https://github.com/edgee-ai/edgee).
79
-
80
79
  | Pressure | Threshold | Actions |
81
80
  |----------|-----------|---------|
82
- | **always** | every turn | tool dedup + error purge + tool output compress + JSON crush (all reversible via CCR) |
83
- | **medium** | ≥ 30% context | + old message text truncation (lossy, extracts key info) |
84
- | **high** | ≥ 50% context | + nudge (alerts model to save important findings)
81
+ | **always** | every turn | tool dedup + error purge + tool output compress + JSON crush + assistant text compress |
82
+ | **medium** | ≥ 50K tokens | + memory nudge (prompts LLM to use `memory_store`) |
83
+ | **high** | ≥ 150K tokens | + pressure nudge (prompts LLM to summarize old tasks) |
85
84
 
86
- What gets compressed at medium+:
85
+ Thresholds are absolute, not percentage-based — they work consistently across 200K and 1M+ context windows. Based on [Focus Agent](https://arxiv.org/html/2601.07190v1) research.
87
86
 
88
87
  | Target | Strategy | Source |
89
88
  |--------|----------|--------|
90
- | Duplicate tool calls | Signature matching (`toolName::sortedParams`) | DCP |
91
- | Old error inputs | Purge inputs after 4 turns | DCP |
92
- | File reads | Keep first 50 + key lines + last 20 | Edgee |
93
- | Command outputs | Keep errors + last 30 lines | Edgee |
94
- | Search results | Keep top-20, group by file | Edgee |
95
- | JSON arrays | Keep first 30% + last 15% + dedup middle | Headroom SmartCrusher |
96
- | Old assistant text | Extract key info (headings, code, errors) | DCP |
89
+ | Duplicate tool calls | Signature matching | [DCP][] |
90
+ | Old error inputs | Purge after 4 turns | [DCP][] |
91
+ | File reads | Keep head + key lines + tail | [Edgee][] |
92
+ | Command outputs | Keep errors + tail | [Edgee][] |
93
+ | Search results | Keep top-20, group by file | [Edgee][] |
94
+ | JSON arrays | Head + dedup middle + tail | [Headroom][] |
95
+ | Old assistant text | Preserve structure, compress prose | [LLMLingua][] |
97
96
 
98
- All compressed content is **reversible** via CCR (Compress-Cache-Retrieve):
99
- originals are cached with SHA-256 hash and 5-minute TTL.
100
- Models can retrieve them via the `deep_expand` tool.
97
+ All compressed content is **reversible** via CCR (Compress-Cache-Retrieve) — originals cached with SHA-256 hash, retrievable via `deep_expand` tool.
101
98
 
102
- **Never touched**: user messages, recent 8 messages, protected tools
103
- (question, edit, write, todowrite, memory_store/search/forget).
99
+ **Never touched**: user messages, recent 4K tokens, protected tools (question, edit, write, todowrite, memory_*).
104
100
 
105
- ## Toast notifications
101
+ ## Memory nudge
106
102
 
107
- After each LLM turn, deep-memory shows a toast notification (bottom-right corner) summarizing
108
- what was compressed and injected. The notification level is chosen automatically:
103
+ Detects decisions, constraints, and fixes in conversation nudges the LLM to persist them.
109
104
 
110
- | Scenario | Level | Content |
111
- |----------|-------|---------|
112
- | Injection only (no compression) | minimal | One-line summary: `-8.5K stripped` |
113
- | Compression (short session) | detailed | Progress bar + per-category breakdown |
114
- | Compression + rich context (repo-map, memory, checkpoint) | extended | Full panel with budget usage |
105
+ | Pattern | Example | Nudge |
106
+ |---------|---------|-------|
107
+ | Decision | "我决定用 PostgreSQL" / "I'll use PostgreSQL" | `memory_store(type="decision")` |
108
+ | Constraint | "不能用 eval()" / "must not use eval()" | `memory_store(type="constraint")` |
109
+ | Error fix | "修复了权限问题" / "fixed the permission error" | `memory_store(type="gotcha")` |
115
110
 
116
- Example toast (detailed level):
111
+ English + Chinese. Pressure nudge and memory nudge have independent cooldowns.
117
112
 
118
- ```
119
- deep-memory | compressed
120
- Compression ─────────────────────────────
121
- │████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│
122
- reasoning -6.2K | metadata -2.1K | tool_err -0.8K
123
- Injection ───────────────────────────────
124
- m[0] stable 1055B ✓ m[1] volatile 574B
125
- tier=main | mode=normal
126
- repo-map: 12 symbols | memory: 8 entries
127
- ```
113
+ ## Tools
114
+
115
+ | Tool | Purpose |
116
+ |------|---------|
117
+ | `memory_search` | Search persistent memory (BM25 + CJK bigram) |
118
+ | `memory_store` | Store decisions, constraints, gotchas, facts, notes |
119
+ | `memory_forget` | Remove stale memory entries |
120
+ | `memory_expand` | Retrieve original content of a compressed message |
121
+ | `deep_expand` | Retrieve original content via CCR hash |
128
122
 
129
- ## Cache-stable injection
123
+ ## Compaction
130
124
 
131
- Each turn pushes two system prompt fragments:
125
+ When OpenCode compacts a session:
132
126
 
133
- - **Stable** (`<deep-memory-stable>`): constraints, rules, and the tool hint.
134
- These change only when MEMORY.md is updated typically across sessions, not turns.
135
- Because they're byte-identical turn after turn, the provider's prompt cache hits on this prefix.
127
+ 1. **Capture** raw messages to `checkpoint.raw.json`
128
+ 2. **Extract** knowledge via 5 heuristic extractors
129
+ 3. **Write** structured `checkpoint.md`
130
+ 4. **Inject** Hermes-8 structured prompt + Codex-style handoff prefix
136
131
 
137
- - **Volatile** (`<deep-memory-volatile>`): context-aware search results from the user's
138
- current query, tier-allocated by importance, plus repo map symbols for recently-read files.
139
- This is the only part that changes per turn.
132
+ The LLM produces: Task Overview Progress Key Decisions → Constraints → Files Modified → Errors → Next Steps → Critical Context
140
133
 
141
- The injection budget adapts to the agent: main orchestrator gets 800 tokens per turn
142
- (3000 on session resume), deep-reasoning agents get 400, and tool subagents get 80.
134
+ ## Memory consolidation
143
135
 
144
- ## Memory search (BM25 + CJK bigram)
136
+ | Cycle | Trigger | Action |
137
+ |-------|---------|--------|
138
+ | **Auto-dream** | 7 days or notes.md >20 lines | Consolidate notes + checkpoints → MEMORY.md |
139
+ | **Auto-distill** | 30 days | Package recurring workflows → skill candidates |
140
+ | **Enrichment** | Session idle after compaction | LLM enriches checkpoint with cross-references |
145
141
 
146
- Instead of SQLite FTS5, we use a pure-JS BM25 engine with a CJK-aware tokenizer.
147
- Chinese runs are split into sliding 2-character bigrams (`"权限死锁"` →
148
- `["权","权限","限死","死锁","锁"]`), making multi-character CJK phrases searchable
149
- without an embedding model. Latin text uses standard whitespace/punctuation splitting.
150
- The index is rebuilt from Markdown files on startup (<250ms for 2000 entries) and
151
- updated incrementally on writes.
142
+ New projects: MEMORY.md auto-bootstraps from notes.md. Both agents have `memory_forget` enabled.
152
143
 
153
144
  ## Configuration
154
145
 
@@ -161,81 +152,72 @@ updated incrementally on writes.
161
152
  ## Storage
162
153
 
163
154
  ```
164
- <project>/.deep-memory/ ← version-controllable
155
+ <project>/.deep-memory/
165
156
  ├── MEMORY.md persistent decisions/constraints/gotchas
166
157
  ├── notes.md keyword captures
167
158
  ├── checkpoint.md last compaction extraction
159
+ ├── checkpoint.raw.json raw messages dump
168
160
  ├── .schedule.json dream/distill state
169
- └── sessions/<sid>/ per-session archive
161
+ ├── .compaction-log.jsonl compaction audit trail
162
+ └── sessions/<sid>/ per-session archive
170
163
  ```
171
164
 
172
- ## Tools
173
-
174
- | Tool | Purpose |
175
- |------|---------|
176
- | `memory_search` | Search persistent memory across sessions (BM25 + CJK) |
177
- | `memory_store` | Store decisions, constraints, gotchas, facts, notes |
178
- | `memory_forget` | Remove memory entries matching a query |
179
- | `memory_expand` | Decompress a sentinel reference to its original content |
180
- | `deep_expand` | Retrieve original content compressed by CCR (use `[ccr:HASH]` marker) |
181
- | `deep_expand` | Retrieve original content compressed by CCR (use `[ccr:HASH]` marker) |
182
-
183
165
  ## Commands
184
166
 
185
- Copy `.opencode/command/*.md` to your project:
186
-
187
167
  - `/checkpoint` — manually capture session state
188
168
  - `/dream` — consolidate notes into persistent memory
189
169
  - `/distill` — package recurring workflows into skills
190
170
 
191
- ## Design
171
+ ## Development
192
172
 
193
- **Memory entries** carry a type (`decision`, `constraint`, `gotcha`, `fact`, `note`) and
194
- an importance score. Importance is heuristically derived from entry type, recency,
195
- frequency across sessions, and keyword-match relevance to the current query
196
- no LLM calls required.
173
+ ```bash
174
+ npm install
175
+ npm run verify # typecheck + test (363) + build + smoke (49)
176
+ ```
197
177
 
198
- Entries are stored as Markdown sections (e.g. `## Decisions`, `## Constraints`) in
199
- `MEMORY.md`, with `[date]` timestamps for time-based decay. The BM25 index is rebuilt
200
- from these files on startup and updated incrementally on write.
178
+ ## Acknowledgments
201
179
 
202
- Background consolidation runs on a 7-day cycle (auto-dream) plus an accumulation trigger
203
- (when `notes.md` exceeds 20 lines). A separate 30-day cycle (auto-distill) packages
204
- recurring workflows into skill candidates. Both use background sessions to avoid
205
- consuming the main session's context budget.
180
+ **[DCP][]** Dynamic Context Pruning for OpenCode. Tool dedup, error purge, and nudge system.
206
181
 
207
- ## Acknowledgments
182
+ **[Headroom][]** — JSON array crush and CCR (Compress-Cache-Retrieve).
183
+
184
+ **[Edgee][]** — Per-tool compression strategies (read, bash, grep, glob).
208
185
 
209
- **[MiMo-Code][]** — a terminal-native AI coding assistant with persistent memory that keeps a
210
- deep understanding of your project across sessions while continuously improving itself.
186
+ **[Contextomizer][]** — Content type detection pipeline.
211
187
 
212
- **[Magic Context][]** — unbounded context. Memory that manages itself. One session, for life.
213
- The hippocampus for coding agents, part of CortexKit.
188
+ **[Focus Agent][]** — Absolute token thresholds and assistant text compression research.
214
189
 
215
- **[Aider][]** — AI pair programming in your terminal. Lets you pair program with LLMs to start
216
- a new project or build on your existing codebase.
190
+ **[LLMLingua][]** — Selective compression: preserve structure, compress prose.
217
191
 
218
- **[Roo Code][]** — a whole dev team of AI agents in your code editor.
192
+ **[Codex CLI][]** — Handoff prefix pattern for compaction continuity.
219
193
 
220
- **[Continue][]** — pioneering open-source coding agent, available as a CLI, VS Code extension,
221
- and JetBrains plugin.
194
+ **[Google ADK][]** — Append-only event compaction architecture.
222
195
 
223
- **[OpenHands][]** — Code Less, Make More. A community focused on AI-driven development.
196
+ **[Hermes][]** — 8-section structured compaction prompt design.
224
197
 
225
- **[Plandex][]** — an AI coding agent designed for large tasks and real world projects.
198
+ **[MiMo-Code][]** — Terminal-native AI coding assistant with persistent memory.
226
199
 
227
- **[DCP][]** — Dynamic Context Pruning for OpenCode. Our tool deduplication, error purging,
228
- and nudge system are inspired by DCP's architecture.
200
+ **[Magic Context][]** — Unbounded context for coding agents.
229
201
 
230
- **[Headroom][]** — compress tool outputs, logs, files, RAG chunks for AI agents.
231
- Our JSON array crush and CCR (Compress-Cache-Retrieve) are derived from Headroom's SmartCrusher.
202
+ **[Aider][]** — AI pair programming in your terminal.
232
203
 
233
- **[Edgee][]** — agent gateway that compresses tokens before LLM providers.
234
- Our per-tool compression strategies (read, bash, grep, glob) are inspired by Edgee's approach.
204
+ **[Roo Code][]** — A whole dev team of AI agents in your code editor.
235
205
 
236
- **[Contextomizer][]** — ultra-fast deterministic library for transforming bloated tool outputs.
237
- Our content type detection pipeline is inspired by Contextomizer's approach.
206
+ **[Continue][]** — Pioneering open-source coding agent.
238
207
 
208
+ **[OpenHands][]** — Code Less, Make More.
209
+
210
+ **[Plandex][]** — AI coding agent for large tasks and real world projects.
211
+
212
+ [DCP]: https://github.com/Opencode-DCP/opencode-dynamic-context-pruning
213
+ [Headroom]: https://github.com/chopratejas/headroom
214
+ [Edgee]: https://github.com/edgee-ai/edgee
215
+ [Contextomizer]: https://github.com/GandalFran/contextomizer
216
+ [Focus Agent]: https://arxiv.org/html/2601.07190v1
217
+ [LLMLingua]: https://github.com/microsoft/LLMLingua
218
+ [Codex CLI]: https://github.com/openai/codex
219
+ [Google ADK]: https://github.com/google/adk-python
220
+ [Hermes]: https://github.com/NousResearch/hermes-agent
239
221
  [MiMo-Code]: https://github.com/XiaomiMiMo/MiMo-Code
240
222
  [Magic Context]: https://github.com/cortexkit/magic-context
241
223
  [Aider]: https://github.com/Aider-AI/aider
@@ -243,35 +225,6 @@ Our content type detection pipeline is inspired by Contextomizer's approach.
243
225
  [Continue]: https://github.com/continuedev/continue
244
226
  [OpenHands]: https://github.com/All-Hands-AI/OpenHands
245
227
  [Plandex]: https://github.com/plandex-ai/plandex
246
- [DCP]: https://github.com/Opencode-DCP/opencode-dynamic-context-pruning
247
- [Headroom]: https://github.com/chopratejas/headroom
248
- [Edgee]: https://github.com/edgee-ai/edgee
249
- [Contextomizer]: https://github.com/GandalFran/contextomizer
250
-
251
- ## Development
252
-
253
- ```bash
254
- npm install
255
- npm run verify # typecheck + test (363) + build + smoke (49)
256
- ```
257
-
258
- Stats: 54 source files, 27 test files (363 tests), 10 compress modules, 49 smoke checks.
259
-
260
- ## CI/CD (npm Trusted Publishing)
261
-
262
- Releases use npm OIDC Trusted Publishing — no token needed. To set up for a fork:
263
-
264
- 1. **npmjs.com** → Package Settings → Trusted Publishers → Add:
265
- - Owner: your GitHub username
266
- - Repository: your fork name
267
- - Workflow filename: `publish.yml`
268
- 2. **package.json** → update `repository.url` to match your fork
269
- 3. **Push a tag** → GitHub Actions auto-publishes:
270
- ```bash
271
- git tag v1.0.0 && git push origin v1.0.0
272
- ```
273
-
274
- Requirements: npm CLI ≥ 11.5.1, Node.js ≥ 22, `id-token: write` permission, public repository.
275
228
 
276
229
  ## License
277
230
 
package/dist/index.js CHANGED
@@ -261,6 +261,7 @@ var PluginState = class {
261
261
  _ccrCache = /* @__PURE__ */ new Map();
262
262
  _lastInputTokens = 0;
263
263
  _lastNudgeMessageCount = /* @__PURE__ */ new Map();
264
+ _lastMemoryNudgeMessageCount = /* @__PURE__ */ new Map();
264
265
  _lastCCRCleanup = 0;
265
266
  _modelContextWindow = 0;
266
267
  agentOf(sessionID) {
@@ -274,6 +275,7 @@ var PluginState = class {
274
275
  this._models.delete(sessionID);
275
276
  this._lastUserText.delete(sessionID);
276
277
  this._lastNudgeMessageCount.delete(sessionID);
278
+ this._lastMemoryNudgeMessageCount.delete(sessionID);
277
279
  }
278
280
  recordModel(sessionID, model) {
279
281
  this._models.set(sessionID, model);
@@ -423,6 +425,13 @@ var PluginState = class {
423
425
  const last = this._lastNudgeMessageCount.get(sessionID);
424
426
  return last != null ? currentMessageCount - last : Number.POSITIVE_INFINITY;
425
427
  }
428
+ recordMemoryNudge(sessionID, messageCount) {
429
+ this._lastMemoryNudgeMessageCount.set(sessionID, messageCount);
430
+ }
431
+ messagesSinceLastMemoryNudge(sessionID, currentMessageCount) {
432
+ const last = this._lastMemoryNudgeMessageCount.get(sessionID);
433
+ return last != null ? currentMessageCount - last : Number.POSITIVE_INFINITY;
434
+ }
426
435
  setModelContextWindow(tokens) {
427
436
  if (tokens > 0) this._modelContextWindow = tokens;
428
437
  }
@@ -1127,7 +1136,7 @@ async function runDream(opts) {
1127
1136
  tools: {
1128
1137
  memory_search: true,
1129
1138
  memory_store: true,
1130
- memory_forget: false,
1139
+ memory_forget: true,
1131
1140
  read: true,
1132
1141
  list: true
1133
1142
  }
@@ -1228,23 +1237,38 @@ async function handleSessionCreatedForDream(args) {
1228
1237
  }
1229
1238
  const notesPath = memoryFilePath("project", "notes", projectPath);
1230
1239
  let notesLines = 0;
1240
+ let notesContent = "";
1231
1241
  try {
1232
- const content = fs5.readFileSync(notesPath, "utf8");
1233
- if (content.trim().length === 0) {
1242
+ notesContent = fs5.readFileSync(notesPath, "utf8");
1243
+ if (notesContent.trim().length === 0) {
1234
1244
  logger?.debug("auto-dream: notes.md is empty, skipping spawn");
1235
1245
  return;
1236
1246
  }
1237
- notesLines = content.split("\n").filter((l) => l.trim()).length;
1247
+ notesLines = notesContent.split("\n").filter((l) => l.trim()).length;
1238
1248
  } catch {
1239
1249
  logger?.debug("auto-dream: notes.md not found, skipping spawn");
1240
1250
  return;
1241
1251
  }
1242
1252
  const memoryPath = memoryFilePath("project", "memory", projectPath);
1243
1253
  if (!fs5.existsSync(memoryPath) || fs5.statSync(memoryPath).size < 50) {
1244
- logger?.debug("auto-dream: MEMORY.md missing or too small, skipping", {
1245
- sessionID: info.id
1246
- });
1247
- return;
1254
+ if (notesLines >= 5) {
1255
+ try {
1256
+ fs5.writeFileSync(memoryPath, notesContent, "utf8");
1257
+ logger?.info("auto-dream: bootstrapped MEMORY.md from notes.md", {
1258
+ notesLines
1259
+ });
1260
+ } catch (err) {
1261
+ logger?.warn("auto-dream: failed to bootstrap MEMORY.md", {
1262
+ error: err instanceof Error ? err.message : String(err)
1263
+ });
1264
+ return;
1265
+ }
1266
+ } else {
1267
+ logger?.debug("auto-dream: MEMORY.md missing and notes too small, skipping", {
1268
+ sessionID: info.id
1269
+ });
1270
+ return;
1271
+ }
1248
1272
  }
1249
1273
  const isSevenDayDue = schedule.lastDream === null || Date.now() - Date.parse(schedule.lastDream) > DREAM_INTERVAL_MS;
1250
1274
  let isAccumulationDue = false;
@@ -1386,7 +1410,7 @@ async function runDistill(opts) {
1386
1410
  tools: {
1387
1411
  memory_search: true,
1388
1412
  memory_store: true,
1389
- memory_forget: false,
1413
+ memory_forget: true,
1390
1414
  read: true,
1391
1415
  list: true
1392
1416
  }
@@ -15301,6 +15325,43 @@ function maxContextFrom(modelContextWindow) {
15301
15325
  if (calibratedMaxContext > 0) return calibratedMaxContext;
15302
15326
  return FALLBACK_MAX_CONTEXT;
15303
15327
  }
15328
+ function estimateTokens2(text) {
15329
+ let cjk = 0;
15330
+ let other = 0;
15331
+ for (const ch of text) {
15332
+ if (/[\u4e00-\u9fff\u3400-\u4dbf\u3000-\u303f\uff00-\uffef\u3040-\u309f\u30a0-\u30ff]/.test(ch)) {
15333
+ cjk++;
15334
+ } else {
15335
+ other++;
15336
+ }
15337
+ }
15338
+ return Math.ceil(cjk * 0.7 + other / 3.8);
15339
+ }
15340
+ function extractTokensFromMessages(messages) {
15341
+ let total = 0;
15342
+ for (const msg of messages) {
15343
+ for (const part of msg.parts) {
15344
+ if (typeof part !== "object" || part === null) continue;
15345
+ const p = part;
15346
+ if (p["type"] === "text" && typeof p["text"] === "string") {
15347
+ total += estimateTokens2(p["text"]);
15348
+ } else if (p["type"] === "tool") {
15349
+ const state = p["state"];
15350
+ if (state?.["output"] && typeof state["output"] === "string") {
15351
+ total += estimateTokens2(state["output"]);
15352
+ }
15353
+ if (state?.["error"] && typeof state["error"] === "string") {
15354
+ total += estimateTokens2(state["error"]);
15355
+ }
15356
+ } else if (p["type"] === "reasoning" || p["type"] === "thinking") {
15357
+ if (typeof p["text"] === "string") {
15358
+ total += estimateTokens2(p["text"]);
15359
+ }
15360
+ }
15361
+ }
15362
+ }
15363
+ return total;
15364
+ }
15304
15365
  function extractInputTokensFromMessages(messages) {
15305
15366
  let best = 0;
15306
15367
  for (let i = messages.length - 1; i >= 0; i--) {
@@ -15323,7 +15384,7 @@ function extractInputTokensFromMessages(messages) {
15323
15384
  function detectPressure(messages, modelContextWindow) {
15324
15385
  const ctx = maxContextFrom(modelContextWindow || 0);
15325
15386
  const inputTokens = extractInputTokensFromMessages(messages);
15326
- const estimated = inputTokens > 0 ? inputTokens : 0;
15387
+ const estimated = inputTokens > 0 ? inputTokens : extractTokensFromMessages(messages);
15327
15388
  const ratio = Math.min(estimated / ctx, 1);
15328
15389
  let level;
15329
15390
  if (estimated >= PRESSURE_HIGH_TOKENS) level = "high";
@@ -15350,17 +15411,17 @@ function buildNudgeText(level) {
15350
15411
  var MEMORY_NUDGE_COOLDOWN = 3;
15351
15412
  var DECISION_PATTERNS = [
15352
15413
  /\b(?:decided|decision|chose|chosen|picked|selected)\b/i,
15353
- /\b(?:采用|选择|决定|确定|选用)\b/,
15414
+ /(?:采用|选择|决定|确定|选用)/,
15354
15415
  /\b(?:use|using|go with|went with)\b.*\b(?:because|since|due to)\b/i
15355
15416
  ];
15356
15417
  var CONSTRAINT_PATTERNS = [
15357
15418
  /\b(?:must not|cannot|should not|do not|never|always)\b/i,
15358
15419
  /\b(?:constraint|restriction|limitation|requirement)\b/i,
15359
- /\b(?:不能|必须|禁止|约束|限制|要求|务必)\b/
15420
+ /(?:不能|必须|禁止|约束|限制|要求|务必)/
15360
15421
  ];
15361
15422
  var ERROR_FIX_PATTERNS = [
15362
15423
  /\b(?:fix|fixed|resolve|resolved|patch|corrected)\b/i,
15363
- /\b(?:修复|修复了|解决|解决了)\b/,
15424
+ /(?:修复|修复了|解决|解决了)/,
15364
15425
  /\b(?:the (?:bug|error|issue) (?:was|is)|root cause)\b/i
15365
15426
  ];
15366
15427
  function detectMemoryNudge(messages, messagesSinceLastNudge) {
@@ -15374,13 +15435,14 @@ function detectMemoryNudge(messages, messagesSinceLastNudge) {
15374
15435
  const hasRecentToolError = recentMessages.some(
15375
15436
  (m) => m.parts.some((p) => p.type === "tool" && p.state?.status === "error")
15376
15437
  );
15438
+ const recentAll = recentUserText + "\n" + recentAssistantText;
15377
15439
  if (hasRecentToolError && ERROR_FIX_PATTERNS.some((p) => p.test(recentAssistantText))) {
15378
15440
  return { injected: true, type: "gotcha" };
15379
15441
  }
15380
- if (CONSTRAINT_PATTERNS.some((p) => p.test(recentUserText))) {
15442
+ if (CONSTRAINT_PATTERNS.some((p) => p.test(recentAll))) {
15381
15443
  return { injected: true, type: "constraint" };
15382
15444
  }
15383
- if (DECISION_PATTERNS.some((p) => p.test(recentAssistantText))) {
15445
+ if (DECISION_PATTERNS.some((p) => p.test(recentAll))) {
15384
15446
  return { injected: true, type: "decision" };
15385
15447
  }
15386
15448
  return { injected: false, type: null };
@@ -15774,17 +15836,19 @@ function runCompressionPipeline(ctx) {
15774
15836
  estimatedTokens: pressure.estimatedTokens
15775
15837
  };
15776
15838
  const sid = sessionID || "default";
15777
- const messagesSinceNudge = state.messagesSinceLastNudge(sid, messages.length);
15778
- if (shouldInjectNudge(pressure.level, messagesSinceNudge)) {
15839
+ const currentMsgCount = messages.length;
15840
+ const pressureSince = state.messagesSinceLastNudge(sid, currentMsgCount);
15841
+ if (shouldInjectNudge(pressure.level, pressureSince)) {
15779
15842
  if (injectIntoLastAssistant(messages, buildNudgeText(pressure.level))) {
15780
15843
  stats.nudgeInjected = true;
15781
- state.recordNudge(sid, messages.length);
15844
+ state.recordNudge(sid, currentMsgCount);
15782
15845
  }
15783
15846
  }
15784
- const memoryNudge = detectMemoryNudge(messages, state.messagesSinceLastNudge(sid, messages.length));
15847
+ const memorySince = state.messagesSinceLastMemoryNudge(sid, currentMsgCount);
15848
+ const memoryNudge = detectMemoryNudge(messages, memorySince);
15785
15849
  if (memoryNudge.injected) {
15786
15850
  if (injectIntoLastAssistant(messages, buildMemoryNudge(memoryNudge.type))) {
15787
- state.recordNudge(sid, messages.length);
15851
+ state.recordMemoryNudge(sid, currentMsgCount);
15788
15852
  logger?.debug("compress: memory nudge", { type: memoryNudge.type });
15789
15853
  }
15790
15854
  }