@bd7pil/opencode-deep-memory 0.7.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -170
- package/dist/index.js +84 -20
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -7,11 +7,11 @@
|
|
|
7
7
|
OpenCode sessions are stateless. Every restart is a cold start. Native compaction
|
|
8
8
|
destroys conversation content. **deep-memory** adds three layers:
|
|
9
9
|
|
|
10
|
-
| Layer |
|
|
11
|
-
|
|
12
|
-
| **Remember** |
|
|
13
|
-
| **Recover** |
|
|
14
|
-
| **Compress** |
|
|
10
|
+
| Layer | What survives | How |
|
|
11
|
+
|-------|--------------|-----|
|
|
12
|
+
| **Remember** | Decisions, constraints, gotchas | `memory_search` / `memory_store` — BM25 + CJK search across sessions |
|
|
13
|
+
| **Recover** | Full conversation context | Checkpoint captures before compaction; resume injection on new session |
|
|
14
|
+
| **Compress** | Token budget | Deterministic stripping + pressure-triggered deep compression — no LLM calls |
|
|
15
15
|
|
|
16
16
|
## Quick start
|
|
17
17
|
|
|
@@ -30,125 +30,116 @@ OpenCode auto-installs on startup. Memory appears at `.deep-memory/` in your pro
|
|
|
30
30
|
## How it works
|
|
31
31
|
|
|
32
32
|
```
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
│
|
|
43
|
-
│
|
|
44
|
-
│
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
│
|
|
51
|
-
│
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
33
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
34
|
+
│ messages.transform (every turn) │
|
|
35
|
+
│ ├─ Strip reasoning/thinking parts (physical removal) │
|
|
36
|
+
│ ├─ Remove system-injected messages (physical removal) │
|
|
37
|
+
│ ├─ Truncate old tool errors │
|
|
38
|
+
│ └─ Deep compress: dedup / tool output / JSON / assistant text │
|
|
39
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
40
|
+
|
|
41
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
42
|
+
│ system.transform (every turn) │
|
|
43
|
+
│ ├─ Inject stable: MEMORY.md constraints + tool hint (cache hit)│
|
|
44
|
+
│ └─ Inject volatile: BM25 search results + repo map symbols │
|
|
45
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
46
|
+
|
|
47
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
48
|
+
│ compacting (before OpenCode destroys messages) │
|
|
49
|
+
│ ├─ Capture raw messages → checkpoint.raw.json │
|
|
50
|
+
│ ├─ Extract knowledge → checkpoint.md │
|
|
51
|
+
│ └─ Inject structured handoff prompt for LLM │
|
|
52
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
53
|
+
|
|
54
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
55
|
+
│ events │
|
|
56
|
+
│ ├─ session.created → resume + dream schedule │
|
|
57
|
+
│ ├─ session.idle → enrichment │
|
|
58
|
+
│ └─ session.compacted → pressure calibration │
|
|
59
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
55
60
|
```
|
|
56
61
|
|
|
57
62
|
## Context compression
|
|
58
63
|
|
|
59
|
-
Two
|
|
64
|
+
Two layers, fully automatic, no LLM calls.
|
|
60
65
|
|
|
61
|
-
### Layer 1: Deterministic stripping
|
|
66
|
+
### Layer 1: Deterministic stripping (always active)
|
|
62
67
|
|
|
63
|
-
|
|
68
|
+
| Target | Action |
|
|
69
|
+
|--------|--------|
|
|
70
|
+
| Old reasoning/thinking parts | Physical removal |
|
|
71
|
+
| System injections (`<system-reminder>`, etc.) | Physical removal |
|
|
72
|
+
| Tool errors >100 chars (older than 4 turns) | Truncate |
|
|
73
|
+
| Inline `<thinking>` tags | Regex strip |
|
|
64
74
|
|
|
65
|
-
|
|
66
|
-
|--------------------|-----|----------|
|
|
67
|
-
| `reasoning_details` metadata | Delete the JSON blob | Billing metadata, never reaches model |
|
|
68
|
-
| Old reasoning text | Replace with `[cleared]` | Conclusions are in assistant text |
|
|
69
|
-
| System injections | Replace with `[stripped]` | `<system-reminder>` stale after one turn |
|
|
70
|
-
| Tool errors >100 chars | Truncate | An old error only needs "it failed" |
|
|
71
|
-
| Inline `<thinking>` tags | Regex strip | Process, not product |
|
|
75
|
+
No marker pollution — old content is physically removed, not replaced with `[cleared]` or `[stripped]`. This prevents [context confusion](https://www.philschmid.de/context-engineering-part-2).
|
|
72
76
|
|
|
73
77
|
### Layer 2: Deep compression (pressure-triggered)
|
|
74
78
|
|
|
75
|
-
Activates when context pressure exceeds thresholds. Inspired by
|
|
76
|
-
[DCP](https://github.com/Opencode-DCP/opencode-dynamic-context-pruning),
|
|
77
|
-
[Headroom](https://github.com/chopratejas/headroom), and
|
|
78
|
-
[Edgee](https://github.com/edgee-ai/edgee).
|
|
79
|
-
|
|
80
79
|
| Pressure | Threshold | Actions |
|
|
81
80
|
|----------|-----------|---------|
|
|
82
|
-
| **always** | every turn | tool dedup + error purge + tool output compress + JSON crush
|
|
83
|
-
| **medium** | ≥
|
|
84
|
-
| **high** | ≥
|
|
81
|
+
| **always** | every turn | tool dedup + error purge + tool output compress + JSON crush + assistant text compress |
|
|
82
|
+
| **medium** | ≥ 50K tokens | + memory nudge (prompts LLM to use `memory_store`) |
|
|
83
|
+
| **high** | ≥ 150K tokens | + pressure nudge (prompts LLM to summarize old tasks) |
|
|
85
84
|
|
|
86
|
-
|
|
85
|
+
Thresholds are absolute, not percentage-based — they work consistently across 200K and 1M+ context windows. Based on [Focus Agent](https://arxiv.org/html/2601.07190v1) research.
|
|
87
86
|
|
|
88
87
|
| Target | Strategy | Source |
|
|
89
88
|
|--------|----------|--------|
|
|
90
|
-
| Duplicate tool calls | Signature matching
|
|
91
|
-
| Old error inputs | Purge
|
|
92
|
-
| File reads | Keep
|
|
93
|
-
| Command outputs | Keep errors +
|
|
94
|
-
| Search results | Keep top-20, group by file | Edgee |
|
|
95
|
-
| JSON arrays |
|
|
96
|
-
| Old assistant text |
|
|
89
|
+
| Duplicate tool calls | Signature matching | [DCP][] |
|
|
90
|
+
| Old error inputs | Purge after 4 turns | [DCP][] |
|
|
91
|
+
| File reads | Keep head + key lines + tail | [Edgee][] |
|
|
92
|
+
| Command outputs | Keep errors + tail | [Edgee][] |
|
|
93
|
+
| Search results | Keep top-20, group by file | [Edgee][] |
|
|
94
|
+
| JSON arrays | Head + dedup middle + tail | [Headroom][] |
|
|
95
|
+
| Old assistant text | Preserve structure, compress prose | [LLMLingua][] |
|
|
97
96
|
|
|
98
|
-
All compressed content is **reversible** via CCR (Compress-Cache-Retrieve)
|
|
99
|
-
originals are cached with SHA-256 hash and 5-minute TTL.
|
|
100
|
-
Models can retrieve them via the `deep_expand` tool.
|
|
97
|
+
All compressed content is **reversible** via CCR (Compress-Cache-Retrieve) — originals cached with SHA-256 hash, retrievable via `deep_expand` tool.
|
|
101
98
|
|
|
102
|
-
**Never touched**: user messages, recent
|
|
103
|
-
(question, edit, write, todowrite, memory_store/search/forget).
|
|
99
|
+
**Never touched**: user messages, recent 4K tokens, protected tools (question, edit, write, todowrite, memory_*).
|
|
104
100
|
|
|
105
|
-
##
|
|
101
|
+
## Memory nudge
|
|
106
102
|
|
|
107
|
-
|
|
108
|
-
what was compressed and injected. The notification level is chosen automatically:
|
|
103
|
+
Detects decisions, constraints, and fixes in conversation — nudges the LLM to persist them.
|
|
109
104
|
|
|
110
|
-
|
|
|
111
|
-
|
|
112
|
-
|
|
|
113
|
-
|
|
|
114
|
-
|
|
|
105
|
+
| Pattern | Example | Nudge |
|
|
106
|
+
|---------|---------|-------|
|
|
107
|
+
| Decision | "我决定用 PostgreSQL" / "I'll use PostgreSQL" | `memory_store(type="decision")` |
|
|
108
|
+
| Constraint | "不能用 eval()" / "must not use eval()" | `memory_store(type="constraint")` |
|
|
109
|
+
| Error fix | "修复了权限问题" / "fixed the permission error" | `memory_store(type="gotcha")` |
|
|
115
110
|
|
|
116
|
-
|
|
111
|
+
English + Chinese. Pressure nudge and memory nudge have independent cooldowns.
|
|
117
112
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
```
|
|
113
|
+
## Tools
|
|
114
|
+
|
|
115
|
+
| Tool | Purpose |
|
|
116
|
+
|------|---------|
|
|
117
|
+
| `memory_search` | Search persistent memory (BM25 + CJK bigram) |
|
|
118
|
+
| `memory_store` | Store decisions, constraints, gotchas, facts, notes |
|
|
119
|
+
| `memory_forget` | Remove stale memory entries |
|
|
120
|
+
| `memory_expand` | Retrieve original content of a compressed message |
|
|
121
|
+
| `deep_expand` | Retrieve original content via CCR hash |
|
|
128
122
|
|
|
129
|
-
##
|
|
123
|
+
## Compaction
|
|
130
124
|
|
|
131
|
-
|
|
125
|
+
When OpenCode compacts a session:
|
|
132
126
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
127
|
+
1. **Capture** raw messages to `checkpoint.raw.json`
|
|
128
|
+
2. **Extract** knowledge via 5 heuristic extractors
|
|
129
|
+
3. **Write** structured `checkpoint.md`
|
|
130
|
+
4. **Inject** Hermes-8 structured prompt + Codex-style handoff prefix
|
|
136
131
|
|
|
137
|
-
|
|
138
|
-
current query, tier-allocated by importance, plus repo map symbols for recently-read files.
|
|
139
|
-
This is the only part that changes per turn.
|
|
132
|
+
The LLM produces: Task Overview → Progress → Key Decisions → Constraints → Files Modified → Errors → Next Steps → Critical Context
|
|
140
133
|
|
|
141
|
-
|
|
142
|
-
(3000 on session resume), deep-reasoning agents get 400, and tool subagents get 80.
|
|
134
|
+
## Memory consolidation
|
|
143
135
|
|
|
144
|
-
|
|
136
|
+
| Cycle | Trigger | Action |
|
|
137
|
+
|-------|---------|--------|
|
|
138
|
+
| **Auto-dream** | 7 days or notes.md >20 lines | Consolidate notes + checkpoints → MEMORY.md |
|
|
139
|
+
| **Auto-distill** | 30 days | Package recurring workflows → skill candidates |
|
|
140
|
+
| **Enrichment** | Session idle after compaction | LLM enriches checkpoint with cross-references |
|
|
145
141
|
|
|
146
|
-
|
|
147
|
-
Chinese runs are split into sliding 2-character bigrams (`"权限死锁"` →
|
|
148
|
-
`["权","权限","限死","死锁","锁"]`), making multi-character CJK phrases searchable
|
|
149
|
-
without an embedding model. Latin text uses standard whitespace/punctuation splitting.
|
|
150
|
-
The index is rebuilt from Markdown files on startup (<250ms for 2000 entries) and
|
|
151
|
-
updated incrementally on writes.
|
|
142
|
+
New projects: MEMORY.md auto-bootstraps from notes.md. Both agents have `memory_forget` enabled.
|
|
152
143
|
|
|
153
144
|
## Configuration
|
|
154
145
|
|
|
@@ -161,81 +152,72 @@ updated incrementally on writes.
|
|
|
161
152
|
## Storage
|
|
162
153
|
|
|
163
154
|
```
|
|
164
|
-
<project>/.deep-memory/
|
|
155
|
+
<project>/.deep-memory/
|
|
165
156
|
├── MEMORY.md persistent decisions/constraints/gotchas
|
|
166
157
|
├── notes.md keyword captures
|
|
167
158
|
├── checkpoint.md last compaction extraction
|
|
159
|
+
├── checkpoint.raw.json raw messages dump
|
|
168
160
|
├── .schedule.json dream/distill state
|
|
169
|
-
|
|
161
|
+
├── .compaction-log.jsonl compaction audit trail
|
|
162
|
+
└── sessions/<sid>/ per-session archive
|
|
170
163
|
```
|
|
171
164
|
|
|
172
|
-
## Tools
|
|
173
|
-
|
|
174
|
-
| Tool | Purpose |
|
|
175
|
-
|------|---------|
|
|
176
|
-
| `memory_search` | Search persistent memory across sessions (BM25 + CJK) |
|
|
177
|
-
| `memory_store` | Store decisions, constraints, gotchas, facts, notes |
|
|
178
|
-
| `memory_forget` | Remove memory entries matching a query |
|
|
179
|
-
| `memory_expand` | Decompress a sentinel reference to its original content |
|
|
180
|
-
| `deep_expand` | Retrieve original content compressed by CCR (use `[ccr:HASH]` marker) |
|
|
181
|
-
| `deep_expand` | Retrieve original content compressed by CCR (use `[ccr:HASH]` marker) |
|
|
182
|
-
|
|
183
165
|
## Commands
|
|
184
166
|
|
|
185
|
-
Copy `.opencode/command/*.md` to your project:
|
|
186
|
-
|
|
187
167
|
- `/checkpoint` — manually capture session state
|
|
188
168
|
- `/dream` — consolidate notes into persistent memory
|
|
189
169
|
- `/distill` — package recurring workflows into skills
|
|
190
170
|
|
|
191
|
-
##
|
|
171
|
+
## Development
|
|
192
172
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
173
|
+
```bash
|
|
174
|
+
npm install
|
|
175
|
+
npm run verify # typecheck + test (363) + build + smoke (49)
|
|
176
|
+
```
|
|
197
177
|
|
|
198
|
-
|
|
199
|
-
`MEMORY.md`, with `[date]` timestamps for time-based decay. The BM25 index is rebuilt
|
|
200
|
-
from these files on startup and updated incrementally on write.
|
|
178
|
+
## Acknowledgments
|
|
201
179
|
|
|
202
|
-
|
|
203
|
-
(when `notes.md` exceeds 20 lines). A separate 30-day cycle (auto-distill) packages
|
|
204
|
-
recurring workflows into skill candidates. Both use background sessions to avoid
|
|
205
|
-
consuming the main session's context budget.
|
|
180
|
+
**[DCP][]** — Dynamic Context Pruning for OpenCode. Tool dedup, error purge, and nudge system.
|
|
206
181
|
|
|
207
|
-
|
|
182
|
+
**[Headroom][]** — JSON array crush and CCR (Compress-Cache-Retrieve).
|
|
183
|
+
|
|
184
|
+
**[Edgee][]** — Per-tool compression strategies (read, bash, grep, glob).
|
|
208
185
|
|
|
209
|
-
**[
|
|
210
|
-
deep understanding of your project across sessions while continuously improving itself.
|
|
186
|
+
**[Contextomizer][]** — Content type detection pipeline.
|
|
211
187
|
|
|
212
|
-
**[
|
|
213
|
-
The hippocampus for coding agents, part of CortexKit.
|
|
188
|
+
**[Focus Agent][]** — Absolute token thresholds and assistant text compression research.
|
|
214
189
|
|
|
215
|
-
**[
|
|
216
|
-
a new project or build on your existing codebase.
|
|
190
|
+
**[LLMLingua][]** — Selective compression: preserve structure, compress prose.
|
|
217
191
|
|
|
218
|
-
**[
|
|
192
|
+
**[Codex CLI][]** — Handoff prefix pattern for compaction continuity.
|
|
219
193
|
|
|
220
|
-
**[
|
|
221
|
-
and JetBrains plugin.
|
|
194
|
+
**[Google ADK][]** — Append-only event compaction architecture.
|
|
222
195
|
|
|
223
|
-
**[
|
|
196
|
+
**[Hermes][]** — 8-section structured compaction prompt design.
|
|
224
197
|
|
|
225
|
-
**[
|
|
198
|
+
**[MiMo-Code][]** — Terminal-native AI coding assistant with persistent memory.
|
|
226
199
|
|
|
227
|
-
**[
|
|
228
|
-
and nudge system are inspired by DCP's architecture.
|
|
200
|
+
**[Magic Context][]** — Unbounded context for coding agents.
|
|
229
201
|
|
|
230
|
-
**[
|
|
231
|
-
Our JSON array crush and CCR (Compress-Cache-Retrieve) are derived from Headroom's SmartCrusher.
|
|
202
|
+
**[Aider][]** — AI pair programming in your terminal.
|
|
232
203
|
|
|
233
|
-
**[
|
|
234
|
-
Our per-tool compression strategies (read, bash, grep, glob) are inspired by Edgee's approach.
|
|
204
|
+
**[Roo Code][]** — A whole dev team of AI agents in your code editor.
|
|
235
205
|
|
|
236
|
-
**[
|
|
237
|
-
Our content type detection pipeline is inspired by Contextomizer's approach.
|
|
206
|
+
**[Continue][]** — Pioneering open-source coding agent.
|
|
238
207
|
|
|
208
|
+
**[OpenHands][]** — Code Less, Make More.
|
|
209
|
+
|
|
210
|
+
**[Plandex][]** — AI coding agent for large tasks and real world projects.
|
|
211
|
+
|
|
212
|
+
[DCP]: https://github.com/Opencode-DCP/opencode-dynamic-context-pruning
|
|
213
|
+
[Headroom]: https://github.com/chopratejas/headroom
|
|
214
|
+
[Edgee]: https://github.com/edgee-ai/edgee
|
|
215
|
+
[Contextomizer]: https://github.com/GandalFran/contextomizer
|
|
216
|
+
[Focus Agent]: https://arxiv.org/html/2601.07190v1
|
|
217
|
+
[LLMLingua]: https://github.com/microsoft/LLMLingua
|
|
218
|
+
[Codex CLI]: https://github.com/openai/codex
|
|
219
|
+
[Google ADK]: https://github.com/google/adk-python
|
|
220
|
+
[Hermes]: https://github.com/NousResearch/hermes-agent
|
|
239
221
|
[MiMo-Code]: https://github.com/XiaomiMiMo/MiMo-Code
|
|
240
222
|
[Magic Context]: https://github.com/cortexkit/magic-context
|
|
241
223
|
[Aider]: https://github.com/Aider-AI/aider
|
|
@@ -243,35 +225,6 @@ Our content type detection pipeline is inspired by Contextomizer's approach.
|
|
|
243
225
|
[Continue]: https://github.com/continuedev/continue
|
|
244
226
|
[OpenHands]: https://github.com/All-Hands-AI/OpenHands
|
|
245
227
|
[Plandex]: https://github.com/plandex-ai/plandex
|
|
246
|
-
[DCP]: https://github.com/Opencode-DCP/opencode-dynamic-context-pruning
|
|
247
|
-
[Headroom]: https://github.com/chopratejas/headroom
|
|
248
|
-
[Edgee]: https://github.com/edgee-ai/edgee
|
|
249
|
-
[Contextomizer]: https://github.com/GandalFran/contextomizer
|
|
250
|
-
|
|
251
|
-
## Development
|
|
252
|
-
|
|
253
|
-
```bash
|
|
254
|
-
npm install
|
|
255
|
-
npm run verify # typecheck + test (363) + build + smoke (49)
|
|
256
|
-
```
|
|
257
|
-
|
|
258
|
-
Stats: 54 source files, 27 test files (363 tests), 10 compress modules, 49 smoke checks.
|
|
259
|
-
|
|
260
|
-
## CI/CD (npm Trusted Publishing)
|
|
261
|
-
|
|
262
|
-
Releases use npm OIDC Trusted Publishing — no token needed. To set up for a fork:
|
|
263
|
-
|
|
264
|
-
1. **npmjs.com** → Package Settings → Trusted Publishers → Add:
|
|
265
|
-
- Owner: your GitHub username
|
|
266
|
-
- Repository: your fork name
|
|
267
|
-
- Workflow filename: `publish.yml`
|
|
268
|
-
2. **package.json** → update `repository.url` to match your fork
|
|
269
|
-
3. **Push a tag** → GitHub Actions auto-publishes:
|
|
270
|
-
```bash
|
|
271
|
-
git tag v1.0.0 && git push origin v1.0.0
|
|
272
|
-
```
|
|
273
|
-
|
|
274
|
-
Requirements: npm CLI ≥ 11.5.1, Node.js ≥ 22, `id-token: write` permission, public repository.
|
|
275
228
|
|
|
276
229
|
## License
|
|
277
230
|
|
package/dist/index.js
CHANGED
|
@@ -261,6 +261,7 @@ var PluginState = class {
|
|
|
261
261
|
_ccrCache = /* @__PURE__ */ new Map();
|
|
262
262
|
_lastInputTokens = 0;
|
|
263
263
|
_lastNudgeMessageCount = /* @__PURE__ */ new Map();
|
|
264
|
+
_lastMemoryNudgeMessageCount = /* @__PURE__ */ new Map();
|
|
264
265
|
_lastCCRCleanup = 0;
|
|
265
266
|
_modelContextWindow = 0;
|
|
266
267
|
agentOf(sessionID) {
|
|
@@ -274,6 +275,7 @@ var PluginState = class {
|
|
|
274
275
|
this._models.delete(sessionID);
|
|
275
276
|
this._lastUserText.delete(sessionID);
|
|
276
277
|
this._lastNudgeMessageCount.delete(sessionID);
|
|
278
|
+
this._lastMemoryNudgeMessageCount.delete(sessionID);
|
|
277
279
|
}
|
|
278
280
|
recordModel(sessionID, model) {
|
|
279
281
|
this._models.set(sessionID, model);
|
|
@@ -423,6 +425,13 @@ var PluginState = class {
|
|
|
423
425
|
const last = this._lastNudgeMessageCount.get(sessionID);
|
|
424
426
|
return last != null ? currentMessageCount - last : Number.POSITIVE_INFINITY;
|
|
425
427
|
}
|
|
428
|
+
recordMemoryNudge(sessionID, messageCount) {
|
|
429
|
+
this._lastMemoryNudgeMessageCount.set(sessionID, messageCount);
|
|
430
|
+
}
|
|
431
|
+
messagesSinceLastMemoryNudge(sessionID, currentMessageCount) {
|
|
432
|
+
const last = this._lastMemoryNudgeMessageCount.get(sessionID);
|
|
433
|
+
return last != null ? currentMessageCount - last : Number.POSITIVE_INFINITY;
|
|
434
|
+
}
|
|
426
435
|
setModelContextWindow(tokens) {
|
|
427
436
|
if (tokens > 0) this._modelContextWindow = tokens;
|
|
428
437
|
}
|
|
@@ -1127,7 +1136,7 @@ async function runDream(opts) {
|
|
|
1127
1136
|
tools: {
|
|
1128
1137
|
memory_search: true,
|
|
1129
1138
|
memory_store: true,
|
|
1130
|
-
memory_forget:
|
|
1139
|
+
memory_forget: true,
|
|
1131
1140
|
read: true,
|
|
1132
1141
|
list: true
|
|
1133
1142
|
}
|
|
@@ -1228,23 +1237,38 @@ async function handleSessionCreatedForDream(args) {
|
|
|
1228
1237
|
}
|
|
1229
1238
|
const notesPath = memoryFilePath("project", "notes", projectPath);
|
|
1230
1239
|
let notesLines = 0;
|
|
1240
|
+
let notesContent = "";
|
|
1231
1241
|
try {
|
|
1232
|
-
|
|
1233
|
-
if (
|
|
1242
|
+
notesContent = fs5.readFileSync(notesPath, "utf8");
|
|
1243
|
+
if (notesContent.trim().length === 0) {
|
|
1234
1244
|
logger?.debug("auto-dream: notes.md is empty, skipping spawn");
|
|
1235
1245
|
return;
|
|
1236
1246
|
}
|
|
1237
|
-
notesLines =
|
|
1247
|
+
notesLines = notesContent.split("\n").filter((l) => l.trim()).length;
|
|
1238
1248
|
} catch {
|
|
1239
1249
|
logger?.debug("auto-dream: notes.md not found, skipping spawn");
|
|
1240
1250
|
return;
|
|
1241
1251
|
}
|
|
1242
1252
|
const memoryPath = memoryFilePath("project", "memory", projectPath);
|
|
1243
1253
|
if (!fs5.existsSync(memoryPath) || fs5.statSync(memoryPath).size < 50) {
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1254
|
+
if (notesLines >= 5) {
|
|
1255
|
+
try {
|
|
1256
|
+
fs5.writeFileSync(memoryPath, notesContent, "utf8");
|
|
1257
|
+
logger?.info("auto-dream: bootstrapped MEMORY.md from notes.md", {
|
|
1258
|
+
notesLines
|
|
1259
|
+
});
|
|
1260
|
+
} catch (err) {
|
|
1261
|
+
logger?.warn("auto-dream: failed to bootstrap MEMORY.md", {
|
|
1262
|
+
error: err instanceof Error ? err.message : String(err)
|
|
1263
|
+
});
|
|
1264
|
+
return;
|
|
1265
|
+
}
|
|
1266
|
+
} else {
|
|
1267
|
+
logger?.debug("auto-dream: MEMORY.md missing and notes too small, skipping", {
|
|
1268
|
+
sessionID: info.id
|
|
1269
|
+
});
|
|
1270
|
+
return;
|
|
1271
|
+
}
|
|
1248
1272
|
}
|
|
1249
1273
|
const isSevenDayDue = schedule.lastDream === null || Date.now() - Date.parse(schedule.lastDream) > DREAM_INTERVAL_MS;
|
|
1250
1274
|
let isAccumulationDue = false;
|
|
@@ -1386,7 +1410,7 @@ async function runDistill(opts) {
|
|
|
1386
1410
|
tools: {
|
|
1387
1411
|
memory_search: true,
|
|
1388
1412
|
memory_store: true,
|
|
1389
|
-
memory_forget:
|
|
1413
|
+
memory_forget: true,
|
|
1390
1414
|
read: true,
|
|
1391
1415
|
list: true
|
|
1392
1416
|
}
|
|
@@ -15301,6 +15325,43 @@ function maxContextFrom(modelContextWindow) {
|
|
|
15301
15325
|
if (calibratedMaxContext > 0) return calibratedMaxContext;
|
|
15302
15326
|
return FALLBACK_MAX_CONTEXT;
|
|
15303
15327
|
}
|
|
15328
|
+
function estimateTokens2(text) {
|
|
15329
|
+
let cjk = 0;
|
|
15330
|
+
let other = 0;
|
|
15331
|
+
for (const ch of text) {
|
|
15332
|
+
if (/[\u4e00-\u9fff\u3400-\u4dbf\u3000-\u303f\uff00-\uffef\u3040-\u309f\u30a0-\u30ff]/.test(ch)) {
|
|
15333
|
+
cjk++;
|
|
15334
|
+
} else {
|
|
15335
|
+
other++;
|
|
15336
|
+
}
|
|
15337
|
+
}
|
|
15338
|
+
return Math.ceil(cjk * 0.7 + other / 3.8);
|
|
15339
|
+
}
|
|
15340
|
+
function extractTokensFromMessages(messages) {
|
|
15341
|
+
let total = 0;
|
|
15342
|
+
for (const msg of messages) {
|
|
15343
|
+
for (const part of msg.parts) {
|
|
15344
|
+
if (typeof part !== "object" || part === null) continue;
|
|
15345
|
+
const p = part;
|
|
15346
|
+
if (p["type"] === "text" && typeof p["text"] === "string") {
|
|
15347
|
+
total += estimateTokens2(p["text"]);
|
|
15348
|
+
} else if (p["type"] === "tool") {
|
|
15349
|
+
const state = p["state"];
|
|
15350
|
+
if (state?.["output"] && typeof state["output"] === "string") {
|
|
15351
|
+
total += estimateTokens2(state["output"]);
|
|
15352
|
+
}
|
|
15353
|
+
if (state?.["error"] && typeof state["error"] === "string") {
|
|
15354
|
+
total += estimateTokens2(state["error"]);
|
|
15355
|
+
}
|
|
15356
|
+
} else if (p["type"] === "reasoning" || p["type"] === "thinking") {
|
|
15357
|
+
if (typeof p["text"] === "string") {
|
|
15358
|
+
total += estimateTokens2(p["text"]);
|
|
15359
|
+
}
|
|
15360
|
+
}
|
|
15361
|
+
}
|
|
15362
|
+
}
|
|
15363
|
+
return total;
|
|
15364
|
+
}
|
|
15304
15365
|
function extractInputTokensFromMessages(messages) {
|
|
15305
15366
|
let best = 0;
|
|
15306
15367
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
@@ -15323,7 +15384,7 @@ function extractInputTokensFromMessages(messages) {
|
|
|
15323
15384
|
function detectPressure(messages, modelContextWindow) {
|
|
15324
15385
|
const ctx = maxContextFrom(modelContextWindow || 0);
|
|
15325
15386
|
const inputTokens = extractInputTokensFromMessages(messages);
|
|
15326
|
-
const estimated = inputTokens > 0 ? inputTokens :
|
|
15387
|
+
const estimated = inputTokens > 0 ? inputTokens : extractTokensFromMessages(messages);
|
|
15327
15388
|
const ratio = Math.min(estimated / ctx, 1);
|
|
15328
15389
|
let level;
|
|
15329
15390
|
if (estimated >= PRESSURE_HIGH_TOKENS) level = "high";
|
|
@@ -15350,17 +15411,17 @@ function buildNudgeText(level) {
|
|
|
15350
15411
|
var MEMORY_NUDGE_COOLDOWN = 3;
|
|
15351
15412
|
var DECISION_PATTERNS = [
|
|
15352
15413
|
/\b(?:decided|decision|chose|chosen|picked|selected)\b/i,
|
|
15353
|
-
|
|
15414
|
+
/(?:采用|选择|决定|确定|选用)/,
|
|
15354
15415
|
/\b(?:use|using|go with|went with)\b.*\b(?:because|since|due to)\b/i
|
|
15355
15416
|
];
|
|
15356
15417
|
var CONSTRAINT_PATTERNS = [
|
|
15357
15418
|
/\b(?:must not|cannot|should not|do not|never|always)\b/i,
|
|
15358
15419
|
/\b(?:constraint|restriction|limitation|requirement)\b/i,
|
|
15359
|
-
|
|
15420
|
+
/(?:不能|必须|禁止|约束|限制|要求|务必)/
|
|
15360
15421
|
];
|
|
15361
15422
|
var ERROR_FIX_PATTERNS = [
|
|
15362
15423
|
/\b(?:fix|fixed|resolve|resolved|patch|corrected)\b/i,
|
|
15363
|
-
|
|
15424
|
+
/(?:修复|修复了|解决|解决了)/,
|
|
15364
15425
|
/\b(?:the (?:bug|error|issue) (?:was|is)|root cause)\b/i
|
|
15365
15426
|
];
|
|
15366
15427
|
function detectMemoryNudge(messages, messagesSinceLastNudge) {
|
|
@@ -15374,13 +15435,14 @@ function detectMemoryNudge(messages, messagesSinceLastNudge) {
|
|
|
15374
15435
|
const hasRecentToolError = recentMessages.some(
|
|
15375
15436
|
(m) => m.parts.some((p) => p.type === "tool" && p.state?.status === "error")
|
|
15376
15437
|
);
|
|
15438
|
+
const recentAll = recentUserText + "\n" + recentAssistantText;
|
|
15377
15439
|
if (hasRecentToolError && ERROR_FIX_PATTERNS.some((p) => p.test(recentAssistantText))) {
|
|
15378
15440
|
return { injected: true, type: "gotcha" };
|
|
15379
15441
|
}
|
|
15380
|
-
if (CONSTRAINT_PATTERNS.some((p) => p.test(
|
|
15442
|
+
if (CONSTRAINT_PATTERNS.some((p) => p.test(recentAll))) {
|
|
15381
15443
|
return { injected: true, type: "constraint" };
|
|
15382
15444
|
}
|
|
15383
|
-
if (DECISION_PATTERNS.some((p) => p.test(
|
|
15445
|
+
if (DECISION_PATTERNS.some((p) => p.test(recentAll))) {
|
|
15384
15446
|
return { injected: true, type: "decision" };
|
|
15385
15447
|
}
|
|
15386
15448
|
return { injected: false, type: null };
|
|
@@ -15774,17 +15836,19 @@ function runCompressionPipeline(ctx) {
|
|
|
15774
15836
|
estimatedTokens: pressure.estimatedTokens
|
|
15775
15837
|
};
|
|
15776
15838
|
const sid = sessionID || "default";
|
|
15777
|
-
const
|
|
15778
|
-
|
|
15839
|
+
const currentMsgCount = messages.length;
|
|
15840
|
+
const pressureSince = state.messagesSinceLastNudge(sid, currentMsgCount);
|
|
15841
|
+
if (shouldInjectNudge(pressure.level, pressureSince)) {
|
|
15779
15842
|
if (injectIntoLastAssistant(messages, buildNudgeText(pressure.level))) {
|
|
15780
15843
|
stats.nudgeInjected = true;
|
|
15781
|
-
state.recordNudge(sid,
|
|
15844
|
+
state.recordNudge(sid, currentMsgCount);
|
|
15782
15845
|
}
|
|
15783
15846
|
}
|
|
15784
|
-
const
|
|
15847
|
+
const memorySince = state.messagesSinceLastMemoryNudge(sid, currentMsgCount);
|
|
15848
|
+
const memoryNudge = detectMemoryNudge(messages, memorySince);
|
|
15785
15849
|
if (memoryNudge.injected) {
|
|
15786
15850
|
if (injectIntoLastAssistant(messages, buildMemoryNudge(memoryNudge.type))) {
|
|
15787
|
-
state.
|
|
15851
|
+
state.recordMemoryNudge(sid, currentMsgCount);
|
|
15788
15852
|
logger?.debug("compress: memory nudge", { type: memoryNudge.type });
|
|
15789
15853
|
}
|
|
15790
15854
|
}
|