opencode-acp 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -102
- package/README.zh-CN.md +52 -71
- package/dist/index.js +0 -81
- package/dist/index.js.map +1 -1
- package/dist/lib/config-validation.d.ts +0 -1
- package/dist/lib/config-validation.d.ts.map +1 -1
- package/dist/lib/message-ids.d.ts +0 -1
- package/dist/lib/message-ids.d.ts.map +1 -1
- package/dist/lib/messages/prune.d.ts.map +1 -1
- package/dist/lib/messages/utils.d.ts +0 -4
- package/dist/lib/messages/utils.d.ts.map +1 -1
- package/dist/lib/state/state.d.ts.map +1 -1
- package/dist/lib/ui/notification.d.ts +0 -2
- package/dist/lib/ui/notification.d.ts.map +1 -1
- package/dist/lib/ui/utils.d.ts +0 -3
- package/dist/lib/ui/utils.d.ts.map +1 -1
- package/dist/lib/update.d.ts +0 -16
- package/dist/lib/update.d.ts.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -22,78 +22,41 @@ The model decides <em>when</em> and <em>what</em> to compress — not a hard lim
|
|
|
22
22
|
|
|
23
23
|
## Why ACP
|
|
24
24
|
|
|
25
|
-
ACP
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
segments while freeing context space. The model controls what to keep, which is
|
|
29
|
-
strictly better than blind truncation.
|
|
30
|
-
|
|
31
|
-
### What makes ACP different
|
|
32
|
-
|
|
33
|
-
- **Full block lifecycle, model-driven.** The model can `compress` a range into a
|
|
34
|
-
summary, `decompress` to restore any block on demand, and `mark_block` /
|
|
35
|
-
`unmark_block` to flag blocks for deferred deletion. The model owns its own
|
|
36
|
-
context lifecycle — not just "create a block and hope GC handles it".
|
|
37
|
-
- **Cache-aware by design.** Summaries merge into existing user turns and batch
|
|
38
|
-
cleanup does a *single* cache break, so prefix-cache hit ratios stay near **90%**
|
|
39
|
-
even when sessions run at 70%+ context utilization (see [Proven at scale](#proven-at-scale)).
|
|
40
|
-
- **Pressure-aware GC.** Instead of blind age-based truncation that silently
|
|
41
|
-
drops important info (task IDs, file paths, decisions), ACP consolidates marked
|
|
42
|
-
blocks first and demotes blind truncation to a last-resort fallback at 100%.
|
|
43
|
-
- **Two compression modes.** *Range* mode (contiguous spans → block summaries)
|
|
44
|
-
and *message* mode (surgical per-message summaries for scattered content).
|
|
45
|
-
- **Protected content.** Tool outputs, file patterns, and user messages you mark
|
|
46
|
-
protected are injected into summaries, so nothing critical is ever lost.
|
|
47
|
-
- **Automatic strategies.** Deduplication (same tool + args → keep last) and
|
|
48
|
-
purge-errors (drop errored inputs after N turns), recalculated on compress —
|
|
49
|
-
not on every turn.
|
|
50
|
-
- **Production-grade configuration.** 3-layer merge (global → config-dir →
|
|
51
|
-
project), per-model context-limit overrides, and user-editable prompts.
|
|
52
|
-
|
|
53
|
-
### A hardened fork of DCP
|
|
54
|
-
|
|
55
|
-
ACP started as a fork of [DCP](https://github.com/Tarquinen/opencode-dynamic-context-pruning)
|
|
56
|
-
and now diverges so far that the original is a small subset. Beyond the features
|
|
57
|
-
above, it ships **37 bug fixes** that make the core production-stable — state
|
|
58
|
-
persistence across restarts, real token reporting (was returning 0), GC
|
|
59
|
-
deactivation, reversed-boundary auto-recovery, 268× logger/tokenizer speedup,
|
|
60
|
-
dialog-role confusion fixes, and skipping OpenCode's internal title/summary
|
|
61
|
-
agents so session titles keep generating. Core deltas vs the original:
|
|
62
|
-
|
|
63
|
-
| | DCP (original) | ACP |
|
|
64
|
-
|---|---|---|
|
|
65
|
-
| **Max stable session** | ~200 messages | 10,000+ |
|
|
66
|
-
| **Per-turn overhead** | 20 – 50 s | ~90 ms |
|
|
67
|
-
| **Model-driven decompress + block cleanup** | No | Yes |
|
|
68
|
-
| **State survives restart** | No | Yes |
|
|
25
|
+
ACP hands all context-management authority to the model itself — not relying on
|
|
26
|
+
external models or any complex external mechanism to do context management. It
|
|
27
|
+
is, to date, the best context-management implementation on the market.
|
|
69
28
|
|
|
70
|
-
|
|
29
|
+
This brings two concrete effects:
|
|
71
30
|
|
|
72
|
-
|
|
31
|
+
- **It saves about two-thirds of tokens.** A model with a 1,000,000-token context
|
|
32
|
+
window effectively runs in the **200,000–300,000 token range**.
|
|
33
|
+
- **It supports ultra-long sessions without losing key content** — **500M-token-level
|
|
34
|
+
cumulative context, 100,000 messages per session**.
|
|
73
35
|
|
|
74
|
-
|
|
75
|
-
from a single developer workstation (1,445 sessions, 69,097 model turns):
|
|
36
|
+
---
|
|
76
37
|
|
|
77
|
-
|
|
78
|
-
|--------|-------|
|
|
79
|
-
| Total tokens processed (incl. prompt-cache reads) | **6.17 billion** |
|
|
80
|
-
| Billable tokens (input + output + reasoning) | 828 million |
|
|
81
|
-
| Prompt-cache hit ratio (average) | ~87% |
|
|
82
|
-
| Compression blocks created (all-time) | 4,894 |
|
|
38
|
+
## Proven at scale
|
|
83
39
|
|
|
84
|
-
|
|
85
|
-
tokens pushed through the model**, not peak context:
|
|
40
|
+
Real engineering context, in practice.
|
|
86
41
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
42
|
+
**Supports 500M-token-level cumulative context, with p95 context around 30% and
|
|
43
|
+
an average prompt-cache hit ratio above 85%.** (That average — not per-session —
|
|
44
|
+
is explained in [Impact on Prompt Caching](#impact-on-prompt-caching), where it
|
|
45
|
+
turns out to save far more tokens than traditional compression.)
|
|
91
46
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
47
|
+
| | Session 1 | Session 2 |
|
|
48
|
+
|---|---|---|
|
|
49
|
+
| **Messages** | 3,024 | 2,028 |
|
|
50
|
+
| **Total tokens processed** | 582 M | 463 M |
|
|
51
|
+
| **Prompt-cache hit ratio** | 86.2% | 89.0% |
|
|
52
|
+
| **Context p50 (median)** | 1.2 K (<1%) | 1.8 K (<1%) |
|
|
53
|
+
| **Context p75** | 2.8 K | 3.5 K |
|
|
54
|
+
| **Context p90** | 108 K (11%) | 58 K (6%) |
|
|
55
|
+
| **Context p95** | 251 K (25%) | 335 K (34%) |
|
|
56
|
+
| **Context p99** | 425 K (43%) | 442 K (44%) |
|
|
57
|
+
| **Peak** | 488 K (49%) | 769 K (77%) |
|
|
58
|
+
|
|
59
|
+
(Context percentages are of the 1M window.)
|
|
97
60
|
|
|
98
61
|
---
|
|
99
62
|
|
|
@@ -117,39 +80,70 @@ Or add to your opencode config:
|
|
|
117
80
|
|
|
118
81
|
## How It Works
|
|
119
82
|
|
|
120
|
-
ACP
|
|
83
|
+
ACP hands the context-compression tool directly to the model. The model is
|
|
84
|
+
**100% responsible** for context compression. The model's available tools are
|
|
85
|
+
mainly: **compress**, **decompress**, and **delete** (`mark_block` / `unmark_block`).
|
|
121
86
|
|
|
122
|
-
###
|
|
87
|
+
### Lifecycle
|
|
123
88
|
|
|
124
|
-
|
|
89
|
+
Three operations: **compress**, **decompress**, and **delete**. Content loops
|
|
90
|
+
between raw and compressed, and eventually terminates in deletion:
|
|
125
91
|
|
|
126
|
-
|
|
92
|
+
```mermaid
|
|
93
|
+
stateDiagram-v2
|
|
94
|
+
Raw --> Compressed : compress
|
|
95
|
+
Compressed --> Raw : decompress
|
|
96
|
+
Compressed --> Deleted : delete
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Compression strategy
|
|
100
|
+
|
|
101
|
+
The system injects a prompt telling the model the current context ratio, the
|
|
102
|
+
compression ratio, whether context is idle, and compression suggestions. When the
|
|
103
|
+
trigger ratio is hit, content is compressed in **priority order**:
|
|
127
104
|
|
|
128
|
-
|
|
129
|
-
|
|
105
|
+
1. Agent/subagent review & consultation results (largest block of uncompressed content)
|
|
106
|
+
2. Verbose command output (build/test runs, git diff/log/status, directory listings)
|
|
107
|
+
3. Exploration that led nowhere (failed approaches, dead-end searches)
|
|
108
|
+
4. Redundant tool results (reading the same file repeatedly, repeated status checks)
|
|
109
|
+
5. Intermediate steps of completed multi-step tasks
|
|
110
|
+
6. Resolved discussion threads (once a decision is recorded)
|
|
111
|
+
7. Large file contents already used
|
|
130
112
|
|
|
131
|
-
|
|
113
|
+
After compression, the original content is replaced by a short **block** that
|
|
114
|
+
references the original (recoverable via `decompress`).
|
|
132
115
|
|
|
133
|
-
###
|
|
116
|
+
### Decompression strategy
|
|
134
117
|
|
|
135
|
-
|
|
118
|
+
The model decides when to decompress. When the context is large enough to
|
|
119
|
+
interfere with the model's self-attention, short blocks lead the model to compress
|
|
120
|
+
some content first, handle the urgent matter, then decompress what it needs in
|
|
121
|
+
later work.
|
|
136
122
|
|
|
137
|
-
###
|
|
123
|
+
### Deletion strategy
|
|
138
124
|
|
|
139
|
-
|
|
125
|
+
To handle the accumulation of many small historical blocks, the new version adds
|
|
126
|
+
a deletion strategy. The model decides whether to delete. **Once deleted, content
|
|
127
|
+
is irrecoverable.** This replaces the original forced GC, so that forced garbage
|
|
128
|
+
collection no longer deletes things the model considers important.
|
|
140
129
|
|
|
141
|
-
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Impact on Prompt Caching
|
|
142
133
|
|
|
143
|
-
|
|
134
|
+
Historically, ACP has fixed many of the low-cache-hit-rate problems caused by
|
|
135
|
+
DCP. The overall cache hit rate is now **~87%**.
|
|
144
136
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
- **High (default 75%)**: all marked blocks are auto merge-compressed into one.
|
|
149
|
-
- **Force (default 90%)**: all old-gen blocks are merged regardless of marks — a last resort before age-based GC truncation.
|
|
150
|
-
- **`unmark_block`** removes the flag if the model changes its mind.
|
|
137
|
+
Compared to traditional compression — which only compresses at 80–90% and, once it
|
|
138
|
+
compresses, forces 100% of the context to re-hit — ACP's hit rate is effectively
|
|
139
|
+
higher.
|
|
151
140
|
|
|
152
|
-
|
|
141
|
+
Additionally, ACP keeps total context around **~30% most of the time**, versus the
|
|
142
|
+
traditional **50–80%**. So total token savings are far higher than traditional
|
|
143
|
+
compression.
|
|
144
|
+
|
|
145
|
+
**Conclusion:** ACP simultaneously raises the overall cache hit rate **and**
|
|
146
|
+
ensures key context information is not lost.
|
|
153
147
|
|
|
154
148
|
---
|
|
155
149
|
|
|
@@ -368,22 +362,6 @@ For the `compress` tool, `compress.protectedTools` ensures specific tool outputs
|
|
|
368
362
|
|
|
369
363
|
---
|
|
370
364
|
|
|
371
|
-
## Impact on Prompt Caching
|
|
372
|
-
|
|
373
|
-
LLM providers cache prompts based on exact prefix matching. When ACP prunes content, it changes messages, which invalidates cached prefixes from that point forward.
|
|
374
|
-
|
|
375
|
-
**Trade-off:** You lose some cache reads but gain token savings from reduced context size and fewer hallucinations from stale context. In most cases, especially in long sessions, the savings outweigh the cache miss cost.
|
|
376
|
-
|
|
377
|
-
> [!NOTE]
|
|
378
|
-
> In testing, cache hit rates were approximately 85% with ACP vs 90% without.
|
|
379
|
-
|
|
380
|
-
**No impact for:**
|
|
381
|
-
|
|
382
|
-
- **Request-based billing** -- Providers like GitHub Copilot that charge per request, not tokens.
|
|
383
|
-
- **Uniform token pricing** -- Providers like Cerebras that bill cached and uncached tokens at the same rate.
|
|
384
|
-
|
|
385
|
-
---
|
|
386
|
-
|
|
387
365
|
## Migrating from DCP
|
|
388
366
|
|
|
389
367
|
ACP is a drop-in replacement for DCP. To migrate:
|
|
@@ -411,7 +389,7 @@ ACP auto-migrates config from `dcp.jsonc` to `acp.jsonc` and prompts from `dcp-p
|
|
|
411
389
|
---
|
|
412
390
|
|
|
413
391
|
<details>
|
|
414
|
-
<summary><strong>Bug Fixes (
|
|
392
|
+
<summary><strong>Bug Fixes (38 total)</strong> -- applied on top of DCP v3.1.11</summary>
|
|
415
393
|
|
|
416
394
|
| # | Severity | Summary |
|
|
417
395
|
|---|----------|---------|
|
|
@@ -441,6 +419,7 @@ ACP auto-migrates config from `dcp.jsonc` to `acp.jsonc` and prompts from `dcp-p
|
|
|
441
419
|
| 35 | HIGH | Aging warnings shown at low context usage (<50%) -- triggers unnecessary compress, wastes tokens |
|
|
442
420
|
| 36 | HIGH | Compression summary emitted as a standalone user message before the user's real turn -- model reads its own prior assistant output as user input, causing dialog role confusion / self-Q&A loops |
|
|
443
421
|
| 37 | HIGH | Message-transform pipeline runs on OpenCode's hidden title/summary/compaction agent requests -- corrupts the request and shared session state, breaking session title generation |
|
|
422
|
+
| 38 | CRITICAL | pruneToolOutputs/pruneToolInputs/pruneToolErrors mutate existing messages in-place -- invalidates LLM prefix cache, causing 89% of fresh input tokens to be wasted on cache-invalidating re-sends |
|
|
444
423
|
|
|
445
424
|
For the complete list with root cause analysis, see the [bug tracker](https://github.com/ranxianglei/opencode-acp/issues).
|
|
446
425
|
|
package/README.zh-CN.md
CHANGED
|
@@ -22,50 +22,34 @@
|
|
|
22
22
|
|
|
23
23
|
## 为什么选择 ACP
|
|
24
24
|
|
|
25
|
-
ACP
|
|
25
|
+
ACP 将上下文管理的所有权限全部交给模型自己,而不依靠外部模型或各种复杂的机制去做上下文管理。它是迄今为止,市面上对上下文管理最好的实现。
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
这带来两个影响:
|
|
28
28
|
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
- **压力感知 GC。** 不是盲目按年龄截断(会静默丢失 task ID、文件路径、决策等重要信息),而是优先整合被标记的块,把盲目截断降级为 100% 时的最后兜底。
|
|
32
|
-
- **两种压缩模式。** *Range* 模式(连续片段 → 块摘要)和 *message* 模式(针对分散内容的精准单消息摘要)。
|
|
33
|
-
- **受保护内容。** 你标记保护的工具输出、文件模式、用户消息会被注入摘要,关键信息永不丢失。
|
|
34
|
-
- **自动策略。** 去重(相同工具+参数 → 只留最后一次)和清除错误(N 轮后丢弃出错的输入),在 compress 时重算 —— 不是每轮。
|
|
35
|
-
- **生产级配置。** 三层合并(全局 → 配置目录 → 项目)+ 每模型上下文上限覆盖 + 用户可编辑 prompt。
|
|
36
|
-
|
|
37
|
-
### DCP 的强化分支
|
|
38
|
-
|
|
39
|
-
ACP 起初是 [DCP](https://github.com/Tarquinen/opencode-dynamic-context-pruning) 的分支,如今分歧之大已让原版只相当于一个小子集。除上述特性外,还包含 **37 项 bug 修复**让核心达到生产稳定 —— 跨重启状态持久化、真实 token 上报(此前返回 0)、GC 停用、反转边界自动恢复、268 倍日志/tokenizer 加速、对话角色混乱修复,以及跳过 OpenCode 内置 title/summary agent 恢复标题生成。相对原版的核心差异:
|
|
40
|
-
|
|
41
|
-
| | DCP(原版) | ACP |
|
|
42
|
-
|---|---|---|
|
|
43
|
-
| **最大稳定会话** | ~200 条消息 | 10,000+ |
|
|
44
|
-
| **每轮开销** | 20 – 50 秒 | ~90 ms |
|
|
45
|
-
| **模型自主解压 + 块清理** | 否 | 是 |
|
|
46
|
-
| **状态跨重启保留** | 否 | 是 |
|
|
29
|
+
- **省 token(约三分之二)。** 一个 100 万 token 上下文窗口的模型,实际只在 **20 万–30 万 token** 区间运行。
|
|
30
|
+
- **超长上下文不丢关键内容** —— 支持 **5 亿级别上下文、单会话 10 万条消息**。
|
|
47
31
|
|
|
48
32
|
---
|
|
49
33
|
|
|
50
34
|
## 实战验证
|
|
51
35
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
| 指标 | 数值 |
|
|
55
|
-
|------|------|
|
|
56
|
-
| 总处理 token(含 prompt-cache 读取) | **61.7 亿** |
|
|
57
|
-
| 计费 token(input + output + reasoning) | 8.28 亿 |
|
|
58
|
-
| prompt-cache 命中率(平均) | ~87% |
|
|
59
|
-
| 累计创建压缩 block | 4,894 个 |
|
|
60
|
-
|
|
61
|
-
两个有代表性的重负载会话(已匿名化)—— 头条数字是**流经模型的累计 token**,而不是峰值上下文:
|
|
36
|
+
真实工程中的上下文情况。
|
|
62
37
|
|
|
63
|
-
|
|
64
|
-
|------|------|--------|----------------|----------|------------|------------|------|
|
|
65
|
-
| 会话一 | 6 天 | 2,694 | **5.82 亿** | 86.2% | 1.2 K(<1%) | 25.1 万(25%) | 48.8 万(49%) |
|
|
66
|
-
| 会话二 | 2 天 | 1,536 | **4.63 亿** | 89.0% | 1.8 K(<1%) | 33.5 万(34%) | 76.9 万(77%) |
|
|
38
|
+
**支持 5 亿级别 token,p95 上下文比例在 30% 左右,平均缓存命中率 85% 以上。**(注意这是平均缓存命中率,不是单会话命中率——后面[对 Prompt 缓存的影响](#对-prompt-缓存的影响)会解释,这实际上比传统压缩算法大幅度节省了 token。)
|
|
67
39
|
|
|
68
|
-
|
|
40
|
+
| | 会话一 | 会话二 |
|
|
41
|
+
|---|---|---|
|
|
42
|
+
| **消息总条数** | 3,024 | 2,028 |
|
|
43
|
+
| **累计处理 token** | 5.82 亿 | 4.63 亿 |
|
|
44
|
+
| **prompt-cache 命中率** | 86.2% | 89.0% |
|
|
45
|
+
| **上下文 p50(中位)** | 1.2 K(<1%) | 1.8 K(<1%) |
|
|
46
|
+
| **上下文 p75** | 2.8 K | 3.5 K |
|
|
47
|
+
| **上下文 p90** | 10.8 万(11%) | 5.8 万(6%) |
|
|
48
|
+
| **上下文 p95** | 25.1 万(25%) | 33.5 万(34%) |
|
|
49
|
+
| **上下文 p99** | 42.5 万(43%) | 44.2 万(44%) |
|
|
50
|
+
| **峰值** | 48.8 万(49%) | 76.9 万(77%) |
|
|
51
|
+
|
|
52
|
+
(上下文百分比均以 1M 窗口计。)
|
|
69
53
|
|
|
70
54
|
---
|
|
71
55
|
|
|
@@ -89,39 +73,52 @@ opencode plugin opencode-acp@latest --global
|
|
|
89
73
|
|
|
90
74
|
## 工作原理
|
|
91
75
|
|
|
92
|
-
ACP
|
|
76
|
+
ACP 把上下文压缩工具直接交给模型。模型对上下文压缩**负全责**。模型可用的工具主要是:**compress**、**decompress** 和 **delete**(`mark_block` / `unmark_block`)。
|
|
77
|
+
|
|
78
|
+
### 生命周期
|
|
79
|
+
|
|
80
|
+
三个操作:**压缩**、**解压缩**、**删除**。内容在原始与压缩之间循环,最终以删除终结:
|
|
81
|
+
|
|
82
|
+
```mermaid
|
|
83
|
+
stateDiagram-v2
|
|
84
|
+
Raw --> Compressed : compress
|
|
85
|
+
Compressed --> Raw : decompress
|
|
86
|
+
Compressed --> Deleted : delete
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### 压缩策略
|
|
93
90
|
|
|
94
|
-
|
|
91
|
+
系统会注入一段 prompt,告诉模型当前的上下文比例、压缩比例、上下文是否空闲,以及压缩建议。当触发比例被命中时,内容按**优先级顺序**被压缩:
|
|
95
92
|
|
|
96
|
-
|
|
93
|
+
1. Agent/子代理的评审与咨询结果(最大一块未压缩内容)
|
|
94
|
+
2. 冗长的命令输出(构建/测试运行、git diff/log/status、目录列表)
|
|
95
|
+
3. 无结果的探索(失败的方法、死胡同式的搜索)
|
|
96
|
+
4. 冗余的工具结果(反复读同一个文件、重复的状态检查)
|
|
97
|
+
5. 已完成多步任务的中间步骤
|
|
98
|
+
6. 已尘埃落定的讨论(一旦决策被记录)
|
|
99
|
+
7. 已经用过的大段文件内容
|
|
97
100
|
|
|
98
|
-
|
|
101
|
+
压缩完成后,原始内容被一个简短的 **block** 替换,该 block 引用原始内容(可通过 `decompress` 恢复)。
|
|
99
102
|
|
|
100
|
-
|
|
101
|
-
- **`message` 模式**(实验性)独立压缩单条原始消息,使模型能够更精细地管理上下文。
|
|
103
|
+
### 解压策略
|
|
102
104
|
|
|
103
|
-
|
|
105
|
+
由模型决定何时解压。当上下文大到足以干扰模型的 self-attention 时,简短的 block 会让模型先压缩一部分内容,处理完紧急事务,再在后续工作中按需解压。
|
|
104
106
|
|
|
105
|
-
###
|
|
107
|
+
### 删除策略
|
|
106
108
|
|
|
107
|
-
|
|
109
|
+
为了应对大量小块历史内容的堆积,新版本增加了删除策略。由模型决定是否删除。**一旦删除,内容不可恢复。** 这取代了原先的强制 GC,使得强制垃圾回收不再删除模型认为重要的内容。
|
|
108
110
|
|
|
109
|
-
|
|
111
|
+
---
|
|
110
112
|
|
|
111
|
-
|
|
113
|
+
## 对 Prompt 缓存的影响
|
|
112
114
|
|
|
113
|
-
|
|
115
|
+
历史上 ACP 修复了大量由 DCP 导致的低缓存命中率问题。目前整体缓存命中率约为 **87%**。
|
|
114
116
|
|
|
115
|
-
|
|
117
|
+
相比传统压缩——只在 80–90% 时才压缩,一旦压缩就强制 100% 的上下文重新命中——ACP 的命中率实际上更高。
|
|
116
118
|
|
|
117
|
-
|
|
118
|
-
- 当上下文使用率越过可配置阈值时,ACP 会在**单次缓存打断**中将所有已标记块整合为一个摘要(而非逐个丢失):
|
|
119
|
-
- **低阈值(默认 60%)**:提醒模型已标记的块可以合并。
|
|
120
|
-
- **高阈值(默认 75%)**:所有已标记块被自动合并压缩为一个。
|
|
121
|
-
- **强制阈值(默认 90%)**:无论是否标记,所有老年代块都被合并 —— 这是基于年龄的 GC 截断前的最后手段。
|
|
122
|
-
- **`unmark_block`** 在模型改变主意时移除标记。
|
|
119
|
+
此外:ACP 大部分时间将总上下文维持在 **~30%** 左右,而传统方案是 50–80%。因此总 token 节省远高于传统压缩。
|
|
123
120
|
|
|
124
|
-
|
|
121
|
+
**结论:** ACP 在提高整体缓存命中率的同时,确保关键上下文信息不丢失。
|
|
125
122
|
|
|
126
123
|
---
|
|
127
124
|
|
|
@@ -340,22 +337,6 @@ ACP 暴露六个可编辑的 prompt:
|
|
|
340
337
|
|
|
341
338
|
---
|
|
342
339
|
|
|
343
|
-
## 对 Prompt 缓存的影响
|
|
344
|
-
|
|
345
|
-
LLM 提供商基于精确前缀匹配来缓存 prompt。当 ACP 剪枝内容时,它会修改消息,从而从该点开始使缓存的前缀失效。
|
|
346
|
-
|
|
347
|
-
**权衡:** 你会损失一些缓存读取,但从缩减的上下文大小中获得 token 节省,并减少因过时上下文产生的幻觉。在大多数情况下,尤其是长会话中,节省的开销超过缓存未命中的代价。
|
|
348
|
-
|
|
349
|
-
> [!NOTE]
|
|
350
|
-
> 在测试中,使用 ACP 的缓存命中率约为 85%,不使用时约为 90%。
|
|
351
|
-
|
|
352
|
-
**以下场景无影响:**
|
|
353
|
-
|
|
354
|
-
- **按请求计费** — 如 GitHub Copilot 等按请求而非按 token 计费的提供商。
|
|
355
|
-
- **统一 token 定价** — 如 Cerebras 等对缓存和未缓存 token 统一价格的提供商。
|
|
356
|
-
|
|
357
|
-
---
|
|
358
|
-
|
|
359
340
|
## 从 DCP 迁移
|
|
360
341
|
|
|
361
342
|
ACP 是 DCP 的直接替代品。迁移步骤:
|
package/dist/index.js
CHANGED
|
@@ -4958,89 +4958,8 @@ var stripHallucinations = (messages) => {
|
|
|
4958
4958
|
};
|
|
4959
4959
|
|
|
4960
4960
|
// lib/messages/prune.ts
|
|
4961
|
-
var PRUNED_TOOL_OUTPUT_REPLACEMENT = "[Output removed to save context - information superseded or no longer needed]";
|
|
4962
|
-
var PRUNED_TOOL_ERROR_INPUT_REPLACEMENT = "[input removed due to failed tool call]";
|
|
4963
|
-
var PRUNED_QUESTION_INPUT_REPLACEMENT = "[questions removed - see output for user's answers]";
|
|
4964
4961
|
var prune = (state, logger, config, messages) => {
|
|
4965
4962
|
filterCompressedRanges(state, logger, config, messages);
|
|
4966
|
-
pruneToolOutputs(state, logger, messages);
|
|
4967
|
-
pruneToolInputs(state, logger, messages);
|
|
4968
|
-
pruneToolErrors(state, logger, messages);
|
|
4969
|
-
};
|
|
4970
|
-
var pruneToolOutputs = (state, logger, messages) => {
|
|
4971
|
-
for (const msg of messages) {
|
|
4972
|
-
if (isMessageCompacted(state, msg)) {
|
|
4973
|
-
continue;
|
|
4974
|
-
}
|
|
4975
|
-
const parts = Array.isArray(msg.parts) ? msg.parts : [];
|
|
4976
|
-
for (const part of parts) {
|
|
4977
|
-
if (part.type !== "tool") {
|
|
4978
|
-
continue;
|
|
4979
|
-
}
|
|
4980
|
-
if (!state.prune.tools.has(part.callID)) {
|
|
4981
|
-
continue;
|
|
4982
|
-
}
|
|
4983
|
-
if (part.state.status !== "completed") {
|
|
4984
|
-
continue;
|
|
4985
|
-
}
|
|
4986
|
-
if (part.tool === "question" || part.tool === "edit" || part.tool === "write") {
|
|
4987
|
-
continue;
|
|
4988
|
-
}
|
|
4989
|
-
part.state.output = PRUNED_TOOL_OUTPUT_REPLACEMENT;
|
|
4990
|
-
}
|
|
4991
|
-
}
|
|
4992
|
-
};
|
|
4993
|
-
var pruneToolInputs = (state, logger, messages) => {
|
|
4994
|
-
for (const msg of messages) {
|
|
4995
|
-
if (isMessageCompacted(state, msg)) {
|
|
4996
|
-
continue;
|
|
4997
|
-
}
|
|
4998
|
-
const parts = Array.isArray(msg.parts) ? msg.parts : [];
|
|
4999
|
-
for (const part of parts) {
|
|
5000
|
-
if (part.type !== "tool") {
|
|
5001
|
-
continue;
|
|
5002
|
-
}
|
|
5003
|
-
if (!state.prune.tools.has(part.callID)) {
|
|
5004
|
-
continue;
|
|
5005
|
-
}
|
|
5006
|
-
if (part.state.status !== "completed") {
|
|
5007
|
-
continue;
|
|
5008
|
-
}
|
|
5009
|
-
if (part.tool !== "question") {
|
|
5010
|
-
continue;
|
|
5011
|
-
}
|
|
5012
|
-
if (part.state.input?.questions !== void 0) {
|
|
5013
|
-
part.state.input.questions = PRUNED_QUESTION_INPUT_REPLACEMENT;
|
|
5014
|
-
}
|
|
5015
|
-
}
|
|
5016
|
-
}
|
|
5017
|
-
};
|
|
5018
|
-
var pruneToolErrors = (state, logger, messages) => {
|
|
5019
|
-
for (const msg of messages) {
|
|
5020
|
-
if (isMessageCompacted(state, msg)) {
|
|
5021
|
-
continue;
|
|
5022
|
-
}
|
|
5023
|
-
const parts = Array.isArray(msg.parts) ? msg.parts : [];
|
|
5024
|
-
for (const part of parts) {
|
|
5025
|
-
if (part.type !== "tool") {
|
|
5026
|
-
continue;
|
|
5027
|
-
}
|
|
5028
|
-
if (!state.prune.tools.has(part.callID)) {
|
|
5029
|
-
continue;
|
|
5030
|
-
}
|
|
5031
|
-
if (part.state.status !== "error") {
|
|
5032
|
-
continue;
|
|
5033
|
-
}
|
|
5034
|
-
const input = part.state.input;
|
|
5035
|
-
if (input && typeof input === "object") {
|
|
5036
|
-
for (const key of Object.keys(input)) {
|
|
5037
|
-
if (typeof input[key] === "string") {
|
|
5038
|
-
input[key] = PRUNED_TOOL_ERROR_INPUT_REPLACEMENT;
|
|
5039
|
-
}
|
|
5040
|
-
}
|
|
5041
|
-
}
|
|
5042
|
-
}
|
|
5043
|
-
}
|
|
5044
4963
|
};
|
|
5045
4964
|
var filterCompressedRanges = (state, logger, config, messages) => {
|
|
5046
4965
|
if (state.prune.messages.byMessageId.size === 0 && state.prune.messages.activeByAnchorMessageId.size === 0) {
|