gencode-ai 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RELEASE_NOTES_v0.4.0.md +140 -0
- package/dist/agent/agent.d.ts +17 -2
- package/dist/agent/agent.d.ts.map +1 -1
- package/dist/agent/agent.js +279 -49
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/types.d.ts +15 -1
- package/dist/agent/types.d.ts.map +1 -1
- package/dist/checkpointing/checkpoint-manager.d.ts +24 -0
- package/dist/checkpointing/checkpoint-manager.d.ts.map +1 -1
- package/dist/checkpointing/checkpoint-manager.js +28 -0
- package/dist/checkpointing/checkpoint-manager.js.map +1 -1
- package/dist/cli/components/App.d.ts +8 -0
- package/dist/cli/components/App.d.ts.map +1 -1
- package/dist/cli/components/App.js +478 -36
- package/dist/cli/components/App.js.map +1 -1
- package/dist/cli/components/CommandSuggestions.d.ts.map +1 -1
- package/dist/cli/components/CommandSuggestions.js +2 -0
- package/dist/cli/components/CommandSuggestions.js.map +1 -1
- package/dist/cli/components/Header.d.ts +6 -1
- package/dist/cli/components/Header.d.ts.map +1 -1
- package/dist/cli/components/Header.js +3 -3
- package/dist/cli/components/Header.js.map +1 -1
- package/dist/cli/components/Messages.d.ts.map +1 -1
- package/dist/cli/components/Messages.js +7 -9
- package/dist/cli/components/Messages.js.map +1 -1
- package/dist/cli/index.js +3 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/config/types.d.ts +20 -1
- package/dist/config/types.d.ts.map +1 -1
- package/dist/config/types.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +2 -2
- package/dist/input/history-manager.d.ts +78 -0
- package/dist/input/history-manager.d.ts.map +1 -0
- package/dist/input/history-manager.js +224 -0
- package/dist/input/history-manager.js.map +1 -0
- package/dist/input/index.d.ts +6 -0
- package/dist/input/index.d.ts.map +1 -0
- package/dist/input/index.js +5 -0
- package/dist/input/index.js.map +1 -0
- package/dist/prompts/index.js +3 -3
- package/dist/prompts/index.js.map +1 -1
- package/dist/providers/gemini.d.ts.map +1 -1
- package/dist/providers/gemini.js +33 -2
- package/dist/providers/gemini.js.map +1 -1
- package/dist/providers/google.d.ts +22 -0
- package/dist/providers/google.d.ts.map +1 -0
- package/dist/providers/google.js +297 -0
- package/dist/providers/google.js.map +1 -0
- package/dist/providers/index.d.ts +4 -4
- package/dist/providers/index.js +11 -11
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +6 -0
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers/registry.js +3 -3
- package/dist/providers/registry.js.map +1 -1
- package/dist/providers/types.d.ts +30 -4
- package/dist/providers/types.d.ts.map +1 -1
- package/dist/session/compression/engine.d.ts +109 -0
- package/dist/session/compression/engine.d.ts.map +1 -0
- package/dist/session/compression/engine.js +311 -0
- package/dist/session/compression/engine.js.map +1 -0
- package/dist/session/compression/index.d.ts +12 -0
- package/dist/session/compression/index.d.ts.map +1 -0
- package/dist/session/compression/index.js +11 -0
- package/dist/session/compression/index.js.map +1 -0
- package/dist/session/compression/types.d.ts +90 -0
- package/dist/session/compression/types.d.ts.map +1 -0
- package/dist/session/compression/types.js +17 -0
- package/dist/session/compression/types.js.map +1 -0
- package/dist/session/manager.d.ts +64 -3
- package/dist/session/manager.d.ts.map +1 -1
- package/dist/session/manager.js +254 -2
- package/dist/session/manager.js.map +1 -1
- package/dist/session/types.d.ts +16 -0
- package/dist/session/types.d.ts.map +1 -1
- package/dist/session/types.js.map +1 -1
- package/docs/README.md +1 -0
- package/docs/diagrams/compression-decision.mmd +30 -0
- package/docs/diagrams/compression-workflow.mmd +54 -0
- package/docs/diagrams/layer1-pruning.mmd +45 -0
- package/docs/diagrams/layer2-compaction.mmd +42 -0
- package/docs/proposals/0007-context-management.md +252 -2
- package/docs/proposals/README.md +4 -3
- package/docs/providers.md +3 -3
- package/docs/session-compression.md +695 -0
- package/examples/agent-demo.ts +23 -1
- package/examples/basic.ts +3 -3
- package/package.json +3 -4
- package/src/agent/agent.ts +314 -52
- package/src/agent/types.ts +19 -1
- package/src/checkpointing/checkpoint-manager.ts +48 -0
- package/src/cli/components/App.tsx +553 -34
- package/src/cli/components/CommandSuggestions.tsx +2 -0
- package/src/cli/components/Header.tsx +16 -1
- package/src/cli/components/Messages.tsx +20 -14
- package/src/cli/index.tsx +3 -2
- package/src/config/types.ts +26 -1
- package/src/index.ts +3 -3
- package/src/input/history-manager.ts +289 -0
- package/src/input/index.ts +6 -0
- package/src/prompts/index.test.ts +2 -1
- package/src/prompts/index.ts +3 -3
- package/src/providers/{gemini.ts → google.ts} +69 -18
- package/src/providers/index.ts +14 -14
- package/src/providers/openai.ts +7 -0
- package/src/providers/registry.ts +3 -3
- package/src/providers/types.ts +33 -3
- package/src/session/compression/engine.ts +406 -0
- package/src/session/compression/index.ts +18 -0
- package/src/session/compression/types.ts +102 -0
- package/src/session/manager.ts +326 -3
- package/src/session/types.ts +21 -0
- package/tests/input-history-manager.test.ts +335 -0
- package/tests/session-checkpoint-persistence.test.ts +198 -0
|
@@ -0,0 +1,695 @@
|
|
|
1
|
+
# Session Compression Implementation Guide
|
|
2
|
+
|
|
3
|
+
Complete documentation for the three-layer session compression system in GenCode.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
1. [Overview](#overview)
|
|
8
|
+
2. [Architecture](#architecture)
|
|
9
|
+
3. [Flowcharts](#flowcharts)
|
|
10
|
+
4. [Strategy Details](#strategy-details)
|
|
11
|
+
5. [Testing](#testing)
|
|
12
|
+
6. [Configuration](#configuration)
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Overview
|
|
17
|
+
|
|
18
|
+
### Goal
|
|
19
|
+
|
|
20
|
+
Session compression enables GenCode to handle long conversations efficiently by:
|
|
21
|
+
- Reducing context size when approaching model limits
|
|
22
|
+
- Preserving critical information (decisions, file changes)
|
|
23
|
+
- Minimizing API costs
|
|
24
|
+
|
|
25
|
+
### Three-Layer Strategy
|
|
26
|
+
|
|
27
|
+
| Layer | Name | Trigger | Cost | Effect |
|
|
28
|
+
|-------|------|---------|------|--------|
|
|
29
|
+
| **Layer 1** | Tool Output Pruning | Tokens > 20k | 🟢 Free | Remove old tool outputs, keep recent 40k |
|
|
30
|
+
| **Layer 2** | Compaction | Tokens > usable context | 🟡 Medium | LLM generates continuation prompt |
|
|
31
|
+
| **Layer 3** | Message Filtering | Session recovery | 🟢 Free | Return compressed message array |
|
|
32
|
+
|
|
33
|
+
### File Structure
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
src/session/compression/
|
|
37
|
+
├── types.ts # Type definitions and defaults (103 lines)
|
|
38
|
+
├── engine.ts # Three-layer compression engine (396 lines)
|
|
39
|
+
└── index.ts # Public API exports
|
|
40
|
+
|
|
41
|
+
Integration points:
|
|
42
|
+
├── src/session/manager.ts # SessionManager integration (lines 310-343, 407-554)
|
|
43
|
+
├── src/agent/agent.ts # Agent integration (lines 66, 448, 504, 543, 597-604)
|
|
44
|
+
└── src/config/types.ts # Config system integration
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Architecture
|
|
50
|
+
|
|
51
|
+
### Design Principles
|
|
52
|
+
|
|
53
|
+
1. **OpenCode Alignment**: Three-layer strategy matches OpenCode's proven approach
|
|
54
|
+
2. **Provider Agnostic**: Works with any LLM provider (Anthropic, OpenAI, Gemini)
|
|
55
|
+
3. **Quality First**: Structured metadata ensures no information loss
|
|
56
|
+
4. **Cost Conscious**: Free pruning before expensive summarization
|
|
57
|
+
|
|
58
|
+
### Key Components
|
|
59
|
+
|
|
60
|
+
**CompressionEngine** (`src/session/compression/engine.ts`)
|
|
61
|
+
- Token estimation (4:1 char-to-token ratio)
|
|
62
|
+
- Compression strategy selection
|
|
63
|
+
- Tool output pruning
|
|
64
|
+
- LLM-based compaction
|
|
65
|
+
- Information extraction (files, tools, decisions)
|
|
66
|
+
|
|
67
|
+
**SessionManager** (`src/session/manager.ts`)
|
|
68
|
+
- Triggers compression on `addMessage()`
|
|
69
|
+
- Stores summaries in session JSON
|
|
70
|
+
- Replaces message array with compressed version
|
|
71
|
+
- Provides `getMessagesForLLM()` for context
|
|
72
|
+
|
|
73
|
+
**Agent** (`src/agent/agent.ts`)
|
|
74
|
+
- Passes `modelInfo` for compression decisions
|
|
75
|
+
- Uses compressed context transparently
|
|
76
|
+
- Exposes `getCompressionStats()`
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Flowcharts
|
|
81
|
+
|
|
82
|
+
### Complete Workflow
|
|
83
|
+
|
|
84
|
+
```mermaid
|
|
85
|
+
flowchart TD
|
|
86
|
+
Start([User Input]) --> AddToAgent[Agent adds message]
|
|
87
|
+
AddToAgent --> CallSM[SessionManager.addMessage<br/>with modelInfo]
|
|
88
|
+
|
|
89
|
+
CallSM --> PushMsg[Push to messages array]
|
|
90
|
+
PushMsg --> UpdateCount[Update fullMessageCount]
|
|
91
|
+
UpdateCount --> CheckCompress{Needs compression?<br/>needsCompression}
|
|
92
|
+
|
|
93
|
+
CheckCompress -->|No| SaveSession[Save session]
|
|
94
|
+
CheckCompress -->|Yes| DetermineStrategy{Determine strategy}
|
|
95
|
+
|
|
96
|
+
DetermineStrategy -->|prune| Layer1[Layer 1: Pruning]
|
|
97
|
+
DetermineStrategy -->|compact| Layer2[Layer 2: Compaction]
|
|
98
|
+
|
|
99
|
+
%% Layer 1 detailed flow
|
|
100
|
+
Layer1 --> EstTokens1[Estimate total tokens]
|
|
101
|
+
EstTokens1 --> CollectRecent[Collect recent tool outputs<br/>within last 40k tokens<br/>backward iteration]
|
|
102
|
+
CollectRecent --> MarkProtected[Mark as protected indices]
|
|
103
|
+
MarkProtected --> ClearOld[Clear unprotected<br/>old tool results]
|
|
104
|
+
ClearOld --> MarkPruned[Mark: Old tool result cleared<br/>pruned: true<br/>prunedAt: timestamp]
|
|
105
|
+
MarkPruned --> SaveSession
|
|
106
|
+
|
|
107
|
+
%% Layer 2 detailed flow
|
|
108
|
+
Layer2 --> DetermineRange[Determine summary range<br/>start: after last summary<br/>end: length - 10]
|
|
109
|
+
DetermineRange --> ExtractInfo[Extract structured info in parallel]
|
|
110
|
+
|
|
111
|
+
ExtractInfo --> Files[Files modified<br/>Write/Edit tools]
|
|
112
|
+
ExtractInfo --> Tools[Tool usage stats<br/>count + top 3 uses]
|
|
113
|
+
ExtractInfo --> Decisions[Key decisions<br/>decided/chose/will use]
|
|
114
|
+
|
|
115
|
+
Files --> GenPrompt[Generate continuation prompt]
|
|
116
|
+
Tools --> GenPrompt
|
|
117
|
+
Decisions --> GenPrompt
|
|
118
|
+
|
|
119
|
+
GenPrompt --> LLMCall[Call LLM<br/>max_tokens: 1500<br/>generate continuation prompt]
|
|
120
|
+
LLMCall --> CreateSummary[Create ConversationSummary<br/>id, range, content, metadata]
|
|
121
|
+
CreateSummary --> StoreSummary[Store to session.summaries array]
|
|
122
|
+
StoreSummary --> ReplaceMessages[Replace messages array]
|
|
123
|
+
|
|
124
|
+
ReplaceMessages --> BuildNew[Build new messages array:<br/>System prompt<br/>Summary system message<br/>Last 10 messages]
|
|
125
|
+
BuildNew --> SaveSession
|
|
126
|
+
|
|
127
|
+
SaveSession --> GetContext[Get LLM context<br/>getMessagesForLLM]
|
|
128
|
+
GetContext --> AgentContinue[Agent continues<br/>call provider.complete]
|
|
129
|
+
AgentContinue --> LLMResponse[LLM returns response]
|
|
130
|
+
LLMResponse --> End([Done])
|
|
131
|
+
|
|
132
|
+
style Layer1 fill:#ffd93d,stroke:#333,stroke-width:2px
|
|
133
|
+
style Layer2 fill:#74c0fc,stroke:#333,stroke-width:2px
|
|
134
|
+
style LLMCall fill:#ff6b6b,stroke:#333,stroke-width:2px
|
|
135
|
+
style GetContext fill:#51cf66,stroke:#333,stroke-width:2px
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Compression Decision Logic
|
|
139
|
+
|
|
140
|
+
```mermaid
|
|
141
|
+
flowchart TD
|
|
142
|
+
Start([Check if compression needed]) --> CheckEnabled{compression.enabled?}
|
|
143
|
+
CheckEnabled -->|No| NoCompression[Return: strategy = none]
|
|
144
|
+
CheckEnabled -->|Yes| CalcTokens[Calculate total tokens]
|
|
145
|
+
|
|
146
|
+
CalcTokens --> UseActual{Has actual TokenUsage?}
|
|
147
|
+
UseActual -->|Yes| SumActual[input + cache.read +<br/>output + reasoning]
|
|
148
|
+
UseActual -->|No| EstimateAll[Estimate all messages<br/>chars / 4]
|
|
149
|
+
|
|
150
|
+
SumActual --> GetUsable[Calculate usable context]
|
|
151
|
+
EstimateAll --> GetUsable
|
|
152
|
+
|
|
153
|
+
GetUsable --> CalcUsable[usable = contextWindow -<br/>min of outputLimit and 32k]
|
|
154
|
+
CalcUsable --> CheckOverflow{total > usable?}
|
|
155
|
+
|
|
156
|
+
CheckOverflow -->|No| NoCompression
|
|
157
|
+
CheckOverflow -->|Yes| CheckPruneMin{total > 20k AND<br/>enablePruning?}
|
|
158
|
+
|
|
159
|
+
CheckPruneMin -->|Yes| UsePrune[Return: strategy = prune]
|
|
160
|
+
CheckPruneMin -->|No| UseCompact[Return: strategy = compact]
|
|
161
|
+
|
|
162
|
+
style CheckEnabled fill:#e3f2fd
|
|
163
|
+
style CheckOverflow fill:#fff3e0
|
|
164
|
+
style CheckPruneMin fill:#fff3e0
|
|
165
|
+
style UsePrune fill:#ffd93d
|
|
166
|
+
style UseCompact fill:#74c0fc
|
|
167
|
+
style NoCompression fill:#f1f3f4
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## Strategy Details
|
|
173
|
+
|
|
174
|
+
### Layer 1: Tool Output Pruning
|
|
175
|
+
|
|
176
|
+
**Implementation**: `src/session/compression/engine.ts:117-159`
|
|
177
|
+
|
|
178
|
+
#### Purpose
|
|
179
|
+
Quick, cost-free reduction of context size by removing old tool outputs.
|
|
180
|
+
|
|
181
|
+
#### Algorithm
|
|
182
|
+
|
|
183
|
+
1. **Check threshold**: Exit if total tokens < 20k
|
|
184
|
+
2. **Backward iteration**: Collect recent tool results
|
|
185
|
+
- Start from end of messages array
|
|
186
|
+
- Accumulate up to 40k tokens
|
|
187
|
+
- Store protected indices in Set
|
|
188
|
+
3. **Forward iteration**: Clear unprotected results
|
|
189
|
+
- Skip messages in protected set
|
|
190
|
+
- Replace `content` with `"[Old tool result cleared]"`
|
|
191
|
+
- Mark with `pruned: true` and `prunedAt: timestamp`
|
|
192
|
+
4. **Return stats**: Count and saved tokens
|
|
193
|
+
|
|
194
|
+
#### Design Decisions
|
|
195
|
+
|
|
196
|
+
| Decision | Rationale |
|
|
197
|
+
|----------|-----------|
|
|
198
|
+
| 20k threshold | Balance efficiency vs necessity |
|
|
199
|
+
| 40k protection | ~10-20 conversation turns of context |
|
|
200
|
+
| Backward iteration | Prioritize recent outputs |
|
|
201
|
+
| Mark not delete | Preserve message structure for debugging |
|
|
202
|
+
| Timestamp tracking | Auditability and transparency |
|
|
203
|
+
|
|
204
|
+
#### Example
|
|
205
|
+
|
|
206
|
+
```typescript
|
|
207
|
+
// Before pruning
|
|
208
|
+
{
|
|
209
|
+
role: 'user',
|
|
210
|
+
content: [
|
|
211
|
+
{ type: 'tool_result', toolUseId: 'old-1', content: '... 5000 chars ...' }
|
|
212
|
+
]
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// After pruning
|
|
216
|
+
{
|
|
217
|
+
role: 'user',
|
|
218
|
+
content: [
|
|
219
|
+
{
|
|
220
|
+
type: 'tool_result',
|
|
221
|
+
toolUseId: 'old-1',
|
|
222
|
+
content: '[Old tool result cleared]',
|
|
223
|
+
pruned: true,
|
|
224
|
+
prunedAt: '2026-01-18T10:30:00.000Z'
|
|
225
|
+
}
|
|
226
|
+
]
|
|
227
|
+
}
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Layer 2: Compaction
|
|
231
|
+
|
|
232
|
+
**Implementation**: `src/session/compression/engine.ts:165-223`
|
|
233
|
+
|
|
234
|
+
#### Purpose
|
|
235
|
+
Generate a "continuation prompt" that preserves context for future work.
|
|
236
|
+
|
|
237
|
+
#### Algorithm
|
|
238
|
+
|
|
239
|
+
1. **Determine range**:
|
|
240
|
+
- Start: After last summary (or beginning)
|
|
241
|
+
- End: Current length - 10 (preserve recent)
|
|
242
|
+
2. **Extract structured info** (parallel):
|
|
243
|
+
- Files modified (from Write/Edit tools)
|
|
244
|
+
- Tool usage statistics
|
|
245
|
+
- Key decisions (contains "decided", "chose", "will use")
|
|
246
|
+
3. **Generate continuation prompt**:
|
|
247
|
+
- Call LLM with specialized prompt
|
|
248
|
+
- max_tokens: 1500
|
|
249
|
+
- Focus: What's needed to continue, not just what was done
|
|
250
|
+
4. **Create summary object**:
|
|
251
|
+
- Unique ID: `sum-{timestamp}-{random}`
|
|
252
|
+
- Type: "compaction"
|
|
253
|
+
- Range: `[start, end]`
|
|
254
|
+
- Content: Continuation prompt
|
|
255
|
+
- Metadata: decisions, files, tools
|
|
256
|
+
5. **Replace messages**:
|
|
257
|
+
- Keep: System prompt
|
|
258
|
+
- Add: Summary as system message
|
|
259
|
+
- Keep: Last 10 messages
|
|
260
|
+
|
|
261
|
+
#### Continuation Prompt Template
|
|
262
|
+
|
|
263
|
+
```
|
|
264
|
+
Provide a detailed prompt for continuing our conversation above.
|
|
265
|
+
|
|
266
|
+
Focus on information that would be helpful for continuing the conversation:
|
|
267
|
+
1. What we accomplished so far
|
|
268
|
+
2. What we're currently working on
|
|
269
|
+
3. Which files we modified and key changes made
|
|
270
|
+
4. What we plan to do next
|
|
271
|
+
5. Any important context or decisions that would be needed
|
|
272
|
+
|
|
273
|
+
Remember: The new session will NOT have access to our full conversation history,
|
|
274
|
+
so include all essential context needed to continue working effectively.
|
|
275
|
+
|
|
276
|
+
Be technical and specific. Use structured bullet points.
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
#### Design Decisions
|
|
280
|
+
|
|
281
|
+
| Decision | Rationale |
|
|
282
|
+
|----------|-----------|
|
|
283
|
+
| "Continuation" not "summary" | Focus on future needs, not past actions |
|
|
284
|
+
| Structured extraction | Ensure no critical info lost even if LLM summary incomplete |
|
|
285
|
+
| max_tokens: 1500 | Detailed but not excessive |
|
|
286
|
+
| Keep last 10 messages | Maintain conversation continuity |
|
|
287
|
+
| Configurable model | Allow cheaper model (e.g., Haiku) for summarization |
|
|
288
|
+
|
|
289
|
+
#### Summary Format
|
|
290
|
+
|
|
291
|
+
```typescript
|
|
292
|
+
{
|
|
293
|
+
id: "sum-1737202200000-abc123",
|
|
294
|
+
type: "compaction",
|
|
295
|
+
coveringMessages: [1, 100],
|
|
296
|
+
content: "We accomplished implementing JWT authentication...",
|
|
297
|
+
keyDecisions: ["Decided to use JWT with refresh tokens"],
|
|
298
|
+
filesModified: ["src/auth.ts", "src/login.tsx"],
|
|
299
|
+
toolsUsed: [
|
|
300
|
+
{ tool: "Write", count: 5, notableUses: ["Modified src/auth.ts"] }
|
|
301
|
+
],
|
|
302
|
+
generatedAt: "2026-01-18T10:30:00.000Z",
|
|
303
|
+
estimatedTokens: 350
|
|
304
|
+
}
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
### Layer 3: Message Filtering
|
|
308
|
+
|
|
309
|
+
**Implementation**: `src/session/manager.ts:520-530`
|
|
310
|
+
|
|
311
|
+
#### Purpose
|
|
312
|
+
Optimize session recovery by returning compressed messages.
|
|
313
|
+
|
|
314
|
+
#### Current Implementation
|
|
315
|
+
|
|
316
|
+
```typescript
|
|
317
|
+
getMessagesForLLM(): Message[] {
|
|
318
|
+
if (!this.currentSession) return [];
|
|
319
|
+
|
|
320
|
+
// If no summaries, return all messages
|
|
321
|
+
if (!this.currentSession.summaries?.length === 0) {
|
|
322
|
+
return this.currentSession.messages;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Messages already include summary (built in performCompaction)
|
|
326
|
+
return this.currentSession.messages;
|
|
327
|
+
}
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
#### Status
|
|
331
|
+
|
|
332
|
+
- ✅ Basic support: Returns compressed message array
|
|
333
|
+
- ✅ Storage optimization: Summaries embedded in session JSON
|
|
334
|
+
- ⚠️ Not implemented: Streaming load, CompactionPart markers (OpenCode feature)
|
|
335
|
+
|
|
336
|
+
---
|
|
337
|
+
|
|
338
|
+
## Testing
|
|
339
|
+
|
|
340
|
+
### Automated Tests
|
|
341
|
+
|
|
342
|
+
Run the test suite:
|
|
343
|
+
|
|
344
|
+
```bash
|
|
345
|
+
# Make script executable
|
|
346
|
+
chmod +x scripts/test-compression.sh
|
|
347
|
+
|
|
348
|
+
# Run all tests
|
|
349
|
+
./scripts/test-compression.sh
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
### Unit Test Structure
|
|
353
|
+
|
|
354
|
+
```typescript
|
|
355
|
+
describe('CompressionEngine', () => {
|
|
356
|
+
describe('estimateTokens', () => {
|
|
357
|
+
it('uses 4:1 char-to-token ratio');
|
|
358
|
+
it('handles empty strings');
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
describe('needsCompression', () => {
|
|
362
|
+
it('returns none when under limit');
|
|
363
|
+
it('returns prune when >20k tokens');
|
|
364
|
+
it('returns compact when <20k but over limit');
|
|
365
|
+
it('respects enabled flag');
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
describe('pruneToolOutputs', () => {
|
|
369
|
+
it('does not prune when <20k tokens');
|
|
370
|
+
it('protects recent 40k tokens');
|
|
371
|
+
it('clears old tool results with pruned marker');
|
|
372
|
+
it('returns accurate stats');
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
describe('compact', () => {
|
|
376
|
+
it('extracts files modified from Write/Edit');
|
|
377
|
+
it('counts tool usage');
|
|
378
|
+
it('extracts key decisions');
|
|
379
|
+
it('calls LLM with continuation prompt');
|
|
380
|
+
it('returns complete ConversationSummary');
|
|
381
|
+
});
|
|
382
|
+
});
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
### Manual Testing
|
|
386
|
+
|
|
387
|
+
#### Test 1: Long Conversation
|
|
388
|
+
|
|
389
|
+
```bash
|
|
390
|
+
npm start
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
```javascript
|
|
394
|
+
> Help me create 50 test files with different content
|
|
395
|
+
|
|
396
|
+
// After agent completes...
|
|
397
|
+
|
|
398
|
+
> Show compression stats
|
|
399
|
+
// Expected:
|
|
400
|
+
// totalMessages > activeMessages
|
|
401
|
+
// summaryCount > 0
|
|
402
|
+
// compressionRatio < 1.0
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
#### Test 2: Tool Output Pruning
|
|
406
|
+
|
|
407
|
+
```bash
|
|
408
|
+
# Start session
|
|
409
|
+
npm start
|
|
410
|
+
|
|
411
|
+
> Read all TypeScript files in the project and summarize them
|
|
412
|
+
|
|
413
|
+
// In another terminal
|
|
414
|
+
SESSION_FILE=$(ls -t ~/.gen/sessions/*.json | head -1)
|
|
415
|
+
cat "$SESSION_FILE" | jq '.messages[] | select(.content[0].pruned == true)'
|
|
416
|
+
|
|
417
|
+
// Expected: Messages with pruned: true and prunedAt timestamp
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
#### Test 3: Session Recovery
|
|
421
|
+
|
|
422
|
+
```bash
|
|
423
|
+
npm start
|
|
424
|
+
|
|
425
|
+
> Implement a user authentication system using JWT
|
|
426
|
+
|
|
427
|
+
# After implementation
|
|
428
|
+
> /save
|
|
429
|
+
> /exit
|
|
430
|
+
|
|
431
|
+
# Restart
|
|
432
|
+
npm start
|
|
433
|
+
> /resume
|
|
434
|
+
|
|
435
|
+
> What authentication scheme did we choose?
|
|
436
|
+
|
|
437
|
+
// Expected: Agent correctly recalls "JWT"
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
### Performance Metrics
|
|
441
|
+
|
|
442
|
+
| Metric | Target | How to Measure |
|
|
443
|
+
|--------|--------|----------------|
|
|
444
|
+
| Compression ratio | 60-70% reduction | Compare totalMessages vs activeMessages |
|
|
445
|
+
| Summary generation | < 5 seconds | Time the `compact()` call |
|
|
446
|
+
| Token savings | 50-60% | Compare before/after token estimates |
|
|
447
|
+
| Context quality | 100% key info retained | Manual verification of summaries |
|
|
448
|
+
| Pruning speed | < 100ms | Time the `pruneToolOutputs()` call |
|
|
449
|
+
|
|
450
|
+
### Session File Verification
|
|
451
|
+
|
|
452
|
+
```bash
|
|
453
|
+
# Find latest session
|
|
454
|
+
SESSION_FILE=$(ls -t ~/.gen/sessions/*.json | head -1)
|
|
455
|
+
|
|
456
|
+
# Check structure
|
|
457
|
+
jq '{
|
|
458
|
+
totalMessages: .fullMessageCount,
|
|
459
|
+
activeMessages: (.messages | length),
|
|
460
|
+
summaryCount: (.summaries | length)
|
|
461
|
+
}' "$SESSION_FILE"
|
|
462
|
+
|
|
463
|
+
# View summary content
|
|
464
|
+
jq '.summaries[0]' "$SESSION_FILE"
|
|
465
|
+
|
|
466
|
+
# Check for pruned messages
|
|
467
|
+
jq '.messages[] | select(.content[]?.pruned == true)' "$SESSION_FILE"
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
---
|
|
471
|
+
|
|
472
|
+
## Configuration
|
|
473
|
+
|
|
474
|
+
### Default Settings
|
|
475
|
+
|
|
476
|
+
```typescript
|
|
477
|
+
// src/session/compression/types.ts
|
|
478
|
+
export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
|
|
479
|
+
enabled: true,
|
|
480
|
+
enablePruning: true,
|
|
481
|
+
enableCompaction: true,
|
|
482
|
+
pruneMinimum: 20_000, // Min tokens to trigger pruning
|
|
483
|
+
pruneProtect: 40_000, // Protect recent 40k tokens
|
|
484
|
+
reservedOutputTokens: 32_000, // Reserve 32k for output
|
|
485
|
+
};
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
### User Configuration
|
|
489
|
+
|
|
490
|
+
Add to `~/.gencode/settings.json`:
|
|
491
|
+
|
|
492
|
+
```json
|
|
493
|
+
{
|
|
494
|
+
"compression": {
|
|
495
|
+
"enabled": true,
|
|
496
|
+
"enablePruning": true,
|
|
497
|
+
"enableCompaction": true,
|
|
498
|
+
"pruneMinimum": 20000,
|
|
499
|
+
"pruneProtect": 40000,
|
|
500
|
+
"reservedOutputTokens": 32000,
|
|
501
|
+
"model": "claude-haiku-3-5" // Optional: cheaper model for summaries
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
### Tuning Guide
|
|
507
|
+
|
|
508
|
+
| Scenario | pruneMinimum | pruneProtect | model | Effect |
|
|
509
|
+
|----------|--------------|--------------|-------|--------|
|
|
510
|
+
| **Low cost** | 10k | 20k | haiku | More frequent compression, lower cost |
|
|
511
|
+
| **Balanced** | 20k | 40k | current | Default, balanced quality/cost |
|
|
512
|
+
| **High quality** | 30k | 60k | sonnet | More context retained, higher cost |
|
|
513
|
+
|
|
514
|
+
### Model Context Windows
|
|
515
|
+
|
|
516
|
+
| Model | Context Window | Output Limit | Usable Context |
|
|
517
|
+
|-------|----------------|--------------|----------------|
|
|
518
|
+
| Claude Sonnet | 200,000 | 8,192 | 191,808 |
|
|
519
|
+
| GPT-4o | 128,000 | 4,096 | 123,904 |
|
|
520
|
+
| Gemini 2.0 | 1,000,000 | 8,192 | 991,808 |
|
|
521
|
+
|
|
522
|
+
---
|
|
523
|
+
|
|
524
|
+
## Implementation Highlights
|
|
525
|
+
|
|
526
|
+
### ADR-1: 4:1 Token Estimation
|
|
527
|
+
|
|
528
|
+
**Context**: Need fast token estimation without heavy dependencies
|
|
529
|
+
|
|
530
|
+
**Decision**: Use 4:1 character-to-token ratio (OpenCode's validated approach)
|
|
531
|
+
|
|
532
|
+
**Pros**:
|
|
533
|
+
- Zero dependencies, instant calculation
|
|
534
|
+
- ±10% accuracy for English text
|
|
535
|
+
- Production-validated
|
|
536
|
+
|
|
537
|
+
**Cons**:
|
|
538
|
+
- 20-30% underestimate for Chinese
|
|
539
|
+
- 15-20% overestimate for code
|
|
540
|
+
|
|
541
|
+
**Mitigation**:
|
|
542
|
+
- Use conservative thresholds
|
|
543
|
+
- Prefer actual TokenUsage when available
|
|
544
|
+
- Future: Optional tiktoken support
|
|
545
|
+
|
|
546
|
+
**Code**: `src/session/compression/engine.ts:36-48`
|
|
547
|
+
|
|
548
|
+
### ADR-2: Backward Iteration for Protection
|
|
549
|
+
|
|
550
|
+
**Context**: Tool outputs consume many tokens, recent ones most important
|
|
551
|
+
|
|
552
|
+
**Decision**: Iterate backward, protect last 40k tokens of tool outputs
|
|
553
|
+
|
|
554
|
+
**Pros**:
|
|
555
|
+
- Guarantees most recent context preserved
|
|
556
|
+
- 40k ≈ 10-20 conversation turns
|
|
557
|
+
- Avoids accidentally removing critical outputs
|
|
558
|
+
|
|
559
|
+
**Implementation**: O(n) time using Set for protected indices
|
|
560
|
+
|
|
561
|
+
**Code**: `src/session/compression/engine.ts:131-142`
|
|
562
|
+
|
|
563
|
+
### ADR-3: Continuation Prompt Philosophy
|
|
564
|
+
|
|
565
|
+
**Context**: Traditional summaries focus on "what was done", but new sessions need "how to continue"
|
|
566
|
+
|
|
567
|
+
**Decision**: Generate "continuation prompts" instead of summaries
|
|
568
|
+
|
|
569
|
+
**Template Structure**:
|
|
570
|
+
1. What we accomplished (past)
|
|
571
|
+
2. Current work (present)
|
|
572
|
+
3. Files modified + changes (context)
|
|
573
|
+
4. Next steps (future)
|
|
574
|
+
5. Important decisions (context)
|
|
575
|
+
|
|
576
|
+
**Effect**: More actionable summaries, Agent can naturally continue work
|
|
577
|
+
|
|
578
|
+
**Code**: `src/session/compression/engine.ts:196-223`
|
|
579
|
+
|
|
580
|
+
---
|
|
581
|
+
|
|
582
|
+
## Appendix
|
|
583
|
+
|
|
584
|
+
### Type Definitions
|
|
585
|
+
|
|
586
|
+
```typescript
|
|
587
|
+
export interface CompressionConfig {
|
|
588
|
+
enabled: boolean;
|
|
589
|
+
enablePruning: boolean;
|
|
590
|
+
enableCompaction: boolean;
|
|
591
|
+
pruneMinimum: number;
|
|
592
|
+
pruneProtect: number;
|
|
593
|
+
reservedOutputTokens: number;
|
|
594
|
+
model?: string;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
export interface ConversationSummary {
|
|
598
|
+
id: string;
|
|
599
|
+
type: 'compaction';
|
|
600
|
+
coveringMessages: [number, number];
|
|
601
|
+
content: string;
|
|
602
|
+
keyDecisions: string[];
|
|
603
|
+
filesModified: string[];
|
|
604
|
+
toolsUsed: ToolUsageSummary[];
|
|
605
|
+
generatedAt: string;
|
|
606
|
+
estimatedTokens: number;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
export interface ToolUsageSummary {
|
|
610
|
+
tool: string;
|
|
611
|
+
count: number;
|
|
612
|
+
notableUses: string[];
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
export interface TokenUsage {
|
|
616
|
+
input: number;
|
|
617
|
+
output: number;
|
|
618
|
+
reasoning?: number;
|
|
619
|
+
cache?: {
|
|
620
|
+
read: number;
|
|
621
|
+
write: number;
|
|
622
|
+
};
|
|
623
|
+
}
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
### Session JSON Example
|
|
627
|
+
|
|
628
|
+
```json
|
|
629
|
+
{
|
|
630
|
+
"metadata": {
|
|
631
|
+
"id": "abc123",
|
|
632
|
+
"title": "Implement Auth System",
|
|
633
|
+
"messageCount": 42,
|
|
634
|
+
"fullMessageCount": 156
|
|
635
|
+
},
|
|
636
|
+
"messages": [
|
|
637
|
+
{ "role": "system", "content": "..." },
|
|
638
|
+
{
|
|
639
|
+
"role": "system",
|
|
640
|
+
"content": "[Earlier conversation - 100 messages summarized]\n\nWe accomplished implementing JWT authentication..."
|
|
641
|
+
},
|
|
642
|
+
{ "role": "user", "content": "..." }
|
|
643
|
+
],
|
|
644
|
+
"summaries": [
|
|
645
|
+
{
|
|
646
|
+
"id": "sum-1737202200000-abc123",
|
|
647
|
+
"type": "compaction",
|
|
648
|
+
"coveringMessages": [1, 100],
|
|
649
|
+
"content": "We accomplished implementing JWT authentication...",
|
|
650
|
+
"keyDecisions": ["Decided to use JWT with refresh tokens"],
|
|
651
|
+
"filesModified": ["src/auth.ts", "src/login.tsx"],
|
|
652
|
+
"toolsUsed": [
|
|
653
|
+
{ "tool": "Write", "count": 5, "notableUses": ["Modified src/auth.ts"] }
|
|
654
|
+
],
|
|
655
|
+
"generatedAt": "2026-01-18T10:30:00.000Z",
|
|
656
|
+
"estimatedTokens": 350
|
|
657
|
+
}
|
|
658
|
+
]
|
|
659
|
+
}
|
|
660
|
+
```
|
|
661
|
+
|
|
662
|
+
### Quick Reference
|
|
663
|
+
|
|
664
|
+
**Check if compression is working:**
|
|
665
|
+
```typescript
|
|
666
|
+
const stats = agent.getCompressionStats();
|
|
667
|
+
console.log(stats);
|
|
668
|
+
// {
|
|
669
|
+
// totalMessages: 156,
|
|
670
|
+
// activeMessages: 42,
|
|
671
|
+
// summaryCount: 2,
|
|
672
|
+
// compressionRatio: 0.269
|
|
673
|
+
// }
|
|
674
|
+
```
|
|
675
|
+
|
|
676
|
+
**Manually trigger compression:**
|
|
677
|
+
```typescript
|
|
678
|
+
// Compression happens automatically in SessionManager.addMessage()
|
|
679
|
+
// when tokens exceed usable context
|
|
680
|
+
```
|
|
681
|
+
|
|
682
|
+
**Disable compression:**
|
|
683
|
+
```json
|
|
684
|
+
{
|
|
685
|
+
"compression": {
|
|
686
|
+
"enabled": false
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
```
|
|
690
|
+
|
|
691
|
+
---
|
|
692
|
+
|
|
693
|
+
**Document Version**: 1.0
|
|
694
|
+
**Last Updated**: 2026-01-18
|
|
695
|
+
**Maintained By**: GenCode Team
|