gencode-ai 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/RELEASE_NOTES_v0.4.0.md +140 -0
  2. package/dist/agent/agent.d.ts +17 -2
  3. package/dist/agent/agent.d.ts.map +1 -1
  4. package/dist/agent/agent.js +279 -49
  5. package/dist/agent/agent.js.map +1 -1
  6. package/dist/agent/types.d.ts +15 -1
  7. package/dist/agent/types.d.ts.map +1 -1
  8. package/dist/checkpointing/checkpoint-manager.d.ts +24 -0
  9. package/dist/checkpointing/checkpoint-manager.d.ts.map +1 -1
  10. package/dist/checkpointing/checkpoint-manager.js +28 -0
  11. package/dist/checkpointing/checkpoint-manager.js.map +1 -1
  12. package/dist/cli/components/App.d.ts +8 -0
  13. package/dist/cli/components/App.d.ts.map +1 -1
  14. package/dist/cli/components/App.js +478 -36
  15. package/dist/cli/components/App.js.map +1 -1
  16. package/dist/cli/components/CommandSuggestions.d.ts.map +1 -1
  17. package/dist/cli/components/CommandSuggestions.js +2 -0
  18. package/dist/cli/components/CommandSuggestions.js.map +1 -1
  19. package/dist/cli/components/Header.d.ts +6 -1
  20. package/dist/cli/components/Header.d.ts.map +1 -1
  21. package/dist/cli/components/Header.js +3 -3
  22. package/dist/cli/components/Header.js.map +1 -1
  23. package/dist/cli/components/Messages.d.ts.map +1 -1
  24. package/dist/cli/components/Messages.js +7 -9
  25. package/dist/cli/components/Messages.js.map +1 -1
  26. package/dist/cli/index.js +3 -2
  27. package/dist/cli/index.js.map +1 -1
  28. package/dist/config/types.d.ts +20 -1
  29. package/dist/config/types.d.ts.map +1 -1
  30. package/dist/config/types.js.map +1 -1
  31. package/dist/index.d.ts +2 -2
  32. package/dist/index.js +2 -2
  33. package/dist/input/history-manager.d.ts +78 -0
  34. package/dist/input/history-manager.d.ts.map +1 -0
  35. package/dist/input/history-manager.js +224 -0
  36. package/dist/input/history-manager.js.map +1 -0
  37. package/dist/input/index.d.ts +6 -0
  38. package/dist/input/index.d.ts.map +1 -0
  39. package/dist/input/index.js +5 -0
  40. package/dist/input/index.js.map +1 -0
  41. package/dist/prompts/index.js +3 -3
  42. package/dist/prompts/index.js.map +1 -1
  43. package/dist/providers/gemini.d.ts.map +1 -1
  44. package/dist/providers/gemini.js +33 -2
  45. package/dist/providers/gemini.js.map +1 -1
  46. package/dist/providers/google.d.ts +22 -0
  47. package/dist/providers/google.d.ts.map +1 -0
  48. package/dist/providers/google.js +297 -0
  49. package/dist/providers/google.js.map +1 -0
  50. package/dist/providers/index.d.ts +4 -4
  51. package/dist/providers/index.js +11 -11
  52. package/dist/providers/index.js.map +1 -1
  53. package/dist/providers/openai.d.ts.map +1 -1
  54. package/dist/providers/openai.js +6 -0
  55. package/dist/providers/openai.js.map +1 -1
  56. package/dist/providers/registry.js +3 -3
  57. package/dist/providers/registry.js.map +1 -1
  58. package/dist/providers/types.d.ts +30 -4
  59. package/dist/providers/types.d.ts.map +1 -1
  60. package/dist/session/compression/engine.d.ts +109 -0
  61. package/dist/session/compression/engine.d.ts.map +1 -0
  62. package/dist/session/compression/engine.js +311 -0
  63. package/dist/session/compression/engine.js.map +1 -0
  64. package/dist/session/compression/index.d.ts +12 -0
  65. package/dist/session/compression/index.d.ts.map +1 -0
  66. package/dist/session/compression/index.js +11 -0
  67. package/dist/session/compression/index.js.map +1 -0
  68. package/dist/session/compression/types.d.ts +90 -0
  69. package/dist/session/compression/types.d.ts.map +1 -0
  70. package/dist/session/compression/types.js +17 -0
  71. package/dist/session/compression/types.js.map +1 -0
  72. package/dist/session/manager.d.ts +64 -3
  73. package/dist/session/manager.d.ts.map +1 -1
  74. package/dist/session/manager.js +254 -2
  75. package/dist/session/manager.js.map +1 -1
  76. package/dist/session/types.d.ts +16 -0
  77. package/dist/session/types.d.ts.map +1 -1
  78. package/dist/session/types.js.map +1 -1
  79. package/docs/README.md +1 -0
  80. package/docs/diagrams/compression-decision.mmd +30 -0
  81. package/docs/diagrams/compression-workflow.mmd +54 -0
  82. package/docs/diagrams/layer1-pruning.mmd +45 -0
  83. package/docs/diagrams/layer2-compaction.mmd +42 -0
  84. package/docs/proposals/0007-context-management.md +252 -2
  85. package/docs/proposals/README.md +4 -3
  86. package/docs/providers.md +3 -3
  87. package/docs/session-compression.md +695 -0
  88. package/examples/agent-demo.ts +23 -1
  89. package/examples/basic.ts +3 -3
  90. package/package.json +3 -4
  91. package/src/agent/agent.ts +314 -52
  92. package/src/agent/types.ts +19 -1
  93. package/src/checkpointing/checkpoint-manager.ts +48 -0
  94. package/src/cli/components/App.tsx +553 -34
  95. package/src/cli/components/CommandSuggestions.tsx +2 -0
  96. package/src/cli/components/Header.tsx +16 -1
  97. package/src/cli/components/Messages.tsx +20 -14
  98. package/src/cli/index.tsx +3 -2
  99. package/src/config/types.ts +26 -1
  100. package/src/index.ts +3 -3
  101. package/src/input/history-manager.ts +289 -0
  102. package/src/input/index.ts +6 -0
  103. package/src/prompts/index.test.ts +2 -1
  104. package/src/prompts/index.ts +3 -3
  105. package/src/providers/{gemini.ts → google.ts} +69 -18
  106. package/src/providers/index.ts +14 -14
  107. package/src/providers/openai.ts +7 -0
  108. package/src/providers/registry.ts +3 -3
  109. package/src/providers/types.ts +33 -3
  110. package/src/session/compression/engine.ts +406 -0
  111. package/src/session/compression/index.ts +18 -0
  112. package/src/session/compression/types.ts +102 -0
  113. package/src/session/manager.ts +326 -3
  114. package/src/session/types.ts +21 -0
  115. package/tests/input-history-manager.test.ts +335 -0
  116. package/tests/session-checkpoint-persistence.test.ts +198 -0
@@ -0,0 +1,695 @@
1
+ # Session Compression Implementation Guide
2
+
3
+ Complete documentation for the three-layer session compression system in GenCode.
4
+
5
+ ## Table of Contents
6
+
7
+ 1. [Overview](#overview)
8
+ 2. [Architecture](#architecture)
9
+ 3. [Flowcharts](#flowcharts)
10
+ 4. [Strategy Details](#strategy-details)
11
+ 5. [Testing](#testing)
12
+ 6. [Configuration](#configuration)
13
+
14
+ ---
15
+
16
+ ## Overview
17
+
18
+ ### Goal
19
+
20
+ Session compression enables GenCode to handle long conversations efficiently by:
21
+ - Reducing context size when approaching model limits
22
+ - Preserving critical information (decisions, file changes)
23
+ - Minimizing API costs
24
+
25
+ ### Three-Layer Strategy
26
+
27
+ | Layer | Name | Trigger | Cost | Effect |
28
+ |-------|------|---------|------|--------|
29
+ | **Layer 1** | Tool Output Pruning | Tokens > 20k | 🟢 Free | Remove old tool outputs, keep recent 40k |
30
+ | **Layer 2** | Compaction | Tokens > usable context | 🟡 Medium | LLM generates continuation prompt |
31
+ | **Layer 3** | Message Filtering | Session recovery | 🟢 Free | Return compressed message array |
32
+
33
+ ### File Structure
34
+
35
+ ```
36
+ src/session/compression/
37
+ ├── types.ts # Type definitions and defaults (103 lines)
38
+ ├── engine.ts # Three-layer compression engine (396 lines)
39
+ └── index.ts # Public API exports
40
+
41
+ Integration points:
42
+ ├── src/session/manager.ts # SessionManager integration (lines 310-343, 407-554)
43
+ ├── src/agent/agent.ts # Agent integration (lines 66, 448, 504, 543, 597-604)
44
+ └── src/config/types.ts # Config system integration
45
+ ```
46
+
47
+ ---
48
+
49
+ ## Architecture
50
+
51
+ ### Design Principles
52
+
53
+ 1. **OpenCode Alignment**: Three-layer strategy matches OpenCode's proven approach
54
+ 2. **Provider Agnostic**: Works with any LLM provider (Anthropic, OpenAI, Gemini)
55
+ 3. **Quality First**: Structured metadata ensures no information loss
56
+ 4. **Cost Conscious**: Free pruning before expensive summarization
57
+
58
+ ### Key Components
59
+
60
+ **CompressionEngine** (`src/session/compression/engine.ts`)
61
+ - Token estimation (4:1 char-to-token ratio)
62
+ - Compression strategy selection
63
+ - Tool output pruning
64
+ - LLM-based compaction
65
+ - Information extraction (files, tools, decisions)
66
+
67
+ **SessionManager** (`src/session/manager.ts`)
68
+ - Triggers compression on `addMessage()`
69
+ - Stores summaries in session JSON
70
+ - Replaces message array with compressed version
71
+ - Provides `getMessagesForLLM()` for context
72
+
73
+ **Agent** (`src/agent/agent.ts`)
74
+ - Passes `modelInfo` for compression decisions
75
+ - Uses compressed context transparently
76
+ - Exposes `getCompressionStats()`
77
+
78
+ ---
79
+
80
+ ## Flowcharts
81
+
82
+ ### Complete Workflow
83
+
84
+ ```mermaid
85
+ flowchart TD
86
+ Start([User Input]) --> AddToAgent[Agent adds message]
87
+ AddToAgent --> CallSM[SessionManager.addMessage<br/>with modelInfo]
88
+
89
+ CallSM --> PushMsg[Push to messages array]
90
+ PushMsg --> UpdateCount[Update fullMessageCount]
91
+ UpdateCount --> CheckCompress{Needs compression?<br/>needsCompression}
92
+
93
+ CheckCompress -->|No| SaveSession[Save session]
94
+ CheckCompress -->|Yes| DetermineStrategy{Determine strategy}
95
+
96
+ DetermineStrategy -->|prune| Layer1[Layer 1: Pruning]
97
+ DetermineStrategy -->|compact| Layer2[Layer 2: Compaction]
98
+
99
+ %% Layer 1 detailed flow
100
+ Layer1 --> EstTokens1[Estimate total tokens]
101
+ EstTokens1 --> CollectRecent[Collect recent tool outputs<br/>within last 40k tokens<br/>backward iteration]
102
+ CollectRecent --> MarkProtected[Mark as protected indices]
103
+ MarkProtected --> ClearOld[Clear unprotected<br/>old tool results]
104
+ ClearOld --> MarkPruned[Mark: Old tool result cleared<br/>pruned: true<br/>prunedAt: timestamp]
105
+ MarkPruned --> SaveSession
106
+
107
+ %% Layer 2 detailed flow
108
+ Layer2 --> DetermineRange[Determine summary range<br/>start: after last summary<br/>end: length - 10]
109
+ DetermineRange --> ExtractInfo[Extract structured info in parallel]
110
+
111
+ ExtractInfo --> Files[Files modified<br/>Write/Edit tools]
112
+ ExtractInfo --> Tools[Tool usage stats<br/>count + top 3 uses]
113
+ ExtractInfo --> Decisions[Key decisions<br/>decided/chose/will use]
114
+
115
+ Files --> GenPrompt[Generate continuation prompt]
116
+ Tools --> GenPrompt
117
+ Decisions --> GenPrompt
118
+
119
+ GenPrompt --> LLMCall[Call LLM<br/>max_tokens: 1500<br/>generate continuation prompt]
120
+ LLMCall --> CreateSummary[Create ConversationSummary<br/>id, range, content, metadata]
121
+ CreateSummary --> StoreSummary[Store to session.summaries array]
122
+ StoreSummary --> ReplaceMessages[Replace messages array]
123
+
124
+ ReplaceMessages --> BuildNew[Build new messages array:<br/>System prompt<br/>Summary system message<br/>Last 10 messages]
125
+ BuildNew --> SaveSession
126
+
127
+ SaveSession --> GetContext[Get LLM context<br/>getMessagesForLLM]
128
+ GetContext --> AgentContinue[Agent continues<br/>call provider.complete]
129
+ AgentContinue --> LLMResponse[LLM returns response]
130
+ LLMResponse --> End([Done])
131
+
132
+ style Layer1 fill:#ffd93d,stroke:#333,stroke-width:2px
133
+ style Layer2 fill:#74c0fc,stroke:#333,stroke-width:2px
134
+ style LLMCall fill:#ff6b6b,stroke:#333,stroke-width:2px
135
+ style GetContext fill:#51cf66,stroke:#333,stroke-width:2px
136
+ ```
137
+
138
+ ### Compression Decision Logic
139
+
140
+ ```mermaid
141
+ flowchart TD
142
+ Start([Check if compression needed]) --> CheckEnabled{compression.enabled?}
143
+ CheckEnabled -->|No| NoCompression[Return: strategy = none]
144
+ CheckEnabled -->|Yes| CalcTokens[Calculate total tokens]
145
+
146
+ CalcTokens --> UseActual{Has actual TokenUsage?}
147
+ UseActual -->|Yes| SumActual[input + cache.read +<br/>output + reasoning]
148
+ UseActual -->|No| EstimateAll[Estimate all messages<br/>chars / 4]
149
+
150
+ SumActual --> GetUsable[Calculate usable context]
151
+ EstimateAll --> GetUsable
152
+
153
+ GetUsable --> CalcUsable[usable = contextWindow -<br/>min of outputLimit and 32k]
154
+ CalcUsable --> CheckOverflow{total > usable?}
155
+
156
+ CheckOverflow -->|No| NoCompression
157
+ CheckOverflow -->|Yes| CheckPruneMin{total > 20k AND<br/>enablePruning?}
158
+
159
+ CheckPruneMin -->|Yes| UsePrune[Return: strategy = prune]
160
+ CheckPruneMin -->|No| UseCompact[Return: strategy = compact]
161
+
162
+ style CheckEnabled fill:#e3f2fd
163
+ style CheckOverflow fill:#fff3e0
164
+ style CheckPruneMin fill:#fff3e0
165
+ style UsePrune fill:#ffd93d
166
+ style UseCompact fill:#74c0fc
167
+ style NoCompression fill:#f1f3f4
168
+ ```
169
+
170
+ ---
171
+
172
+ ## Strategy Details
173
+
174
+ ### Layer 1: Tool Output Pruning
175
+
176
+ **Implementation**: `src/session/compression/engine.ts:117-159`
177
+
178
+ #### Purpose
179
+ Quick, cost-free reduction of context size by removing old tool outputs.
180
+
181
+ #### Algorithm
182
+
183
+ 1. **Check threshold**: Exit if total tokens < 20k
184
+ 2. **Backward iteration**: Collect recent tool results
185
+ - Start from end of messages array
186
+ - Accumulate up to 40k tokens
187
+ - Store protected indices in Set
188
+ 3. **Forward iteration**: Clear unprotected results
189
+ - Skip messages in protected set
190
+ - Replace `content` with `"[Old tool result cleared]"`
191
+ - Mark with `pruned: true` and `prunedAt: timestamp`
192
+ 4. **Return stats**: Count and saved tokens
193
+
194
+ #### Design Decisions
195
+
196
+ | Decision | Rationale |
197
+ |----------|-----------|
198
+ | 20k threshold | Balance efficiency vs necessity |
199
+ | 40k protection | ~10-20 conversation turns of context |
200
+ | Backward iteration | Prioritize recent outputs |
201
+ | Mark not delete | Preserve message structure for debugging |
202
+ | Timestamp tracking | Auditability and transparency |
203
+
204
+ #### Example
205
+
206
+ ```typescript
207
+ // Before pruning
208
+ {
209
+ role: 'user',
210
+ content: [
211
+ { type: 'tool_result', toolUseId: 'old-1', content: '... 5000 chars ...' }
212
+ ]
213
+ }
214
+
215
+ // After pruning
216
+ {
217
+ role: 'user',
218
+ content: [
219
+ {
220
+ type: 'tool_result',
221
+ toolUseId: 'old-1',
222
+ content: '[Old tool result cleared]',
223
+ pruned: true,
224
+ prunedAt: '2026-01-18T10:30:00.000Z'
225
+ }
226
+ ]
227
+ }
228
+ ```
229
+
230
+ ### Layer 2: Compaction
231
+
232
+ **Implementation**: `src/session/compression/engine.ts:165-223`
233
+
234
+ #### Purpose
235
+ Generate a "continuation prompt" that preserves context for future work.
236
+
237
+ #### Algorithm
238
+
239
+ 1. **Determine range**:
240
+ - Start: After last summary (or beginning)
241
+ - End: Current length - 10 (preserve recent)
242
+ 2. **Extract structured info** (parallel):
243
+ - Files modified (from Write/Edit tools)
244
+ - Tool usage statistics
245
+ - Key decisions (contains "decided", "chose", "will use")
246
+ 3. **Generate continuation prompt**:
247
+ - Call LLM with specialized prompt
248
+ - max_tokens: 1500
249
+ - Focus: What's needed to continue, not just what was done
250
+ 4. **Create summary object**:
251
+ - Unique ID: `sum-{timestamp}-{random}`
252
+ - Type: "compaction"
253
+ - Range: `[start, end]`
254
+ - Content: Continuation prompt
255
+ - Metadata: decisions, files, tools
256
+ 5. **Replace messages**:
257
+ - Keep: System prompt
258
+ - Add: Summary as system message
259
+ - Keep: Last 10 messages
260
+
261
+ #### Continuation Prompt Template
262
+
263
+ ```
264
+ Provide a detailed prompt for continuing our conversation above.
265
+
266
+ Focus on information that would be helpful for continuing the conversation:
267
+ 1. What we accomplished so far
268
+ 2. What we're currently working on
269
+ 3. Which files we modified and key changes made
270
+ 4. What we plan to do next
271
+ 5. Any important context or decisions that would be needed
272
+
273
+ Remember: The new session will NOT have access to our full conversation history,
274
+ so include all essential context needed to continue working effectively.
275
+
276
+ Be technical and specific. Use structured bullet points.
277
+ ```
278
+
279
+ #### Design Decisions
280
+
281
+ | Decision | Rationale |
282
+ |----------|-----------|
283
+ | "Continuation" not "summary" | Focus on future needs, not past actions |
284
+ | Structured extraction | Ensure no critical info lost even if LLM summary incomplete |
285
+ | max_tokens: 1500 | Detailed but not excessive |
286
+ | Keep last 10 messages | Maintain conversation continuity |
287
+ | Configurable model | Allow cheaper model (e.g., Haiku) for summarization |
288
+
289
+ #### Summary Format
290
+
291
+ ```typescript
292
+ {
293
+ id: "sum-1737202200000-abc123",
294
+ type: "compaction",
295
+ coveringMessages: [1, 100],
296
+ content: "We accomplished implementing JWT authentication...",
297
+ keyDecisions: ["Decided to use JWT with refresh tokens"],
298
+ filesModified: ["src/auth.ts", "src/login.tsx"],
299
+ toolsUsed: [
300
+ { tool: "Write", count: 5, notableUses: ["Modified src/auth.ts"] }
301
+ ],
302
+ generatedAt: "2026-01-18T10:30:00.000Z",
303
+ estimatedTokens: 350
304
+ }
305
+ ```
306
+
307
+ ### Layer 3: Message Filtering
308
+
309
+ **Implementation**: `src/session/manager.ts:520-530`
310
+
311
+ #### Purpose
312
+ Optimize session recovery by returning compressed messages.
313
+
314
+ #### Current Implementation
315
+
316
+ ```typescript
317
+ getMessagesForLLM(): Message[] {
318
+ if (!this.currentSession) return [];
319
+
320
+ // If no summaries, return all messages
321
+ if (!this.currentSession.summaries?.length === 0) {
322
+ return this.currentSession.messages;
323
+ }
324
+
325
+ // Messages already include summary (built in performCompaction)
326
+ return this.currentSession.messages;
327
+ }
328
+ ```
329
+
330
+ #### Status
331
+
332
+ - ✅ Basic support: Returns compressed message array
333
+ - ✅ Storage optimization: Summaries embedded in session JSON
334
+ - ⚠️ Not implemented: Streaming load, CompactionPart markers (OpenCode feature)
335
+
336
+ ---
337
+
338
+ ## Testing
339
+
340
+ ### Automated Tests
341
+
342
+ Run the test suite:
343
+
344
+ ```bash
345
+ # Make script executable
346
+ chmod +x scripts/test-compression.sh
347
+
348
+ # Run all tests
349
+ ./scripts/test-compression.sh
350
+ ```
351
+
352
+ ### Unit Test Structure
353
+
354
+ ```typescript
355
+ describe('CompressionEngine', () => {
356
+ describe('estimateTokens', () => {
357
+ it('uses 4:1 char-to-token ratio');
358
+ it('handles empty strings');
359
+ });
360
+
361
+ describe('needsCompression', () => {
362
+ it('returns none when under limit');
363
+ it('returns prune when >20k tokens');
364
+ it('returns compact when <20k but over limit');
365
+ it('respects enabled flag');
366
+ });
367
+
368
+ describe('pruneToolOutputs', () => {
369
+ it('does not prune when <20k tokens');
370
+ it('protects recent 40k tokens');
371
+ it('clears old tool results with pruned marker');
372
+ it('returns accurate stats');
373
+ });
374
+
375
+ describe('compact', () => {
376
+ it('extracts files modified from Write/Edit');
377
+ it('counts tool usage');
378
+ it('extracts key decisions');
379
+ it('calls LLM with continuation prompt');
380
+ it('returns complete ConversationSummary');
381
+ });
382
+ });
383
+ ```
384
+
385
+ ### Manual Testing
386
+
387
+ #### Test 1: Long Conversation
388
+
389
+ ```bash
390
+ npm start
391
+ ```
392
+
393
+ ```javascript
394
+ > Help me create 50 test files with different content
395
+
396
+ // After agent completes...
397
+
398
+ > Show compression stats
399
+ // Expected:
400
+ // totalMessages > activeMessages
401
+ // summaryCount > 0
402
+ // compressionRatio < 1.0
403
+ ```
404
+
405
+ #### Test 2: Tool Output Pruning
406
+
407
+ ```bash
408
+ # Start session
409
+ npm start
410
+
411
+ > Read all TypeScript files in the project and summarize them
412
+
413
+ // In another terminal
414
+ SESSION_FILE=$(ls -t ~/.gen/sessions/*.json | head -1)
415
+ cat "$SESSION_FILE" | jq '.messages[] | select(.content[0].pruned == true)'
416
+
417
+ // Expected: Messages with pruned: true and prunedAt timestamp
418
+ ```
419
+
420
+ #### Test 3: Session Recovery
421
+
422
+ ```bash
423
+ npm start
424
+
425
+ > Implement a user authentication system using JWT
426
+
427
+ # After implementation
428
+ > /save
429
+ > /exit
430
+
431
+ # Restart
432
+ npm start
433
+ > /resume
434
+
435
+ > What authentication scheme did we choose?
436
+
437
+ // Expected: Agent correctly recalls "JWT"
438
+ ```
439
+
440
+ ### Performance Metrics
441
+
442
+ | Metric | Target | How to Measure |
443
+ |--------|--------|----------------|
444
+ | Compression ratio | 60-70% reduction | Compare totalMessages vs activeMessages |
445
+ | Summary generation | < 5 seconds | Time the `compact()` call |
446
+ | Token savings | 50-60% | Compare before/after token estimates |
447
+ | Context quality | 100% key info retained | Manual verification of summaries |
448
+ | Pruning speed | < 100ms | Time the `pruneToolOutputs()` call |
449
+
450
+ ### Session File Verification
451
+
452
+ ```bash
453
+ # Find latest session
454
+ SESSION_FILE=$(ls -t ~/.gen/sessions/*.json | head -1)
455
+
456
+ # Check structure
457
+ jq '{
458
+ totalMessages: .fullMessageCount,
459
+ activeMessages: (.messages | length),
460
+ summaryCount: (.summaries | length)
461
+ }' "$SESSION_FILE"
462
+
463
+ # View summary content
464
+ jq '.summaries[0]' "$SESSION_FILE"
465
+
466
+ # Check for pruned messages
467
+ jq '.messages[] | select(.content[]?.pruned == true)' "$SESSION_FILE"
468
+ ```
469
+
470
+ ---
471
+
472
+ ## Configuration
473
+
474
+ ### Default Settings
475
+
476
+ ```typescript
477
+ // src/session/compression/types.ts
478
+ export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
479
+ enabled: true,
480
+ enablePruning: true,
481
+ enableCompaction: true,
482
+ pruneMinimum: 20_000, // Min tokens to trigger pruning
483
+ pruneProtect: 40_000, // Protect recent 40k tokens
484
+ reservedOutputTokens: 32_000, // Reserve 32k for output
485
+ };
486
+ ```
487
+
488
+ ### User Configuration
489
+
490
+ Add to `~/.gencode/settings.json`:
491
+
492
+ ```json
493
+ {
494
+ "compression": {
495
+ "enabled": true,
496
+ "enablePruning": true,
497
+ "enableCompaction": true,
498
+ "pruneMinimum": 20000,
499
+ "pruneProtect": 40000,
500
+ "reservedOutputTokens": 32000,
501
+ "model": "claude-haiku-3-5" // Optional: cheaper model for summaries
502
+ }
503
+ }
504
+ ```
505
+
506
+ ### Tuning Guide
507
+
508
+ | Scenario | pruneMinimum | pruneProtect | model | Effect |
509
+ |----------|--------------|--------------|-------|--------|
510
+ | **Low cost** | 10k | 20k | haiku | More frequent compression, lower cost |
511
+ | **Balanced** | 20k | 40k | current | Default, balanced quality/cost |
512
+ | **High quality** | 30k | 60k | sonnet | More context retained, higher cost |
513
+
514
+ ### Model Context Windows
515
+
516
+ | Model | Context Window | Output Limit | Usable Context |
517
+ |-------|----------------|--------------|----------------|
518
+ | Claude Sonnet | 200,000 | 8,192 | 191,808 |
519
+ | GPT-4o | 128,000 | 4,096 | 123,904 |
520
+ | Gemini 2.0 | 1,000,000 | 8,192 | 991,808 |
521
+
522
+ ---
523
+
524
+ ## Implementation Highlights
525
+
526
+ ### ADR-1: 4:1 Token Estimation
527
+
528
+ **Context**: Need fast token estimation without heavy dependencies
529
+
530
+ **Decision**: Use 4:1 character-to-token ratio (OpenCode's validated approach)
531
+
532
+ **Pros**:
533
+ - Zero dependencies, instant calculation
534
+ - ±10% accuracy for English text
535
+ - Production-validated
536
+
537
+ **Cons**:
538
+ - 20-30% underestimate for Chinese
539
+ - 15-20% overestimate for code
540
+
541
+ **Mitigation**:
542
+ - Use conservative thresholds
543
+ - Prefer actual TokenUsage when available
544
+ - Future: Optional tiktoken support
545
+
546
+ **Code**: `src/session/compression/engine.ts:36-48`
547
+
548
+ ### ADR-2: Backward Iteration for Protection
549
+
550
+ **Context**: Tool outputs consume many tokens, recent ones most important
551
+
552
+ **Decision**: Iterate backward, protect last 40k tokens of tool outputs
553
+
554
+ **Pros**:
555
+ - Guarantees most recent context preserved
556
+ - 40k ≈ 10-20 conversation turns
557
+ - Avoids accidentally removing critical outputs
558
+
559
+ **Implementation**: O(n) time using Set for protected indices
560
+
561
+ **Code**: `src/session/compression/engine.ts:131-142`
562
+
563
+ ### ADR-3: Continuation Prompt Philosophy
564
+
565
+ **Context**: Traditional summaries focus on "what was done", but new sessions need "how to continue"
566
+
567
+ **Decision**: Generate "continuation prompts" instead of summaries
568
+
569
+ **Template Structure**:
570
+ 1. What we accomplished (past)
571
+ 2. Current work (present)
572
+ 3. Files modified + changes (context)
573
+ 4. Next steps (future)
574
+ 5. Important decisions (context)
575
+
576
+ **Effect**: More actionable summaries, Agent can naturally continue work
577
+
578
+ **Code**: `src/session/compression/engine.ts:196-223`
579
+
580
+ ---
581
+
582
+ ## Appendix
583
+
584
+ ### Type Definitions
585
+
586
+ ```typescript
587
+ export interface CompressionConfig {
588
+ enabled: boolean;
589
+ enablePruning: boolean;
590
+ enableCompaction: boolean;
591
+ pruneMinimum: number;
592
+ pruneProtect: number;
593
+ reservedOutputTokens: number;
594
+ model?: string;
595
+ }
596
+
597
+ export interface ConversationSummary {
598
+ id: string;
599
+ type: 'compaction';
600
+ coveringMessages: [number, number];
601
+ content: string;
602
+ keyDecisions: string[];
603
+ filesModified: string[];
604
+ toolsUsed: ToolUsageSummary[];
605
+ generatedAt: string;
606
+ estimatedTokens: number;
607
+ }
608
+
609
+ export interface ToolUsageSummary {
610
+ tool: string;
611
+ count: number;
612
+ notableUses: string[];
613
+ }
614
+
615
+ export interface TokenUsage {
616
+ input: number;
617
+ output: number;
618
+ reasoning?: number;
619
+ cache?: {
620
+ read: number;
621
+ write: number;
622
+ };
623
+ }
624
+ ```
625
+
626
+ ### Session JSON Example
627
+
628
+ ```json
629
+ {
630
+ "metadata": {
631
+ "id": "abc123",
632
+ "title": "Implement Auth System",
633
+ "messageCount": 42,
634
+ "fullMessageCount": 156
635
+ },
636
+ "messages": [
637
+ { "role": "system", "content": "..." },
638
+ {
639
+ "role": "system",
640
+ "content": "[Earlier conversation - 100 messages summarized]\n\nWe accomplished implementing JWT authentication..."
641
+ },
642
+ { "role": "user", "content": "..." }
643
+ ],
644
+ "summaries": [
645
+ {
646
+ "id": "sum-1737202200000-abc123",
647
+ "type": "compaction",
648
+ "coveringMessages": [1, 100],
649
+ "content": "We accomplished implementing JWT authentication...",
650
+ "keyDecisions": ["Decided to use JWT with refresh tokens"],
651
+ "filesModified": ["src/auth.ts", "src/login.tsx"],
652
+ "toolsUsed": [
653
+ { "tool": "Write", "count": 5, "notableUses": ["Modified src/auth.ts"] }
654
+ ],
655
+ "generatedAt": "2026-01-18T10:30:00.000Z",
656
+ "estimatedTokens": 350
657
+ }
658
+ ]
659
+ }
660
+ ```
661
+
662
+ ### Quick Reference
663
+
664
+ **Check if compression is working:**
665
+ ```typescript
666
+ const stats = agent.getCompressionStats();
667
+ console.log(stats);
668
+ // {
669
+ // totalMessages: 156,
670
+ // activeMessages: 42,
671
+ // summaryCount: 2,
672
+ // compressionRatio: 0.269
673
+ // }
674
+ ```
675
+
676
+ **Manually trigger compression:**
677
+ ```typescript
678
+ // Compression happens automatically in SessionManager.addMessage()
679
+ // when tokens exceed usable context
680
+ ```
681
+
682
+ **Disable compression:**
683
+ ```json
684
+ {
685
+ "compression": {
686
+ "enabled": false
687
+ }
688
+ }
689
+ ```
690
+
691
+ ---
692
+
693
+ **Document Version**: 1.0
694
+ **Last Updated**: 2026-01-18
695
+ **Maintained By**: GenCode Team