@mariozechner/pi-coding-agent 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/CHANGELOG.md +38 -0
  2. package/README.md +58 -1
  3. package/dist/cli/args.d.ts +1 -0
  4. package/dist/cli/args.d.ts.map +1 -1
  5. package/dist/cli/args.js +5 -0
  6. package/dist/cli/args.js.map +1 -1
  7. package/dist/config.d.ts +2 -0
  8. package/dist/config.d.ts.map +1 -1
  9. package/dist/config.js +4 -0
  10. package/dist/config.js.map +1 -1
  11. package/dist/core/agent-session.d.ts +30 -2
  12. package/dist/core/agent-session.d.ts.map +1 -1
  13. package/dist/core/agent-session.js +181 -21
  14. package/dist/core/agent-session.js.map +1 -1
  15. package/dist/core/compaction.d.ts +30 -5
  16. package/dist/core/compaction.d.ts.map +1 -1
  17. package/dist/core/compaction.js +194 -61
  18. package/dist/core/compaction.js.map +1 -1
  19. package/dist/core/hooks/index.d.ts +5 -0
  20. package/dist/core/hooks/index.d.ts.map +1 -0
  21. package/dist/core/hooks/index.js +4 -0
  22. package/dist/core/hooks/index.js.map +1 -0
  23. package/dist/core/hooks/loader.d.ts +56 -0
  24. package/dist/core/hooks/loader.d.ts.map +1 -0
  25. package/dist/core/hooks/loader.js +158 -0
  26. package/dist/core/hooks/loader.js.map +1 -0
  27. package/dist/core/hooks/runner.d.ts +69 -0
  28. package/dist/core/hooks/runner.d.ts.map +1 -0
  29. package/dist/core/hooks/runner.js +203 -0
  30. package/dist/core/hooks/runner.js.map +1 -0
  31. package/dist/core/hooks/tool-wrapper.d.ts +16 -0
  32. package/dist/core/hooks/tool-wrapper.d.ts.map +1 -0
  33. package/dist/core/hooks/tool-wrapper.js +71 -0
  34. package/dist/core/hooks/tool-wrapper.js.map +1 -0
  35. package/dist/core/hooks/types.d.ts +220 -0
  36. package/dist/core/hooks/types.d.ts.map +1 -0
  37. package/dist/core/hooks/types.js +8 -0
  38. package/dist/core/hooks/types.js.map +1 -0
  39. package/dist/core/index.d.ts +1 -0
  40. package/dist/core/index.d.ts.map +1 -1
  41. package/dist/core/index.js +1 -0
  42. package/dist/core/index.js.map +1 -1
  43. package/dist/core/session-manager.d.ts +10 -3
  44. package/dist/core/session-manager.d.ts.map +1 -1
  45. package/dist/core/session-manager.js +78 -28
  46. package/dist/core/session-manager.js.map +1 -1
  47. package/dist/core/settings-manager.d.ts +6 -0
  48. package/dist/core/settings-manager.d.ts.map +1 -1
  49. package/dist/core/settings-manager.js +14 -0
  50. package/dist/core/settings-manager.js.map +1 -1
  51. package/dist/core/system-prompt.d.ts.map +1 -1
  52. package/dist/core/system-prompt.js +5 -3
  53. package/dist/core/system-prompt.js.map +1 -1
  54. package/dist/core/tools/truncate.d.ts +6 -2
  55. package/dist/core/tools/truncate.d.ts.map +1 -1
  56. package/dist/core/tools/truncate.js +11 -1
  57. package/dist/core/tools/truncate.js.map +1 -1
  58. package/dist/index.d.ts +1 -0
  59. package/dist/index.d.ts.map +1 -1
  60. package/dist/index.js.map +1 -1
  61. package/dist/main.d.ts.map +1 -1
  62. package/dist/main.js +23 -12
  63. package/dist/main.js.map +1 -1
  64. package/dist/modes/interactive/components/bash-execution.d.ts +1 -0
  65. package/dist/modes/interactive/components/bash-execution.d.ts.map +1 -1
  66. package/dist/modes/interactive/components/bash-execution.js +17 -6
  67. package/dist/modes/interactive/components/bash-execution.js.map +1 -1
  68. package/dist/modes/interactive/components/hook-input.d.ts +12 -0
  69. package/dist/modes/interactive/components/hook-input.d.ts.map +1 -0
  70. package/dist/modes/interactive/components/hook-input.js +46 -0
  71. package/dist/modes/interactive/components/hook-input.js.map +1 -0
  72. package/dist/modes/interactive/components/hook-selector.d.ts +16 -0
  73. package/dist/modes/interactive/components/hook-selector.d.ts.map +1 -0
  74. package/dist/modes/interactive/components/hook-selector.js +76 -0
  75. package/dist/modes/interactive/components/hook-selector.js.map +1 -0
  76. package/dist/modes/interactive/components/tool-execution.d.ts.map +1 -1
  77. package/dist/modes/interactive/components/tool-execution.js +12 -7
  78. package/dist/modes/interactive/components/tool-execution.js.map +1 -1
  79. package/dist/modes/interactive/interactive-mode.d.ts +37 -0
  80. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  81. package/dist/modes/interactive/interactive-mode.js +190 -7
  82. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  83. package/dist/modes/print-mode.d.ts.map +1 -1
  84. package/dist/modes/print-mode.js +15 -0
  85. package/dist/modes/print-mode.js.map +1 -1
  86. package/dist/modes/rpc/rpc-mode.d.ts +2 -1
  87. package/dist/modes/rpc/rpc-mode.d.ts.map +1 -1
  88. package/dist/modes/rpc/rpc-mode.js +118 -3
  89. package/dist/modes/rpc/rpc-mode.js.map +1 -1
  90. package/dist/modes/rpc/rpc-types.d.ts +41 -0
  91. package/dist/modes/rpc/rpc-types.d.ts.map +1 -1
  92. package/dist/modes/rpc/rpc-types.js.map +1 -1
  93. package/docs/compaction.md +519 -0
  94. package/docs/hooks.md +609 -0
  95. package/docs/rpc.md +870 -0
  96. package/docs/session.md +89 -0
  97. package/docs/theme.md +586 -0
  98. package/docs/truncation.md +235 -0
  99. package/docs/undercompaction.md +313 -0
  100. package/package.json +18 -6
@@ -0,0 +1,235 @@
1
+ # Tool Output Truncation
2
+
3
+ ## Limits
4
+
5
+ - **Line limit**: 2000 lines
6
+ - **Byte limit**: 30KB
7
+ - **Grep line limit**: 500 chars per match line
8
+
9
+ Whichever limit is hit first wins. **Never return partial lines** (except bash edge case).
10
+
11
+ ---
12
+
13
+ ## read
14
+
15
+ Head truncation (first N lines). Has offset/limit params for continuation.
16
+
17
+ ### Scenarios
18
+
19
+ **First line > 30KB:**
20
+ ```
21
+ LLM sees:
22
+ [Line 1 is 50KB, exceeds 30KB limit. Use bash to read: head -c 30000 path/to/file]
23
+
24
+ Details:
25
+ { truncation: { truncated: true, truncatedBy: "bytes", outputLines: 0, ... } }
26
+ ```
27
+
28
+ **Hit line limit (2000 lines, < 30KB):**
29
+ ```
30
+ LLM sees:
31
+ [lines 1-2000 content]
32
+
33
+ [Showing lines 1-2000 of 5000. Use offset=2001 to continue]
34
+
35
+ Details:
36
+ { truncation: { truncated: true, truncatedBy: "lines", outputLines: 2000, totalLines: 5000 } }
37
+ ```
38
+
39
+ **Hit byte limit (< 2000 lines, 30KB):**
40
+ ```
41
+ LLM sees:
42
+ [lines 1-500 content]
43
+
44
+ [Showing lines 1-500 of 5000 (30KB limit). Use offset=501 to continue]
45
+
46
+ Details:
47
+ { truncation: { truncated: true, truncatedBy: "bytes", outputLines: 500, totalLines: 5000 } }
48
+ ```
49
+
50
+ **With offset, hit line limit (e.g., offset=1000):**
51
+ ```
52
+ LLM sees:
53
+ [lines 1000-2999 content]
54
+
55
+ [Showing lines 1000-2999 of 5000. Use offset=3000 to continue]
56
+
57
+ Details:
58
+ { truncation: { truncatedBy: "lines", ... } }
59
+ ```
60
+
61
+ **With offset, hit byte limit (e.g., offset=1000, 30KB after 500 lines):**
62
+ ```
63
+ LLM sees:
64
+ [lines 1000-1499 content]
65
+
66
+ [Showing lines 1000-1499 of 5000 (30KB limit). Use offset=1500 to continue]
67
+
68
+ Details:
69
+ { truncation: { truncatedBy: "bytes", outputLines: 500, ... } }
70
+ ```
71
+
72
+ **With offset, first line at offset > 30KB (e.g., offset=1000, line 1000 is 50KB):**
73
+ ```
74
+ LLM sees:
75
+ [Line 1000 is 50KB, exceeds 30KB limit. Use bash: sed -n '1000p' file | head -c 30000]
76
+
77
+ Details:
78
+ { truncation: { truncated: true, truncatedBy: "bytes", outputLines: 0 } }
79
+ ```
80
+
81
+ ---
82
+
83
+ ## bash
84
+
85
+ Tail truncation (last N lines). Writes full output to temp file if truncated.
86
+
87
+ ### Scenarios
88
+
89
+ **Hit line limit (2000 lines):**
90
+ ```
91
+ LLM sees:
92
+ [lines 48001-50000 content]
93
+
94
+ [Showing lines 48001-50000 of 50000. Full output: /tmp/pi-bash-xxx.log]
95
+
96
+ Details:
97
+ { truncation: { truncated: true, truncatedBy: "lines", outputLines: 2000, totalLines: 50000 }, fullOutputPath: "/tmp/..." }
98
+ ```
99
+
100
+ **Hit byte limit (< 2000 lines, 30KB):**
101
+ ```
102
+ LLM sees:
103
+ [lines 49501-50000 content]
104
+
105
+ [Showing lines 49501-50000 of 50000 (30KB limit). Full output: /tmp/pi-bash-xxx.log]
106
+
107
+ Details:
108
+ { truncation: { truncatedBy: "bytes", ... }, fullOutputPath: "/tmp/..." }
109
+ ```
110
+
111
+ **Last line alone > 30KB (edge case, partial OK here):**
112
+ ```
113
+ LLM sees:
114
+ [last 30KB of final line]
115
+
116
+ [Showing last 30KB of line 50000 (line is 100KB). Full output: /tmp/pi-bash-xxx.log]
117
+
118
+ Details:
119
+ { truncation: { truncatedBy: "bytes", lastLinePartial: true }, fullOutputPath: "/tmp/..." }
120
+ ```
121
+
122
+ ---
123
+
124
+ ## grep
125
+
126
+ Head truncation. Primary limit: 100 matches. Each match line truncated to 500 chars.
127
+
128
+ ### Scenarios
129
+
130
+ **Hit match limit (100 matches):**
131
+ ```
132
+ LLM sees:
133
+ file.ts:10: matching content here...
134
+ file.ts:25: another match...
135
+ ...
136
+
137
+ [100 matches limit reached. Use limit=200 for more, or refine pattern]
138
+
139
+ Details:
140
+ { matchLimitReached: 100 }
141
+ ```
142
+
143
+ **Hit byte limit (< 100 matches, 30KB):**
144
+ ```
145
+ LLM sees:
146
+ [matches that fit in 30KB]
147
+
148
+ [30KB limit reached (50 of 100+ matches shown)]
149
+
150
+ Details:
151
+ { truncation: { truncatedBy: "bytes", ... } }
152
+ ```
153
+
154
+ **Match lines truncated (any line > 500 chars):**
155
+ ```
156
+ LLM sees:
157
+ file.ts:10: very long matching content that exceeds 500 chars gets cut off here... [truncated]
158
+ file.ts:25: normal match
159
+
160
+ [Some lines truncated to 500 chars. Use read tool to see full lines]
161
+
162
+ Details:
163
+ { linesTruncated: true }
164
+ ```
165
+
166
+ ---
167
+
168
+ ## find
169
+
170
+ Head truncation. Primary limit: 1000 results. File paths only (never > 30KB each).
171
+
172
+ ### Scenarios
173
+
174
+ **Hit result limit (1000 results):**
175
+ ```
176
+ LLM sees:
177
+ src/file1.ts
178
+ src/file2.ts
179
+ [998 more paths]
180
+
181
+ [1000 results limit reached. Use limit=2000 for more, or refine pattern]
182
+
183
+ Details:
184
+ { resultLimitReached: 1000 }
185
+ ```
186
+
187
+ **Hit byte limit (unlikely, < 1000 results, 30KB):**
188
+ ```
189
+ LLM sees:
190
+ [paths that fit]
191
+
192
+ [30KB limit reached]
193
+
194
+ Details:
195
+ { truncation: { truncatedBy: "bytes", ... } }
196
+ ```
197
+
198
+ ---
199
+
200
+ ## ls
201
+
202
+ Head truncation. Primary limit: 500 entries. Entry names only (never > 30KB each).
203
+
204
+ ### Scenarios
205
+
206
+ **Hit entry limit (500 entries):**
207
+ ```
208
+ LLM sees:
209
+ .gitignore
210
+ README.md
211
+ src/
212
+ [497 more entries]
213
+
214
+ [500 entries limit reached. Use limit=1000 for more]
215
+
216
+ Details:
217
+ { entryLimitReached: 500 }
218
+ ```
219
+
220
+ **Hit byte limit (unlikely):**
221
+ ```
222
+ LLM sees:
223
+ [entries that fit]
224
+
225
+ [30KB limit reached]
226
+
227
+ Details:
228
+ { truncation: { truncatedBy: "bytes", ... } }
229
+ ```
230
+
231
+ ---
232
+
233
+ ## TUI Display
234
+
235
+ `tool-execution.ts` reads `details.truncation` and related fields to display truncation notices in warning color. The LLM text content and TUI display show the same information.
@@ -0,0 +1,313 @@
1
+ # Under-Compaction Analysis
2
+
3
+ ## Problem Statement
4
+
5
+ Auto-compaction triggers too late, causing context window overflows that result in failed LLM calls with `stopReason == "length"`.
6
+
7
+ ## Architecture Overview
8
+
9
+ ### Event Flow
10
+
11
+ ```
12
+ User prompt
13
+
14
+
15
+ agent.prompt()
16
+
17
+
18
+ agentLoop() in packages/ai/src/agent/agent-loop.ts
19
+
20
+ ├─► streamAssistantResponse()
21
+ │ │
22
+ │ ▼
23
+ │ LLM provider (Anthropic, OpenAI, etc.)
24
+ │ │
25
+ │ ▼
26
+ │ Events: message_start → message_update* → message_end
27
+ │ │
28
+ │ ▼
29
+ │ AssistantMessage with usage stats (input, output, cacheRead, cacheWrite)
30
+
31
+ ├─► If assistant has tool calls:
32
+ │ │
33
+ │ ▼
34
+ │ executeToolCalls()
35
+ │ │
36
+ │ ├─► tool_execution_start (toolCallId, toolName, args)
37
+ │ │
38
+ │ ├─► tool.execute() runs (read, bash, write, edit, etc.)
39
+ │ │
40
+ │ ├─► tool_execution_end (toolCallId, toolName, result, isError)
41
+ │ │
42
+ │ └─► message_start + message_end for ToolResultMessage
43
+
44
+ └─► Loop continues until no more tool calls
45
+
46
+
47
+ agent_end
48
+ ```
49
+
50
+ ### Token Usage Reporting
51
+
52
+ Token usage is ONLY available in `AssistantMessage.usage` after the LLM responds:
53
+
54
+ ```typescript
55
+ // From packages/ai/src/types.ts
56
+ export interface Usage {
57
+ input: number; // Tokens in the request
58
+ output: number; // Tokens generated
59
+ cacheRead: number; // Cached tokens read
60
+ cacheWrite: number; // Cached tokens written
61
+ cost: Cost;
62
+ }
63
+ ```
64
+
65
+ The `input` field represents the total context size sent to the LLM, which includes:
66
+ - System prompt
67
+ - All conversation messages
68
+ - All tool results from previous calls
69
+
70
+ ### Current Compaction Check
71
+
72
+ Both TUI (`tui-renderer.ts`) and RPC (`main.ts`) modes check compaction identically:
73
+
74
+ ```typescript
75
+ // In agent.subscribe() callback:
76
+ if (event.type === "message_end") {
77
+ // ...
78
+ if (event.message.role === "assistant") {
79
+ await checkAutoCompaction();
80
+ }
81
+ }
82
+
83
+ async function checkAutoCompaction() {
84
+ // Get last non-aborted assistant message
85
+ const messages = agent.state.messages;
86
+ let lastAssistant = findLastNonAbortedAssistant(messages);
87
+ if (!lastAssistant) return;
88
+
89
+ const contextTokens = calculateContextTokens(lastAssistant.usage);
90
+ const contextWindow = agent.state.model.contextWindow;
91
+
92
+ if (!shouldCompact(contextTokens, contextWindow, settings)) return;
93
+
94
+ // Trigger compaction...
95
+ }
96
+ ```
97
+
98
+ **The check happens on `message_end` for assistant messages only.**
99
+
100
+ ## The Under-Compaction Problem
101
+
102
+ ### Failure Scenario
103
+
104
+ ```
105
+ Context window: 200,000 tokens
106
+ Reserve tokens: 16,384 (default)
107
+ Threshold: 200,000 - 16,384 = 183,616
108
+
109
+ Turn N:
110
+ 1. Assistant message received, usage shows 180,000 tokens
111
+ 2. shouldCompact(180000, 200000, settings) → 180000 > 183616 → FALSE
112
+ 3. Tool executes: `cat large-file.txt` → outputs 100KB (~25,000 tokens)
113
+ 4. Context now effectively 205,000 tokens, but we don't know this
114
+ 5. Next LLM call fails: context exceeds 200,000 window
115
+ ```
116
+
117
+ The problem occurs when:
118
+ 1. Context is below threshold (so compaction doesn't trigger)
119
+ 2. A tool adds enough content to push it over the window limit
120
+ 3. We only discover this when the next LLM call fails
121
+
122
+ ### Root Cause
123
+
124
+ 1. **Token counts are retrospective**: We only learn the context size AFTER the LLM processes it
125
+ 2. **Tool results are blind spots**: When a tool executes and returns a large result, we don't know how many tokens it adds until the next LLM call
126
+ 3. **No estimation before submission**: We submit the context and hope it fits
127
+
128
+ ## Current Tool Output Limits
129
+
130
+ | Tool | Our Limit | Worst Case |
131
+ |------|-----------|------------|
132
+ | bash | 10MB per stream | 20MB (~5M tokens) |
133
+ | read | 2000 lines × 2000 chars | 4MB (~1M tokens) |
134
+ | write | Byte count only | Minimal |
135
+ | edit | Diff output | Variable |
136
+
137
+ ## How Other Tools Handle This
138
+
139
+ ### SST/OpenCode
140
+
141
+ **Tool Output Limits (during execution):**
142
+
143
+ | Tool | Limit | Details |
144
+ |------|-------|---------|
145
+ | bash | 30KB chars | `MAX_OUTPUT_LENGTH = 30_000`, truncates with notice |
146
+ | read | 2000 lines × 2000 chars/line | No total cap, theoretically 4MB |
147
+ | grep | 100 matches, 2000 chars/line | Truncates with notice |
148
+ | ls | 100 files | Truncates with notice |
149
+ | glob | 100 results | Truncates with notice |
150
+ | webfetch | 5MB | `MAX_RESPONSE_SIZE` |
151
+
152
+ **Overflow Detection:**
153
+ - `isOverflow()` runs BEFORE each turn (not during)
154
+ - Uses last LLM-reported token count: `tokens.input + tokens.cache.read + tokens.output`
155
+ - Triggers if `count > context - maxOutput`
156
+ - Does NOT detect overflow from tool results in current turn
157
+
158
+ **Recovery - Pruning:**
159
+ - `prune()` runs AFTER each turn completes
160
+ - Walks backwards through completed tool results
161
+ - Keeps last 40k tokens of tool outputs (`PRUNE_PROTECT`)
162
+ - Removes content from older tool results (marks `time.compacted`)
163
+ - Only prunes if savings > 20k tokens (`PRUNE_MINIMUM`)
164
+ - Token estimation: `chars / 4`
165
+
166
+ **Recovery - Compaction:**
167
+ - Triggered when `isOverflow()` returns true before a turn
168
+ - LLM generates summary of conversation
169
+ - Replaces old messages with summary
170
+
171
+ **Gap:** No mid-turn protection. A single read returning 4MB would overflow. The 30KB bash limit is their primary practical protection.
172
+
173
+ ### OpenAI/Codex
174
+
175
+ **Tool Output Limits (during execution):**
176
+
177
+ | Tool | Limit | Details |
178
+ |------|-------|---------|
179
+ | shell/exec | 10k tokens or 10k bytes | Per-model `TruncationPolicy`, user-configurable |
180
+ | read_file | 2000 lines, 500 chars/line | `MAX_LINE_LENGTH = 500`, ~1MB max |
181
+ | grep_files | 100 matches | Default limit |
182
+ | list_dir | Configurable | BFS with depth limits |
183
+
184
+ **Truncation Policy:**
185
+ - Per-model family setting: `TruncationPolicy::Bytes(10_000)` or `TruncationPolicy::Tokens(10_000)`
186
+ - User can override via `tool_output_token_limit` config
187
+ - Applied to ALL tool outputs uniformly via `truncate_function_output_items_with_policy()`
188
+ - Preserves beginning and end, removes middle with `"…N tokens truncated…"` marker
189
+
190
+ **Overflow Detection:**
191
+ - After each successful turn: `if total_usage_tokens >= auto_compact_token_limit { compact() }`
192
+ - Per-model thresholds (e.g., 180k for 200k context window)
193
+ - `ContextWindowExceeded` error caught and handled
194
+
195
+ **Recovery - Compaction:**
196
+ - If tokens exceed threshold after turn, triggers `run_inline_auto_compact_task()`
197
+ - During compaction, if `ContextWindowExceeded`: removes oldest history item and retries
198
+ - Loop: `history.remove_first_item()` until it fits
199
+ - Notifies user: "Trimmed N older conversation item(s)"
200
+
201
+ **Recovery - Turn Error:**
202
+ - On `ContextWindowExceeded` during normal turn: marks tokens as full, returns error to user
203
+ - Does NOT auto-retry the failed turn
204
+ - User must manually continue
205
+
206
+ **Gap:** Still no mid-turn protection, but aggressive 10k token truncation on all tool outputs prevents most issues in practice.
207
+
208
+ ### Comparison
209
+
210
+ | Feature | pi-coding-agent | OpenCode | Codex |
211
+ |---------|-----------------|----------|-------|
212
+ | Bash limit | 10MB | 30KB | ~40KB (10k tokens) |
213
+ | Read limit | 2000×2000 (4MB) | 2000×2000 (4MB) | 2000×500 (1MB) |
214
+ | Truncation policy | None | Per-tool | Per-model, uniform |
215
+ | Token estimation | None | chars/4 | chars/4 |
216
+ | Pre-turn check | No | Yes (last tokens) | Yes (threshold) |
217
+ | Mid-turn check | No | No | No |
218
+ | Post-turn pruning | No | Yes (removes old tool output) | No |
219
+ | Overflow recovery | No | Compaction | Trim oldest + compact |
220
+
221
+ **Key insight:** None of these tools protect against mid-turn overflow. Their practical protection is aggressive static limits on tool output, especially bash. OpenCode's 30KB bash limit vs our 10MB is the critical difference.
222
+
223
+ ## Recommended Solution
224
+
225
+ ### Phase 1: Static Limits (immediate)
226
+
227
+ Add hard limits to tool outputs matching industry practice:
228
+
229
+ ```typescript
230
+ // packages/coding-agent/src/tools/limits.ts
231
+ export const MAX_TOOL_OUTPUT_CHARS = 30_000; // ~7.5k tokens, matches OpenCode bash
232
+ export const MAX_TOOL_OUTPUT_NOTICE = "\n\n...(truncated, output exceeded limit)...";
233
+ ```
234
+
235
+ Apply to all tools:
236
+ - bash: 10MB → 30KB
237
+ - read: Add 100KB total output cap
238
+ - edit: Cap diff output
239
+
240
+ ### Phase 2: Post-Tool Estimation
241
+
242
+ After `tool_execution_end`, estimate and flag:
243
+
244
+ ```typescript
245
+ let needsCompactionAfterTurn = false;
246
+
247
+ agent.subscribe(async (event) => {
248
+ if (event.type === "tool_execution_end") {
249
+ const resultChars = extractTextLength(event.result);
250
+ const estimatedTokens = Math.ceil(resultChars / 4);
251
+
252
+ const lastUsage = getLastAssistantUsage(agent.state.messages);
253
+ if (lastUsage) {
254
+ const current = calculateContextTokens(lastUsage);
255
+ const projected = current + estimatedTokens;
256
+ const threshold = agent.state.model.contextWindow - settings.reserveTokens;
257
+ if (projected > threshold) {
258
+ needsCompactionAfterTurn = true;
259
+ }
260
+ }
261
+ }
262
+
263
+ if (event.type === "turn_end" && needsCompactionAfterTurn) {
264
+ needsCompactionAfterTurn = false;
265
+ await triggerCompaction();
266
+ }
267
+ });
268
+ ```
269
+
270
+ ### Phase 3: Overflow Recovery (like Codex)
271
+
272
+ Handle `stopReason === "length"` gracefully:
273
+
274
+ ```typescript
275
+ if (event.type === "message_end" && event.message.role === "assistant") {
276
+ if (event.message.stopReason === "length") {
277
+ // Context overflow occurred
278
+ await triggerCompaction();
279
+ // Optionally: retry the turn
280
+ }
281
+ }
282
+ ```
283
+
284
+ During compaction, if it also overflows, trim oldest messages:
285
+
286
+ ```typescript
287
+ async function compactWithRetry() {
288
+ while (true) {
289
+ try {
290
+ await compact();
291
+ break;
292
+ } catch (e) {
293
+ if (isContextOverflow(e) && messages.length > 1) {
294
+ messages.shift(); // Remove oldest
295
+ continue;
296
+ }
297
+ throw e;
298
+ }
299
+ }
300
+ }
301
+ ```
302
+
303
+ ## Summary
304
+
305
+ The under-compaction problem occurs because:
306
+ 1. We only check context size after assistant messages
307
+ 2. Tool results can add arbitrary amounts of content
308
+ 3. We discover overflows only when the next LLM call fails
309
+
310
+ The fix requires:
311
+ 1. Aggressive static limits on tool output (immediate safety net)
312
+ 2. Token estimation after tool execution (proactive detection)
313
+ 3. Graceful handling of overflow errors (fallback recovery)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mariozechner/pi-coding-agent",
3
- "version": "0.16.0",
3
+ "version": "0.18.0",
4
4
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
5
5
  "type": "module",
6
6
  "piConfig": {
@@ -12,8 +12,19 @@
12
12
  },
13
13
  "main": "./dist/index.js",
14
14
  "types": "./dist/index.d.ts",
15
+ "exports": {
16
+ ".": {
17
+ "types": "./dist/index.d.ts",
18
+ "import": "./dist/index.js"
19
+ },
20
+ "./hooks": {
21
+ "types": "./dist/core/hooks/index.d.ts",
22
+ "import": "./dist/core/hooks/index.js"
23
+ }
24
+ },
15
25
  "files": [
16
26
  "dist",
27
+ "docs",
17
28
  "CHANGELOG.md"
18
29
  ],
19
30
  "scripts": {
@@ -21,19 +32,20 @@
21
32
  "build": "tsgo -p tsconfig.build.json && chmod +x dist/cli.js && npm run copy-assets",
22
33
  "build:binary": "npm run build && bun build --compile ./dist/cli.js --outfile dist/pi && npm run copy-binary-assets",
23
34
  "copy-assets": "mkdir -p dist/modes/interactive/theme && cp src/modes/interactive/theme/*.json dist/modes/interactive/theme/",
24
- "copy-binary-assets": "cp package.json dist/ && cp README.md dist/ && cp CHANGELOG.md dist/ && mkdir -p dist/theme && cp src/modes/interactive/theme/*.json dist/theme/",
35
+ "copy-binary-assets": "cp package.json dist/ && cp README.md dist/ && cp CHANGELOG.md dist/ && mkdir -p dist/theme && cp src/modes/interactive/theme/*.json dist/theme/ && cp -r docs dist/",
25
36
  "dev": "tsgo -p tsconfig.build.json --watch --preserveWatchOutput",
26
37
  "check": "tsgo --noEmit",
27
38
  "test": "vitest --run",
28
39
  "prepublishOnly": "npm run clean && npm run build"
29
40
  },
30
41
  "dependencies": {
31
- "@mariozechner/pi-agent-core": "^0.16.0",
32
- "@mariozechner/pi-ai": "^0.16.0",
33
- "@mariozechner/pi-tui": "^0.16.0",
42
+ "@mariozechner/pi-agent-core": "^0.18.0",
43
+ "@mariozechner/pi-ai": "^0.18.0",
44
+ "@mariozechner/pi-tui": "^0.18.0",
34
45
  "chalk": "^5.5.0",
35
46
  "diff": "^8.0.2",
36
- "glob": "^11.0.3"
47
+ "glob": "^11.0.3",
48
+ "jiti": "^2.6.1"
37
49
  },
38
50
  "devDependencies": {
39
51
  "@types/diff": "^7.0.2",