@mariozechner/pi-coding-agent 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/README.md +58 -1
- package/dist/cli/args.d.ts +1 -0
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +5 -0
- package/dist/cli/args.js.map +1 -1
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +4 -0
- package/dist/config.js.map +1 -1
- package/dist/core/agent-session.d.ts +30 -2
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +181 -21
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/compaction.d.ts +30 -5
- package/dist/core/compaction.d.ts.map +1 -1
- package/dist/core/compaction.js +194 -61
- package/dist/core/compaction.js.map +1 -1
- package/dist/core/hooks/index.d.ts +5 -0
- package/dist/core/hooks/index.d.ts.map +1 -0
- package/dist/core/hooks/index.js +4 -0
- package/dist/core/hooks/index.js.map +1 -0
- package/dist/core/hooks/loader.d.ts +56 -0
- package/dist/core/hooks/loader.d.ts.map +1 -0
- package/dist/core/hooks/loader.js +158 -0
- package/dist/core/hooks/loader.js.map +1 -0
- package/dist/core/hooks/runner.d.ts +69 -0
- package/dist/core/hooks/runner.d.ts.map +1 -0
- package/dist/core/hooks/runner.js +203 -0
- package/dist/core/hooks/runner.js.map +1 -0
- package/dist/core/hooks/tool-wrapper.d.ts +16 -0
- package/dist/core/hooks/tool-wrapper.d.ts.map +1 -0
- package/dist/core/hooks/tool-wrapper.js +71 -0
- package/dist/core/hooks/tool-wrapper.js.map +1 -0
- package/dist/core/hooks/types.d.ts +220 -0
- package/dist/core/hooks/types.d.ts.map +1 -0
- package/dist/core/hooks/types.js +8 -0
- package/dist/core/hooks/types.js.map +1 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +1 -0
- package/dist/core/index.js.map +1 -1
- package/dist/core/session-manager.d.ts +10 -3
- package/dist/core/session-manager.d.ts.map +1 -1
- package/dist/core/session-manager.js +78 -28
- package/dist/core/session-manager.js.map +1 -1
- package/dist/core/settings-manager.d.ts +6 -0
- package/dist/core/settings-manager.d.ts.map +1 -1
- package/dist/core/settings-manager.js +14 -0
- package/dist/core/settings-manager.js.map +1 -1
- package/dist/core/system-prompt.d.ts.map +1 -1
- package/dist/core/system-prompt.js +5 -3
- package/dist/core/system-prompt.js.map +1 -1
- package/dist/core/tools/truncate.d.ts +6 -2
- package/dist/core/tools/truncate.d.ts.map +1 -1
- package/dist/core/tools/truncate.js +11 -1
- package/dist/core/tools/truncate.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js.map +1 -1
- package/dist/main.d.ts.map +1 -1
- package/dist/main.js +23 -12
- package/dist/main.js.map +1 -1
- package/dist/modes/interactive/components/bash-execution.d.ts +1 -0
- package/dist/modes/interactive/components/bash-execution.d.ts.map +1 -1
- package/dist/modes/interactive/components/bash-execution.js +17 -6
- package/dist/modes/interactive/components/bash-execution.js.map +1 -1
- package/dist/modes/interactive/components/hook-input.d.ts +12 -0
- package/dist/modes/interactive/components/hook-input.d.ts.map +1 -0
- package/dist/modes/interactive/components/hook-input.js +46 -0
- package/dist/modes/interactive/components/hook-input.js.map +1 -0
- package/dist/modes/interactive/components/hook-selector.d.ts +16 -0
- package/dist/modes/interactive/components/hook-selector.d.ts.map +1 -0
- package/dist/modes/interactive/components/hook-selector.js +76 -0
- package/dist/modes/interactive/components/hook-selector.js.map +1 -0
- package/dist/modes/interactive/components/tool-execution.d.ts.map +1 -1
- package/dist/modes/interactive/components/tool-execution.js +12 -7
- package/dist/modes/interactive/components/tool-execution.js.map +1 -1
- package/dist/modes/interactive/interactive-mode.d.ts +37 -0
- package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
- package/dist/modes/interactive/interactive-mode.js +190 -7
- package/dist/modes/interactive/interactive-mode.js.map +1 -1
- package/dist/modes/print-mode.d.ts.map +1 -1
- package/dist/modes/print-mode.js +15 -0
- package/dist/modes/print-mode.js.map +1 -1
- package/dist/modes/rpc/rpc-mode.d.ts +2 -1
- package/dist/modes/rpc/rpc-mode.d.ts.map +1 -1
- package/dist/modes/rpc/rpc-mode.js +118 -3
- package/dist/modes/rpc/rpc-mode.js.map +1 -1
- package/dist/modes/rpc/rpc-types.d.ts +41 -0
- package/dist/modes/rpc/rpc-types.d.ts.map +1 -1
- package/dist/modes/rpc/rpc-types.js.map +1 -1
- package/docs/compaction.md +519 -0
- package/docs/hooks.md +609 -0
- package/docs/rpc.md +870 -0
- package/docs/session.md +89 -0
- package/docs/theme.md +586 -0
- package/docs/truncation.md +235 -0
- package/docs/undercompaction.md +313 -0
- package/package.json +18 -6
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# Tool Output Truncation
|
|
2
|
+
|
|
3
|
+
## Limits
|
|
4
|
+
|
|
5
|
+
- **Line limit**: 2000 lines
|
|
6
|
+
- **Byte limit**: 30KB
|
|
7
|
+
- **Grep line limit**: 500 chars per match line
|
|
8
|
+
|
|
9
|
+
Whichever limit is hit first wins. **Never return partial lines** (except bash edge case).
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## read
|
|
14
|
+
|
|
15
|
+
Head truncation (first N lines). Has offset/limit params for continuation.
|
|
16
|
+
|
|
17
|
+
### Scenarios
|
|
18
|
+
|
|
19
|
+
**First line > 30KB:**
|
|
20
|
+
```
|
|
21
|
+
LLM sees:
|
|
22
|
+
[Line 1 is 50KB, exceeds 30KB limit. Use bash to read: head -c 30000 path/to/file]
|
|
23
|
+
|
|
24
|
+
Details:
|
|
25
|
+
{ truncation: { truncated: true, truncatedBy: "bytes", outputLines: 0, ... } }
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
**Hit line limit (2000 lines, < 30KB):**
|
|
29
|
+
```
|
|
30
|
+
LLM sees:
|
|
31
|
+
[lines 1-2000 content]
|
|
32
|
+
|
|
33
|
+
[Showing lines 1-2000 of 5000. Use offset=2001 to continue]
|
|
34
|
+
|
|
35
|
+
Details:
|
|
36
|
+
{ truncation: { truncated: true, truncatedBy: "lines", outputLines: 2000, totalLines: 5000 } }
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Hit byte limit (< 2000 lines, 30KB):**
|
|
40
|
+
```
|
|
41
|
+
LLM sees:
|
|
42
|
+
[lines 1-500 content]
|
|
43
|
+
|
|
44
|
+
[Showing lines 1-500 of 5000 (30KB limit). Use offset=501 to continue]
|
|
45
|
+
|
|
46
|
+
Details:
|
|
47
|
+
{ truncation: { truncated: true, truncatedBy: "bytes", outputLines: 500, totalLines: 5000 } }
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**With offset, hit line limit (e.g., offset=1000):**
|
|
51
|
+
```
|
|
52
|
+
LLM sees:
|
|
53
|
+
[lines 1000-2999 content]
|
|
54
|
+
|
|
55
|
+
[Showing lines 1000-2999 of 5000. Use offset=3000 to continue]
|
|
56
|
+
|
|
57
|
+
Details:
|
|
58
|
+
{ truncation: { truncatedBy: "lines", ... } }
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**With offset, hit byte limit (e.g., offset=1000, 30KB after 500 lines):**
|
|
62
|
+
```
|
|
63
|
+
LLM sees:
|
|
64
|
+
[lines 1000-1499 content]
|
|
65
|
+
|
|
66
|
+
[Showing lines 1000-1499 of 5000 (30KB limit). Use offset=1500 to continue]
|
|
67
|
+
|
|
68
|
+
Details:
|
|
69
|
+
{ truncation: { truncatedBy: "bytes", outputLines: 500, ... } }
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
**With offset, first line at offset > 30KB (e.g., offset=1000, line 1000 is 50KB):**
|
|
73
|
+
```
|
|
74
|
+
LLM sees:
|
|
75
|
+
[Line 1000 is 50KB, exceeds 30KB limit. Use bash: sed -n '1000p' file | head -c 30000]
|
|
76
|
+
|
|
77
|
+
Details:
|
|
78
|
+
{ truncation: { truncated: true, truncatedBy: "bytes", outputLines: 0 } }
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## bash
|
|
84
|
+
|
|
85
|
+
Tail truncation (last N lines). Writes full output to temp file if truncated.
|
|
86
|
+
|
|
87
|
+
### Scenarios
|
|
88
|
+
|
|
89
|
+
**Hit line limit (2000 lines):**
|
|
90
|
+
```
|
|
91
|
+
LLM sees:
|
|
92
|
+
[lines 48001-50000 content]
|
|
93
|
+
|
|
94
|
+
[Showing lines 48001-50000 of 50000. Full output: /tmp/pi-bash-xxx.log]
|
|
95
|
+
|
|
96
|
+
Details:
|
|
97
|
+
{ truncation: { truncated: true, truncatedBy: "lines", outputLines: 2000, totalLines: 50000 }, fullOutputPath: "/tmp/..." }
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Hit byte limit (< 2000 lines, 30KB):**
|
|
101
|
+
```
|
|
102
|
+
LLM sees:
|
|
103
|
+
[lines 49501-50000 content]
|
|
104
|
+
|
|
105
|
+
[Showing lines 49501-50000 of 50000 (30KB limit). Full output: /tmp/pi-bash-xxx.log]
|
|
106
|
+
|
|
107
|
+
Details:
|
|
108
|
+
{ truncation: { truncatedBy: "bytes", ... }, fullOutputPath: "/tmp/..." }
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Last line alone > 30KB (edge case, partial OK here):**
|
|
112
|
+
```
|
|
113
|
+
LLM sees:
|
|
114
|
+
[last 30KB of final line]
|
|
115
|
+
|
|
116
|
+
[Showing last 30KB of line 50000 (line is 100KB). Full output: /tmp/pi-bash-xxx.log]
|
|
117
|
+
|
|
118
|
+
Details:
|
|
119
|
+
{ truncation: { truncatedBy: "bytes", lastLinePartial: true }, fullOutputPath: "/tmp/..." }
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## grep
|
|
125
|
+
|
|
126
|
+
Head truncation. Primary limit: 100 matches. Each match line truncated to 500 chars.
|
|
127
|
+
|
|
128
|
+
### Scenarios
|
|
129
|
+
|
|
130
|
+
**Hit match limit (100 matches):**
|
|
131
|
+
```
|
|
132
|
+
LLM sees:
|
|
133
|
+
file.ts:10: matching content here...
|
|
134
|
+
file.ts:25: another match...
|
|
135
|
+
...
|
|
136
|
+
|
|
137
|
+
[100 matches limit reached. Use limit=200 for more, or refine pattern]
|
|
138
|
+
|
|
139
|
+
Details:
|
|
140
|
+
{ matchLimitReached: 100 }
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Hit byte limit (< 100 matches, 30KB):**
|
|
144
|
+
```
|
|
145
|
+
LLM sees:
|
|
146
|
+
[matches that fit in 30KB]
|
|
147
|
+
|
|
148
|
+
[30KB limit reached (50 of 100+ matches shown)]
|
|
149
|
+
|
|
150
|
+
Details:
|
|
151
|
+
{ truncation: { truncatedBy: "bytes", ... } }
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Match lines truncated (any line > 500 chars):**
|
|
155
|
+
```
|
|
156
|
+
LLM sees:
|
|
157
|
+
file.ts:10: very long matching content that exceeds 500 chars gets cut off here... [truncated]
|
|
158
|
+
file.ts:25: normal match
|
|
159
|
+
|
|
160
|
+
[Some lines truncated to 500 chars. Use read tool to see full lines]
|
|
161
|
+
|
|
162
|
+
Details:
|
|
163
|
+
{ linesTruncated: true }
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## find
|
|
169
|
+
|
|
170
|
+
Head truncation. Primary limit: 1000 results. File paths only (never > 30KB each).
|
|
171
|
+
|
|
172
|
+
### Scenarios
|
|
173
|
+
|
|
174
|
+
**Hit result limit (1000 results):**
|
|
175
|
+
```
|
|
176
|
+
LLM sees:
|
|
177
|
+
src/file1.ts
|
|
178
|
+
src/file2.ts
|
|
179
|
+
[998 more paths]
|
|
180
|
+
|
|
181
|
+
[1000 results limit reached. Use limit=2000 for more, or refine pattern]
|
|
182
|
+
|
|
183
|
+
Details:
|
|
184
|
+
{ resultLimitReached: 1000 }
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
**Hit byte limit (unlikely, < 1000 results, 30KB):**
|
|
188
|
+
```
|
|
189
|
+
LLM sees:
|
|
190
|
+
[paths that fit]
|
|
191
|
+
|
|
192
|
+
[30KB limit reached]
|
|
193
|
+
|
|
194
|
+
Details:
|
|
195
|
+
{ truncation: { truncatedBy: "bytes", ... } }
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## ls
|
|
201
|
+
|
|
202
|
+
Head truncation. Primary limit: 500 entries. Entry names only (never > 30KB each).
|
|
203
|
+
|
|
204
|
+
### Scenarios
|
|
205
|
+
|
|
206
|
+
**Hit entry limit (500 entries):**
|
|
207
|
+
```
|
|
208
|
+
LLM sees:
|
|
209
|
+
.gitignore
|
|
210
|
+
README.md
|
|
211
|
+
src/
|
|
212
|
+
[497 more entries]
|
|
213
|
+
|
|
214
|
+
[500 entries limit reached. Use limit=1000 for more]
|
|
215
|
+
|
|
216
|
+
Details:
|
|
217
|
+
{ entryLimitReached: 500 }
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
**Hit byte limit (unlikely):**
|
|
221
|
+
```
|
|
222
|
+
LLM sees:
|
|
223
|
+
[entries that fit]
|
|
224
|
+
|
|
225
|
+
[30KB limit reached]
|
|
226
|
+
|
|
227
|
+
Details:
|
|
228
|
+
{ truncation: { truncatedBy: "bytes", ... } }
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
## TUI Display
|
|
234
|
+
|
|
235
|
+
`tool-execution.ts` reads `details.truncation` and related fields to display truncation notices in warning color. The LLM text content and TUI display show the same information.
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# Under-Compaction Analysis
|
|
2
|
+
|
|
3
|
+
## Problem Statement
|
|
4
|
+
|
|
5
|
+
Auto-compaction triggers too late, causing context window overflows that result in failed LLM calls with `stopReason == "length"`.
|
|
6
|
+
|
|
7
|
+
## Architecture Overview
|
|
8
|
+
|
|
9
|
+
### Event Flow
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
User prompt
|
|
13
|
+
│
|
|
14
|
+
▼
|
|
15
|
+
agent.prompt()
|
|
16
|
+
│
|
|
17
|
+
▼
|
|
18
|
+
agentLoop() in packages/ai/src/agent/agent-loop.ts
|
|
19
|
+
│
|
|
20
|
+
├─► streamAssistantResponse()
|
|
21
|
+
│ │
|
|
22
|
+
│ ▼
|
|
23
|
+
│ LLM provider (Anthropic, OpenAI, etc.)
|
|
24
|
+
│ │
|
|
25
|
+
│ ▼
|
|
26
|
+
│ Events: message_start → message_update* → message_end
|
|
27
|
+
│ │
|
|
28
|
+
│ ▼
|
|
29
|
+
│ AssistantMessage with usage stats (input, output, cacheRead, cacheWrite)
|
|
30
|
+
│
|
|
31
|
+
├─► If assistant has tool calls:
|
|
32
|
+
│ │
|
|
33
|
+
│ ▼
|
|
34
|
+
│ executeToolCalls()
|
|
35
|
+
│ │
|
|
36
|
+
│ ├─► tool_execution_start (toolCallId, toolName, args)
|
|
37
|
+
│ │
|
|
38
|
+
│ ├─► tool.execute() runs (read, bash, write, edit, etc.)
|
|
39
|
+
│ │
|
|
40
|
+
│ ├─► tool_execution_end (toolCallId, toolName, result, isError)
|
|
41
|
+
│ │
|
|
42
|
+
│ └─► message_start + message_end for ToolResultMessage
|
|
43
|
+
│
|
|
44
|
+
└─► Loop continues until no more tool calls
|
|
45
|
+
│
|
|
46
|
+
▼
|
|
47
|
+
agent_end
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Token Usage Reporting
|
|
51
|
+
|
|
52
|
+
Token usage is ONLY available in `AssistantMessage.usage` after the LLM responds:
|
|
53
|
+
|
|
54
|
+
```typescript
|
|
55
|
+
// From packages/ai/src/types.ts
|
|
56
|
+
export interface Usage {
|
|
57
|
+
input: number; // Tokens in the request
|
|
58
|
+
output: number; // Tokens generated
|
|
59
|
+
cacheRead: number; // Cached tokens read
|
|
60
|
+
cacheWrite: number; // Cached tokens written
|
|
61
|
+
cost: Cost;
|
|
62
|
+
}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
The `input` field represents the total context size sent to the LLM, which includes:
|
|
66
|
+
- System prompt
|
|
67
|
+
- All conversation messages
|
|
68
|
+
- All tool results from previous calls
|
|
69
|
+
|
|
70
|
+
### Current Compaction Check
|
|
71
|
+
|
|
72
|
+
Both TUI (`tui-renderer.ts`) and RPC (`main.ts`) modes check compaction identically:
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
// In agent.subscribe() callback:
|
|
76
|
+
if (event.type === "message_end") {
|
|
77
|
+
// ...
|
|
78
|
+
if (event.message.role === "assistant") {
|
|
79
|
+
await checkAutoCompaction();
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async function checkAutoCompaction() {
|
|
84
|
+
// Get last non-aborted assistant message
|
|
85
|
+
const messages = agent.state.messages;
|
|
86
|
+
let lastAssistant = findLastNonAbortedAssistant(messages);
|
|
87
|
+
if (!lastAssistant) return;
|
|
88
|
+
|
|
89
|
+
const contextTokens = calculateContextTokens(lastAssistant.usage);
|
|
90
|
+
const contextWindow = agent.state.model.contextWindow;
|
|
91
|
+
|
|
92
|
+
if (!shouldCompact(contextTokens, contextWindow, settings)) return;
|
|
93
|
+
|
|
94
|
+
// Trigger compaction...
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**The check happens on `message_end` for assistant messages only.**
|
|
99
|
+
|
|
100
|
+
## The Under-Compaction Problem
|
|
101
|
+
|
|
102
|
+
### Failure Scenario
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
Context window: 200,000 tokens
|
|
106
|
+
Reserve tokens: 16,384 (default)
|
|
107
|
+
Threshold: 200,000 - 16,384 = 183,616
|
|
108
|
+
|
|
109
|
+
Turn N:
|
|
110
|
+
1. Assistant message received, usage shows 180,000 tokens
|
|
111
|
+
2. shouldCompact(180000, 200000, settings) → 180000 > 183616 → FALSE
|
|
112
|
+
3. Tool executes: `cat large-file.txt` → outputs 100KB (~25,000 tokens)
|
|
113
|
+
4. Context now effectively 205,000 tokens, but we don't know this
|
|
114
|
+
5. Next LLM call fails: context exceeds 200,000 window
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
The problem occurs when:
|
|
118
|
+
1. Context is below threshold (so compaction doesn't trigger)
|
|
119
|
+
2. A tool adds enough content to push it over the window limit
|
|
120
|
+
3. We only discover this when the next LLM call fails
|
|
121
|
+
|
|
122
|
+
### Root Cause
|
|
123
|
+
|
|
124
|
+
1. **Token counts are retrospective**: We only learn the context size AFTER the LLM processes it
|
|
125
|
+
2. **Tool results are blind spots**: When a tool executes and returns a large result, we don't know how many tokens it adds until the next LLM call
|
|
126
|
+
3. **No estimation before submission**: We submit the context and hope it fits
|
|
127
|
+
|
|
128
|
+
## Current Tool Output Limits
|
|
129
|
+
|
|
130
|
+
| Tool | Our Limit | Worst Case |
|
|
131
|
+
|------|-----------|------------|
|
|
132
|
+
| bash | 10MB per stream | 20MB (~5M tokens) |
|
|
133
|
+
| read | 2000 lines × 2000 chars | 4MB (~1M tokens) |
|
|
134
|
+
| write | Byte count only | Minimal |
|
|
135
|
+
| edit | Diff output | Variable |
|
|
136
|
+
|
|
137
|
+
## How Other Tools Handle This
|
|
138
|
+
|
|
139
|
+
### SST/OpenCode
|
|
140
|
+
|
|
141
|
+
**Tool Output Limits (during execution):**
|
|
142
|
+
|
|
143
|
+
| Tool | Limit | Details |
|
|
144
|
+
|------|-------|---------|
|
|
145
|
+
| bash | 30KB chars | `MAX_OUTPUT_LENGTH = 30_000`, truncates with notice |
|
|
146
|
+
| read | 2000 lines × 2000 chars/line | No total cap, theoretically 4MB |
|
|
147
|
+
| grep | 100 matches, 2000 chars/line | Truncates with notice |
|
|
148
|
+
| ls | 100 files | Truncates with notice |
|
|
149
|
+
| glob | 100 results | Truncates with notice |
|
|
150
|
+
| webfetch | 5MB | `MAX_RESPONSE_SIZE` |
|
|
151
|
+
|
|
152
|
+
**Overflow Detection:**
|
|
153
|
+
- `isOverflow()` runs BEFORE each turn (not during)
|
|
154
|
+
- Uses last LLM-reported token count: `tokens.input + tokens.cache.read + tokens.output`
|
|
155
|
+
- Triggers if `count > context - maxOutput`
|
|
156
|
+
- Does NOT detect overflow from tool results in current turn
|
|
157
|
+
|
|
158
|
+
**Recovery - Pruning:**
|
|
159
|
+
- `prune()` runs AFTER each turn completes
|
|
160
|
+
- Walks backwards through completed tool results
|
|
161
|
+
- Keeps last 40k tokens of tool outputs (`PRUNE_PROTECT`)
|
|
162
|
+
- Removes content from older tool results (marks `time.compacted`)
|
|
163
|
+
- Only prunes if savings > 20k tokens (`PRUNE_MINIMUM`)
|
|
164
|
+
- Token estimation: `chars / 4`
|
|
165
|
+
|
|
166
|
+
**Recovery - Compaction:**
|
|
167
|
+
- Triggered when `isOverflow()` returns true before a turn
|
|
168
|
+
- LLM generates summary of conversation
|
|
169
|
+
- Replaces old messages with summary
|
|
170
|
+
|
|
171
|
+
**Gap:** No mid-turn protection. A single read returning 4MB would overflow. The 30KB bash limit is their primary practical protection.
|
|
172
|
+
|
|
173
|
+
### OpenAI/Codex
|
|
174
|
+
|
|
175
|
+
**Tool Output Limits (during execution):**
|
|
176
|
+
|
|
177
|
+
| Tool | Limit | Details |
|
|
178
|
+
|------|-------|---------|
|
|
179
|
+
| shell/exec | 10k tokens or 10k bytes | Per-model `TruncationPolicy`, user-configurable |
|
|
180
|
+
| read_file | 2000 lines, 500 chars/line | `MAX_LINE_LENGTH = 500`, ~1MB max |
|
|
181
|
+
| grep_files | 100 matches | Default limit |
|
|
182
|
+
| list_dir | Configurable | BFS with depth limits |
|
|
183
|
+
|
|
184
|
+
**Truncation Policy:**
|
|
185
|
+
- Per-model family setting: `TruncationPolicy::Bytes(10_000)` or `TruncationPolicy::Tokens(10_000)`
|
|
186
|
+
- User can override via `tool_output_token_limit` config
|
|
187
|
+
- Applied to ALL tool outputs uniformly via `truncate_function_output_items_with_policy()`
|
|
188
|
+
- Preserves beginning and end, removes middle with `"…N tokens truncated…"` marker
|
|
189
|
+
|
|
190
|
+
**Overflow Detection:**
|
|
191
|
+
- After each successful turn: `if total_usage_tokens >= auto_compact_token_limit { compact() }`
|
|
192
|
+
- Per-model thresholds (e.g., 180k for 200k context window)
|
|
193
|
+
- `ContextWindowExceeded` error caught and handled
|
|
194
|
+
|
|
195
|
+
**Recovery - Compaction:**
|
|
196
|
+
- If tokens exceed threshold after turn, triggers `run_inline_auto_compact_task()`
|
|
197
|
+
- During compaction, if `ContextWindowExceeded`: removes oldest history item and retries
|
|
198
|
+
- Loop: `history.remove_first_item()` until it fits
|
|
199
|
+
- Notifies user: "Trimmed N older conversation item(s)"
|
|
200
|
+
|
|
201
|
+
**Recovery - Turn Error:**
|
|
202
|
+
- On `ContextWindowExceeded` during normal turn: marks tokens as full, returns error to user
|
|
203
|
+
- Does NOT auto-retry the failed turn
|
|
204
|
+
- User must manually continue
|
|
205
|
+
|
|
206
|
+
**Gap:** Still no mid-turn protection, but aggressive 10k token truncation on all tool outputs prevents most issues in practice.
|
|
207
|
+
|
|
208
|
+
### Comparison
|
|
209
|
+
|
|
210
|
+
| Feature | pi-coding-agent | OpenCode | Codex |
|
|
211
|
+
|---------|-----------------|----------|-------|
|
|
212
|
+
| Bash limit | 10MB | 30KB | ~40KB (10k tokens) |
|
|
213
|
+
| Read limit | 2000×2000 (4MB) | 2000×2000 (4MB) | 2000×500 (1MB) |
|
|
214
|
+
| Truncation policy | None | Per-tool | Per-model, uniform |
|
|
215
|
+
| Token estimation | None | chars/4 | chars/4 |
|
|
216
|
+
| Pre-turn check | No | Yes (last tokens) | Yes (threshold) |
|
|
217
|
+
| Mid-turn check | No | No | No |
|
|
218
|
+
| Post-turn pruning | No | Yes (removes old tool output) | No |
|
|
219
|
+
| Overflow recovery | No | Compaction | Trim oldest + compact |
|
|
220
|
+
|
|
221
|
+
**Key insight:** None of these tools protect against mid-turn overflow. Their practical protection is aggressive static limits on tool output, especially bash. OpenCode's 30KB bash limit vs our 10MB is the critical difference.
|
|
222
|
+
|
|
223
|
+
## Recommended Solution
|
|
224
|
+
|
|
225
|
+
### Phase 1: Static Limits (immediate)
|
|
226
|
+
|
|
227
|
+
Add hard limits to tool outputs matching industry practice:
|
|
228
|
+
|
|
229
|
+
```typescript
|
|
230
|
+
// packages/coding-agent/src/tools/limits.ts
|
|
231
|
+
export const MAX_TOOL_OUTPUT_CHARS = 30_000; // ~7.5k tokens, matches OpenCode bash
|
|
232
|
+
export const MAX_TOOL_OUTPUT_NOTICE = "\n\n...(truncated, output exceeded limit)...";
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
Apply to all tools:
|
|
236
|
+
- bash: 10MB → 30KB
|
|
237
|
+
- read: Add 100KB total output cap
|
|
238
|
+
- edit: Cap diff output
|
|
239
|
+
|
|
240
|
+
### Phase 2: Post-Tool Estimation
|
|
241
|
+
|
|
242
|
+
After `tool_execution_end`, estimate and flag:
|
|
243
|
+
|
|
244
|
+
```typescript
|
|
245
|
+
let needsCompactionAfterTurn = false;
|
|
246
|
+
|
|
247
|
+
agent.subscribe(async (event) => {
|
|
248
|
+
if (event.type === "tool_execution_end") {
|
|
249
|
+
const resultChars = extractTextLength(event.result);
|
|
250
|
+
const estimatedTokens = Math.ceil(resultChars / 4);
|
|
251
|
+
|
|
252
|
+
const lastUsage = getLastAssistantUsage(agent.state.messages);
|
|
253
|
+
if (lastUsage) {
|
|
254
|
+
const current = calculateContextTokens(lastUsage);
|
|
255
|
+
const projected = current + estimatedTokens;
|
|
256
|
+
const threshold = agent.state.model.contextWindow - settings.reserveTokens;
|
|
257
|
+
if (projected > threshold) {
|
|
258
|
+
needsCompactionAfterTurn = true;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (event.type === "turn_end" && needsCompactionAfterTurn) {
|
|
264
|
+
needsCompactionAfterTurn = false;
|
|
265
|
+
await triggerCompaction();
|
|
266
|
+
}
|
|
267
|
+
});
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Phase 3: Overflow Recovery (like Codex)
|
|
271
|
+
|
|
272
|
+
Handle `stopReason === "length"` gracefully:
|
|
273
|
+
|
|
274
|
+
```typescript
|
|
275
|
+
if (event.type === "message_end" && event.message.role === "assistant") {
|
|
276
|
+
if (event.message.stopReason === "length") {
|
|
277
|
+
// Context overflow occurred
|
|
278
|
+
await triggerCompaction();
|
|
279
|
+
// Optionally: retry the turn
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
During compaction, if it also overflows, trim oldest messages:
|
|
285
|
+
|
|
286
|
+
```typescript
|
|
287
|
+
async function compactWithRetry() {
|
|
288
|
+
while (true) {
|
|
289
|
+
try {
|
|
290
|
+
await compact();
|
|
291
|
+
break;
|
|
292
|
+
} catch (e) {
|
|
293
|
+
if (isContextOverflow(e) && messages.length > 1) {
|
|
294
|
+
messages.shift(); // Remove oldest
|
|
295
|
+
continue;
|
|
296
|
+
}
|
|
297
|
+
throw e;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
## Summary
|
|
304
|
+
|
|
305
|
+
The under-compaction problem occurs because:
|
|
306
|
+
1. We only check context size after assistant messages
|
|
307
|
+
2. Tool results can add arbitrary amounts of content
|
|
308
|
+
3. We discover overflows only when the next LLM call fails
|
|
309
|
+
|
|
310
|
+
The fix requires:
|
|
311
|
+
1. Aggressive static limits on tool output (immediate safety net)
|
|
312
|
+
2. Token estimation after tool execution (proactive detection)
|
|
313
|
+
3. Graceful handling of overflow errors (fallback recovery)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mariozechner/pi-coding-agent",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.18.0",
|
|
4
4
|
"description": "Coding agent CLI with read, bash, edit, write tools and session management",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"piConfig": {
|
|
@@ -12,8 +12,19 @@
|
|
|
12
12
|
},
|
|
13
13
|
"main": "./dist/index.js",
|
|
14
14
|
"types": "./dist/index.d.ts",
|
|
15
|
+
"exports": {
|
|
16
|
+
".": {
|
|
17
|
+
"types": "./dist/index.d.ts",
|
|
18
|
+
"import": "./dist/index.js"
|
|
19
|
+
},
|
|
20
|
+
"./hooks": {
|
|
21
|
+
"types": "./dist/core/hooks/index.d.ts",
|
|
22
|
+
"import": "./dist/core/hooks/index.js"
|
|
23
|
+
}
|
|
24
|
+
},
|
|
15
25
|
"files": [
|
|
16
26
|
"dist",
|
|
27
|
+
"docs",
|
|
17
28
|
"CHANGELOG.md"
|
|
18
29
|
],
|
|
19
30
|
"scripts": {
|
|
@@ -21,19 +32,20 @@
|
|
|
21
32
|
"build": "tsgo -p tsconfig.build.json && chmod +x dist/cli.js && npm run copy-assets",
|
|
22
33
|
"build:binary": "npm run build && bun build --compile ./dist/cli.js --outfile dist/pi && npm run copy-binary-assets",
|
|
23
34
|
"copy-assets": "mkdir -p dist/modes/interactive/theme && cp src/modes/interactive/theme/*.json dist/modes/interactive/theme/",
|
|
24
|
-
"copy-binary-assets": "cp package.json dist/ && cp README.md dist/ && cp CHANGELOG.md dist/ && mkdir -p dist/theme && cp src/modes/interactive/theme/*.json dist/theme/",
|
|
35
|
+
"copy-binary-assets": "cp package.json dist/ && cp README.md dist/ && cp CHANGELOG.md dist/ && mkdir -p dist/theme && cp src/modes/interactive/theme/*.json dist/theme/ && cp -r docs dist/",
|
|
25
36
|
"dev": "tsgo -p tsconfig.build.json --watch --preserveWatchOutput",
|
|
26
37
|
"check": "tsgo --noEmit",
|
|
27
38
|
"test": "vitest --run",
|
|
28
39
|
"prepublishOnly": "npm run clean && npm run build"
|
|
29
40
|
},
|
|
30
41
|
"dependencies": {
|
|
31
|
-
"@mariozechner/pi-agent-core": "^0.
|
|
32
|
-
"@mariozechner/pi-ai": "^0.
|
|
33
|
-
"@mariozechner/pi-tui": "^0.
|
|
42
|
+
"@mariozechner/pi-agent-core": "^0.18.0",
|
|
43
|
+
"@mariozechner/pi-ai": "^0.18.0",
|
|
44
|
+
"@mariozechner/pi-tui": "^0.18.0",
|
|
34
45
|
"chalk": "^5.5.0",
|
|
35
46
|
"diff": "^8.0.2",
|
|
36
|
-
"glob": "^11.0.3"
|
|
47
|
+
"glob": "^11.0.3",
|
|
48
|
+
"jiti": "^2.6.1"
|
|
37
49
|
},
|
|
38
50
|
"devDependencies": {
|
|
39
51
|
"@types/diff": "^7.0.2",
|