@agi-cli/server 0.1.160 → 0.1.161

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agi-cli/server",
3
- "version": "0.1.160",
3
+ "version": "0.1.161",
4
4
  "description": "HTTP API server for AGI CLI",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -29,8 +29,8 @@
29
29
  "typecheck": "tsc --noEmit"
30
30
  },
31
31
  "dependencies": {
32
- "@agi-cli/sdk": "0.1.160",
33
- "@agi-cli/database": "0.1.160",
32
+ "@agi-cli/sdk": "0.1.161",
33
+ "@agi-cli/database": "0.1.161",
34
34
  "drizzle-orm": "^0.44.5",
35
35
  "hono": "^4.9.9",
36
36
  "zod": "^4.1.8"
@@ -187,6 +187,13 @@ async function runAssistant(opts: RunOpts) {
187
187
  if (part.type === 'text-delta') {
188
188
  const delta = part.text;
189
189
  if (!delta) continue;
190
+
191
+ accumulated += delta;
192
+
193
+ if (!currentPartId && !accumulated.trim()) {
194
+ continue;
195
+ }
196
+
190
197
  if (!firstDeltaSeen) {
191
198
  firstDeltaSeen = true;
192
199
  streamStartTimer.end();
@@ -208,7 +215,7 @@ async function runAssistant(opts: RunOpts) {
208
215
  index: await sharedCtx.nextIndex(),
209
216
  stepIndex: null,
210
217
  type: 'text',
211
- content: JSON.stringify({ text: '' }),
218
+ content: JSON.stringify({ text: accumulated }),
212
219
  agent: opts.agent,
213
220
  provider: opts.provider,
214
221
  model: opts.model,
@@ -216,7 +223,6 @@ async function runAssistant(opts: RunOpts) {
216
223
  });
217
224
  }
218
225
 
219
- accumulated += delta;
220
226
  publish({
221
227
  type: 'message.part.delta',
222
228
  sessionId: opts.sessionId,
@@ -1,6 +1,6 @@
1
1
  import type { getDb } from '@agi-cli/database';
2
2
  import { messages, messageParts } from '@agi-cli/database/schema';
3
- import { eq, asc } from 'drizzle-orm';
3
+ import { eq, asc, desc } from 'drizzle-orm';
4
4
 
5
5
  export async function buildCompactionContext(
6
6
  db: Awaited<ReturnType<typeof getDb>>,
@@ -11,17 +11,22 @@ export async function buildCompactionContext(
11
11
  .select()
12
12
  .from(messages)
13
13
  .where(eq(messages.sessionId, sessionId))
14
- .orderBy(asc(messages.createdAt));
14
+ .orderBy(desc(messages.createdAt));
15
15
 
16
- const lines: string[] = [];
17
- let totalChars = 0;
18
16
  const maxChars = contextTokenLimit ? contextTokenLimit * 4 : 60000;
17
+ const recentBudget = Math.floor(maxChars * 0.65);
18
+ const olderBudget = maxChars - recentBudget;
19
+
20
+ const recentLines: string[] = [];
21
+ const olderLines: string[] = [];
22
+ let recentChars = 0;
23
+ let olderChars = 0;
24
+ let userTurns = 0;
25
+ let inRecent = true;
19
26
 
20
27
  for (const msg of allMessages) {
21
- if (totalChars > maxChars) {
22
- lines.unshift('[...earlier content truncated...]');
23
- break;
24
- }
28
+ if (msg.role === 'user') userTurns++;
29
+ if (userTurns > 3 && inRecent) inRecent = false;
25
30
 
26
31
  const parts = await db
27
32
  .select()
@@ -37,28 +42,60 @@ export async function buildCompactionContext(
37
42
 
38
43
  if (part.type === 'text' && content.text) {
39
44
  const text = `[${msg.role.toUpperCase()}]: ${content.text}`;
40
- lines.push(text.slice(0, 3000));
41
- totalChars += text.length;
45
+ const limit = inRecent ? 3000 : 1000;
46
+ const line = text.slice(0, limit);
47
+
48
+ if (inRecent && recentChars < recentBudget) {
49
+ recentLines.unshift(line);
50
+ recentChars += line.length;
51
+ } else if (olderChars < olderBudget) {
52
+ olderLines.unshift(line);
53
+ olderChars += line.length;
54
+ }
42
55
  } else if (part.type === 'tool_call' && content.name) {
43
- const argsStr =
44
- typeof content.args === 'object'
45
- ? JSON.stringify(content.args).slice(0, 500)
46
- : '';
47
- const text = `[TOOL ${content.name}]: ${argsStr}`;
48
- lines.push(text);
49
- totalChars += text.length;
56
+ if (inRecent && recentChars < recentBudget) {
57
+ const argsStr =
58
+ typeof content.args === 'object'
59
+ ? JSON.stringify(content.args).slice(0, 1000)
60
+ : '';
61
+ const line = `[TOOL ${content.name}]: ${argsStr}`;
62
+ recentLines.unshift(line);
63
+ recentChars += line.length;
64
+ } else if (olderChars < olderBudget) {
65
+ const line = `[TOOL ${content.name}]`;
66
+ olderLines.unshift(line);
67
+ olderChars += line.length;
68
+ }
50
69
  } else if (part.type === 'tool_result' && content.result !== null) {
51
70
  const resultStr =
52
71
  typeof content.result === 'string'
53
- ? content.result.slice(0, 1500)
54
- : JSON.stringify(content.result ?? '').slice(0, 1500);
55
- const text = `[RESULT]: ${resultStr}`;
56
- lines.push(text);
57
- totalChars += text.length;
72
+ ? content.result
73
+ : JSON.stringify(content.result ?? '');
74
+
75
+ if (inRecent && recentChars < recentBudget) {
76
+ const line = `[RESULT]: ${resultStr.slice(0, 2000)}`;
77
+ recentLines.unshift(line);
78
+ recentChars += line.length;
79
+ } else if (olderChars < olderBudget) {
80
+ const line = `[RESULT]: ${resultStr.slice(0, 150)}...`;
81
+ olderLines.unshift(line);
82
+ olderChars += line.length;
83
+ }
58
84
  }
59
85
  } catch {}
60
86
  }
87
+
88
+ if (olderChars >= olderBudget) break;
89
+ }
90
+
91
+ const result: string[] = [];
92
+ if (olderLines.length > 0) {
93
+ result.push('[...older conversation (tool data truncated)...]');
94
+ result.push(...olderLines);
95
+ result.push('');
96
+ result.push('[--- Recent conversation (full detail) ---]');
61
97
  }
98
+ result.push(...recentLines);
62
99
 
63
- return lines.join('\n');
100
+ return result.join('\n');
64
101
  }
@@ -5,13 +5,20 @@ export function isCompactCommand(content: string): boolean {
5
5
 
6
6
  export function getCompactionSystemPrompt(): string {
7
7
  return `
8
- The user has requested to compact the conversation. Generate a comprehensive summary that captures:
8
+ The conversation context is being compacted. The provided context is structured with
9
+ RECENT conversation in full detail at the end, and OLDER conversation (with truncated tool data) at the start.
9
10
 
10
- 1. **Main Goals**: What was the user trying to accomplish?
11
- 2. **Key Actions**: What files were created, modified, or deleted?
12
- 3. **Important Decisions**: What approaches or solutions were chosen and why?
13
- 4. **Current State**: What is done and what might be pending?
14
- 5. **Critical Context**: Any gotchas, errors encountered, or important details for continuing.
11
+ Generate a comprehensive summary that captures:
12
+
13
+ 1. **Current State**: What was the most recent task? What is the current state of the work RIGHT NOW?
14
+ 2. **Key Changes Made**: What files were created, modified, or deleted? Summarize recent code changes.
15
+ 3. **Main Goals**: What is the user trying to accomplish overall?
16
+ 4. **Important Decisions**: What approaches or solutions were chosen and why?
17
+ 5. **Pending Work**: What remains to be done? Any known issues or blockers?
18
+ 6. **Critical Context**: Any gotchas, errors encountered, or important details for continuing.
19
+
20
+ IMPORTANT: Prioritize the RECENT conversation. The summary must allow seamless continuation
21
+ of work. Focus on what was just done and what comes next — not the early parts of the conversation.
15
22
 
16
23
  Format your response as a clear, structured summary. Start with "📦 **Context Compacted**" header.
17
24
  Keep under 2000 characters but be thorough. This summary will replace detailed tool history.
@@ -1,6 +1,6 @@
1
1
  import type { getDb } from '@agi-cli/database';
2
2
  import { messages, messageParts } from '@agi-cli/database/schema';
3
- import { eq, desc, and, lt } from 'drizzle-orm';
3
+ import { eq, asc, and, lt } from 'drizzle-orm';
4
4
  import { debugLog } from '../debug/index.ts';
5
5
  import { estimateTokens, PRUNE_PROTECT } from './compaction-limits.ts';
6
6
 
@@ -35,33 +35,22 @@ export async function markSessionCompacted(
35
35
  lt(messages.createdAt, cutoffTime),
36
36
  ),
37
37
  )
38
- .orderBy(desc(messages.createdAt));
38
+ .orderBy(asc(messages.createdAt));
39
39
 
40
- let totalTokens = 0;
41
- let compactedTokens = 0;
42
- const toCompact: Array<{ id: string; content: string }> = [];
43
- let turns = 0;
40
+ type PartInfo = { id: string; tokens: number };
41
+ const allToolParts: PartInfo[] = [];
42
+ let totalToolTokens = 0;
44
43
 
45
44
  for (const msg of oldMessages) {
46
- if (msg.role === 'user') {
47
- turns++;
48
- }
49
-
50
- if (turns < 2) continue;
51
-
52
45
  const parts = await db
53
46
  .select()
54
47
  .from(messageParts)
55
48
  .where(eq(messageParts.messageId, msg.id))
56
- .orderBy(desc(messageParts.index));
49
+ .orderBy(asc(messageParts.index));
57
50
 
58
51
  for (const part of parts) {
59
52
  if (part.type !== 'tool_call' && part.type !== 'tool_result') continue;
60
-
61
- if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) {
62
- continue;
63
- }
64
-
53
+ if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) continue;
65
54
  if (part.compactedAt) continue;
66
55
 
67
56
  let content: { result?: unknown; args?: unknown };
@@ -78,18 +67,25 @@ export async function markSessionCompacted(
78
67
  : JSON.stringify(content.result ?? '')
79
68
  : JSON.stringify(content.args ?? '');
80
69
 
81
- const estimate = estimateTokens(contentStr);
82
- totalTokens += estimate;
83
-
84
- if (totalTokens > PRUNE_PROTECT) {
85
- compactedTokens += estimate;
86
- toCompact.push({ id: part.id, content: part.content ?? '{}' });
87
- }
70
+ const tokens = estimateTokens(contentStr);
71
+ totalToolTokens += tokens;
72
+ allToolParts.push({ id: part.id, tokens });
88
73
  }
89
74
  }
90
75
 
76
+ const tokensToFree = Math.max(0, totalToolTokens - PRUNE_PROTECT);
77
+
78
+ const toCompact: PartInfo[] = [];
79
+ let freedTokens = 0;
80
+
81
+ for (const part of allToolParts) {
82
+ if (freedTokens >= tokensToFree) break;
83
+ freedTokens += part.tokens;
84
+ toCompact.push(part);
85
+ }
86
+
91
87
  debugLog(
92
- `[compaction] Found ${toCompact.length} parts to compact, saving ~${compactedTokens} tokens`,
88
+ `[compaction] Found ${toCompact.length} parts to compact (oldest first), saving ~${freedTokens} tokens`,
93
89
  );
94
90
 
95
91
  if (toCompact.length > 0) {
@@ -111,5 +107,5 @@ export async function markSessionCompacted(
111
107
  debugLog(`[compaction] Marked ${toCompact.length} parts as compacted`);
112
108
  }
113
109
 
114
- return { compacted: toCompact.length, saved: compactedTokens };
110
+ return { compacted: toCompact.length, saved: freedTokens };
115
111
  }
@@ -337,7 +337,7 @@ export async function cleanupEmptyTextParts(
337
337
  try {
338
338
  t = JSON.parse(p.content || '{}')?.text || '';
339
339
  } catch {}
340
- if (!t || t.length === 0) {
340
+ if (!t || !t.trim()) {
341
341
  await db.delete(messageParts).where(eq(messageParts.id, p.id));
342
342
  }
343
343
  }
@@ -16,6 +16,7 @@ export type RunOpts = {
16
16
  isCompactCommand?: boolean;
17
17
  compactionContext?: string;
18
18
  toolApprovalMode?: ToolApprovalMode;
19
+ compactionRetries?: number;
19
20
  };
20
21
 
21
22
  export type QueuedMessage = {
@@ -187,66 +187,103 @@ export function createErrorHandler(
187
187
  debugLog(
188
188
  '[stream-handlers] Prompt too long detected, auto-compacting...',
189
189
  );
190
- let compactionSucceeded = false;
191
- try {
192
- const publishWrapper = (event: {
193
- type: string;
194
- sessionId: string;
195
- payload: Record<string, unknown>;
196
- }) => {
197
- publish(event as Parameters<typeof publish>[0]);
198
- };
199
- const compactResult = await performAutoCompaction(
200
- db,
201
- opts.sessionId,
202
- opts.assistantMessageId,
203
- publishWrapper,
204
- opts.provider,
205
- opts.model,
190
+
191
+ const retries = opts.compactionRetries ?? 0;
192
+ if (retries >= 2) {
193
+ debugLog(
194
+ '[stream-handlers] Compaction retry limit reached, surfacing error',
206
195
  );
207
- if (compactResult.success) {
208
- debugLog(
209
- `[stream-handlers] Auto-compaction succeeded: ${compactResult.summary?.slice(0, 100)}...`,
210
- );
211
- compactionSucceeded = true;
212
- } else {
213
- debugLog(
214
- `[stream-handlers] Auto-compaction failed: ${compactResult.error}, falling back to prune`,
196
+ } else {
197
+ await db
198
+ .update(messages)
199
+ .set({ status: 'completed', completedAt: Date.now() })
200
+ .where(eq(messages.id, opts.assistantMessageId));
201
+
202
+ publish({
203
+ type: 'message.completed',
204
+ sessionId: opts.sessionId,
205
+ payload: {
206
+ id: opts.assistantMessageId,
207
+ autoCompacted: true,
208
+ },
209
+ });
210
+
211
+ const compactMessageId = crypto.randomUUID();
212
+ const compactMessageTime = Date.now();
213
+ await db.insert(messages).values({
214
+ id: compactMessageId,
215
+ sessionId: opts.sessionId,
216
+ role: 'assistant',
217
+ status: 'pending',
218
+ agent: opts.agent,
219
+ provider: opts.provider,
220
+ model: opts.model,
221
+ createdAt: compactMessageTime,
222
+ });
223
+
224
+ publish({
225
+ type: 'message.created',
226
+ sessionId: opts.sessionId,
227
+ payload: { id: compactMessageId, role: 'assistant' },
228
+ });
229
+
230
+ let compactionSucceeded = false;
231
+ try {
232
+ const publishWrapper = (event: {
233
+ type: string;
234
+ sessionId: string;
235
+ payload: Record<string, unknown>;
236
+ }) => {
237
+ publish(event as Parameters<typeof publish>[0]);
238
+ };
239
+ const compactResult = await performAutoCompaction(
240
+ db,
241
+ opts.sessionId,
242
+ compactMessageId,
243
+ publishWrapper,
244
+ opts.provider,
245
+ opts.model,
215
246
  );
216
- const pruneResult = await pruneSession(db, opts.sessionId);
247
+ if (compactResult.success) {
248
+ debugLog(
249
+ `[stream-handlers] Auto-compaction succeeded: ${compactResult.summary?.slice(0, 100)}...`,
250
+ );
251
+ compactionSucceeded = true;
252
+ } else {
253
+ debugLog(
254
+ `[stream-handlers] Auto-compaction failed: ${compactResult.error}, falling back to prune`,
255
+ );
256
+ const pruneResult = await pruneSession(db, opts.sessionId);
257
+ debugLog(
258
+ `[stream-handlers] Fallback pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
259
+ );
260
+ compactionSucceeded = pruneResult.pruned > 0;
261
+ }
262
+ } catch (compactErr) {
217
263
  debugLog(
218
- `[stream-handlers] Fallback pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
264
+ `[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
219
265
  );
220
- compactionSucceeded = pruneResult.pruned > 0;
221
266
  }
222
- } catch (compactErr) {
223
- debugLog(
224
- `[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
225
- );
226
- }
227
267
 
228
- if (compactionSucceeded) {
229
268
  await db
230
269
  .update(messages)
231
270
  .set({
232
- status: 'completed',
271
+ status: compactionSucceeded ? 'completed' : 'error',
272
+ completedAt: Date.now(),
233
273
  })
234
- .where(eq(messages.id, opts.assistantMessageId));
274
+ .where(eq(messages.id, compactMessageId));
235
275
 
236
276
  publish({
237
277
  type: 'message.completed',
238
278
  sessionId: opts.sessionId,
239
- payload: {
240
- id: opts.assistantMessageId,
241
- autoCompacted: true,
242
- },
279
+ payload: { id: compactMessageId, autoCompacted: true },
243
280
  });
244
281
 
245
- if (retryCallback) {
282
+ if (compactionSucceeded && retryCallback) {
246
283
  debugLog('[stream-handlers] Triggering retry after compaction...');
247
- const newAssistantMessageId = crypto.randomUUID();
284
+ const retryMessageId = crypto.randomUUID();
248
285
  await db.insert(messages).values({
249
- id: newAssistantMessageId,
286
+ id: retryMessageId,
250
287
  sessionId: opts.sessionId,
251
288
  role: 'assistant',
252
289
  status: 'pending',
@@ -259,23 +296,26 @@ export function createErrorHandler(
259
296
  publish({
260
297
  type: 'message.created',
261
298
  sessionId: opts.sessionId,
262
- payload: { id: newAssistantMessageId, role: 'assistant' },
299
+ payload: { id: retryMessageId, role: 'assistant' },
263
300
  });
264
301
 
265
302
  enqueueAssistantRun(
266
303
  {
267
304
  ...opts,
268
- assistantMessageId: newAssistantMessageId,
305
+ assistantMessageId: retryMessageId,
306
+ compactionRetries: retries + 1,
269
307
  },
270
308
  retryCallback,
271
309
  );
272
- } else {
310
+ return;
311
+ }
312
+
313
+ if (compactionSucceeded) {
273
314
  debugLog(
274
315
  '[stream-handlers] No retryCallback provided, cannot auto-retry',
275
316
  );
317
+ return;
276
318
  }
277
-
278
- return;
279
319
  }
280
320
  }
281
321