@agi-cli/server 0.1.120 → 0.1.121

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/package.json +3 -3
  2. package/src/index.ts +5 -5
  3. package/src/openapi/paths/git.ts +4 -0
  4. package/src/routes/ask.ts +13 -14
  5. package/src/routes/branch.ts +2 -2
  6. package/src/routes/config/agents.ts +1 -1
  7. package/src/routes/config/cwd.ts +1 -1
  8. package/src/routes/config/main.ts +1 -1
  9. package/src/routes/config/models.ts +32 -4
  10. package/src/routes/config/providers.ts +1 -1
  11. package/src/routes/config/utils.ts +14 -1
  12. package/src/routes/files.ts +1 -1
  13. package/src/routes/git/commit.ts +23 -6
  14. package/src/routes/git/schemas.ts +1 -0
  15. package/src/routes/session-files.ts +1 -1
  16. package/src/routes/session-messages.ts +2 -2
  17. package/src/routes/sessions.ts +8 -6
  18. package/src/runtime/agent/registry.ts +333 -0
  19. package/src/runtime/agent/runner-reasoning.ts +108 -0
  20. package/src/runtime/agent/runner-setup.ts +265 -0
  21. package/src/runtime/agent/runner.ts +356 -0
  22. package/src/runtime/agent-registry.ts +6 -333
  23. package/src/runtime/{ask-service.ts → ask/service.ts} +5 -5
  24. package/src/runtime/{debug.ts → debug/index.ts} +1 -1
  25. package/src/runtime/{api-error.ts → errors/api-error.ts} +2 -2
  26. package/src/runtime/message/compaction-auto.ts +137 -0
  27. package/src/runtime/message/compaction-context.ts +64 -0
  28. package/src/runtime/message/compaction-detect.ts +19 -0
  29. package/src/runtime/message/compaction-limits.ts +58 -0
  30. package/src/runtime/message/compaction-mark.ts +115 -0
  31. package/src/runtime/message/compaction-prune.ts +75 -0
  32. package/src/runtime/message/compaction.ts +23 -0
  33. package/src/runtime/{history-builder.ts → message/history-builder.ts} +2 -2
  34. package/src/runtime/{message-service.ts → message/service.ts} +8 -14
  35. package/src/runtime/{history → message}/tool-history-tracker.ts +1 -1
  36. package/src/runtime/{prompt.ts → prompt/builder.ts} +1 -1
  37. package/src/runtime/{provider.ts → provider/anthropic.ts} +4 -219
  38. package/src/runtime/provider/google.ts +12 -0
  39. package/src/runtime/provider/index.ts +44 -0
  40. package/src/runtime/provider/openai.ts +26 -0
  41. package/src/runtime/provider/opencode.ts +61 -0
  42. package/src/runtime/provider/openrouter.ts +11 -0
  43. package/src/runtime/provider/solforge.ts +22 -0
  44. package/src/runtime/provider/zai.ts +53 -0
  45. package/src/runtime/{branch.ts → session/branch.ts} +1 -1
  46. package/src/runtime/{db-operations.ts → session/db-operations.ts} +1 -1
  47. package/src/runtime/{session-manager.ts → session/manager.ts} +1 -1
  48. package/src/runtime/{session-queue.ts → session/queue.ts} +2 -2
  49. package/src/runtime/stream/abort-handler.ts +65 -0
  50. package/src/runtime/stream/error-handler.ts +200 -0
  51. package/src/runtime/stream/finish-handler.ts +123 -0
  52. package/src/runtime/stream/handlers.ts +5 -0
  53. package/src/runtime/stream/step-finish.ts +93 -0
  54. package/src/runtime/stream/types.ts +17 -0
  55. package/src/runtime/{tool-context.ts → tools/context.ts} +1 -1
  56. package/src/runtime/{tool-context-setup.ts → tools/setup.ts} +3 -3
  57. package/src/runtime/{token-utils.ts → utils/token.ts} +2 -2
  58. package/src/tools/adapter.ts +4 -4
  59. package/src/runtime/compaction.ts +0 -536
  60. package/src/runtime/runner.ts +0 -654
  61. package/src/runtime/stream-handlers.ts +0 -508
  62. /package/src/runtime/{cache-optimizer.ts → context/cache-optimizer.ts} +0 -0
  63. /package/src/runtime/{environment.ts → context/environment.ts} +0 -0
  64. /package/src/runtime/{context-optimizer.ts → context/optimizer.ts} +0 -0
  65. /package/src/runtime/{debug-state.ts → debug/state.ts} +0 -0
  66. /package/src/runtime/{error-handling.ts → errors/handling.ts} +0 -0
  67. /package/src/runtime/{history-truncator.ts → message/history-truncator.ts} +0 -0
  68. /package/src/runtime/{provider-selection.ts → provider/selection.ts} +0 -0
  69. /package/src/runtime/{tool-mapping.ts → tools/mapping.ts} +0 -0
  70. /package/src/runtime/{cwd.ts → utils/cwd.ts} +0 -0
@@ -1,654 +0,0 @@
1
- import { hasToolCall, streamText } from 'ai';
2
- import { loadConfig } from '@agi-cli/sdk';
3
- import { getDb } from '@agi-cli/database';
4
- import { messageParts, sessions } from '@agi-cli/database/schema';
5
- import { eq } from 'drizzle-orm';
6
- import { resolveModel } from './provider.ts';
7
- import { resolveAgentConfig } from './agent-registry.ts';
8
- import { composeSystemPrompt } from './prompt.ts';
9
- import { discoverProjectTools } from '@agi-cli/sdk';
10
- import { adaptTools } from '../tools/adapter.ts';
11
- import { publish, subscribe } from '../events/bus.ts';
12
- import { debugLog, time } from './debug.ts';
13
- import { buildHistoryMessages } from './history-builder.ts';
14
- import { toErrorPayload } from './error-handling.ts';
15
- import { getMaxOutputTokens } from './token-utils.ts';
16
- import {
17
- type RunOpts,
18
- setRunning,
19
- dequeueJob,
20
- cleanupSession,
21
- } from './session-queue.ts';
22
- import { setupToolContext } from './tool-context-setup.ts';
23
- import {
24
- updateSessionTokensIncremental,
25
- updateMessageTokensIncremental,
26
- completeAssistantMessage,
27
- cleanupEmptyTextParts,
28
- } from './db-operations.ts';
29
- import {
30
- createStepFinishHandler,
31
- createErrorHandler,
32
- createAbortHandler,
33
- createFinishHandler,
34
- } from './stream-handlers.ts';
35
- import { getCompactionSystemPrompt, pruneSession } from './compaction.ts';
36
-
37
- export {
38
- enqueueAssistantRun,
39
- abortSession,
40
- abortMessage,
41
- removeFromQueue,
42
- getQueueState,
43
- getRunnerState,
44
- } from './session-queue.ts';
45
-
46
- /**
47
- * Main loop that processes the queue for a given session.
48
- */
49
- export async function runSessionLoop(sessionId: string) {
50
- setRunning(sessionId, true);
51
-
52
- while (true) {
53
- const job = await dequeueJob(sessionId);
54
- if (!job) break;
55
-
56
- try {
57
- await runAssistant(job);
58
- } catch (_err) {
59
- // Swallow to keep the loop alive; event published by runner
60
- }
61
- }
62
-
63
- setRunning(sessionId, false);
64
- cleanupSession(sessionId);
65
- }
66
-
67
- /**
68
- * Main function to run the assistant for a given request.
69
- */
70
- async function runAssistant(opts: RunOpts) {
71
- const separator = '='.repeat(72);
72
- debugLog(separator);
73
- debugLog(
74
- `[RUNNER] Starting turn for session ${opts.sessionId}, message ${opts.assistantMessageId}`,
75
- );
76
-
77
- const cfgTimer = time('runner:loadConfig+db');
78
- const cfg = await loadConfig(opts.projectRoot);
79
- const db = await getDb(cfg.projectRoot);
80
- cfgTimer.end();
81
-
82
- const agentTimer = time('runner:resolveAgentConfig');
83
- const agentCfg = await resolveAgentConfig(cfg.projectRoot, opts.agent);
84
- agentTimer.end({ agent: opts.agent });
85
-
86
- const agentPrompt = agentCfg.prompt || '';
87
-
88
- // For /compact command, use minimal history - the compaction context has everything needed
89
- const historyTimer = time('runner:buildHistory');
90
- let history: Awaited<ReturnType<typeof buildHistoryMessages>>;
91
- if (opts.isCompactCommand && opts.compactionContext) {
92
- debugLog('[RUNNER] Using minimal history for /compact command');
93
- history = [];
94
- } else {
95
- history = await buildHistoryMessages(db, opts.sessionId);
96
- }
97
- historyTimer.end({ messages: history.length });
98
-
99
- // Fetch session to get context summary for compaction
100
- const sessionRows = await db
101
- .select()
102
- .from(sessions)
103
- .where(eq(sessions.id, opts.sessionId))
104
- .limit(1);
105
- const contextSummary = sessionRows[0]?.contextSummary ?? undefined;
106
- if (contextSummary) {
107
- debugLog(
108
- `[RUNNER] Using context summary from compaction (${contextSummary.length} chars)`,
109
- );
110
- }
111
-
112
- // FIX: For OAuth, we need to check if this is the first ASSISTANT message
113
- // The user message is already in history by this point, so history.length will be > 0
114
- // We need to add additionalSystemMessages on the first assistant turn
115
- const isFirstMessage = !history.some((m) => m.role === 'assistant');
116
-
117
- debugLog(`[RUNNER] isFirstMessage: ${isFirstMessage}`);
118
- debugLog(`[RUNNER] userContext provided: ${opts.userContext ? 'YES' : 'NO'}`);
119
- if (opts.userContext) {
120
- debugLog(
121
- `[RUNNER] userContext value: ${opts.userContext.substring(0, 100)}${opts.userContext.length > 100 ? '...' : ''}`,
122
- );
123
- }
124
-
125
- const systemTimer = time('runner:composeSystemPrompt');
126
- const { getAuth } = await import('@agi-cli/sdk');
127
- const { getProviderSpoofPrompt } = await import('./prompt.ts');
128
- const auth = await getAuth(opts.provider, cfg.projectRoot);
129
- const needsSpoof = auth?.type === 'oauth';
130
- const spoofPrompt = needsSpoof
131
- ? getProviderSpoofPrompt(opts.provider)
132
- : undefined;
133
-
134
- debugLog(`[RUNNER] needsSpoof (OAuth): ${needsSpoof}`);
135
- debugLog(
136
- `[RUNNER] spoofPrompt: ${spoofPrompt ? `present (${opts.provider})` : 'none'}`,
137
- );
138
-
139
- let system: string;
140
- let systemComponents: string[] = [];
141
- let oauthFullPromptComponents: string[] | undefined;
142
- let additionalSystemMessages: Array<{ role: 'system'; content: string }> = [];
143
-
144
- if (spoofPrompt) {
145
- // OAuth mode: short spoof in system field, full instructions in messages array
146
- system = spoofPrompt;
147
- systemComponents = [`spoof:${opts.provider || 'unknown'}`];
148
- const fullPrompt = await composeSystemPrompt({
149
- provider: opts.provider,
150
- model: opts.model,
151
- projectRoot: cfg.projectRoot,
152
- agentPrompt,
153
- oneShot: opts.oneShot,
154
- spoofPrompt: undefined,
155
- includeProjectTree: isFirstMessage,
156
- userContext: opts.userContext,
157
- contextSummary,
158
- });
159
- oauthFullPromptComponents = fullPrompt.components;
160
-
161
- // FIX: Always add the system message for OAuth because:
162
- // 1. System messages are NOT stored in the database
163
- // 2. buildHistoryMessages only returns user/assistant messages
164
- // 3. We need the full instructions on every turn
165
- additionalSystemMessages = [{ role: 'system', content: fullPrompt.prompt }];
166
-
167
- debugLog('[RUNNER] OAuth mode: additionalSystemMessages created');
168
- const includesUserContext =
169
- !!opts.userContext && fullPrompt.prompt.includes(opts.userContext);
170
- debugLog(
171
- `[system] oauth-full summary: ${JSON.stringify({
172
- components: oauthFullPromptComponents ?? [],
173
- length: fullPrompt.prompt.length,
174
- includesUserContext,
175
- })}`,
176
- );
177
- } else {
178
- // API key mode: full instructions in system field
179
- const composed = await composeSystemPrompt({
180
- provider: opts.provider,
181
- model: opts.model,
182
- projectRoot: cfg.projectRoot,
183
- agentPrompt,
184
- oneShot: opts.oneShot,
185
- spoofPrompt: undefined,
186
- includeProjectTree: isFirstMessage,
187
- userContext: opts.userContext,
188
- contextSummary,
189
- });
190
- system = composed.prompt;
191
- systemComponents = composed.components;
192
- }
193
- systemTimer.end();
194
- debugLog(
195
- `[system] summary: ${JSON.stringify({
196
- components: systemComponents,
197
- length: system.length,
198
- })}`,
199
- );
200
-
201
- // Inject compaction prompt if this is a /compact command
202
- if (opts.isCompactCommand && opts.compactionContext) {
203
- debugLog('[RUNNER] Injecting compaction context for /compact command');
204
- const compactPrompt = getCompactionSystemPrompt();
205
- // Add compaction instructions as system message
206
- // Don't modify `system` directly as it may contain OAuth spoof prompt
207
- additionalSystemMessages.push({
208
- role: 'system',
209
- content: compactPrompt,
210
- });
211
- // Add the conversation context as a USER message (Anthropic requires at least one user message)
212
- additionalSystemMessages.push({
213
- role: 'user',
214
- content: `Please summarize this conversation:\n\n<conversation-to-summarize>\n${opts.compactionContext}\n</conversation-to-summarize>`,
215
- });
216
- }
217
-
218
- const toolsTimer = time('runner:discoverTools');
219
- const allTools = await discoverProjectTools(cfg.projectRoot);
220
- toolsTimer.end({ count: allTools.length });
221
- const allowedNames = new Set([...(agentCfg.tools || []), 'finish']);
222
- const gated = allTools.filter((tool) => allowedNames.has(tool.name));
223
- debugLog(`[tools] ${gated.length} allowed tools`);
224
-
225
- // FIX: For OAuth, ALWAYS prepend the system message because it's never in history
226
- // For API key mode, only add on first message (when additionalSystemMessages is empty)
227
- const messagesWithSystemInstructions: Array<{
228
- role: string;
229
- content: string | Array<unknown>;
230
- }> = [
231
- ...additionalSystemMessages, // Always add for OAuth, empty for API key mode
232
- ...history,
233
- ];
234
-
235
- // Inject a reminder for subsequent turns to prevent "abrupt stops"
236
- // This reinforces the instruction to call finish and maintain context
237
- if (!isFirstMessage) {
238
- messagesWithSystemInstructions.push({
239
- role: 'user',
240
- content:
241
- 'SYSTEM REMINDER: You are continuing an existing session. When you have completed the task, you MUST stream a text summary of what you did to the user, and THEN call the `finish` tool. Do not call `finish` without a summary.',
242
- });
243
- }
244
-
245
- debugLog(`[RUNNER] About to create model with provider: ${opts.provider}`);
246
- debugLog(`[RUNNER] About to create model ID: ${opts.model}`);
247
- debugLog(
248
- `[RUNNER] messagesWithSystemInstructions length: ${messagesWithSystemInstructions.length}`,
249
- );
250
- debugLog(
251
- `[RUNNER] additionalSystemMessages length: ${additionalSystemMessages.length}`,
252
- );
253
- if (additionalSystemMessages.length > 0) {
254
- debugLog(
255
- '[RUNNER] ✅ additionalSystemMessages ADDED to messagesWithSystemInstructions',
256
- );
257
- debugLog(
258
- `[RUNNER] This happens on EVERY turn for OAuth (system messages not stored in DB)`,
259
- );
260
- }
261
-
262
- // For OpenAI OAuth, pass the full system prompt as instructions
263
- const oauthSystemPrompt =
264
- needsSpoof && opts.provider === 'openai' && additionalSystemMessages[0]
265
- ? additionalSystemMessages[0].content
266
- : undefined;
267
- const model = await resolveModel(opts.provider, opts.model, cfg, {
268
- systemPrompt: oauthSystemPrompt,
269
- });
270
- debugLog(
271
- `[RUNNER] Model created: ${JSON.stringify({ id: model.modelId, provider: model.provider })}`,
272
- );
273
-
274
- const maxOutputTokens = getMaxOutputTokens(opts.provider, opts.model);
275
- debugLog(`[RUNNER] maxOutputTokens for ${opts.model}: ${maxOutputTokens}`);
276
-
277
- // Setup tool context
278
- const { sharedCtx, firstToolTimer, firstToolSeen } = await setupToolContext(
279
- opts,
280
- db,
281
- );
282
-
283
- // Get auth type for Claude Code OAuth detection
284
- const providerAuth = await getAuth(opts.provider, opts.projectRoot);
285
- const authType = providerAuth?.type;
286
- const toolset = adaptTools(gated, sharedCtx, opts.provider, authType);
287
-
288
- let _finishObserved = false;
289
- const unsubscribeFinish = subscribe(opts.sessionId, (evt) => {
290
- if (evt.type !== 'tool.result') return;
291
- try {
292
- const name = (evt.payload as { name?: string } | undefined)?.name;
293
- if (name === 'finish') _finishObserved = true;
294
- } catch {}
295
- });
296
-
297
- const streamStartTimer = time('runner:first-delta');
298
- let firstDeltaSeen = false;
299
- debugLog(`[streamText] Calling with maxOutputTokens: ${maxOutputTokens}`);
300
-
301
- // State management helpers
302
- const getCurrentPartId = () => currentPartId;
303
- const getStepIndex = () => stepIndex;
304
- const updateCurrentPartId = (id: string | null) => {
305
- currentPartId = id;
306
- };
307
- const updateAccumulated = (text: string) => {
308
- accumulated = text;
309
- };
310
- const incrementStepIndex = () => {
311
- stepIndex += 1;
312
- return stepIndex;
313
- };
314
-
315
- type ReasoningState = {
316
- partId: string;
317
- text: string;
318
- providerMetadata?: unknown;
319
- };
320
- const reasoningStates = new Map<string, ReasoningState>();
321
- const serializeReasoningContent = (state: ReasoningState) =>
322
- JSON.stringify(
323
- state.providerMetadata != null
324
- ? { text: state.text, providerMetadata: state.providerMetadata }
325
- : { text: state.text },
326
- );
327
-
328
- // Create stream handlers
329
- const onStepFinish = createStepFinishHandler(
330
- opts,
331
- db,
332
- getStepIndex,
333
- incrementStepIndex,
334
- getCurrentPartId,
335
- updateCurrentPartId,
336
- updateAccumulated,
337
- sharedCtx,
338
- updateSessionTokensIncremental,
339
- updateMessageTokensIncremental,
340
- );
341
-
342
- const onError = createErrorHandler(
343
- opts,
344
- db,
345
- getStepIndex,
346
- sharedCtx,
347
- runSessionLoop,
348
- );
349
-
350
- const onAbort = createAbortHandler(opts, db, getStepIndex, sharedCtx);
351
-
352
- const onFinish = createFinishHandler(opts, db, completeAssistantMessage);
353
-
354
- // Use messages directly without truncation or optimization
355
- const optimizedMessages = messagesWithSystemInstructions;
356
- const cachedSystem = system;
357
-
358
- // Part tracking - will be created on first text-delta
359
- let currentPartId: string | null = null;
360
- let accumulated = '';
361
- let stepIndex = 0;
362
-
363
- // Build provider options for reasoning/extended thinking
364
- const providerOptions: Record<string, unknown> = {};
365
- const THINKING_BUDGET = 16000;
366
- // When reasoning is enabled for Anthropic, the API requires max_tokens to fit
367
- // both thinking tokens AND response tokens. AI SDK adds budgetTokens to maxOutputTokens,
368
- // so we need to reduce maxOutputTokens to leave room for thinking.
369
- let effectiveMaxOutputTokens = maxOutputTokens;
370
-
371
- if (opts.reasoning) {
372
- if (opts.provider === 'anthropic') {
373
- providerOptions.anthropic = {
374
- thinking: { type: 'enabled', budgetTokens: THINKING_BUDGET },
375
- };
376
- // Reduce max output to leave room for thinking budget
377
- if (maxOutputTokens && maxOutputTokens > THINKING_BUDGET) {
378
- effectiveMaxOutputTokens = maxOutputTokens - THINKING_BUDGET;
379
- }
380
- } else if (opts.provider === 'openai') {
381
- providerOptions.openai = {
382
- reasoningSummary: 'auto',
383
- };
384
- } else if (opts.provider === 'google') {
385
- providerOptions.google = {
386
- thinkingConfig: { thinkingBudget: THINKING_BUDGET },
387
- };
388
- }
389
- }
390
-
391
- try {
392
- const result = streamText({
393
- model,
394
- tools: toolset,
395
- ...(cachedSystem ? { system: cachedSystem } : {}),
396
- messages: optimizedMessages,
397
- ...(effectiveMaxOutputTokens
398
- ? { maxOutputTokens: effectiveMaxOutputTokens }
399
- : {}),
400
- ...(Object.keys(providerOptions).length > 0 ? { providerOptions } : {}),
401
- abortSignal: opts.abortSignal,
402
- stopWhen: hasToolCall('finish'),
403
- onStepFinish,
404
- onError,
405
- onAbort,
406
- onFinish,
407
- });
408
-
409
- for await (const part of result.fullStream) {
410
- if (!part) continue;
411
- if (part.type === 'text-delta') {
412
- const delta = part.text;
413
- if (!delta) continue;
414
- if (!firstDeltaSeen) {
415
- firstDeltaSeen = true;
416
- streamStartTimer.end();
417
- }
418
-
419
- // Create text part on first delta
420
- if (!currentPartId) {
421
- currentPartId = crypto.randomUUID();
422
- sharedCtx.assistantPartId = currentPartId;
423
- await db.insert(messageParts).values({
424
- id: currentPartId,
425
- messageId: opts.assistantMessageId,
426
- index: sharedCtx.nextIndex(),
427
- stepIndex: null,
428
- type: 'text',
429
- content: JSON.stringify({ text: '' }),
430
- agent: opts.agent,
431
- provider: opts.provider,
432
- model: opts.model,
433
- startedAt: Date.now(),
434
- });
435
- }
436
-
437
- accumulated += delta;
438
- publish({
439
- type: 'message.part.delta',
440
- sessionId: opts.sessionId,
441
- payload: {
442
- messageId: opts.assistantMessageId,
443
- partId: currentPartId,
444
- stepIndex,
445
- delta,
446
- },
447
- });
448
- await db
449
- .update(messageParts)
450
- .set({ content: JSON.stringify({ text: accumulated }) })
451
- .where(eq(messageParts.id, currentPartId));
452
- continue;
453
- }
454
-
455
- if (part.type === 'reasoning-start') {
456
- const reasoningId = part.id;
457
- if (!reasoningId) continue;
458
- const reasoningPartId = crypto.randomUUID();
459
- const state: ReasoningState = {
460
- partId: reasoningPartId,
461
- text: '',
462
- providerMetadata: part.providerMetadata,
463
- };
464
- reasoningStates.set(reasoningId, state);
465
- try {
466
- await db.insert(messageParts).values({
467
- id: reasoningPartId,
468
- messageId: opts.assistantMessageId,
469
- index: sharedCtx.nextIndex(),
470
- stepIndex: getStepIndex(),
471
- type: 'reasoning',
472
- content: serializeReasoningContent(state),
473
- agent: opts.agent,
474
- provider: opts.provider,
475
- model: opts.model,
476
- startedAt: Date.now(),
477
- });
478
- } catch {}
479
- continue;
480
- }
481
-
482
- if (part.type === 'reasoning-delta') {
483
- const state = reasoningStates.get(part.id);
484
- if (!state) continue;
485
- state.text += part.text;
486
- if (part.providerMetadata != null) {
487
- state.providerMetadata = part.providerMetadata;
488
- }
489
- publish({
490
- type: 'reasoning.delta',
491
- sessionId: opts.sessionId,
492
- payload: {
493
- messageId: opts.assistantMessageId,
494
- partId: state.partId,
495
- stepIndex: getStepIndex(),
496
- delta: part.text,
497
- },
498
- });
499
- try {
500
- await db
501
- .update(messageParts)
502
- .set({ content: serializeReasoningContent(state) })
503
- .where(eq(messageParts.id, state.partId));
504
- } catch {}
505
- continue;
506
- }
507
-
508
- if (part.type === 'reasoning-end') {
509
- const state = reasoningStates.get(part.id);
510
- if (!state) continue;
511
- // Delete the reasoning part if it's empty
512
- if (!state.text || state.text.trim() === '') {
513
- try {
514
- await db
515
- .delete(messageParts)
516
- .where(eq(messageParts.id, state.partId));
517
- } catch {}
518
- reasoningStates.delete(part.id);
519
- continue;
520
- }
521
- try {
522
- await db
523
- .update(messageParts)
524
- .set({ completedAt: Date.now() })
525
- .where(eq(messageParts.id, state.partId));
526
- } catch {}
527
- reasoningStates.delete(part.id);
528
- }
529
- }
530
-
531
- // Emit finish-step at the end if there were no tool calls and no finish
532
- const fs = firstToolSeen();
533
- if (!fs && !_finishObserved) {
534
- publish({
535
- type: 'finish-step',
536
- sessionId: opts.sessionId,
537
- payload: { reason: 'no-tool-calls' },
538
- });
539
- }
540
-
541
- unsubscribeFinish();
542
-
543
- await cleanupEmptyTextParts(opts, db);
544
-
545
- firstToolTimer.end({ seen: firstToolSeen() });
546
-
547
- debugLog(
548
- `[RUNNER] Stream finished. finishSeen=${_finishObserved}, firstToolSeen=${fs}`,
549
- );
550
- } catch (err) {
551
- unsubscribeFinish();
552
- const payload = toErrorPayload(err);
553
-
554
- // Check if this is a "prompt too long" error and auto-compact
555
- const errorMessage = err instanceof Error ? err.message : String(err);
556
- const errorCode = (err as { code?: string })?.code ?? '';
557
- const responseBody = (err as { responseBody?: string })?.responseBody ?? '';
558
- const apiErrorType = (err as { apiErrorType?: string })?.apiErrorType ?? '';
559
- const combinedError = `${errorMessage} ${responseBody}`.toLowerCase();
560
- debugLog(`[RUNNER] Error caught - message: ${errorMessage.slice(0, 100)}`);
561
- debugLog(
562
- `[RUNNER] Error caught - code: ${errorCode}, apiErrorType: ${apiErrorType}`,
563
- );
564
- debugLog(
565
- `[RUNNER] Error caught - responseBody: ${responseBody.slice(0, 200)}`,
566
- );
567
- const isPromptTooLong =
568
- combinedError.includes('prompt is too long') ||
569
- combinedError.includes('maximum context length') ||
570
- combinedError.includes('too many tokens') ||
571
- combinedError.includes('context_length_exceeded') ||
572
- combinedError.includes('request too large') ||
573
- combinedError.includes('exceeds the model') ||
574
- combinedError.includes('input is too long') ||
575
- errorCode === 'context_length_exceeded' ||
576
- apiErrorType === 'invalid_request_error';
577
- debugLog(
578
- `[RUNNER] isPromptTooLong: ${isPromptTooLong}, isCompactCommand: ${opts.isCompactCommand}`,
579
- );
580
-
581
- if (isPromptTooLong && !opts.isCompactCommand) {
582
- debugLog(
583
- '[RUNNER] Prompt too long - auto-compacting and will retry on next user message',
584
- );
585
- try {
586
- const pruneResult = await pruneSession(db, opts.sessionId);
587
- debugLog(
588
- `[RUNNER] Auto-pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
589
- );
590
-
591
- // Publish a system message to inform the user
592
- publish({
593
- type: 'error',
594
- sessionId: opts.sessionId,
595
- payload: {
596
- ...payload,
597
- message: `Context too large (${errorMessage.match(/\d+/)?.[0] || 'many'} tokens). Auto-compacted old tool results. Please retry your message.`,
598
- name: 'ContextOverflow',
599
- },
600
- });
601
-
602
- // Complete the message as failed
603
- try {
604
- await completeAssistantMessage({}, opts, db);
605
- } catch {}
606
- return;
607
- } catch (pruneErr) {
608
- debugLog(
609
- `[RUNNER] Auto-prune failed: ${pruneErr instanceof Error ? pruneErr.message : String(pruneErr)}`,
610
- );
611
- }
612
- }
613
-
614
- debugLog(`[RUNNER] Error during stream: ${payload.message}`);
615
- debugLog(
616
- `[RUNNER] Error stack: ${err instanceof Error ? err.stack : 'no stack'}`,
617
- );
618
- debugLog(
619
- `[RUNNER] db is: ${typeof db}, db.select is: ${typeof db?.select}`,
620
- );
621
- publish({
622
- type: 'error',
623
- sessionId: opts.sessionId,
624
- payload,
625
- });
626
- try {
627
- await updateSessionTokensIncremental(
628
- {
629
- inputTokens: 0,
630
- outputTokens: 0,
631
- },
632
- undefined,
633
- opts,
634
- db,
635
- );
636
- await updateMessageTokensIncremental(
637
- {
638
- inputTokens: 0,
639
- outputTokens: 0,
640
- },
641
- undefined,
642
- opts,
643
- db,
644
- );
645
- await completeAssistantMessage({}, opts, db);
646
- } catch {}
647
- throw err;
648
- } finally {
649
- debugLog(
650
- `[RUNNER] Turn complete for session ${opts.sessionId}, message ${opts.assistantMessageId}`,
651
- );
652
- debugLog(separator);
653
- }
654
- }