@agi-cli/server 0.1.119 → 0.1.121
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/index.ts +9 -5
- package/src/openapi/paths/git.ts +4 -0
- package/src/routes/ask.ts +13 -14
- package/src/routes/branch.ts +106 -0
- package/src/routes/config/agents.ts +1 -1
- package/src/routes/config/cwd.ts +1 -1
- package/src/routes/config/main.ts +1 -1
- package/src/routes/config/models.ts +32 -4
- package/src/routes/config/providers.ts +1 -1
- package/src/routes/config/utils.ts +14 -1
- package/src/routes/files.ts +1 -1
- package/src/routes/git/commit.ts +23 -6
- package/src/routes/git/schemas.ts +1 -0
- package/src/routes/session-files.ts +1 -1
- package/src/routes/session-messages.ts +2 -2
- package/src/routes/sessions.ts +8 -6
- package/src/runtime/agent/registry.ts +333 -0
- package/src/runtime/agent/runner-reasoning.ts +108 -0
- package/src/runtime/agent/runner-setup.ts +265 -0
- package/src/runtime/agent/runner.ts +356 -0
- package/src/runtime/agent-registry.ts +6 -333
- package/src/runtime/{ask-service.ts → ask/service.ts} +5 -5
- package/src/runtime/{debug.ts → debug/index.ts} +1 -1
- package/src/runtime/{api-error.ts → errors/api-error.ts} +2 -2
- package/src/runtime/message/compaction-auto.ts +137 -0
- package/src/runtime/message/compaction-context.ts +64 -0
- package/src/runtime/message/compaction-detect.ts +19 -0
- package/src/runtime/message/compaction-limits.ts +58 -0
- package/src/runtime/message/compaction-mark.ts +115 -0
- package/src/runtime/message/compaction-prune.ts +75 -0
- package/src/runtime/message/compaction.ts +23 -0
- package/src/runtime/{history-builder.ts → message/history-builder.ts} +2 -2
- package/src/runtime/{message-service.ts → message/service.ts} +8 -14
- package/src/runtime/{history → message}/tool-history-tracker.ts +1 -1
- package/src/runtime/{prompt.ts → prompt/builder.ts} +1 -1
- package/src/runtime/{provider.ts → provider/anthropic.ts} +4 -219
- package/src/runtime/provider/google.ts +12 -0
- package/src/runtime/provider/index.ts +44 -0
- package/src/runtime/provider/openai.ts +26 -0
- package/src/runtime/provider/opencode.ts +61 -0
- package/src/runtime/provider/openrouter.ts +11 -0
- package/src/runtime/provider/solforge.ts +22 -0
- package/src/runtime/provider/zai.ts +53 -0
- package/src/runtime/session/branch.ts +277 -0
- package/src/runtime/{db-operations.ts → session/db-operations.ts} +1 -1
- package/src/runtime/{session-manager.ts → session/manager.ts} +1 -1
- package/src/runtime/{session-queue.ts → session/queue.ts} +2 -2
- package/src/runtime/stream/abort-handler.ts +65 -0
- package/src/runtime/stream/error-handler.ts +200 -0
- package/src/runtime/stream/finish-handler.ts +123 -0
- package/src/runtime/stream/handlers.ts +5 -0
- package/src/runtime/stream/step-finish.ts +93 -0
- package/src/runtime/stream/types.ts +17 -0
- package/src/runtime/{tool-context.ts → tools/context.ts} +1 -1
- package/src/runtime/{tool-context-setup.ts → tools/setup.ts} +3 -3
- package/src/runtime/{token-utils.ts → utils/token.ts} +2 -2
- package/src/tools/adapter.ts +4 -4
- package/src/runtime/compaction.ts +0 -536
- package/src/runtime/runner.ts +0 -654
- package/src/runtime/stream-handlers.ts +0 -508
- /package/src/runtime/{cache-optimizer.ts → context/cache-optimizer.ts} +0 -0
- /package/src/runtime/{environment.ts → context/environment.ts} +0 -0
- /package/src/runtime/{context-optimizer.ts → context/optimizer.ts} +0 -0
- /package/src/runtime/{debug-state.ts → debug/state.ts} +0 -0
- /package/src/runtime/{error-handling.ts → errors/handling.ts} +0 -0
- /package/src/runtime/{history-truncator.ts → message/history-truncator.ts} +0 -0
- /package/src/runtime/{provider-selection.ts → provider/selection.ts} +0 -0
- /package/src/runtime/{tool-mapping.ts → tools/mapping.ts} +0 -0
- /package/src/runtime/{cwd.ts → utils/cwd.ts} +0 -0
|
@@ -1,508 +0,0 @@
|
|
|
1
|
-
import type { getDb } from '@agi-cli/database';
|
|
2
|
-
import { messages, messageParts } from '@agi-cli/database/schema';
|
|
3
|
-
import { eq } from 'drizzle-orm';
|
|
4
|
-
import { APICallError } from 'ai';
|
|
5
|
-
import { publish } from '../events/bus.ts';
|
|
6
|
-
import { estimateModelCostUsd } from '@agi-cli/sdk';
|
|
7
|
-
import { toErrorPayload } from './error-handling.ts';
|
|
8
|
-
import type { RunOpts } from './session-queue.ts';
|
|
9
|
-
import type { ToolAdapterContext } from '../tools/adapter.ts';
|
|
10
|
-
import type { ProviderMetadata, UsageData } from './db-operations.ts';
|
|
11
|
-
import {
|
|
12
|
-
pruneSession,
|
|
13
|
-
isOverflow,
|
|
14
|
-
getModelLimits,
|
|
15
|
-
type TokenUsage,
|
|
16
|
-
markSessionCompacted,
|
|
17
|
-
performAutoCompaction,
|
|
18
|
-
} from './compaction.ts';
|
|
19
|
-
import { debugLog } from './debug.ts';
|
|
20
|
-
import { enqueueAssistantRun } from './session-queue.ts';
|
|
21
|
-
|
|
22
|
-
type StepFinishEvent = {
|
|
23
|
-
usage?: UsageData;
|
|
24
|
-
finishReason?: string;
|
|
25
|
-
response?: unknown;
|
|
26
|
-
experimental_providerMetadata?: ProviderMetadata;
|
|
27
|
-
};
|
|
28
|
-
|
|
29
|
-
type FinishEvent = {
|
|
30
|
-
usage?: Pick<UsageData, 'inputTokens' | 'outputTokens' | 'totalTokens'>;
|
|
31
|
-
finishReason?: string;
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
type AbortEvent = {
|
|
35
|
-
steps: unknown[];
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* Creates the onStepFinish handler for the stream
|
|
40
|
-
*/
|
|
41
|
-
export function createStepFinishHandler(
|
|
42
|
-
opts: RunOpts,
|
|
43
|
-
db: Awaited<ReturnType<typeof getDb>>,
|
|
44
|
-
getStepIndex: () => number,
|
|
45
|
-
incrementStepIndex: () => number,
|
|
46
|
-
getCurrentPartId: () => string | null,
|
|
47
|
-
updateCurrentPartId: (id: string | null) => void,
|
|
48
|
-
updateAccumulated: (text: string) => void,
|
|
49
|
-
sharedCtx: ToolAdapterContext,
|
|
50
|
-
updateSessionTokensIncrementalFn: (
|
|
51
|
-
usage: UsageData,
|
|
52
|
-
providerMetadata: ProviderMetadata | undefined,
|
|
53
|
-
opts: RunOpts,
|
|
54
|
-
db: Awaited<ReturnType<typeof getDb>>,
|
|
55
|
-
) => Promise<void>,
|
|
56
|
-
updateMessageTokensIncrementalFn: (
|
|
57
|
-
usage: UsageData,
|
|
58
|
-
providerMetadata: ProviderMetadata | undefined,
|
|
59
|
-
opts: RunOpts,
|
|
60
|
-
db: Awaited<ReturnType<typeof getDb>>,
|
|
61
|
-
) => Promise<void>,
|
|
62
|
-
) {
|
|
63
|
-
return async (step: StepFinishEvent) => {
|
|
64
|
-
const finishedAt = Date.now();
|
|
65
|
-
const currentPartId = getCurrentPartId();
|
|
66
|
-
const stepIndex = getStepIndex();
|
|
67
|
-
|
|
68
|
-
try {
|
|
69
|
-
if (currentPartId) {
|
|
70
|
-
await db
|
|
71
|
-
.update(messageParts)
|
|
72
|
-
.set({ completedAt: finishedAt })
|
|
73
|
-
.where(eq(messageParts.id, currentPartId));
|
|
74
|
-
}
|
|
75
|
-
} catch {}
|
|
76
|
-
|
|
77
|
-
// Update token counts incrementally after each step
|
|
78
|
-
if (step.usage) {
|
|
79
|
-
try {
|
|
80
|
-
await updateSessionTokensIncrementalFn(
|
|
81
|
-
step.usage,
|
|
82
|
-
step.experimental_providerMetadata,
|
|
83
|
-
opts,
|
|
84
|
-
db,
|
|
85
|
-
);
|
|
86
|
-
} catch {}
|
|
87
|
-
|
|
88
|
-
try {
|
|
89
|
-
await updateMessageTokensIncrementalFn(
|
|
90
|
-
step.usage,
|
|
91
|
-
step.experimental_providerMetadata,
|
|
92
|
-
opts,
|
|
93
|
-
db,
|
|
94
|
-
);
|
|
95
|
-
} catch {}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
try {
|
|
99
|
-
publish({
|
|
100
|
-
type: 'finish-step',
|
|
101
|
-
sessionId: opts.sessionId,
|
|
102
|
-
payload: {
|
|
103
|
-
stepIndex,
|
|
104
|
-
usage: step.usage,
|
|
105
|
-
finishReason: step.finishReason,
|
|
106
|
-
response: step.response,
|
|
107
|
-
},
|
|
108
|
-
});
|
|
109
|
-
if (step.usage) {
|
|
110
|
-
publish({
|
|
111
|
-
type: 'usage',
|
|
112
|
-
sessionId: opts.sessionId,
|
|
113
|
-
payload: { stepIndex, ...step.usage },
|
|
114
|
-
});
|
|
115
|
-
}
|
|
116
|
-
} catch {}
|
|
117
|
-
|
|
118
|
-
try {
|
|
119
|
-
// Increment step index but defer creating the new text part
|
|
120
|
-
// until we actually get a text-delta (so reasoning blocks can complete first)
|
|
121
|
-
const newStepIndex = incrementStepIndex();
|
|
122
|
-
sharedCtx.stepIndex = newStepIndex;
|
|
123
|
-
updateCurrentPartId(null); // Signal that next text-delta should create new part
|
|
124
|
-
updateAccumulated('');
|
|
125
|
-
} catch {}
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Creates the onError handler for the stream
|
|
131
|
-
*/
|
|
132
|
-
export function createErrorHandler(
|
|
133
|
-
opts: RunOpts,
|
|
134
|
-
db: Awaited<ReturnType<typeof getDb>>,
|
|
135
|
-
getStepIndex: () => number,
|
|
136
|
-
sharedCtx: ToolAdapterContext,
|
|
137
|
-
retryCallback?: (sessionId: string) => Promise<void>,
|
|
138
|
-
) {
|
|
139
|
-
return async (err: unknown) => {
|
|
140
|
-
const errorPayload = toErrorPayload(err);
|
|
141
|
-
const isApiError = APICallError.isInstance(err);
|
|
142
|
-
const stepIndex = getStepIndex();
|
|
143
|
-
|
|
144
|
-
// Check if this is a prompt-too-long error and auto-compact
|
|
145
|
-
// Handle nested error structures from AI SDK
|
|
146
|
-
const errObj = err as Record<string, unknown>;
|
|
147
|
-
const nestedError = (errObj?.error as Record<string, unknown>)?.error as
|
|
148
|
-
| Record<string, unknown>
|
|
149
|
-
| undefined;
|
|
150
|
-
const errorCode =
|
|
151
|
-
(errObj?.code as string) ?? (nestedError?.code as string) ?? '';
|
|
152
|
-
const errorType =
|
|
153
|
-
(errObj?.apiErrorType as string) ?? (nestedError?.type as string) ?? '';
|
|
154
|
-
const fullErrorStr = JSON.stringify(err).toLowerCase();
|
|
155
|
-
|
|
156
|
-
const isPromptTooLong =
|
|
157
|
-
fullErrorStr.includes('prompt is too long') ||
|
|
158
|
-
fullErrorStr.includes('maximum context length') ||
|
|
159
|
-
fullErrorStr.includes('too many tokens') ||
|
|
160
|
-
fullErrorStr.includes('context_length_exceeded') ||
|
|
161
|
-
fullErrorStr.includes('request too large') ||
|
|
162
|
-
fullErrorStr.includes('exceeds the model') ||
|
|
163
|
-
fullErrorStr.includes('context window') ||
|
|
164
|
-
fullErrorStr.includes('input is too long') ||
|
|
165
|
-
errorCode === 'context_length_exceeded' ||
|
|
166
|
-
errorType === 'invalid_request_error';
|
|
167
|
-
|
|
168
|
-
debugLog(
|
|
169
|
-
`[stream-handlers] isPromptTooLong: ${isPromptTooLong}, errorCode: ${errorCode}, errorType: ${errorType}`,
|
|
170
|
-
);
|
|
171
|
-
|
|
172
|
-
if (isPromptTooLong && !opts.isCompactCommand) {
|
|
173
|
-
debugLog(
|
|
174
|
-
'[stream-handlers] Prompt too long detected, auto-compacting...',
|
|
175
|
-
);
|
|
176
|
-
let compactionSucceeded = false;
|
|
177
|
-
try {
|
|
178
|
-
// Stream the compaction summary with proper publish function
|
|
179
|
-
const compactResult = await performAutoCompaction(
|
|
180
|
-
db,
|
|
181
|
-
opts.sessionId,
|
|
182
|
-
opts.assistantMessageId,
|
|
183
|
-
publish,
|
|
184
|
-
opts.provider,
|
|
185
|
-
opts.model,
|
|
186
|
-
);
|
|
187
|
-
if (compactResult.success) {
|
|
188
|
-
debugLog(
|
|
189
|
-
`[stream-handlers] Auto-compaction succeeded: ${compactResult.summary?.slice(0, 100)}...`,
|
|
190
|
-
);
|
|
191
|
-
compactionSucceeded = true;
|
|
192
|
-
} else {
|
|
193
|
-
debugLog(
|
|
194
|
-
`[stream-handlers] Auto-compaction failed: ${compactResult.error}, falling back to prune`,
|
|
195
|
-
);
|
|
196
|
-
// Fall back to simple prune
|
|
197
|
-
const pruneResult = await pruneSession(db, opts.sessionId);
|
|
198
|
-
debugLog(
|
|
199
|
-
`[stream-handlers] Fallback pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
|
|
200
|
-
);
|
|
201
|
-
compactionSucceeded = pruneResult.pruned > 0;
|
|
202
|
-
}
|
|
203
|
-
} catch (compactErr) {
|
|
204
|
-
debugLog(
|
|
205
|
-
`[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
|
|
206
|
-
);
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
// If compaction succeeded, complete this message and trigger retry
|
|
210
|
-
if (compactionSucceeded) {
|
|
211
|
-
// Mark this compaction message as completed
|
|
212
|
-
await db
|
|
213
|
-
.update(messages)
|
|
214
|
-
.set({
|
|
215
|
-
status: 'completed',
|
|
216
|
-
})
|
|
217
|
-
.where(eq(messages.id, opts.assistantMessageId));
|
|
218
|
-
|
|
219
|
-
// Publish completion event for the compaction message
|
|
220
|
-
publish({
|
|
221
|
-
type: 'message.completed',
|
|
222
|
-
sessionId: opts.sessionId,
|
|
223
|
-
payload: {
|
|
224
|
-
id: opts.assistantMessageId,
|
|
225
|
-
autoCompacted: true,
|
|
226
|
-
},
|
|
227
|
-
});
|
|
228
|
-
|
|
229
|
-
// Trigger retry - create a new assistant message and enqueue the run
|
|
230
|
-
if (retryCallback) {
|
|
231
|
-
debugLog('[stream-handlers] Triggering retry after compaction...');
|
|
232
|
-
const newAssistantMessageId = crypto.randomUUID();
|
|
233
|
-
await db.insert(messages).values({
|
|
234
|
-
id: newAssistantMessageId,
|
|
235
|
-
sessionId: opts.sessionId,
|
|
236
|
-
role: 'assistant',
|
|
237
|
-
status: 'pending',
|
|
238
|
-
agent: opts.agent,
|
|
239
|
-
provider: opts.provider,
|
|
240
|
-
model: opts.model,
|
|
241
|
-
createdAt: Date.now(),
|
|
242
|
-
});
|
|
243
|
-
|
|
244
|
-
publish({
|
|
245
|
-
type: 'message.created',
|
|
246
|
-
sessionId: opts.sessionId,
|
|
247
|
-
payload: { id: newAssistantMessageId, role: 'assistant' },
|
|
248
|
-
});
|
|
249
|
-
|
|
250
|
-
// Enqueue the retry with the new assistant message
|
|
251
|
-
enqueueAssistantRun(
|
|
252
|
-
{
|
|
253
|
-
...opts,
|
|
254
|
-
assistantMessageId: newAssistantMessageId,
|
|
255
|
-
},
|
|
256
|
-
retryCallback,
|
|
257
|
-
);
|
|
258
|
-
} else {
|
|
259
|
-
debugLog(
|
|
260
|
-
'[stream-handlers] No retryCallback provided, cannot auto-retry',
|
|
261
|
-
);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
return; // Don't show error, compaction and retry handled it
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
// Create error part for UI display
|
|
269
|
-
const errorPartId = crypto.randomUUID();
|
|
270
|
-
const displayMessage =
|
|
271
|
-
isPromptTooLong && !opts.isCompactCommand
|
|
272
|
-
? `${errorPayload.message}. Context auto-compacted - please retry your message.`
|
|
273
|
-
: errorPayload.message;
|
|
274
|
-
await db.insert(messageParts).values({
|
|
275
|
-
id: errorPartId,
|
|
276
|
-
messageId: opts.assistantMessageId,
|
|
277
|
-
index: await sharedCtx.nextIndex(),
|
|
278
|
-
stepIndex,
|
|
279
|
-
type: 'error',
|
|
280
|
-
content: JSON.stringify({
|
|
281
|
-
message: displayMessage,
|
|
282
|
-
type: errorPayload.type,
|
|
283
|
-
details: errorPayload.details,
|
|
284
|
-
isAborted: false,
|
|
285
|
-
}),
|
|
286
|
-
agent: opts.agent,
|
|
287
|
-
provider: opts.provider,
|
|
288
|
-
model: opts.model,
|
|
289
|
-
startedAt: Date.now(),
|
|
290
|
-
completedAt: Date.now(),
|
|
291
|
-
});
|
|
292
|
-
|
|
293
|
-
// Update message status
|
|
294
|
-
await db
|
|
295
|
-
.update(messages)
|
|
296
|
-
.set({
|
|
297
|
-
status: 'error',
|
|
298
|
-
error: displayMessage,
|
|
299
|
-
errorType: errorPayload.type,
|
|
300
|
-
errorDetails: JSON.stringify({
|
|
301
|
-
...errorPayload.details,
|
|
302
|
-
isApiError,
|
|
303
|
-
autoCompacted: isPromptTooLong && !opts.isCompactCommand,
|
|
304
|
-
}),
|
|
305
|
-
isAborted: false,
|
|
306
|
-
})
|
|
307
|
-
.where(eq(messages.id, opts.assistantMessageId));
|
|
308
|
-
|
|
309
|
-
// Publish enhanced error event
|
|
310
|
-
publish({
|
|
311
|
-
type: 'error',
|
|
312
|
-
sessionId: opts.sessionId,
|
|
313
|
-
payload: {
|
|
314
|
-
messageId: opts.assistantMessageId,
|
|
315
|
-
partId: errorPartId,
|
|
316
|
-
error: displayMessage,
|
|
317
|
-
errorType: errorPayload.type,
|
|
318
|
-
details: errorPayload.details,
|
|
319
|
-
isAborted: false,
|
|
320
|
-
autoCompacted: isPromptTooLong && !opts.isCompactCommand,
|
|
321
|
-
},
|
|
322
|
-
});
|
|
323
|
-
};
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
/**
|
|
327
|
-
* Creates the onAbort handler for the stream
|
|
328
|
-
*/
|
|
329
|
-
export function createAbortHandler(
|
|
330
|
-
opts: RunOpts,
|
|
331
|
-
db: Awaited<ReturnType<typeof getDb>>,
|
|
332
|
-
getStepIndex: () => number,
|
|
333
|
-
sharedCtx: ToolAdapterContext,
|
|
334
|
-
) {
|
|
335
|
-
return async ({ steps }: AbortEvent) => {
|
|
336
|
-
const stepIndex = getStepIndex();
|
|
337
|
-
|
|
338
|
-
// Create abort part for UI
|
|
339
|
-
const abortPartId = crypto.randomUUID();
|
|
340
|
-
await db.insert(messageParts).values({
|
|
341
|
-
id: abortPartId,
|
|
342
|
-
messageId: opts.assistantMessageId,
|
|
343
|
-
index: await sharedCtx.nextIndex(),
|
|
344
|
-
stepIndex,
|
|
345
|
-
type: 'error',
|
|
346
|
-
content: JSON.stringify({
|
|
347
|
-
message: 'Generation stopped by user',
|
|
348
|
-
type: 'abort',
|
|
349
|
-
isAborted: true,
|
|
350
|
-
stepsCompleted: steps.length,
|
|
351
|
-
}),
|
|
352
|
-
agent: opts.agent,
|
|
353
|
-
provider: opts.provider,
|
|
354
|
-
model: opts.model,
|
|
355
|
-
startedAt: Date.now(),
|
|
356
|
-
completedAt: Date.now(),
|
|
357
|
-
});
|
|
358
|
-
|
|
359
|
-
// Store abort info
|
|
360
|
-
await db
|
|
361
|
-
.update(messages)
|
|
362
|
-
.set({
|
|
363
|
-
status: 'error',
|
|
364
|
-
error: 'Generation stopped by user',
|
|
365
|
-
errorType: 'abort',
|
|
366
|
-
errorDetails: JSON.stringify({
|
|
367
|
-
stepsCompleted: steps.length,
|
|
368
|
-
abortedAt: Date.now(),
|
|
369
|
-
}),
|
|
370
|
-
isAborted: true,
|
|
371
|
-
})
|
|
372
|
-
.where(eq(messages.id, opts.assistantMessageId));
|
|
373
|
-
|
|
374
|
-
// Publish abort event
|
|
375
|
-
publish({
|
|
376
|
-
type: 'error',
|
|
377
|
-
sessionId: opts.sessionId,
|
|
378
|
-
payload: {
|
|
379
|
-
messageId: opts.assistantMessageId,
|
|
380
|
-
partId: abortPartId,
|
|
381
|
-
error: 'Generation stopped by user',
|
|
382
|
-
errorType: 'abort',
|
|
383
|
-
isAborted: true,
|
|
384
|
-
stepsCompleted: steps.length,
|
|
385
|
-
},
|
|
386
|
-
});
|
|
387
|
-
};
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
/**
|
|
391
|
-
* Creates the onFinish handler for the stream
|
|
392
|
-
*/
|
|
393
|
-
export function createFinishHandler(
|
|
394
|
-
opts: RunOpts,
|
|
395
|
-
db: Awaited<ReturnType<typeof getDb>>,
|
|
396
|
-
completeAssistantMessageFn: (
|
|
397
|
-
fin: FinishEvent,
|
|
398
|
-
opts: RunOpts,
|
|
399
|
-
db: Awaited<ReturnType<typeof getDb>>,
|
|
400
|
-
) => Promise<void>,
|
|
401
|
-
) {
|
|
402
|
-
return async (fin: FinishEvent) => {
|
|
403
|
-
// Note: Token updates are handled incrementally in onStepFinish
|
|
404
|
-
// Do NOT add fin.usage here as it would cause double-counting
|
|
405
|
-
|
|
406
|
-
try {
|
|
407
|
-
await completeAssistantMessageFn(fin, opts, db);
|
|
408
|
-
} catch {}
|
|
409
|
-
|
|
410
|
-
// If this was a /compact command, mark old parts as compacted
|
|
411
|
-
// Only mark as compacted if the response was successful and has content
|
|
412
|
-
if (opts.isCompactCommand && fin.finishReason !== 'error') {
|
|
413
|
-
// Verify the assistant actually generated text content (the summary)
|
|
414
|
-
const assistantParts = await db
|
|
415
|
-
.select()
|
|
416
|
-
.from(messageParts)
|
|
417
|
-
.where(eq(messageParts.messageId, opts.assistantMessageId));
|
|
418
|
-
const hasTextContent = assistantParts.some(
|
|
419
|
-
(p) => p.type === 'text' && p.content && p.content !== '{"text":""}',
|
|
420
|
-
);
|
|
421
|
-
|
|
422
|
-
if (!hasTextContent) {
|
|
423
|
-
debugLog(
|
|
424
|
-
'[stream-handlers] /compact finished but no summary generated, skipping compaction marking',
|
|
425
|
-
);
|
|
426
|
-
} else {
|
|
427
|
-
try {
|
|
428
|
-
debugLog(
|
|
429
|
-
`[stream-handlers] /compact complete, marking session compacted`,
|
|
430
|
-
);
|
|
431
|
-
const result = await markSessionCompacted(
|
|
432
|
-
db,
|
|
433
|
-
opts.sessionId,
|
|
434
|
-
opts.assistantMessageId,
|
|
435
|
-
);
|
|
436
|
-
debugLog(
|
|
437
|
-
`[stream-handlers] Compacted ${result.compacted} parts, saved ~${result.saved} tokens`,
|
|
438
|
-
);
|
|
439
|
-
} catch (err) {
|
|
440
|
-
debugLog(
|
|
441
|
-
`[stream-handlers] Compaction failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
442
|
-
);
|
|
443
|
-
}
|
|
444
|
-
}
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
// Use session totals from DB for accurate cost calculation
|
|
448
|
-
const sessRows = await db
|
|
449
|
-
.select()
|
|
450
|
-
.from(messages)
|
|
451
|
-
.where(eq(messages.id, opts.assistantMessageId));
|
|
452
|
-
|
|
453
|
-
const usage = sessRows[0]
|
|
454
|
-
? {
|
|
455
|
-
inputTokens: Number(sessRows[0].promptTokens ?? 0),
|
|
456
|
-
outputTokens: Number(sessRows[0].completionTokens ?? 0),
|
|
457
|
-
totalTokens: Number(sessRows[0].totalTokens ?? 0),
|
|
458
|
-
cachedInputTokens: Number(sessRows[0].cachedInputTokens ?? 0),
|
|
459
|
-
}
|
|
460
|
-
: fin.usage;
|
|
461
|
-
|
|
462
|
-
const costUsd = usage
|
|
463
|
-
? estimateModelCostUsd(opts.provider, opts.model, usage)
|
|
464
|
-
: undefined;
|
|
465
|
-
|
|
466
|
-
// Check for context overflow and prune if needed
|
|
467
|
-
if (usage) {
|
|
468
|
-
try {
|
|
469
|
-
const limits = getModelLimits(opts.provider, opts.model);
|
|
470
|
-
if (limits) {
|
|
471
|
-
const tokenUsage: TokenUsage = {
|
|
472
|
-
input: usage.inputTokens ?? 0,
|
|
473
|
-
output: usage.outputTokens ?? 0,
|
|
474
|
-
cacheRead:
|
|
475
|
-
(usage as { cachedInputTokens?: number }).cachedInputTokens ?? 0,
|
|
476
|
-
};
|
|
477
|
-
|
|
478
|
-
if (isOverflow(tokenUsage, limits)) {
|
|
479
|
-
debugLog(
|
|
480
|
-
`[stream-handlers] Context overflow detected, triggering prune for session ${opts.sessionId}`,
|
|
481
|
-
);
|
|
482
|
-
// Prune asynchronously - don't block the finish handler
|
|
483
|
-
pruneSession(db, opts.sessionId).catch((err) => {
|
|
484
|
-
debugLog(
|
|
485
|
-
`[stream-handlers] Prune failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
486
|
-
);
|
|
487
|
-
});
|
|
488
|
-
}
|
|
489
|
-
}
|
|
490
|
-
} catch (err) {
|
|
491
|
-
debugLog(
|
|
492
|
-
`[stream-handlers] Overflow check failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
493
|
-
);
|
|
494
|
-
}
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
publish({
|
|
498
|
-
type: 'message.completed',
|
|
499
|
-
sessionId: opts.sessionId,
|
|
500
|
-
payload: {
|
|
501
|
-
id: opts.assistantMessageId,
|
|
502
|
-
usage,
|
|
503
|
-
costUsd,
|
|
504
|
-
finishReason: fin.finishReason,
|
|
505
|
-
},
|
|
506
|
-
});
|
|
507
|
-
};
|
|
508
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|