@agi-cli/server 0.1.58 → 0.1.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { hasToolCall, streamText } from 'ai';
1
+ import { streamText } from 'ai';
2
2
  import { loadConfig } from '@agi-cli/sdk';
3
3
  import { getDb } from '@agi-cli/database';
4
4
  import { messageParts } from '@agi-cli/database/schema';
@@ -7,11 +7,8 @@ import { resolveModel } from './provider.ts';
7
7
  import { resolveAgentConfig } from './agent-registry.ts';
8
8
  import { composeSystemPrompt } from './prompt.ts';
9
9
  import { discoverProjectTools } from '@agi-cli/sdk';
10
- import { adaptTools } from '../tools/adapter.ts';
11
- import { publish, subscribe } from '../events/bus.ts';
12
- import { debugLog, time } from './debug.ts';
10
+ import { publish } from '../events/bus.ts';
13
11
  import { buildHistoryMessages } from './history-builder.ts';
14
- import { toErrorPayload } from './error-handling.ts';
15
12
  import { getMaxOutputTokens } from './token-utils.ts';
16
13
  import {
17
14
  type RunOpts,
@@ -22,14 +19,11 @@ import {
22
19
  dequeueJob,
23
20
  cleanupSession,
24
21
  } from './session-queue.ts';
22
+ import { setupToolContext } from './tool-context-setup.ts';
25
23
  import {
26
- setupToolContext,
27
- type RunnerToolContext,
28
- } from './tool-context-setup.ts';
29
- import {
30
- updateSessionTokens,
24
+ updateSessionTokensIncremental,
25
+ updateMessageTokensIncremental,
31
26
  completeAssistantMessage,
32
- cleanupEmptyTextParts,
33
27
  } from './db-operations.ts';
34
28
  import {
35
29
  createStepFinishHandler,
@@ -37,175 +31,38 @@ import {
37
31
  createAbortHandler,
38
32
  createFinishHandler,
39
33
  } from './stream-handlers.ts';
34
+ import { addCacheControl } from './cache-optimizer.ts';
35
+ import { optimizeContext } from './context-optimizer.ts';
36
+ import { truncateHistory } from './history-truncator.ts';
40
37
 
41
38
  /**
42
- * Enqueues an assistant run for processing.
43
- */
44
- export function enqueueAssistantRun(opts: Omit<RunOpts, 'abortSignal'>) {
45
- enqueueRun(opts, processQueue);
46
- }
47
-
48
- /**
49
- * Aborts an active session.
39
+ * Main runner that executes the LLM streaming loop with tools
50
40
  */
51
- export function abortSession(sessionId: string) {
52
- abortSessionQueue(sessionId);
53
- }
54
-
55
- /**
56
- * Processes the queue of assistant runs for a session.
57
- */
58
- async function processQueue(sessionId: string) {
59
- const state = getRunnerState(sessionId);
60
- if (!state) return;
61
- if (state.running) return;
62
- setRunning(sessionId, true);
63
-
64
- while (state.queue.length > 0) {
65
- const job = dequeueJob(sessionId);
66
- if (!job) break;
67
- try {
68
- await runAssistant(job);
69
- } catch (_err) {
70
- // Swallow to keep the loop alive; event published by runner
71
- }
72
- }
73
-
74
- setRunning(sessionId, false);
75
- cleanupSession(sessionId);
76
- }
77
-
78
- /**
79
- * Ensures the finish tool is called if not already observed.
80
- */
81
- async function ensureFinishToolCalled(
82
- finishObserved: boolean,
83
- toolset: ReturnType<typeof adaptTools>,
84
- sharedCtx: RunnerToolContext,
85
- stepIndex: number,
86
- ) {
87
- if (finishObserved || !toolset?.finish?.execute) return;
88
-
89
- const finishInput = {} as const;
90
- const callOptions = { input: finishInput } as const;
91
-
92
- sharedCtx.stepIndex = stepIndex;
93
-
94
- try {
95
- await toolset.finish.onInputStart?.(callOptions as never);
96
- } catch {}
97
-
98
- try {
99
- await toolset.finish.onInputAvailable?.(callOptions as never);
100
- } catch {}
101
-
102
- await toolset.finish.execute(finishInput, {} as never);
103
- }
104
-
105
- /**
106
- * Main function to run the assistant for a given request.
107
- */
108
- async function runAssistant(opts: RunOpts) {
109
- const cfgTimer = time('runner:loadConfig+db');
110
- const cfg = await loadConfig(opts.projectRoot);
111
- const db = await getDb(cfg.projectRoot);
112
- cfgTimer.end();
113
-
114
- const agentTimer = time('runner:resolveAgentConfig');
115
- const agentCfg = await resolveAgentConfig(cfg.projectRoot, opts.agent);
116
- agentTimer.end({ agent: opts.agent });
117
-
118
- const agentPrompt = agentCfg.prompt || '';
119
-
120
- const historyTimer = time('runner:buildHistory');
121
- const history = await buildHistoryMessages(db, opts.sessionId);
122
- historyTimer.end({ messages: history.length });
123
-
124
- const isFirstMessage = history.length === 0;
125
-
126
- const systemTimer = time('runner:composeSystemPrompt');
127
- const { getAuth } = await import('@agi-cli/sdk');
128
- const { getProviderSpoofPrompt } = await import('./prompt.ts');
129
- const auth = await getAuth(opts.provider, cfg.projectRoot);
130
- const needsSpoof = auth?.type === 'oauth';
131
- const spoofPrompt = needsSpoof
132
- ? getProviderSpoofPrompt(opts.provider)
133
- : undefined;
134
-
135
- let system: string;
136
- let additionalSystemMessages: Array<{ role: 'system'; content: string }> = [];
137
-
138
- if (spoofPrompt) {
139
- system = spoofPrompt;
140
- const fullPrompt = await composeSystemPrompt({
141
- provider: opts.provider,
142
- model: opts.model,
143
- projectRoot: cfg.projectRoot,
144
- agentPrompt,
145
- oneShot: opts.oneShot,
146
- spoofPrompt: undefined,
147
- includeProjectTree: isFirstMessage,
148
- });
149
- additionalSystemMessages = [{ role: 'system', content: fullPrompt }];
150
- } else {
151
- system = await composeSystemPrompt({
152
- provider: opts.provider,
153
- model: opts.model,
154
- projectRoot: cfg.projectRoot,
155
- agentPrompt,
156
- oneShot: opts.oneShot,
157
- spoofPrompt: undefined,
158
- includeProjectTree: isFirstMessage,
159
- });
160
- }
161
- systemTimer.end();
162
- debugLog('[system] composed prompt (provider+base+agent):');
163
- debugLog(system);
164
-
165
- const toolsTimer = time('runner:discoverTools');
166
- const allTools = await discoverProjectTools(cfg.projectRoot);
167
- toolsTimer.end({ count: allTools.length });
168
- const allowedNames = new Set([
169
- ...(agentCfg.tools || []),
170
- 'finish',
171
- 'progress_update',
172
- ]);
173
- const gated = allTools.filter((t) => allowedNames.has(t.name));
174
- const messagesWithSystemInstructions = [
175
- ...(isFirstMessage ? additionalSystemMessages : []),
176
- ...history,
177
- ];
178
-
179
- const { sharedCtx, firstToolTimer, firstToolSeen } = await setupToolContext(
180
- opts,
181
- db,
182
- );
183
- const toolset = adaptTools(gated, sharedCtx);
184
-
185
- const modelTimer = time('runner:resolveModel');
186
- const model = await resolveModel(opts.provider, opts.model, cfg);
187
- modelTimer.end();
188
-
189
- const maxOutputTokens = getMaxOutputTokens(opts.provider, opts.model);
190
-
191
- let currentPartId = opts.assistantPartId;
41
+ export async function runAssistant(opts: RunOpts) {
42
+ const db = await getDb();
43
+ const config = await loadConfig();
44
+ const [provider, modelName] = opts.model.split('/', 2);
45
+ const model = resolveModel(provider, modelName);
46
+
47
+ // Build agent + system prompt
48
+ const agentConfig = resolveAgentConfig(opts.agent);
49
+ const availableTools = await discoverProjectTools(config.project.root);
50
+ const system = composeSystemPrompt(agentConfig, availableTools);
51
+
52
+ // Build message history
53
+ const history = await buildHistoryMessages(opts, db);
54
+
55
+ // Setup tool context
56
+ const toolContext = await setupToolContext(opts, db);
57
+ const { tools, sharedCtx } = toolContext;
58
+
59
+ // State
60
+ let currentPartId = sharedCtx.assistantPartId;
61
+ let stepIndex = sharedCtx.stepIndex;
192
62
  let accumulated = '';
193
- let stepIndex = 0;
63
+ const abortController = new AbortController();
194
64
 
195
- let finishObserved = false;
196
- const unsubscribeFinish = subscribe(opts.sessionId, (evt) => {
197
- if (evt.type !== 'tool.result') return;
198
- try {
199
- const name = (evt.payload as { name?: string } | undefined)?.name;
200
- if (name === 'finish') finishObserved = true;
201
- } catch {}
202
- });
203
-
204
- const streamStartTimer = time('runner:first-delta');
205
- let firstDeltaSeen = false;
206
- debugLog(`[streamText] Calling with maxOutputTokens: ${maxOutputTokens}`);
207
-
208
- // State management helpers
65
+ // State getters/setters
209
66
  const getCurrentPartId = () => currentPartId;
210
67
  const getStepIndex = () => stepIndex;
211
68
  const updateCurrentPartId = (id: string) => {
@@ -214,12 +71,10 @@ async function runAssistant(opts: RunOpts) {
214
71
  const updateAccumulated = (text: string) => {
215
72
  accumulated = text;
216
73
  };
217
- const incrementStepIndex = () => {
218
- stepIndex += 1;
219
- return stepIndex;
220
- };
74
+ const getAccumulated = () => accumulated;
75
+ const incrementStepIndex = () => ++stepIndex;
221
76
 
222
- // Create stream handlers
77
+ // Handlers
223
78
  const onStepFinish = createStepFinishHandler(
224
79
  opts,
225
80
  db,
@@ -229,85 +84,106 @@ async function runAssistant(opts: RunOpts) {
229
84
  updateCurrentPartId,
230
85
  updateAccumulated,
231
86
  incrementStepIndex,
87
+ updateSessionTokensIncremental,
88
+ updateMessageTokensIncremental,
232
89
  );
233
90
 
234
- const onError = createErrorHandler(opts, db, getStepIndex, sharedCtx);
235
-
236
- const onAbort = createAbortHandler(opts, db, getStepIndex, sharedCtx);
237
-
238
91
  const onFinish = createFinishHandler(
239
92
  opts,
240
93
  db,
241
- () => ensureFinishToolCalled(finishObserved, toolset, sharedCtx, stepIndex),
242
- updateSessionTokens,
243
94
  completeAssistantMessage,
95
+ getAccumulated,
96
+ abortController,
97
+ );
98
+
99
+ const _onAbort = createAbortHandler(opts, db, abortController);
100
+ const onError = createErrorHandler(opts, db);
101
+
102
+ // Context optimization
103
+ const contextOptimized = optimizeContext(history, {
104
+ deduplicateFiles: true,
105
+ maxToolResults: 30,
106
+ });
107
+
108
+ // Truncate history
109
+ const truncatedMessages = truncateHistory(contextOptimized, 20);
110
+
111
+ // Add cache control
112
+ const { system: cachedSystem, messages: optimizedMessages } = addCacheControl(
113
+ opts.provider,
114
+ system,
115
+ truncatedMessages,
244
116
  );
245
117
 
246
118
  try {
247
- const result = streamText({
119
+ const maxTokens = getMaxOutputTokens(provider, modelName);
120
+ const result = await streamText({
248
121
  model,
249
- tools: toolset,
250
- ...(String(system || '').trim() ? { system } : {}),
251
- messages: messagesWithSystemInstructions,
252
- ...(maxOutputTokens ? { maxOutputTokens } : {}),
253
- abortSignal: opts.abortSignal,
254
- stopWhen: hasToolCall('finish'),
122
+ system: cachedSystem,
123
+ messages: optimizedMessages,
124
+ tools,
125
+ maxSteps: 50,
126
+ maxTokens,
127
+ temperature: agentConfig.temperature ?? 0.7,
128
+ abortSignal: abortController.signal,
255
129
  onStepFinish,
256
- onError,
257
- onAbort,
258
130
  onFinish,
131
+ experimental_continueSteps: true,
259
132
  });
260
133
 
134
+ // Process the stream
261
135
  for await (const delta of result.textStream) {
262
- if (!delta) continue;
263
- if (!firstDeltaSeen) {
264
- firstDeltaSeen = true;
265
- streamStartTimer.end();
266
- }
136
+ if (abortController.signal.aborted) break;
137
+
267
138
  accumulated += delta;
268
- publish({
269
- type: 'message.part.delta',
139
+ if (currentPartId) {
140
+ await db
141
+ .update(messageParts)
142
+ .set({ content: accumulated })
143
+ .where(eq(messageParts.id, currentPartId));
144
+ }
145
+
146
+ publish('stream:text-delta', {
270
147
  sessionId: opts.sessionId,
271
- payload: {
272
- messageId: opts.assistantMessageId,
273
- partId: currentPartId,
274
- stepIndex,
275
- delta,
276
- },
148
+ messageId: opts.assistantMessageId,
149
+ assistantMessageId: opts.assistantMessageId,
150
+ stepIndex,
151
+ textDelta: delta,
152
+ fullText: accumulated,
277
153
  });
278
- await db
279
- .update(messageParts)
280
- .set({ content: JSON.stringify({ text: accumulated }) })
281
- .where(eq(messageParts.id, currentPartId));
282
154
  }
283
- } catch (error) {
284
- const errorPayload = toErrorPayload(error);
285
- await db
286
- .update(messageParts)
287
- .set({
288
- content: JSON.stringify({
289
- text: accumulated,
290
- error: errorPayload.message,
291
- }),
292
- })
293
- .where(eq(messageParts.messageId, opts.assistantMessageId));
294
- publish({
295
- type: 'error',
296
- sessionId: opts.sessionId,
297
- payload: {
298
- messageId: opts.assistantMessageId,
299
- error: errorPayload.message,
300
- details: errorPayload.details,
301
- },
302
- });
303
- throw error;
155
+ } catch (err) {
156
+ await onError(err);
304
157
  } finally {
305
- if (!firstToolSeen()) firstToolTimer.end({ skipped: true });
306
- try {
307
- unsubscribeFinish();
308
- } catch {}
309
- try {
310
- await cleanupEmptyTextParts(opts, db);
311
- } catch {}
158
+ setRunning(opts.sessionId, false);
159
+ dequeueJob(opts.sessionId);
312
160
  }
313
161
  }
162
+
163
+ /**
164
+ * Enqueues an assistant run
165
+ */
166
+ export async function enqueueAssistantRun(opts: RunOpts) {
167
+ return enqueueRun(opts);
168
+ }
169
+
170
+ /**
171
+ * Aborts a running session
172
+ */
173
+ export async function abortSession(sessionId: number) {
174
+ return abortSessionQueue(sessionId);
175
+ }
176
+
177
+ /**
178
+ * Gets the current runner state for a session
179
+ */
180
+ export function getSessionState(sessionId: number) {
181
+ return getRunnerState(sessionId);
182
+ }
183
+
184
+ /**
185
+ * Cleanup session resources
186
+ */
187
+ export function cleanupSessionResources(sessionId: number) {
188
+ return cleanupSession(sessionId);
189
+ }
@@ -50,6 +50,8 @@ export async function createSession({
50
50
  lastActiveAt: null,
51
51
  totalInputTokens: null,
52
52
  totalOutputTokens: null,
53
+ totalCachedTokens: null,
54
+ totalReasoningTokens: null,
53
55
  totalToolTimeMs: null,
54
56
  toolCountsJson: null,
55
57
  };