@dex-ai/sdk 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +308 -0
  2. package/dist/agent.d.ts +181 -0
  3. package/dist/agent.d.ts.map +1 -0
  4. package/dist/agent.js +41 -0
  5. package/dist/agent.js.map +1 -0
  6. package/dist/context.d.ts +68 -0
  7. package/dist/context.d.ts.map +1 -0
  8. package/dist/context.js +8 -0
  9. package/dist/context.js.map +1 -0
  10. package/dist/create-agent.d.ts +7 -0
  11. package/dist/create-agent.d.ts.map +1 -0
  12. package/dist/create-agent.js +205 -0
  13. package/dist/create-agent.js.map +1 -0
  14. package/dist/extension.d.ts +162 -0
  15. package/dist/extension.d.ts.map +1 -0
  16. package/dist/extension.js +20 -0
  17. package/dist/extension.js.map +1 -0
  18. package/dist/generate.d.ts +10 -0
  19. package/dist/generate.d.ts.map +1 -0
  20. package/dist/generate.js +839 -0
  21. package/dist/generate.js.map +1 -0
  22. package/dist/index.d.ts +26 -0
  23. package/dist/index.d.ts.map +1 -0
  24. package/dist/index.js +16 -0
  25. package/dist/index.js.map +1 -0
  26. package/dist/message.d.ts +89 -0
  27. package/dist/message.d.ts.map +1 -0
  28. package/dist/message.js +17 -0
  29. package/dist/message.js.map +1 -0
  30. package/dist/messages.d.ts +98 -0
  31. package/dist/messages.d.ts.map +1 -0
  32. package/dist/messages.js +339 -0
  33. package/dist/messages.js.map +1 -0
  34. package/dist/model.d.ts +39 -0
  35. package/dist/model.d.ts.map +1 -0
  36. package/dist/model.js +11 -0
  37. package/dist/model.js.map +1 -0
  38. package/dist/provider.d.ts +157 -0
  39. package/dist/provider.d.ts.map +1 -0
  40. package/dist/provider.js +39 -0
  41. package/dist/provider.js.map +1 -0
  42. package/dist/resolve-schema.d.ts +44 -0
  43. package/dist/resolve-schema.d.ts.map +1 -0
  44. package/dist/resolve-schema.js +367 -0
  45. package/dist/resolve-schema.js.map +1 -0
  46. package/dist/schema.d.ts +80 -0
  47. package/dist/schema.d.ts.map +1 -0
  48. package/dist/schema.js +90 -0
  49. package/dist/schema.js.map +1 -0
  50. package/dist/tool-dispatch.d.ts +24 -0
  51. package/dist/tool-dispatch.d.ts.map +1 -0
  52. package/dist/tool-dispatch.js +120 -0
  53. package/dist/tool-dispatch.js.map +1 -0
  54. package/dist/tool-result-cache.d.ts +43 -0
  55. package/dist/tool-result-cache.d.ts.map +1 -0
  56. package/dist/tool-result-cache.js +118 -0
  57. package/dist/tool-result-cache.js.map +1 -0
  58. package/dist/tool.d.ts +96 -0
  59. package/dist/tool.d.ts.map +1 -0
  60. package/dist/tool.js +29 -0
  61. package/dist/tool.js.map +1 -0
  62. package/dist/util.d.ts +26 -0
  63. package/dist/util.d.ts.map +1 -0
  64. package/dist/util.js +104 -0
  65. package/dist/util.js.map +1 -0
  66. package/package.json +41 -0
  67. package/src/agent.ts +235 -0
  68. package/src/context.ts +82 -0
  69. package/src/create-agent.ts +237 -0
  70. package/src/extension.ts +244 -0
  71. package/src/generate.ts +943 -0
  72. package/src/index.ts +113 -0
  73. package/src/message.ts +114 -0
  74. package/src/messages.test.ts +299 -0
  75. package/src/messages.ts +423 -0
  76. package/src/model.ts +43 -0
  77. package/src/provider.ts +187 -0
  78. package/src/resolve-schema.test.ts +351 -0
  79. package/src/resolve-schema.ts +426 -0
  80. package/src/schema.ts +131 -0
  81. package/src/tool-dispatch.ts +166 -0
  82. package/src/tool-result-cache.test.ts +182 -0
  83. package/src/tool-result-cache.ts +164 -0
  84. package/src/tool.ts +110 -0
  85. package/src/util.ts +110 -0
@@ -0,0 +1,943 @@
1
+ /**
2
+ * The generate loop — event-driven.
3
+ *
4
+ * Emits events to extensions via ext.on['event-name']. Extensions subscribe
5
+ * to events they care about. The loop orchestrates model calls, tool dispatch,
6
+ * and context injection purely through events.
7
+ */
8
+
9
+ import type {
10
+ AgentContext,
11
+ AgentStream,
12
+ AgentStreamPart,
13
+ AnyTool,
14
+ Content,
15
+ ErrorSource,
16
+ Extension,
17
+ FinishReason,
18
+ GenerateContext,
19
+ GenerateOptions,
20
+ GenerateResult,
21
+ Message,
22
+ ModelRequest,
23
+ ToolCall,
24
+ ToolCallContent,
25
+ ToolResult,
26
+ Usage,
27
+ } from "./index";
28
+ import { mergeSignals, PushStream, formatError } from "./util";
29
+ import { dispatchTool } from "./tool-dispatch";
30
+ import {
31
+ TOOL_RESULT_CACHE_KEY,
32
+ type ResolvedCacheConfig,
33
+ } from "./tool-result-cache";
34
+ import { validateToolSchemas } from "./resolve-schema";
35
+
36
+ const EMPTY_USAGE: Usage = { inputTokens: 0, outputTokens: 0 };
37
+ const DEFAULT_TOOL_TIMEOUT_MS = 120_000; // 2 minutes
38
+
39
+ /* ------------------------------------------------------------------ */
40
+ /* Structured Error Logging */
41
+ /* ------------------------------------------------------------------ */
42
+
43
+ /**
44
+ * Format ErrorSource context for logging.
45
+ */
46
+ function formatErrorSource(source: ErrorSource): string {
47
+ const parts = [`[error:${source.kind}]`];
48
+ if (source.extensionName) parts.push(`ext=${source.extensionName}`);
49
+ if (source.event) parts.push(`event=${source.event}`);
50
+ if (source.call) parts.push(`tool=${source.call.toolName}`);
51
+ return parts.join(" ");
52
+ }
53
+
54
+ function addUsage(a: Usage, b: Usage): Usage {
55
+ const total = (a.totalTokens ?? 0) + (b.totalTokens ?? 0);
56
+ const cached = (a.cachedInputTokens ?? 0) + (b.cachedInputTokens ?? 0);
57
+ const cacheCreation =
58
+ (a.cacheCreationInputTokens ?? 0) + (b.cacheCreationInputTokens ?? 0);
59
+ const reasoning = (a.reasoningTokens ?? 0) + (b.reasoningTokens ?? 0);
60
+ return {
61
+ inputTokens: a.inputTokens + b.inputTokens,
62
+ outputTokens: a.outputTokens + b.outputTokens,
63
+ ...(total > 0 ? { totalTokens: total } : {}),
64
+ ...(cached > 0 ? { cachedInputTokens: cached } : {}),
65
+ ...(cacheCreation > 0 ? { cacheCreationInputTokens: cacheCreation } : {}),
66
+ ...(reasoning > 0 ? { reasoningTokens: reasoning } : {}),
67
+ };
68
+ }
69
+
70
+ /* ------------------------------------------------------------------ */
71
+ /* Tool collection */
72
+ /* ------------------------------------------------------------------ */
73
+
74
+ /** Track tools already warned about to avoid spam on every generate(). */
75
+ const _warnedTools = new Set<string>();
76
+
77
+ function collectTools(
78
+ extensions: ReadonlyArray<Extension>,
79
+ ): Map<string, AnyTool> {
80
+ const byName = new Map<string, AnyTool>();
81
+ const toolSources = new Map<string, string>(); // tool name -> first extension name
82
+ for (const ext of extensions) {
83
+ const raw = ext.tools;
84
+ if (raw === undefined) continue;
85
+ const list: ReadonlyArray<AnyTool> = Array.isArray(raw)
86
+ ? raw
87
+ : [raw as AnyTool];
88
+
89
+ // Validate tool schemas eagerly — exclude tools with fatal schema issues
90
+ const diagnostics = validateToolSchemas(list, { extensionName: ext.name });
91
+ const errorTools = new Set(
92
+ diagnostics.filter((d) => d.severity === "error").map((d) => d.toolName),
93
+ );
94
+
95
+ // Log diagnostics (only first time per tool)
96
+ for (const d of diagnostics) {
97
+ const key = `${ext.name}:${d.toolName}:${d.severity}`;
98
+ if (_warnedTools.has(key)) continue;
99
+ _warnedTools.add(key);
100
+ const prefix = d.severity === "error" ? "ERROR" : "WARN";
101
+ const src = d.extensionName ? ` [${d.extensionName}]` : "";
102
+ console.error(`[tool-schema:${prefix}] ${d.toolName}${src}: ${d.issue}`);
103
+ }
104
+
105
+ for (const tool of list) {
106
+ // Skip tools with schema errors — they would break the provider
107
+ if (errorTools.has(tool.name)) {
108
+ console.error(
109
+ `[tool-schema] Excluding tool "${tool.name}" from extension "${ext.name}" due to schema errors`,
110
+ );
111
+ continue;
112
+ }
113
+
114
+ if (byName.has(tool.name)) {
115
+ // Same extension name providing the same tool = duplicate extension instance, skip.
116
+ // Different extension providing same tool name = real conflict, throw.
117
+ const firstSource = toolSources.get(tool.name);
118
+ if (firstSource === ext.name) {
119
+ // Duplicate extension instance — skip silently
120
+ continue;
121
+ }
122
+ throw new Error(
123
+ `Duplicate tool name: ${tool.name} (from extension ${ext.name}, already registered by ${firstSource})`,
124
+ );
125
+ }
126
+ byName.set(tool.name, tool);
127
+ toolSources.set(tool.name, ext.name);
128
+ }
129
+ }
130
+ return byName;
131
+ }
132
+
133
+ /* ------------------------------------------------------------------ */
134
+ /* Context collection */
135
+ /* ------------------------------------------------------------------ */
136
+
137
+ /** Collect context from extensions that return AsyncIterable<Content> for generate-start. */
138
+ async function collectContext(
139
+ extensions: ReadonlyArray<Extension>,
140
+ gctx: GenerateContext,
141
+ ): Promise<Content[]> {
142
+ const content: Content[] = [];
143
+ for (const ext of extensions) {
144
+ const handler = ext.on?.["generate-start"];
145
+ if (!handler) continue;
146
+ try {
147
+ const result = await handler(gctx);
148
+ if (result && Symbol.asyncIterator in (result as object)) {
149
+ for await (const c of result as AsyncIterable<Content>) {
150
+ content.push(c);
151
+ }
152
+ }
153
+ } catch (err) {
154
+ console.warn(`[${ext.name}] generate-start context failed:`, err);
155
+ }
156
+ }
157
+ return content;
158
+ }
159
+
160
+ /* ------------------------------------------------------------------ */
161
+ /* Helpers */
162
+ /* ------------------------------------------------------------------ */
163
+
164
+ function toAssistantMessage(content: ReadonlyArray<Content>): Message {
165
+ return { role: "assistant", content };
166
+ }
167
+
168
+ function commitMessage(
169
+ actx: AgentContext,
170
+ message: Message,
171
+ emit: (p: AgentStreamPart) => void,
172
+ generateId?: string,
173
+ ) {
174
+ const stamped: Message = generateId ? { ...message, generateId } : message;
175
+ actx.appendMessage(stamped);
176
+ const msg = actx.messages[actx.messages.length - 1]!;
177
+ emit({
178
+ type: "message-committed",
179
+ message: msg,
180
+ index: actx.messages.length - 1,
181
+ });
182
+ }
183
+
184
+ function newGenerateContext(
185
+ actx: AgentContext,
186
+ opts: GenerateOptions,
187
+ signal: AbortSignal,
188
+ generateId: string,
189
+ ): GenerateContext {
190
+ return {
191
+ agent: actx,
192
+ generateId,
193
+ content: [],
194
+ state: new Map(),
195
+ stepCount: 0,
196
+ maxSteps: opts.maxSteps ?? 10,
197
+ usage: { ...EMPTY_USAGE },
198
+ signal,
199
+ };
200
+ }
201
+
202
+ function buildRequest(
203
+ actx: AgentContext,
204
+ tools: ReadonlyArray<AnyTool>,
205
+ opts: GenerateOptions,
206
+ signal: AbortSignal,
207
+ ): ModelRequest {
208
+ return {
209
+ messages: actx.messages,
210
+ signal,
211
+ ...(tools.length ? { tools } : {}),
212
+ ...(opts.temperature !== undefined
213
+ ? { temperature: opts.temperature }
214
+ : {}),
215
+ ...(opts.topP !== undefined ? { topP: opts.topP } : {}),
216
+ ...(opts.maxTokens !== undefined ? { maxTokens: opts.maxTokens } : {}),
217
+ ...(opts.stopSequences !== undefined
218
+ ? { stopSequences: opts.stopSequences }
219
+ : {}),
220
+ ...(opts.seed !== undefined ? { seed: opts.seed } : {}),
221
+ ...(opts.providerOptions !== undefined
222
+ ? { providerOptions: opts.providerOptions }
223
+ : {}),
224
+ ...(opts.thinking !== undefined ? { thinking: opts.thinking } : {}),
225
+ };
226
+ }
227
+
228
+ /**
229
+ * Compute cache breakpoint indices for providers with explicit prompt caching.
230
+ *
231
+ * Strategy:
232
+ * - Place a breakpoint on the last message before the "new" content for this step.
233
+ * This ensures the stable prefix (everything before the latest additions) is cached.
234
+ * - On step 0: breakpoint at the second-to-last user turn boundary (history is stable).
235
+ * - On step N>0: breakpoint at the last user message (everything before latest
236
+ * tool results is stable from the previous step).
237
+ *
238
+ * Respects a limit of 2 message breakpoints (Anthropic allows 4 total,
239
+ * but 1 is used for system and 1 for tools).
240
+ */
241
+ function computeCacheBreakpoints(
242
+ messages: ReadonlyArray<Message>,
243
+ step: number,
244
+ ): number[] {
245
+ const breakpoints: number[] = [];
246
+
247
+ if (messages.length < 4) return breakpoints;
248
+
249
+ // Find user message boundaries (turn starts)
250
+ const userIndices: number[] = [];
251
+ for (let i = 0; i < messages.length; i++) {
252
+ if (messages[i]!.role === "user" && messages[i]!.type !== "context-turn") {
253
+ userIndices.push(i);
254
+ }
255
+ }
256
+
257
+ if (userIndices.length < 2) return breakpoints;
258
+
259
+ // Primary breakpoint: the second-to-last user message.
260
+ // Everything up to and including this message is stable:
261
+ // - On step 0: this is the previous turn (history boundary)
262
+ // - On step N: this is the current turn's user message (everything
263
+ // before the latest assistant+tool-result is stable)
264
+ const secondToLast = userIndices[userIndices.length - 2]!;
265
+ breakpoints.push(secondToLast);
266
+
267
+ // For long conversations: add a breakpoint deeper in history
268
+ // to ensure the early prefix stays cached even as the conversation grows.
269
+ // Place it at roughly the 1/3 mark of the conversation.
270
+ if (userIndices.length >= 6) {
271
+ const earlyIdx = userIndices[Math.floor(userIndices.length / 3)]!;
272
+ if (earlyIdx > 0 && earlyIdx < secondToLast) {
273
+ breakpoints.unshift(earlyIdx);
274
+ }
275
+ }
276
+
277
+ // Limit to 2 breakpoints for messages (system + tools use the other 2)
278
+ return breakpoints.slice(0, 2);
279
+ }
280
+
281
+ /* ------------------------------------------------------------------ */
282
+ /* Entry point */
283
+ /* ------------------------------------------------------------------ */
284
+
285
+ export function runGenerate(
286
+ actx: AgentContext,
287
+ opts: GenerateOptions,
288
+ ): AgentStream {
289
+ const stream = new PushStream<AgentStreamPart>();
290
+ let resolveResult: (r: GenerateResult) => void;
291
+ let rejectResult: (err: unknown) => void;
292
+ const resultPromise = new Promise<GenerateResult>((resolve, reject) => {
293
+ resolveResult = resolve;
294
+ rejectResult = reject;
295
+ });
296
+
297
+ queueMicrotask(() => {
298
+ loop(actx, opts, stream).then(resolveResult!, (err) => {
299
+ stream.fail(err);
300
+ rejectResult!(err);
301
+ });
302
+ });
303
+
304
+ return Object.assign(stream, { result: resultPromise }) as AgentStream;
305
+ }
306
+
307
+ /* ------------------------------------------------------------------ */
308
+ /* The loop */
309
+ /* ------------------------------------------------------------------ */
310
+
311
+ async function loop(
312
+ actx: AgentContext,
313
+ opts: GenerateOptions,
314
+ stream: PushStream<AgentStreamPart>,
315
+ ): Promise<GenerateResult> {
316
+ const extensions = actx.extensions;
317
+ const tools = collectTools(extensions);
318
+ const toolList = Array.from(tools.values());
319
+ const generateId = crypto.randomUUID();
320
+ const emit = (p: AgentStreamPart) => stream.push(p);
321
+ const signals = mergeSignals(actx.signal, opts.signal);
322
+ const signal = signals.signal;
323
+ const gctx = newGenerateContext(actx, opts, signal, generateId);
324
+ const cacheConfig = actx.state.get(TOOL_RESULT_CACHE_KEY) as
325
+ | ResolvedCacheConfig
326
+ | undefined;
327
+
328
+ const reportError = async (err: unknown, source: ErrorSource) => {
329
+ // Always log structured error with stack trace for troubleshooting
330
+ const formatted = formatError(err);
331
+ const sourceCtx = formatErrorSource(source);
332
+ console.error(`${sourceCtx} ${formatted}`);
333
+
334
+ // Emit extension-info error part so TUI/headless can display it
335
+ try {
336
+ emit({
337
+ type: "extension-info",
338
+ extension: "generate-loop",
339
+ level: "error",
340
+ text: `${sourceCtx} ${err instanceof Error ? err.message : String(err)}`,
341
+ });
342
+ } catch {
343
+ /* stream may be closed */
344
+ }
345
+
346
+ for (const ext of extensions) {
347
+ const handler = ext.on?.["error"];
348
+ if (!handler) continue;
349
+ try {
350
+ await handler(err, source, gctx);
351
+ } catch (handlerErr) {
352
+ // Log but don't propagate — error handlers must not crash the loop
353
+ console.error(
354
+ `[error-handler:${ext.name}] handler threw: ${formatError(handlerErr)}`,
355
+ );
356
+ }
357
+ }
358
+ };
359
+
360
+ const toolResultMessages: Message[] = [];
361
+ let finalAssistant: Message | null = null;
362
+ let finishReason: FinishReason = "stop";
363
+ let generateStopFired = false;
364
+
365
+ signal.addEventListener(
366
+ "abort",
367
+ () => {
368
+ finishReason = "abort";
369
+ emit({ type: "abort", reason: signal.reason });
370
+ },
371
+ { once: true },
372
+ );
373
+
374
+ try {
375
+ // 1. Fire generate-input — let extensions augment input messages.
376
+ const inputMessages = [...opts.input] as Message[];
377
+ for (const ext of extensions) {
378
+ const handler = ext.on?.["generate-input"];
379
+ if (!handler) continue;
380
+ try {
381
+ const extra = await handler(inputMessages, gctx);
382
+ if (extra && extra.length > 0) {
383
+ // Append to the last user message's content
384
+ let lastUserIdx = -1;
385
+ for (let i = inputMessages.length - 1; i >= 0; i--) {
386
+ if (inputMessages[i]!.role === "user") {
387
+ lastUserIdx = i;
388
+ break;
389
+ }
390
+ }
391
+ if (lastUserIdx >= 0) {
392
+ const msg = inputMessages[lastUserIdx]!;
393
+ inputMessages[lastUserIdx] = {
394
+ ...msg,
395
+ content: [...msg.content, ...extra],
396
+ };
397
+ }
398
+ }
399
+ } catch (err) {
400
+ await reportError(err, {
401
+ kind: "extension",
402
+ extensionName: ext.name,
403
+ event: "generate-input",
404
+ });
405
+ }
406
+ }
407
+
408
+ // 2. Append input messages (stamped with generateId for turn tracking).
409
+ for (const m of inputMessages) {
410
+ const stamped: Message = { ...m, generateId };
411
+ actx.appendMessage(stamped);
412
+ emit({
413
+ type: "message-committed",
414
+ message: stamped,
415
+ index: actx.messages.length - 1,
416
+ });
417
+ }
418
+
419
+ // 2. emit('generate-start') — collect ephemeral context.
420
+ emit({
421
+ type: "generate-start",
422
+ maxSteps: gctx.maxSteps,
423
+ startedAt: Date.now(),
424
+ generateId,
425
+ });
426
+ const turnContext = await collectContext(extensions, gctx);
427
+
428
+ // Build an ephemeral context message that is injected into each model
429
+ // request but never committed to actx.messages or persisted to session.
430
+ // This mirrors cache_control: ephemeral semantics — the model sees it
431
+ // on the current turn but it does not pollute history on resume.
432
+ const ephemeralCtxMsg: Message | null =
433
+ turnContext.length > 0
434
+ ? { role: "user", content: [...turnContext], type: "context-turn" }
435
+ : null;
436
+
437
+ // 3. Iteration loop.
438
+ let step = 0;
439
+ let lastIterUsage: Usage = { ...EMPTY_USAGE };
440
+ (gctx as { stepCount: number }).stepCount = step;
441
+
442
+ while (true) {
443
+ if (signal.aborted) break;
444
+
445
+ // 3a. emit('model-start') — reducer: transform request.
446
+ let req = buildRequest(actx, toolList, opts, signal);
447
+
448
+ // Inject ephemeral context into request messages (not persisted).
449
+ // Only on step 0 — on later steps, injecting a user message after
450
+ // tool results creates a role-order violation that makes the model
451
+ // think a new turn started, causing it to repeat itself.
452
+ if (ephemeralCtxMsg && step === 0) {
453
+ req = { ...req, messages: [...req.messages, ephemeralCtxMsg] };
454
+ }
455
+ for (const ext of extensions) {
456
+ const handler = ext.on?.["model-start"];
457
+ if (!handler) continue;
458
+ try {
459
+ const out = await handler(req, gctx);
460
+ if (
461
+ out !== undefined &&
462
+ out !== null &&
463
+ typeof out === "object" &&
464
+ "messages" in out
465
+ ) {
466
+ req = out as ModelRequest;
467
+ }
468
+ } catch (err) {
469
+ await reportError(err, {
470
+ kind: "extension",
471
+ extensionName: ext.name,
472
+ event: "model-start",
473
+ });
474
+ throw err;
475
+ }
476
+ }
477
+
478
+ // 3b. Compute cache breakpoints for providers that support explicit caching.
479
+ // Placed after extensions modify messages so breakpoints target final positions.
480
+ const cacheBreakpoints = computeCacheBreakpoints(req.messages, step);
481
+ if (cacheBreakpoints.length > 0) {
482
+ req = { ...req, cacheBreakpoints };
483
+ }
484
+
485
+ // 3c. Stream from model.
486
+ const model = actx.model;
487
+ (gctx as { content: Content[] }).content = [];
488
+ const pendingToolCalls: ToolCallContent[] = [];
489
+ let iterFinishReason: FinishReason | null = null;
490
+ let iterUsage: Usage = { ...EMPTY_USAGE };
491
+ let committedAssistant: Message | null = null;
492
+
493
+ // emit('message-start')
494
+ emit({ type: "iteration-start", step, startedAt: Date.now() });
495
+ for (const ext of extensions) {
496
+ const handler = ext.on?.["message-start"];
497
+ if (handler) {
498
+ try {
499
+ await handler(gctx);
500
+ } catch (err) {
501
+ console.error(`[${ext.name}:message-start] ${formatError(err)}`);
502
+ }
503
+ }
504
+ }
505
+
506
+ try {
507
+ for await (const part of model.stream(req)) {
508
+ if (signal.aborted) break;
509
+ emit(part);
510
+
511
+ if (part.type === "text-delta") {
512
+ for (const ext of extensions) {
513
+ const h = ext.on?.["text-delta"];
514
+ if (h) {
515
+ try {
516
+ await h(part.delta, gctx);
517
+ } catch (err) {
518
+ console.error(`[${ext.name}:text-delta] ${formatError(err)}`);
519
+ }
520
+ }
521
+ }
522
+ const last = gctx.content[gctx.content.length - 1];
523
+ if (last && last.type === "text") {
524
+ (gctx.content as Content[])[gctx.content.length - 1] = {
525
+ ...last,
526
+ text: last.text + part.delta,
527
+ };
528
+ } else {
529
+ gctx.content.push({ type: "text", text: part.delta });
530
+ }
531
+ } else if (part.type === "reasoning-delta") {
532
+ for (const ext of extensions) {
533
+ const h = ext.on?.["reasoning-delta"];
534
+ if (h) {
535
+ try {
536
+ await h(part.delta, gctx);
537
+ } catch (err) {
538
+ console.error(
539
+ `[${ext.name}:reasoning-delta] ${formatError(err)}`,
540
+ );
541
+ }
542
+ }
543
+ }
544
+ const last = gctx.content[gctx.content.length - 1];
545
+ if (last && last.type === "reasoning") {
546
+ (gctx.content as Content[])[gctx.content.length - 1] = {
547
+ ...last,
548
+ text: last.text + part.delta,
549
+ };
550
+ } else {
551
+ gctx.content.push({ type: "reasoning", text: part.delta });
552
+ }
553
+ } else if (part.type === "tool-call-delta") {
554
+ for (const ext of extensions) {
555
+ const h = ext.on?.["tool-call-delta"];
556
+ if (h) {
557
+ try {
558
+ await h(
559
+ {
560
+ toolCallId: part.toolCallId,
561
+ toolName: part.toolName,
562
+ inputDelta: part.inputDelta,
563
+ },
564
+ gctx,
565
+ );
566
+ } catch (err) {
567
+ console.error(
568
+ `[${ext.name}:tool-call-delta] ${formatError(err)}`,
569
+ );
570
+ }
571
+ }
572
+ }
573
+ } else if (part.type === "tool-call") {
574
+ const tc: ToolCallContent = {
575
+ type: "tool-call",
576
+ toolCallId: part.toolCallId,
577
+ toolName: part.toolName,
578
+ input: part.input,
579
+ };
580
+ gctx.content.push(tc);
581
+ pendingToolCalls.push(tc);
582
+ } else if (part.type === "message-stop") {
583
+ committedAssistant = part.message;
584
+ } else if (part.type === "finish") {
585
+ iterFinishReason = part.reason;
586
+ iterUsage = part.usage;
587
+ } else if (part.type === "error") {
588
+ // Provider emitted a structured error (non-200 response, malformed body, etc.).
589
+ // Treat as a fatal stream error — surface it to the caller.
590
+ const providerErr =
591
+ part.error instanceof Error
592
+ ? part.error
593
+ : new Error(String(part.error));
594
+ throw providerErr;
595
+ } else if (part.type === "abort") {
596
+ finishReason = "abort";
597
+ break;
598
+ }
599
+ }
600
+ } catch (err) {
601
+ await reportError(err, { kind: "provider", request: req });
602
+
603
+ // Commit partial/error assistant message to maintain sequence validity.
604
+ // The user message was already committed — if we throw without an
605
+ // assistant message, the next generate() call will fail with
606
+ // "user follows user" validation error.
607
+ const errorText =
608
+ err instanceof Error ? err.message : String(err);
609
+ const errorContent: Content[] =
610
+ gctx.content.length > 0
611
+ ? [...gctx.content] // Preserve any partial text streamed before failure
612
+ : [{ type: "text", text: `[Error: ${errorText}]` }];
613
+ const errorMsg = toAssistantMessage(errorContent);
614
+ commitMessage(actx, errorMsg, emit, generateId);
615
+ finalAssistant = errorMsg;
616
+
617
+ throw err;
618
+ }
619
+
620
+ if (signal.aborted) break;
621
+
622
+ // 3c. Commit assistant message + emit('message-stop').
623
+ const asstMsg = committedAssistant ?? toAssistantMessage(gctx.content);
624
+ commitMessage(actx, asstMsg, emit, generateId);
625
+ finalAssistant = asstMsg;
626
+ lastIterUsage = iterUsage;
627
+ gctx.usage = addUsage(gctx.usage, iterUsage);
628
+
629
+ for (const ext of extensions) {
630
+ const handler = ext.on?.["message-stop"];
631
+ if (!handler) continue;
632
+ try {
633
+ await handler(asstMsg, gctx);
634
+ } catch (err) {
635
+ console.error(`[${ext.name}:message-stop] ${formatError(err)}`);
636
+ }
637
+ }
638
+
639
+ // Update actx.tokenCount to reflect current session size.
640
+ // Each API call sends the full conversation, so the last call's
641
+ // totalTokens IS the session size.
642
+ actx.tokenCount =
643
+ iterUsage.totalTokens ?? iterUsage.inputTokens + iterUsage.outputTokens;
644
+
645
+ // emit('model-stop')
646
+ for (const ext of extensions) {
647
+ const handler = ext.on?.["model-stop"];
648
+ if (handler) {
649
+ try {
650
+ await handler(gctx);
651
+ } catch (err) {
652
+ console.error(`[${ext.name}:model-stop] ${formatError(err)}`);
653
+ }
654
+ }
655
+ }
656
+
657
+ // 3d. Dispatch tool calls.
658
+ if (pendingToolCalls.length > 0) {
659
+ for (const tc of pendingToolCalls) {
660
+ if (signal.aborted) break;
661
+ const tool = tools.get(tc.toolName);
662
+ if (!tool) {
663
+ const msg: Message = {
664
+ role: "tool",
665
+ content: [
666
+ {
667
+ type: "tool-result",
668
+ toolCallId: tc.toolCallId,
669
+ toolName: tc.toolName,
670
+ output: {
671
+ type: "error-text",
672
+ value: `Unknown tool: ${tc.toolName}`,
673
+ },
674
+ },
675
+ ],
676
+ };
677
+ commitMessage(actx, msg, emit, generateId);
678
+ toolResultMessages.push(msg);
679
+ continue;
680
+ }
681
+
682
+ // emit('tool-start') — intercept/modify/reject
683
+ const call: ToolCall = {
684
+ toolCallId: tc.toolCallId,
685
+ toolName: tc.toolName,
686
+ input: tc.input,
687
+ };
688
+ let effectiveCall = call;
689
+ let shortCircuited: ToolResult | null = null;
690
+
691
+ for (const ext of extensions) {
692
+ const handler = ext.on?.["tool-start"];
693
+ if (!handler) continue;
694
+ try {
695
+ const out = await handler(effectiveCall, gctx);
696
+ if (!out) continue;
697
+ if ("output" in out) {
698
+ shortCircuited = out as ToolResult;
699
+ break;
700
+ }
701
+ effectiveCall = out as ToolCall;
702
+ } catch (err) {
703
+ await reportError(err, {
704
+ kind: "extension",
705
+ extensionName: ext.name,
706
+ event: "tool-start",
707
+ });
708
+ throw err;
709
+ }
710
+ }
711
+
712
+ let result: ToolResult;
713
+ const toolStartedAt = Date.now();
714
+ emit({
715
+ type: "tool-execute-start",
716
+ call: effectiveCall,
717
+ startedAt: toolStartedAt,
718
+ });
719
+
720
+ if (shortCircuited) {
721
+ result = shortCircuited;
722
+ } else {
723
+ const timeoutMs = opts.toolTimeoutMs ?? DEFAULT_TOOL_TIMEOUT_MS;
724
+ const toolPromise = dispatchTool({
725
+ call: effectiveCall,
726
+ tool,
727
+ extensions,
728
+ gctx,
729
+ reportError,
730
+ ...(cacheConfig ? { cacheConfig } : {}),
731
+ });
732
+ const timeoutPromise = new Promise<never>((_, reject) =>
733
+ setTimeout(() => reject(new Error(`TOOL_TIMEOUT`)), timeoutMs),
734
+ );
735
+ // Race abort signal so tool execution is interrupted immediately on abort.
736
+ const abortPromise = new Promise<never>((_, reject) => {
737
+ if (signal.aborted) {
738
+ reject(signal.reason);
739
+ return;
740
+ }
741
+ signal.addEventListener("abort", () => reject(signal.reason), {
742
+ once: true,
743
+ });
744
+ });
745
+ try {
746
+ const outcome = await Promise.race([
747
+ toolPromise,
748
+ timeoutPromise,
749
+ abortPromise,
750
+ ]);
751
+ result = outcome.result;
752
+ } catch (err) {
753
+ if (signal.aborted) {
754
+ // Abort won the race — break out of tool loop.
755
+ break;
756
+ }
757
+ // Synthesize an error result for ANY failure (timeout,
758
+ // network, unexpected throw). This ensures every tool-call
759
+ // always gets a matching tool-result — no orphaned state.
760
+ const errorValue =
761
+ err instanceof Error && err.message === "TOOL_TIMEOUT"
762
+ ? `Tool "${effectiveCall.toolName}" timed out after ${timeoutMs}ms. Retry with a longer timeout or simplify the request.`
763
+ : err instanceof Error
764
+ ? `Tool execution failed: ${err.message}`
765
+ : "Tool call failed (internal error during execution).";
766
+ result = {
767
+ toolCallId: effectiveCall.toolCallId,
768
+ toolName: effectiveCall.toolName,
769
+ output: {
770
+ type: "error-text",
771
+ value: errorValue,
772
+ },
773
+ };
774
+ await reportError(err, {
775
+ kind: "tool",
776
+ call: effectiveCall,
777
+ });
778
+ }
779
+ }
780
+
781
+ // emit('tool-stop') — reducer
782
+ for (const ext of extensions) {
783
+ const handler = ext.on?.["tool-stop"];
784
+ if (!handler) continue;
785
+ try {
786
+ const out = await handler(result, gctx);
787
+ if (out !== undefined) result = out;
788
+ } catch (err) {
789
+ await reportError(err, {
790
+ kind: "extension",
791
+ extensionName: ext.name,
792
+ event: "tool-stop",
793
+ });
794
+ }
795
+ }
796
+
797
+ emit({
798
+ type: "tool-execute-finish",
799
+ call: effectiveCall,
800
+ result,
801
+ startedAt: toolStartedAt,
802
+ endedAt: Date.now(),
803
+ });
804
+
805
+ const msg: Message = {
806
+ role: "tool",
807
+ content: [
808
+ {
809
+ type: "tool-result",
810
+ toolCallId: result.toolCallId,
811
+ toolName: result.toolName,
812
+ output: result.output,
813
+ },
814
+ ],
815
+ };
816
+ commitMessage(actx, msg, emit, generateId);
817
+ toolResultMessages.push(msg);
818
+ }
819
+
820
+ // 3d-cleanup. If abort interrupted the tool loop, synthesize error
821
+ // results for any tool calls that never got a response. This keeps
822
+ // the message history valid (every tool-call has a tool-result).
823
+ if (signal.aborted) {
824
+ const fulfilled = new Set(
825
+ toolResultMessages.map((m) => {
826
+ const c = m.content[0];
827
+ return c && c.type === "tool-result" ? c.toolCallId : "";
828
+ }),
829
+ );
830
+ for (const tc of pendingToolCalls) {
831
+ if (fulfilled.has(tc.toolCallId)) continue;
832
+ const abortMsg: Message = {
833
+ role: "tool",
834
+ content: [
835
+ {
836
+ type: "tool-result",
837
+ toolCallId: tc.toolCallId,
838
+ toolName: tc.toolName,
839
+ output: {
840
+ type: "error-text",
841
+ value: "Tool call aborted.",
842
+ },
843
+ },
844
+ ],
845
+ };
846
+ commitMessage(actx, abortMsg, emit, generateId);
847
+ toolResultMessages.push(abortMsg);
848
+ }
849
+ }
850
+ }
851
+
852
+ // 3e. Step finished.
853
+ const iterReason = iterFinishReason ?? "stop";
854
+ emit({
855
+ type: "iteration-finish",
856
+ step,
857
+ endedAt: Date.now(),
858
+ finishReason: iterReason,
859
+ usage: iterUsage,
860
+ });
861
+ finishReason = iterReason;
862
+
863
+ const shouldContinue =
864
+ iterReason === "tool-calls" &&
865
+ pendingToolCalls.length > 0 &&
866
+ step + 1 < gctx.maxSteps;
867
+ if (!shouldContinue) break;
868
+ step++;
869
+ (gctx as { stepCount: number }).stepCount = step;
870
+ }
871
+
872
+ // 4. emit('generate-stop').
873
+ generateStopFired = true;
874
+ for (const ext of extensions) {
875
+ const handler = ext.on?.["generate-stop"];
876
+ if (!handler) continue;
877
+ try {
878
+ await handler(gctx);
879
+ } catch (err) {
880
+ console.error(`[${ext.name}:generate-stop] ${formatError(err)}`);
881
+ }
882
+ }
883
+
884
+ // 5. Result.
885
+ const result: GenerateResult = {
886
+ generateId,
887
+ message: finalAssistant ?? { role: "assistant", content: [] },
888
+ toolResultMessages,
889
+ usage: gctx.usage,
890
+ finishReason,
891
+ steps: step + 1,
892
+ };
893
+ // actx.tokenCount already updated in the model-stop block above.
894
+ emit({ type: "generate-finish", result });
895
+ stream.close();
896
+ signals.dispose();
897
+ return result;
898
+ } catch (err) {
899
+ // Fire generate-stop even on error so extensions (e.g. session persistence)
900
+ // can flush any committed messages before the error propagates.
901
+ if (!generateStopFired) {
902
+ for (const ext of extensions) {
903
+ const handler = ext.on?.["generate-stop"];
904
+ if (!handler) continue;
905
+ try {
906
+ await handler(gctx);
907
+ } catch (stopErr) {
908
+ console.error(`[${ext.name}:generate-stop] ${formatError(stopErr)}`);
909
+ }
910
+ }
911
+ }
912
+
913
+ // Safety net: if we committed user message(s) this turn but no assistant
914
+ // message followed (e.g. provider timeout/error before any response),
915
+ // commit a synthetic assistant to maintain sequence validity.
916
+ // Without this, the next generate() call would fail with
917
+ // "user follows user" validation error.
918
+ if (!finalAssistant) {
919
+ const lastMsg = actx.messages[actx.messages.length - 1];
920
+ if (lastMsg && lastMsg.role !== "assistant" && lastMsg.role !== "system") {
921
+ const errorText = signal.aborted
922
+ ? "The operation was aborted."
923
+ : err instanceof Error
924
+ ? err.message
925
+ : String(err);
926
+ const fallbackMsg: Message = {
927
+ role: "assistant",
928
+ content: [{ type: "text", text: `[Error: ${errorText}]` }],
929
+ ...(generateId ? { generateId } : {}),
930
+ };
931
+ try {
932
+ actx.appendMessage(fallbackMsg);
933
+ } catch {
934
+ // If even the fallback commit fails (shouldn't happen), don't mask the original error
935
+ }
936
+ }
937
+ }
938
+
939
+ await reportError(err, { kind: "loop" });
940
+ signals.dispose();
941
+ throw err;
942
+ }
943
+ }