@rudderjs/ai 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.js CHANGED
@@ -13,14 +13,17 @@ function _buildObserverSteps(steps, modelString) {
13
13
  model: modelString,
14
14
  tokens: { prompt: step.usage.promptTokens, completion: step.usage.completionTokens, total: step.usage.totalTokens },
15
15
  finishReason: step.finishReason,
16
- toolCalls: step.toolCalls.map(tc => ({
17
- id: tc.id,
18
- name: tc.name,
19
- args: tc.arguments,
20
- result: step.toolResults.find(r => r.toolCallId === tc.id)?.result,
21
- duration: 0,
22
- needsApproval: false,
23
- })),
16
+ toolCalls: step.toolCalls.map(tc => {
17
+ const tr = step.toolResults.find(r => r.toolCallId === tc.id);
18
+ return {
19
+ id: tc.id,
20
+ name: tc.name,
21
+ args: tc.arguments,
22
+ result: tr?.result,
23
+ duration: tr?.duration ?? 0,
24
+ needsApproval: false,
25
+ };
26
+ }),
24
27
  }));
25
28
  }
26
29
  // ─── Stop Condition Combinators ──────────────────────────
@@ -51,6 +54,13 @@ export class Agent {
51
54
  temperature() { return undefined; }
52
55
  /** Max tokens for response */
53
56
  maxTokens() { return undefined; }
57
+ /**
58
+ * Default for `AgentPromptOptions.parallelTools`. When `true` (default),
59
+ * multiple tool calls within a single step run their `execute()` functions
60
+ * concurrently. Override on a subclass to flip the default for an agent
61
+ * whose tools share non-idempotent state. Per-call options still win.
62
+ */
63
+ parallelTools() { return true; }
54
64
  /** Run the agent with a prompt (non-streaming) */
55
65
  async prompt(input, options) {
56
66
  return runAgentLoop(this, input, options);
@@ -284,8 +294,587 @@ function buildMiddlewareConfig(messages, a) {
284
294
  config.maxTokens = maxTok;
285
295
  return config;
286
296
  }
287
- // ─── Agent Loop (non-streaming) ──────────────────────────
288
- async function runAgentLoop(a, input, options) {
297
+ /**
298
+ * Iterate the failover model list and invoke `call` against each provider
299
+ * adapter until one succeeds. Mutates `loopCtx.failoverAttempts` so the
300
+ * observer event reflects the real number of attempts. A caller-supplied
301
+ * `AbortSignal` short-circuits — abort errors propagate immediately rather
302
+ * than triggering the next failover model.
303
+ */
304
+ async function runFailover(loopCtx, currentModel, call) {
305
+ const failoverModels = [currentModel, ...loopCtx.agent.failover().filter(m => m !== currentModel)];
306
+ let lastError;
307
+ for (const tryModel of failoverModels) {
308
+ try {
309
+ const adapter = AiRegistry.resolve(tryModel);
310
+ const [, modelId] = AiRegistry.parseModelString(tryModel);
311
+ const reqOptions = {
312
+ model: modelId,
313
+ messages: loopCtx.messages,
314
+ tools: loopCtx.toolSchemas.length > 0 ? loopCtx.toolSchemas : undefined,
315
+ temperature: loopCtx.agent.temperature(),
316
+ maxTokens: loopCtx.agent.maxTokens(),
317
+ signal: loopCtx.options?.signal,
318
+ };
319
+ return await call(adapter, modelId, reqOptions);
320
+ }
321
+ catch (err) {
322
+ // If the abort came from the caller, don't try the next failover
323
+ // model — re-throw so `prompt()` / the stream rejects immediately.
324
+ if (loopCtx.options?.signal?.aborted)
325
+ throw loopCtx.options.signal.reason;
326
+ lastError = err instanceof Error ? err : new Error(String(err));
327
+ loopCtx.failoverAttempts++;
328
+ if (tryModel === failoverModels[failoverModels.length - 1])
329
+ throw lastError;
330
+ }
331
+ }
332
+ throw lastError ?? new Error('No provider available');
333
+ }
334
+ /** Emit the `agent.failed` observer event from the shared loop state. */
335
+ function emitObserverFailed(loopCtx, err, streaming) {
336
+ const obs = _getAiObservers();
337
+ if (!obs)
338
+ return;
339
+ const inputText = loopCtx.options?.messages ? '' : loopCtx.input;
340
+ obs.emit({
341
+ kind: 'agent.failed',
342
+ agentName: loopCtx.agent.constructor.name,
343
+ model: loopCtx.modelString,
344
+ provider: loopCtx.providerName,
345
+ input: inputText,
346
+ output: '',
347
+ steps: _buildObserverSteps(loopCtx.steps, loopCtx.modelString),
348
+ tokens: {
349
+ prompt: loopCtx.totalUsage.promptTokens,
350
+ completion: loopCtx.totalUsage.completionTokens,
351
+ total: loopCtx.totalUsage.totalTokens,
352
+ },
353
+ duration: Math.round(performance.now() - loopCtx.loopStart),
354
+ finishReason: 'error',
355
+ streaming,
356
+ conversationId: null,
357
+ failoverAttempts: loopCtx.failoverAttempts,
358
+ error: err instanceof Error ? err.message : String(err),
359
+ });
360
+ }
361
+ /**
362
+ * Emit the per-step `agent.step.completed` observer event after each
363
+ * iteration. Built from the SAME `_buildObserverSteps` mapping used by
364
+ * the terminal events so consumers see consistent shapes — they just see
365
+ * the latest step rather than the full array.
366
+ */
367
+ function emitObserverStepCompleted(loopCtx, iteration, streaming) {
368
+ const obs = _getAiObservers();
369
+ if (!obs)
370
+ return;
371
+ const justPushed = loopCtx.steps[loopCtx.steps.length - 1];
372
+ if (!justPushed)
373
+ return;
374
+ // Re-use _buildObserverSteps so the per-step shape matches the steps[]
375
+ // entries on the terminal events. Pass a single-element slice since we
376
+ // only need the latest step's mapping.
377
+ const built = _buildObserverSteps([justPushed], loopCtx.modelString);
378
+ const stepEvent = built[0];
379
+ if (!stepEvent)
380
+ return;
381
+ // Override iteration with the loop's iteration counter — _buildObserverSteps
382
+ // numbers from 1 within the array it sees, but we want the global step
383
+ // number across the whole run.
384
+ stepEvent.iteration = iteration + 1;
385
+ obs.emit({
386
+ kind: 'agent.step.completed',
387
+ agentName: loopCtx.agent.constructor.name,
388
+ model: loopCtx.modelString,
389
+ provider: loopCtx.providerName,
390
+ iteration: iteration + 1,
391
+ step: stepEvent,
392
+ tokens: {
393
+ prompt: loopCtx.totalUsage.promptTokens,
394
+ completion: loopCtx.totalUsage.completionTokens,
395
+ total: loopCtx.totalUsage.totalTokens,
396
+ },
397
+ duration: Math.round(performance.now() - loopCtx.loopStart),
398
+ streaming,
399
+ conversationId: null,
400
+ });
401
+ }
402
+ /** Emit the `agent.completed` observer event from the shared loop state. */
403
+ function emitObserverCompleted(loopCtx, result, streaming) {
404
+ const obs = _getAiObservers();
405
+ if (!obs)
406
+ return;
407
+ const inputText = loopCtx.options?.messages ? '' : loopCtx.input;
408
+ const lastStep = loopCtx.steps[loopCtx.steps.length - 1];
409
+ obs.emit({
410
+ kind: 'agent.completed',
411
+ agentName: loopCtx.agent.constructor.name,
412
+ model: loopCtx.modelString,
413
+ provider: loopCtx.providerName,
414
+ input: inputText,
415
+ output: result.text,
416
+ steps: _buildObserverSteps(loopCtx.steps, loopCtx.modelString),
417
+ tokens: {
418
+ prompt: loopCtx.totalUsage.promptTokens,
419
+ completion: loopCtx.totalUsage.completionTokens,
420
+ total: loopCtx.totalUsage.totalTokens,
421
+ },
422
+ duration: Math.round(performance.now() - loopCtx.loopStart),
423
+ finishReason: result.finishReason ?? lastStep?.finishReason ?? 'stop',
424
+ streaming,
425
+ conversationId: null,
426
+ failoverAttempts: loopCtx.failoverAttempts,
427
+ });
428
+ }
429
+ /** Build the final `AgentResponse` from accumulated loop state. */
430
+ function buildAgentResponse(loopCtx) {
431
+ const lastStep = loopCtx.steps[loopCtx.steps.length - 1];
432
+ const result = {
433
+ text: lastStep ? getMessageText(lastStep.message.content) : '',
434
+ steps: loopCtx.steps,
435
+ usage: loopCtx.totalUsage,
436
+ };
437
+ if (loopCtx.loopFinishReason)
438
+ result.finishReason = loopCtx.loopFinishReason;
439
+ if (loopCtx.pendingClientToolCalls.length > 0)
440
+ result.pendingClientToolCalls = loopCtx.pendingClientToolCalls;
441
+ if (loopCtx.pendingApprovalToolCall)
442
+ result.pendingApprovalToolCall = loopCtx.pendingApprovalToolCall;
443
+ if (loopCtx.resumedToolMessages.length > 0)
444
+ result.resumedToolMessages = loopCtx.resumedToolMessages;
445
+ return result;
446
+ }
447
+ /**
448
+ * Execute the tool phase for a single agent step. Yields the same
449
+ * `StreamChunk` sequence (`tool-call` → `tool-update*` → `tool-result`) that
450
+ * the streaming caller surfaces to consumers. Non-streaming callers iterate
451
+ * via `.next()` and discard yields — the side effects (message pushes,
452
+ * pending-state mutations on `loopCtx`) are identical regardless of whether
453
+ * the chunks reach a consumer.
454
+ *
455
+ * Returns the step's `ToolResult[]`. The caller passes the assistant message
456
+ * to push before iteration so the AgentStep shape (response.message) and the
457
+ * final `messages` array stay in sync with the loop variant.
458
+ */
459
+ async function* executeToolPhase(loopCtx, toolCalls, assistantMessage) {
460
+ const { messages, middlewares, options, ctx } = loopCtx;
461
+ const toolResults = [];
462
+ messages.push(assistantMessage);
463
+ // Resolve parallelism setting. Per-call option wins; falls back to the
464
+ // agent-level override which defaults to `true`. Single-tool batches
465
+ // route through the serial path either way (no parallelism to gain, and
466
+ // serial preserves live `tool-update` streaming for that one tool).
467
+ const parallel = (options?.parallelTools ?? loopCtx.agent.parallelTools()) && toolCalls.length > 1;
468
+ if (parallel) {
469
+ yield* runToolPhaseParallel(loopCtx, toolCalls, toolResults);
470
+ }
471
+ else {
472
+ yield* runToolPhaseSerial(loopCtx, toolCalls, toolResults);
473
+ }
474
+ // onToolPhaseComplete
475
+ if (middlewares.length > 0)
476
+ await runSequential(middlewares, 'onToolPhaseComplete', ctx);
477
+ return toolResults;
478
+ }
479
+ /**
480
+ * Serial tool execution — the original behavior. Runs each tool call's
481
+ * prelude (approval, before-middleware, validation) and `execute()`
482
+ * one-after-another, streaming `tool-update` chunks live as the tool
483
+ * emits them.
484
+ */
485
+ async function* runToolPhaseSerial(loopCtx, toolCalls, toolResults) {
486
+ const { messages, middlewares, toolMap, options, ctx } = loopCtx;
487
+ for (const tc of toolCalls) {
488
+ const tool = toolMap.get(tc.name);
489
+ if (!tool) {
490
+ const unknownResult = `Error: Unknown tool "${tc.name}"`;
491
+ toolResults.push({ toolCallId: tc.id, result: unknownResult });
492
+ messages.push({ role: 'tool', content: unknownResult, toolCallId: tc.id });
493
+ yield { type: 'tool-result', toolCall: tc, result: unknownResult };
494
+ continue;
495
+ }
496
+ if (!tool.execute) {
497
+ // Client tool — no server-side handler.
498
+ if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
499
+ loopCtx.pendingClientToolCalls.push(tc);
500
+ loopCtx.loopFinishReason = 'client_tool_calls';
501
+ loopCtx.stopForClientTools = true;
502
+ yield { type: 'tool-call', toolCall: tc };
503
+ continue;
504
+ }
505
+ const placeholder = '[client tool — execute on client]';
506
+ toolResults.push({ toolCallId: tc.id, result: placeholder });
507
+ messages.push({ role: 'tool', content: placeholder, toolCallId: tc.id });
508
+ yield { type: 'tool-call', toolCall: tc };
509
+ yield { type: 'tool-result', toolCall: tc, result: placeholder };
510
+ continue;
511
+ }
512
+ // needsApproval enforcement
513
+ const approvalDecision = await evaluateApproval(tool, tc, options);
514
+ if (approvalDecision === 'rejected') {
515
+ const rejectionResult = { rejected: true, reason: 'User rejected this tool call' };
516
+ toolResults.push({ toolCallId: tc.id, result: rejectionResult });
517
+ messages.push({ role: 'tool', content: JSON.stringify(rejectionResult), toolCallId: tc.id });
518
+ yield { type: 'tool-result', toolCall: tc, result: rejectionResult };
519
+ continue;
520
+ }
521
+ if (approvalDecision === 'pending') {
522
+ loopCtx.pendingApprovalToolCall = { toolCall: tc, isClientTool: false };
523
+ loopCtx.loopFinishReason = 'tool_approval_required';
524
+ loopCtx.stopForApproval = true;
525
+ yield { type: 'tool-call', toolCall: tc };
526
+ break;
527
+ }
528
+ // onBeforeToolCall
529
+ let toolArgs = tc.arguments;
530
+ if (middlewares.length > 0) {
531
+ const beforeResult = await runOnBeforeToolCall(middlewares, ctx, tc.name, toolArgs);
532
+ if (beforeResult) {
533
+ if (beforeResult.type === 'skip') {
534
+ const resultStr = typeof beforeResult.result === 'string' ? beforeResult.result : JSON.stringify(beforeResult.result);
535
+ toolResults.push({ toolCallId: tc.id, result: beforeResult.result });
536
+ messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
537
+ yield { type: 'tool-result', toolCall: tc, result: beforeResult.result };
538
+ await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, beforeResult.result);
539
+ continue;
540
+ }
541
+ if (beforeResult.type === 'abort') {
542
+ await runOnAbort(middlewares, ctx, beforeResult.reason);
543
+ break;
544
+ }
545
+ if (beforeResult.type === 'transformArgs') {
546
+ toolArgs = beforeResult.args;
547
+ }
548
+ }
549
+ }
550
+ // Validate args against the tool's inputSchema. Runs after middleware
551
+ // transforms so transforms can reshape malformed model output before
552
+ // it is judged. The tool-call chunk is emitted even on validation
553
+ // failure so streaming UIs see a paired tool-call → tool-result(error)
554
+ // sequence; non-streaming callers discard the chunk.
555
+ const validation = validateToolArgs(tool, toolArgs);
556
+ if (!validation.ok) {
557
+ yield { type: 'tool-call', toolCall: tc };
558
+ toolResults.push({ toolCallId: tc.id, result: validation.error });
559
+ messages.push({ role: 'tool', content: JSON.stringify(validation.error), toolCallId: tc.id });
560
+ yield { type: 'tool-result', toolCall: tc, result: validation.error };
561
+ if (middlewares.length > 0)
562
+ await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, validation.error);
563
+ continue;
564
+ }
565
+ const validatedArgs = validation.value;
566
+ const toolStart = performance.now();
567
+ try {
568
+ // Emit the tool-call marker before execution so streaming UIs see
569
+ // tool-call → tool-update* → tool-result in order. Async-generator
570
+ // executes stream their yields as tool-update chunks live; plain
571
+ // executes yield nothing here.
572
+ //
573
+ // Pause detection: a yielded `pause_for_client_tools` control chunk
574
+ // halts iteration, propagates the nested calls to the parent's
575
+ // pending list, and SKIPS the tool_result emission — the yielding
576
+ // tool's own call stays orphaned in the parent message history
577
+ // until the caller resolves it on resume.
578
+ yield { type: 'tool-call', toolCall: tc };
579
+ const execGen = executeMaybeStreaming(tool, validatedArgs, { toolCallId: tc.id });
580
+ let result;
581
+ let paused = false;
582
+ while (true) {
583
+ const step = await execGen.next();
584
+ if (step.done) {
585
+ result = step.value;
586
+ break;
587
+ }
588
+ if (isPauseForClientToolsChunk(step.value)) {
589
+ for (const pending of step.value.toolCalls) {
590
+ loopCtx.pendingClientToolCalls.push(pending);
591
+ }
592
+ loopCtx.loopFinishReason = 'client_tool_calls';
593
+ loopCtx.stopForClientTools = true;
594
+ paused = true;
595
+ break;
596
+ }
597
+ const updateChunk = { type: 'tool-update', toolCall: tc, update: step.value };
598
+ if (middlewares.length > 0) {
599
+ const transformed = runOnChunk(middlewares, ctx, updateChunk);
600
+ if (transformed)
601
+ yield transformed;
602
+ }
603
+ else {
604
+ yield updateChunk;
605
+ }
606
+ }
607
+ if (paused)
608
+ continue; // skip tool_result emission + message push for this tc
609
+ const duration = performance.now() - toolStart;
610
+ // toolResults preserves the ORIGINAL value; only the message content
611
+ // pushed onto `messages` (next-step model input) is narrowed by
612
+ // toModelOutput. The streamed `tool-result` chunk also carries the
613
+ // ORIGINAL value.
614
+ toolResults.push({ toolCallId: tc.id, result, duration });
615
+ const resultStr = await applyToModelOutput(tool, result, middlewares.length > 0 ? (e) => runOnError(middlewares, ctx, e) : undefined);
616
+ messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
617
+ yield { type: 'tool-result', toolCall: tc, result };
618
+ // onAfterToolCall
619
+ if (middlewares.length > 0)
620
+ await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, result);
621
+ }
622
+ catch (err) {
623
+ const duration = performance.now() - toolStart;
624
+ const msg = err instanceof Error ? err.message : String(err);
625
+ const errResult = `Error: ${msg}`;
626
+ toolResults.push({ toolCallId: tc.id, result: errResult, duration });
627
+ messages.push({ role: 'tool', content: errResult, toolCallId: tc.id });
628
+ yield { type: 'tool-result', toolCall: tc, result: errResult };
629
+ // onAfterToolCall (error case)
630
+ if (middlewares.length > 0)
631
+ await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, errResult);
632
+ }
633
+ }
634
+ }
635
+ /**
636
+ * Parallel tool execution — three phases:
637
+ *
638
+ * 1. **Prelude (serial, in tool-call order):** classify each call. Approval
639
+ * decisions, `onBeforeToolCall` middleware, and arg validation all
640
+ * resolve here; the next phase only sees calls that cleared every
641
+ * gate. `pending-approval` and `mw-abort` short-circuit the prelude
642
+ * exactly as they do in serial mode — later calls are never dispatched.
643
+ *
644
+ * 2. **Execution (parallel):** for every `ready` outcome, drive
645
+ * `executeMaybeStreaming` to completion concurrently. `tool-update`
646
+ * chunks (and any pause-for-client-tools mutations to `loopCtx`) are
647
+ * captured per-call into a buffer.
648
+ *
649
+ * 3. **Replay (serial, in tool-call order):** for each outcome, emit its
650
+ * chunks (including buffered `tool-update`s for ready calls), push
651
+ * tool messages, and run `onAfterToolCall`. This is the only phase
652
+ * that yields chunks to consumers, so streamed output stays
653
+ * deterministic regardless of which `execute()` finished first.
654
+ */
655
+ async function* runToolPhaseParallel(loopCtx, toolCalls, toolResults) {
656
+ const { messages, middlewares, ctx } = loopCtx;
657
+ // ─── Phase 1: prelude ──────────────────────────────────
658
+ const outcomes = await classifyToolCalls(loopCtx, toolCalls);
659
+ // ─── Phase 2: dispatch ready executions concurrently ──
660
+ const ready = outcomes.filter((o) => o.kind === 'ready');
661
+ const executions = await Promise.all(ready.map(o => runToolExecution(loopCtx, o)));
662
+ const executionByCallId = new Map();
663
+ for (let i = 0; i < ready.length; i++) {
664
+ executionByCallId.set(ready[i].tc.id, executions[i]);
665
+ }
666
+ // ─── Phase 3: replay chunks + side-effects in order ───
667
+ for (const outcome of outcomes) {
668
+ if (outcome.kind === 'unknown-tool') {
669
+ toolResults.push({ toolCallId: outcome.tc.id, result: outcome.result });
670
+ messages.push({ role: 'tool', content: outcome.result, toolCallId: outcome.tc.id });
671
+ yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.result };
672
+ continue;
673
+ }
674
+ if (outcome.kind === 'client-tool-stop') {
675
+ // loopCtx mutations already applied during the prelude.
676
+ yield { type: 'tool-call', toolCall: outcome.tc };
677
+ continue;
678
+ }
679
+ if (outcome.kind === 'client-tool-placeholder') {
680
+ toolResults.push({ toolCallId: outcome.tc.id, result: outcome.result });
681
+ messages.push({ role: 'tool', content: outcome.result, toolCallId: outcome.tc.id });
682
+ yield { type: 'tool-call', toolCall: outcome.tc };
683
+ yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.result };
684
+ continue;
685
+ }
686
+ if (outcome.kind === 'rejected') {
687
+ toolResults.push({ toolCallId: outcome.tc.id, result: outcome.result });
688
+ messages.push({ role: 'tool', content: JSON.stringify(outcome.result), toolCallId: outcome.tc.id });
689
+ yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.result };
690
+ continue;
691
+ }
692
+ if (outcome.kind === 'pending-approval') {
693
+ // loopCtx mutations already applied during the prelude.
694
+ yield { type: 'tool-call', toolCall: outcome.tc };
695
+ // Phase 1 stops classifying after pending-approval, so this is the
696
+ // last outcome — but `break` keeps the intent explicit.
697
+ break;
698
+ }
699
+ if (outcome.kind === 'mw-skip') {
700
+ const resultStr = typeof outcome.result === 'string' ? outcome.result : JSON.stringify(outcome.result);
701
+ toolResults.push({ toolCallId: outcome.tc.id, result: outcome.result });
702
+ messages.push({ role: 'tool', content: resultStr, toolCallId: outcome.tc.id });
703
+ yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.result };
704
+ if (middlewares.length > 0)
705
+ await runOnAfterToolCall(middlewares, ctx, outcome.tc.name, outcome.toolArgs, outcome.result);
706
+ continue;
707
+ }
708
+ if (outcome.kind === 'validation-error') {
709
+ yield { type: 'tool-call', toolCall: outcome.tc };
710
+ toolResults.push({ toolCallId: outcome.tc.id, result: outcome.error });
711
+ messages.push({ role: 'tool', content: JSON.stringify(outcome.error), toolCallId: outcome.tc.id });
712
+ yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.error };
713
+ if (middlewares.length > 0)
714
+ await runOnAfterToolCall(middlewares, ctx, outcome.tc.name, outcome.toolArgs, outcome.error);
715
+ continue;
716
+ }
717
+ // outcome.kind === 'ready'
718
+ const exec = executionByCallId.get(outcome.tc.id);
719
+ yield { type: 'tool-call', toolCall: outcome.tc };
720
+ for (const chunk of exec.updates)
721
+ yield chunk;
722
+ if (exec.kind === 'paused') {
723
+ // Pause-for-client-tools propagated its calls onto `loopCtx` during
724
+ // execution. Skip tool_result emission + message push — the call
725
+ // stays orphaned until resume.
726
+ continue;
727
+ }
728
+ if (exec.kind === 'error') {
729
+ const errResult = `Error: ${exec.error.message}`;
730
+ toolResults.push({ toolCallId: outcome.tc.id, result: errResult, duration: exec.duration });
731
+ messages.push({ role: 'tool', content: errResult, toolCallId: outcome.tc.id });
732
+ yield { type: 'tool-result', toolCall: outcome.tc, result: errResult };
733
+ if (middlewares.length > 0)
734
+ await runOnAfterToolCall(middlewares, ctx, outcome.tc.name, outcome.toolArgs, errResult);
735
+ continue;
736
+ }
737
+ // exec.kind === 'ok'
738
+ toolResults.push({ toolCallId: outcome.tc.id, result: exec.result, duration: exec.duration });
739
+ const resultStr = await applyToModelOutput(outcome.tool, exec.result, middlewares.length > 0 ? (e) => runOnError(middlewares, ctx, e) : undefined);
740
+ messages.push({ role: 'tool', content: resultStr, toolCallId: outcome.tc.id });
741
+ yield { type: 'tool-result', toolCall: outcome.tc, result: exec.result };
742
+ if (middlewares.length > 0)
743
+ await runOnAfterToolCall(middlewares, ctx, outcome.tc.name, outcome.toolArgs, exec.result);
744
+ }
745
+ }
746
+ /**
747
+ * Walk `toolCalls` in order and decide each call's fate. Mutations to
748
+ * `loopCtx` for client-tool-stop, pending-approval, and middleware-abort
749
+ * happen here so the rest of the parallel flow sees the same state the
750
+ * serial path would. `pending-approval` and `mw-abort` stop the walk —
751
+ * later calls are not classified and are silently dropped.
752
+ */
753
+ async function classifyToolCalls(loopCtx, toolCalls) {
754
+ const { middlewares, toolMap, options, ctx } = loopCtx;
755
+ const outcomes = [];
756
+ for (const tc of toolCalls) {
757
+ const tool = toolMap.get(tc.name);
758
+ if (!tool) {
759
+ outcomes.push({ kind: 'unknown-tool', tc, result: `Error: Unknown tool "${tc.name}"` });
760
+ continue;
761
+ }
762
+ if (!tool.execute) {
763
+ if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
764
+ loopCtx.pendingClientToolCalls.push(tc);
765
+ loopCtx.loopFinishReason = 'client_tool_calls';
766
+ loopCtx.stopForClientTools = true;
767
+ outcomes.push({ kind: 'client-tool-stop', tc });
768
+ continue;
769
+ }
770
+ outcomes.push({ kind: 'client-tool-placeholder', tc, result: '[client tool — execute on client]' });
771
+ continue;
772
+ }
773
+ const approvalDecision = await evaluateApproval(tool, tc, options);
774
+ if (approvalDecision === 'rejected') {
775
+ outcomes.push({ kind: 'rejected', tc, result: { rejected: true, reason: 'User rejected this tool call' } });
776
+ continue;
777
+ }
778
+ if (approvalDecision === 'pending') {
779
+ loopCtx.pendingApprovalToolCall = { toolCall: tc, isClientTool: false };
780
+ loopCtx.loopFinishReason = 'tool_approval_required';
781
+ loopCtx.stopForApproval = true;
782
+ outcomes.push({ kind: 'pending-approval', tc });
783
+ break;
784
+ }
785
+ let toolArgs = tc.arguments;
786
+ if (middlewares.length > 0) {
787
+ const beforeResult = await runOnBeforeToolCall(middlewares, ctx, tc.name, toolArgs);
788
+ if (beforeResult) {
789
+ if (beforeResult.type === 'skip') {
790
+ outcomes.push({ kind: 'mw-skip', tc, toolArgs, result: beforeResult.result });
791
+ continue;
792
+ }
793
+ if (beforeResult.type === 'abort') {
794
+ await runOnAbort(middlewares, ctx, beforeResult.reason);
795
+ // Drop any prior outcomes too? No — serial mode emits prior
796
+ // outcomes' chunks before hitting abort, so we keep them in the
797
+ // outcomes list and Phase 3 emits them up to (but not including)
798
+ // this call. Stop classifying further.
799
+ break;
800
+ }
801
+ if (beforeResult.type === 'transformArgs') {
802
+ toolArgs = beforeResult.args;
803
+ }
804
+ }
805
+ }
806
+ const validation = validateToolArgs(tool, toolArgs);
807
+ if (!validation.ok) {
808
+ outcomes.push({ kind: 'validation-error', tc, toolArgs, error: validation.error });
809
+ continue;
810
+ }
811
+ outcomes.push({ kind: 'ready', tc, tool, toolArgs, validatedArgs: validation.value });
812
+ }
813
+ return outcomes;
814
+ }
815
+ /**
816
+ * Drive a single tool's `executeMaybeStreaming` to completion. Buffers
817
+ * `tool-update` chunks for replay in tool-call order; pause-for-client-tools
818
+ * mutations to `loopCtx` apply immediately and the call returns `paused`.
819
+ *
820
+ * `ctx` is shared across concurrent invocations. Middleware that writes
821
+ * through `ctx` during `runOnChunk` (uncommon — most use it read-only for
822
+ * telemetry) may observe interleaved updates from sibling tool calls;
823
+ * apps with such middleware should opt out via `parallelTools: false`.
824
+ */
825
+ async function runToolExecution(loopCtx, outcome) {
826
+ const { middlewares, ctx } = loopCtx;
827
+ const updates = [];
828
+ const toolStart = performance.now();
829
+ try {
830
+ const execGen = executeMaybeStreaming(outcome.tool, outcome.validatedArgs, { toolCallId: outcome.tc.id });
831
+ let result;
832
+ let paused = false;
833
+ while (true) {
834
+ const step = await execGen.next();
835
+ if (step.done) {
836
+ result = step.value;
837
+ break;
838
+ }
839
+ if (isPauseForClientToolsChunk(step.value)) {
840
+ for (const pending of step.value.toolCalls) {
841
+ loopCtx.pendingClientToolCalls.push(pending);
842
+ }
843
+ loopCtx.loopFinishReason = 'client_tool_calls';
844
+ loopCtx.stopForClientTools = true;
845
+ paused = true;
846
+ break;
847
+ }
848
+ const updateChunk = { type: 'tool-update', toolCall: outcome.tc, update: step.value };
849
+ if (middlewares.length > 0) {
850
+ const transformed = runOnChunk(middlewares, ctx, updateChunk);
851
+ if (transformed)
852
+ updates.push(transformed);
853
+ }
854
+ else {
855
+ updates.push(updateChunk);
856
+ }
857
+ }
858
+ const duration = performance.now() - toolStart;
859
+ if (paused)
860
+ return { kind: 'paused', updates, duration };
861
+ return { kind: 'ok', result, updates, duration };
862
+ }
863
+ catch (err) {
864
+ const duration = performance.now() - toolStart;
865
+ return { kind: 'error', error: err instanceof Error ? err : new Error(String(err)), updates, duration };
866
+ }
867
+ }
868
+ /**
869
+ * Build the shared `LoopContext` for a `prompt()` / `stream()` call, run
870
+ * approval-resume, and fire `onConfig(init)` + `onStart`. After this returns,
871
+ * the iteration loop can run with the same setup regardless of streaming
872
+ * mode.
873
+ */
874
+ async function initializeLoop(a, input, options) {
875
+ // Honor caller-supplied AbortSignal as early as possible — if the signal
876
+ // is already aborted on entry, do no work at all.
877
+ options?.signal?.throwIfAborted();
289
878
  const loopStart = performance.now();
290
879
  const modelString = a.model() ?? AiRegistry.getDefault();
291
880
  const [providerName] = AiRegistry.parseModelString(modelString);
@@ -293,7 +882,6 @@ async function runAgentLoop(a, input, options) {
293
882
  const middlewares = getMiddleware(a);
294
883
  const toolSchemas = buildToolSchemas(tools);
295
884
  const toolMap = buildToolMap(tools);
296
- let failoverAttempts = 0;
297
885
  const messages = options?.messages
298
886
  ? [{ role: 'system', content: a.instructions() }, ...options.messages]
299
887
  : [
@@ -304,26 +892,42 @@ async function runAgentLoop(a, input, options) {
304
892
  const steps = [];
305
893
  const stopConditions = normalizeStopConditions(a.stopWhen());
306
894
  const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
307
- // State for client-tool-stopping and approval-stopping
308
- const pendingClientToolCalls = [];
309
- let pendingApprovalToolCall;
310
- let loopFinishReason;
311
- let stopForClientTools = false;
312
- let stopForApproval = false;
313
- let resumedToolMessages = []; // eslint-disable-line no-useless-assignment
895
+ // Create middleware context (resume below mutates `messages`, captured by
896
+ // reference here, so order is safe).
897
+ const ctx = createMiddlewareContext(messages, modelString, tools, 0);
898
+ const loopCtx = {
899
+ agent: a,
900
+ input,
901
+ options,
902
+ modelString,
903
+ providerName,
904
+ tools,
905
+ toolMap,
906
+ toolSchemas,
907
+ middlewares,
908
+ loopStart,
909
+ ctx,
910
+ messages,
911
+ steps,
912
+ totalUsage,
913
+ pendingClientToolCalls: [],
914
+ pendingApprovalToolCall: undefined,
915
+ loopFinishReason: undefined,
916
+ stopForClientTools: false,
917
+ stopForApproval: false,
918
+ resumedToolMessages: [],
919
+ failoverAttempts: 0,
920
+ };
314
921
  // Resume server tools left pending by a previous approval round-trip.
315
- // (Must run before middleware context creation since `messages` may grow.)
316
922
  {
317
923
  const resume = await resumePendingToolCalls({ messages, toolMap, options });
318
- resumedToolMessages = resume.resumed;
924
+ loopCtx.resumedToolMessages = resume.resumed;
319
925
  if (resume.approvalStillRequired) {
320
- pendingApprovalToolCall = resume.approvalStillRequired;
321
- loopFinishReason = 'tool_approval_required';
322
- stopForApproval = true;
926
+ loopCtx.pendingApprovalToolCall = resume.approvalStillRequired;
927
+ loopCtx.loopFinishReason = 'tool_approval_required';
928
+ loopCtx.stopForApproval = true;
323
929
  }
324
930
  }
325
- // Create middleware context
326
- const ctx = createMiddlewareContext(messages, modelString, tools, 0);
327
931
  // onConfig — init phase
328
932
  if (middlewares.length > 0) {
329
933
  const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, a), 'init');
@@ -333,179 +937,78 @@ async function runAgentLoop(a, input, options) {
333
937
  // onStart
334
938
  if (middlewares.length > 0)
335
939
  await runSequential(middlewares, 'onStart', ctx);
940
+ return { loopCtx, stopConditions };
941
+ }
942
+ /**
943
+ * Run the per-iteration prelude — caller-abort check, middleware-abort
944
+ * check, `onIteration`, `prepareStep`, `onConfig(beforeModel)`. Returns the
945
+ * resolved model for this step or `{ aborted: true }` if middleware
946
+ * cancelled the run (caller should `break`). Throws the abort reason if a
947
+ * caller-supplied AbortSignal fired between iterations.
948
+ */
949
+ async function runIterationPrelude(loopCtx, iteration) {
950
+ const { agent, options, ctx, middlewares, messages, modelString, steps } = loopCtx;
951
+ ctx.iteration = iteration;
952
+ // Reset the streaming chunk index for middlewares that key off it. Harmless
953
+ // in non-streaming mode where no chunks flow through `onChunk`.
954
+ ctx.chunkIndex = 0;
955
+ // Honor caller-supplied AbortSignal between iterations.
956
+ options?.signal?.throwIfAborted();
957
+ if (ctx._aborted) {
958
+ await runOnAbort(middlewares, ctx, ctx._abortReason);
959
+ return { aborted: true };
960
+ }
961
+ if (middlewares.length > 0)
962
+ await runSequential(middlewares, 'onIteration', ctx);
963
+ let currentModel = modelString;
964
+ if (agent.prepareStep) {
965
+ const prep = await agent.prepareStep({ stepNumber: iteration, steps, messages });
966
+ if (prep.model)
967
+ currentModel = prep.model;
968
+ if (prep.messages)
969
+ messages.splice(0, messages.length, ...prep.messages);
970
+ if (prep.system)
971
+ messages[0] = { role: 'system', content: prep.system };
972
+ }
973
+ if (middlewares.length > 0) {
974
+ const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, agent), 'beforeModel');
975
+ if (configResult.messages)
976
+ messages.splice(0, messages.length, ...configResult.messages);
977
+ }
978
+ return { currentModel };
979
+ }
980
+ // ─── Agent Loop (non-streaming) ──────────────────────────
981
+ async function runAgentLoop(a, input, options) {
982
+ const { loopCtx, stopConditions } = await initializeLoop(a, input, options);
983
+ const { ctx, middlewares, messages, steps, totalUsage } = loopCtx;
336
984
  try {
337
- if (stopForApproval) {
985
+ if (loopCtx.stopForApproval) {
338
986
  // Approval is still required from the resume — skip the model loop.
339
987
  }
340
988
  else {
341
989
  for (let iteration = 0; iteration < a.maxSteps(); iteration++) {
342
- ctx.iteration = iteration;
343
- // Check if middleware aborted
344
- if (ctx._aborted) {
345
- await runOnAbort(middlewares, ctx, ctx._abortReason);
990
+ const prelude = await runIterationPrelude(loopCtx, iteration);
991
+ if ('aborted' in prelude)
346
992
  break;
347
- }
348
- // onIteration
349
- if (middlewares.length > 0)
350
- await runSequential(middlewares, 'onIteration', ctx);
351
- let currentModel = modelString;
352
- const currentToolSchemas = toolSchemas;
353
- // prepareStep hook
354
- if (a.prepareStep) {
355
- const prep = await a.prepareStep({ stepNumber: iteration, steps, messages });
356
- if (prep.model)
357
- currentModel = prep.model;
358
- if (prep.messages)
359
- messages.splice(0, messages.length, ...prep.messages);
360
- if (prep.system)
361
- messages[0] = { role: 'system', content: prep.system };
362
- }
363
- // onConfig — beforeModel phase
364
- if (middlewares.length > 0) {
365
- const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, a), 'beforeModel');
366
- if (configResult.messages)
367
- messages.splice(0, messages.length, ...configResult.messages);
368
- }
369
- const failoverModels = [currentModel, ...a.failover().filter(m => m !== currentModel)];
370
- let response;
371
- let lastError;
372
- for (const tryModel of failoverModels) {
373
- try {
374
- const adapter = AiRegistry.resolve(tryModel);
375
- const [, modelId] = AiRegistry.parseModelString(tryModel);
376
- const reqOptions = {
377
- model: modelId,
378
- messages,
379
- tools: currentToolSchemas.length > 0 ? currentToolSchemas : undefined,
380
- temperature: a.temperature(),
381
- maxTokens: a.maxTokens(),
382
- };
383
- response = await adapter.generate(reqOptions);
384
- break;
385
- }
386
- catch (err) {
387
- lastError = err instanceof Error ? err : new Error(String(err));
388
- failoverAttempts++;
389
- if (tryModel === failoverModels[failoverModels.length - 1])
390
- throw lastError;
391
- }
392
- }
393
- if (!response)
394
- throw lastError ?? new Error('No provider available');
993
+ const { currentModel } = prelude;
994
+ const response = await runFailover(loopCtx, currentModel, (adapter, _, opts) => adapter.generate(opts));
395
995
  addUsage(totalUsage, response.usage);
396
996
  // onUsage
397
997
  if (middlewares.length > 0)
398
998
  await runOnUsage(middlewares, ctx, response.usage);
399
999
  const toolCalls = response.message.toolCalls ?? [];
400
- const toolResults = [];
1000
+ let toolResults = [];
401
1001
  if (toolCalls.length > 0) {
402
- messages.push(response.message);
403
- for (const tc of toolCalls) {
404
- const tool = toolMap.get(tc.name);
405
- if (!tool) {
406
- toolResults.push({ toolCallId: tc.id, result: `Error: Unknown tool "${tc.name}"` });
407
- messages.push({ role: 'tool', content: `Error: Unknown tool "${tc.name}"`, toolCallId: tc.id });
408
- continue;
409
- }
410
- if (!tool.execute) {
411
- // Client tool — no server-side handler.
412
- if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
413
- pendingClientToolCalls.push(tc);
414
- loopFinishReason = 'client_tool_calls';
415
- stopForClientTools = true;
416
- continue;
417
- }
418
- toolResults.push({ toolCallId: tc.id, result: '[client tool — execute on client]' });
419
- messages.push({ role: 'tool', content: '[client tool — execute on client]', toolCallId: tc.id });
420
- continue;
421
- }
422
- // needsApproval enforcement
423
- const approvalDecision = await evaluateApproval(tool, tc, options);
424
- if (approvalDecision === 'rejected') {
425
- const rejectionResult = { rejected: true, reason: 'User rejected this tool call' };
426
- toolResults.push({ toolCallId: tc.id, result: rejectionResult });
427
- messages.push({ role: 'tool', content: JSON.stringify(rejectionResult), toolCallId: tc.id });
428
- continue;
429
- }
430
- if (approvalDecision === 'pending') {
431
- pendingApprovalToolCall = { toolCall: tc, isClientTool: false };
432
- loopFinishReason = 'tool_approval_required';
433
- stopForApproval = true;
1002
+ // Drain `executeToolPhase` to completion, discarding the streamed
1003
+ // chunks non-streaming callers don't surface them.
1004
+ const phaseGen = executeToolPhase(loopCtx, toolCalls, response.message);
1005
+ while (true) {
1006
+ const next = await phaseGen.next();
1007
+ if (next.done) {
1008
+ toolResults = next.value;
434
1009
  break;
435
1010
  }
436
- // onBeforeToolCall
437
- let toolArgs = tc.arguments;
438
- if (middlewares.length > 0) {
439
- const beforeResult = await runOnBeforeToolCall(middlewares, ctx, tc.name, toolArgs);
440
- if (beforeResult) {
441
- if (beforeResult.type === 'skip') {
442
- const resultStr = typeof beforeResult.result === 'string' ? beforeResult.result : JSON.stringify(beforeResult.result);
443
- toolResults.push({ toolCallId: tc.id, result: beforeResult.result });
444
- messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
445
- await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, beforeResult.result);
446
- continue;
447
- }
448
- if (beforeResult.type === 'abort') {
449
- await runOnAbort(middlewares, ctx, beforeResult.reason);
450
- break;
451
- }
452
- if (beforeResult.type === 'transformArgs') {
453
- toolArgs = beforeResult.args;
454
- }
455
- }
456
- }
457
- try {
458
- // Drain generator yields silently in the non-streaming loop —
459
- // the same tool definition must work in both prompt() and stream().
460
- // Exception: a `pause_for_client_tools` control chunk yield
461
- // halts iteration, propagates the nested calls to the parent's
462
- // pending list, and skips tool_result recording (see tool.ts
463
- // `pauseForClientTools` for rationale).
464
- const execGen = executeMaybeStreaming(tool, toolArgs, { toolCallId: tc.id });
465
- let result;
466
- let paused = false;
467
- while (true) {
468
- const step = await execGen.next();
469
- if (step.done) {
470
- result = step.value;
471
- break;
472
- }
473
- if (isPauseForClientToolsChunk(step.value)) {
474
- for (const pending of step.value.toolCalls) {
475
- pendingClientToolCalls.push(pending);
476
- }
477
- loopFinishReason = 'client_tool_calls';
478
- stopForClientTools = true;
479
- paused = true;
480
- break;
481
- }
482
- // Plain tool-update yields are silently dropped in the
483
- // non-streaming loop — only the final return value matters.
484
- }
485
- if (paused)
486
- continue; // skip toolResults + message push for this tc
487
- // toolResults preserves the ORIGINAL value; only the tool message
488
- // pushed onto `messages` (what the next model step sees) is
489
- // narrowed by toModelOutput.
490
- toolResults.push({ toolCallId: tc.id, result });
491
- const resultStr = await applyToModelOutput(tool, result, middlewares.length > 0 ? (e) => runOnError(middlewares, ctx, e) : undefined);
492
- messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
493
- // onAfterToolCall
494
- if (middlewares.length > 0)
495
- await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, result);
496
- }
497
- catch (err) {
498
- const msg = err instanceof Error ? err.message : String(err);
499
- toolResults.push({ toolCallId: tc.id, result: `Error: ${msg}` });
500
- messages.push({ role: 'tool', content: `Error: ${msg}`, toolCallId: tc.id });
501
- // onAfterToolCall (error case)
502
- if (middlewares.length > 0)
503
- await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, `Error: ${msg}`);
504
- }
505
1011
  }
506
- // onToolPhaseComplete
507
- if (middlewares.length > 0)
508
- await runSequential(middlewares, 'onToolPhaseComplete', ctx);
509
1012
  }
510
1013
  else {
511
1014
  messages.push(response.message);
@@ -518,7 +1021,8 @@ async function runAgentLoop(a, input, options) {
518
1021
  finishReason: response.finishReason,
519
1022
  };
520
1023
  steps.push(step);
521
- if (stopForClientTools || stopForApproval)
1024
+ emitObserverStepCompleted(loopCtx, iteration, false);
1025
+ if (loopCtx.stopForClientTools || loopCtx.stopForApproval)
522
1026
  break;
523
1027
  const shouldStop = stopConditions.some(cond => cond({ steps, iteration, lastMessage: response.message }));
524
1028
  if (shouldStop || response.finishReason !== 'tool_calls') {
@@ -531,177 +1035,38 @@ async function runAgentLoop(a, input, options) {
531
1035
  // onError
532
1036
  if (middlewares.length > 0)
533
1037
  await runOnError(middlewares, ctx, err);
534
- // Emit observer event on failure
535
- const obs = _getAiObservers();
536
- if (obs) {
537
- const inputText = options?.messages ? '' : input;
538
- obs.emit({
539
- kind: 'agent.failed',
540
- agentName: a.constructor.name,
541
- model: modelString,
542
- provider: providerName,
543
- input: inputText,
544
- output: '',
545
- steps: _buildObserverSteps(steps, modelString),
546
- tokens: { prompt: totalUsage.promptTokens, completion: totalUsage.completionTokens, total: totalUsage.totalTokens },
547
- duration: Math.round(performance.now() - loopStart),
548
- finishReason: 'error',
549
- streaming: false,
550
- conversationId: null,
551
- failoverAttempts,
552
- error: err instanceof Error ? err.message : String(err),
553
- });
554
- }
1038
+ emitObserverFailed(loopCtx, err, false);
555
1039
  throw err;
556
1040
  }
557
1041
  // onFinish
558
1042
  if (middlewares.length > 0)
559
1043
  await runSequential(middlewares, 'onFinish', ctx);
560
- const lastStep = steps[steps.length - 1];
561
- const result = {
562
- text: lastStep ? getMessageText(lastStep.message.content) : '',
563
- steps,
564
- usage: totalUsage,
565
- };
566
- if (loopFinishReason)
567
- result.finishReason = loopFinishReason;
568
- if (pendingClientToolCalls.length > 0)
569
- result.pendingClientToolCalls = pendingClientToolCalls;
570
- if (pendingApprovalToolCall)
571
- result.pendingApprovalToolCall = pendingApprovalToolCall;
572
- if (resumedToolMessages.length > 0)
573
- result.resumedToolMessages = resumedToolMessages;
574
- // Emit observer event on success
575
- const obs = _getAiObservers();
576
- if (obs) {
577
- const inputText = options?.messages ? '' : input;
578
- obs.emit({
579
- kind: 'agent.completed',
580
- agentName: a.constructor.name,
581
- model: modelString,
582
- provider: providerName,
583
- input: inputText,
584
- output: result.text,
585
- steps: _buildObserverSteps(steps, modelString),
586
- tokens: { prompt: totalUsage.promptTokens, completion: totalUsage.completionTokens, total: totalUsage.totalTokens },
587
- duration: Math.round(performance.now() - loopStart),
588
- finishReason: result.finishReason ?? lastStep?.finishReason ?? 'stop',
589
- streaming: false,
590
- conversationId: null,
591
- failoverAttempts,
592
- });
593
- }
1044
+ const result = buildAgentResponse(loopCtx);
1045
+ emitObserverCompleted(loopCtx, result, false);
594
1046
  return result;
595
1047
  }
596
1048
  // ─── Agent Loop (streaming) ──────────────────────────────
597
1049
  function runAgentLoopStreaming(a, input, options) {
598
1050
  let resolveResponse;
599
- const responsePromise = new Promise((resolve) => { resolveResponse = resolve; });
1051
+ let rejectResponse;
1052
+ const responsePromise = new Promise((resolve, reject) => {
1053
+ resolveResponse = resolve;
1054
+ rejectResponse = reject;
1055
+ });
600
1056
  async function* generateStream() {
601
- const loopStart = performance.now();
602
- const modelString = a.model() ?? AiRegistry.getDefault();
603
- const [providerName] = AiRegistry.parseModelString(modelString);
604
- const tools = getTools(a);
605
- const middlewares = getMiddleware(a);
606
- const toolSchemas = buildToolSchemas(tools);
607
- const toolMap = buildToolMap(tools);
608
- let failoverAttempts = 0;
609
- const messages = options?.messages
610
- ? [{ role: 'system', content: a.instructions() }, ...options.messages]
611
- : [
612
- { role: 'system', content: a.instructions() },
613
- ...(options?.history ?? []),
614
- buildUserMessage(input, options?.attachments),
615
- ];
616
- const steps = [];
617
- const stopConditions = normalizeStopConditions(a.stopWhen());
618
- const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
619
- // State for client-tool-stopping and approval-stopping
620
- const pendingClientToolCalls = [];
621
- let pendingApprovalToolCall;
622
- let loopFinishReason;
623
- let stopForClientTools = false;
624
- let stopForApproval = false;
625
- let resumedToolMessages = []; // eslint-disable-line no-useless-assignment
626
- // Resume server tools left pending by a previous approval round-trip.
627
- {
628
- const resume = await resumePendingToolCalls({ messages, toolMap, options });
629
- resumedToolMessages = resume.resumed;
630
- if (resume.approvalStillRequired) {
631
- pendingApprovalToolCall = resume.approvalStillRequired;
632
- loopFinishReason = 'tool_approval_required';
633
- stopForApproval = true;
634
- }
635
- }
636
- // Create middleware context
637
- const ctx = createMiddlewareContext(messages, modelString, tools, 0);
638
- // onConfig — init phase
639
- if (middlewares.length > 0) {
640
- const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, a), 'init');
641
- if (configResult.messages)
642
- messages.splice(0, messages.length, ...configResult.messages);
643
- }
644
- // onStart
645
- if (middlewares.length > 0)
646
- await runSequential(middlewares, 'onStart', ctx);
1057
+ const { loopCtx, stopConditions } = await initializeLoop(a, input, options);
1058
+ const { ctx, middlewares, messages, steps, totalUsage } = loopCtx;
647
1059
  try {
648
- if (stopForApproval) {
1060
+ if (loopCtx.stopForApproval) {
649
1061
  // Resume detected unfulfilled approval — skip the model loop entirely.
650
1062
  }
651
1063
  else {
652
1064
  for (let iteration = 0; iteration < a.maxSteps(); iteration++) {
653
- ctx.iteration = iteration;
654
- ctx.chunkIndex = 0;
655
- // Check if middleware aborted
656
- if (ctx._aborted) {
657
- await runOnAbort(middlewares, ctx, ctx._abortReason);
1065
+ const prelude = await runIterationPrelude(loopCtx, iteration);
1066
+ if ('aborted' in prelude)
658
1067
  break;
659
- }
660
- // onIteration
661
- if (middlewares.length > 0)
662
- await runSequential(middlewares, 'onIteration', ctx);
663
- let currentModel = modelString;
664
- if (a.prepareStep) {
665
- const prep = await a.prepareStep({ stepNumber: iteration, steps, messages });
666
- if (prep.model)
667
- currentModel = prep.model;
668
- if (prep.messages)
669
- messages.splice(0, messages.length, ...prep.messages);
670
- if (prep.system)
671
- messages[0] = { role: 'system', content: prep.system };
672
- }
673
- // onConfig — beforeModel phase
674
- if (middlewares.length > 0) {
675
- const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, a), 'beforeModel');
676
- if (configResult.messages)
677
- messages.splice(0, messages.length, ...configResult.messages);
678
- }
679
- const failoverModels = [currentModel, ...a.failover().filter(m => m !== currentModel)];
680
- let streamSource;
681
- let lastError;
682
- for (const tryModel of failoverModels) {
683
- try {
684
- const adapter = AiRegistry.resolve(tryModel);
685
- const [, modelId] = AiRegistry.parseModelString(tryModel);
686
- const opts = {
687
- model: modelId,
688
- messages,
689
- tools: toolSchemas.length > 0 ? toolSchemas : undefined,
690
- temperature: a.temperature(),
691
- maxTokens: a.maxTokens(),
692
- };
693
- streamSource = adapter.stream(opts);
694
- break;
695
- }
696
- catch (err) {
697
- lastError = err instanceof Error ? err : new Error(String(err));
698
- failoverAttempts++;
699
- if (tryModel === failoverModels[failoverModels.length - 1])
700
- throw lastError;
701
- }
702
- }
703
- if (!streamSource)
704
- throw lastError ?? new Error('No provider available');
1068
+ const { currentModel } = prelude;
1069
+ const streamSource = await runFailover(loopCtx, currentModel, (adapter, _, opts) => adapter.stream(opts));
705
1070
  let text = '';
706
1071
  let currentToolCalls = [];
707
1072
  let stepUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
@@ -760,141 +1125,20 @@ function runAgentLoopStreaming(a, input, options) {
760
1125
  // onUsage
761
1126
  if (middlewares.length > 0)
762
1127
  await runOnUsage(middlewares, ctx, stepUsage);
763
- const toolResults = [];
1128
+ let toolResults = [];
764
1129
  if (currentToolCalls.length > 0) {
765
1130
  const assistantMsg = { role: 'assistant', content: text, toolCalls: currentToolCalls };
766
- messages.push(assistantMsg);
767
- for (const tc of currentToolCalls) {
768
- const tool = toolMap.get(tc.name);
769
- if (!tool) {
770
- const unknownResult = `Error: Unknown tool "${tc.name}"`;
771
- toolResults.push({ toolCallId: tc.id, result: unknownResult });
772
- messages.push({ role: 'tool', content: unknownResult, toolCallId: tc.id });
773
- yield { type: 'tool-result', toolCall: tc, result: unknownResult };
774
- continue;
775
- }
776
- if (!tool.execute) {
777
- // Client tool — no server-side handler.
778
- if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
779
- pendingClientToolCalls.push(tc);
780
- loopFinishReason = 'client_tool_calls';
781
- stopForClientTools = true;
782
- yield { type: 'tool-call', toolCall: tc };
783
- continue;
784
- }
785
- const placeholder = '[client tool — execute on client]';
786
- toolResults.push({ toolCallId: tc.id, result: placeholder });
787
- messages.push({ role: 'tool', content: placeholder, toolCallId: tc.id });
788
- yield { type: 'tool-call', toolCall: tc };
789
- yield { type: 'tool-result', toolCall: tc, result: placeholder };
790
- continue;
791
- }
792
- // needsApproval enforcement
793
- const approvalDecision = await evaluateApproval(tool, tc, options);
794
- if (approvalDecision === 'rejected') {
795
- const rejectionResult = { rejected: true, reason: 'User rejected this tool call' };
796
- toolResults.push({ toolCallId: tc.id, result: rejectionResult });
797
- messages.push({ role: 'tool', content: JSON.stringify(rejectionResult), toolCallId: tc.id });
798
- yield { type: 'tool-result', toolCall: tc, result: rejectionResult };
799
- continue;
800
- }
801
- if (approvalDecision === 'pending') {
802
- pendingApprovalToolCall = { toolCall: tc, isClientTool: false };
803
- loopFinishReason = 'tool_approval_required';
804
- stopForApproval = true;
805
- yield { type: 'tool-call', toolCall: tc };
1131
+ // Forward chunks from the shared tool-phase generator straight
1132
+ // through to the stream consumer.
1133
+ const phaseGen = executeToolPhase(loopCtx, currentToolCalls, assistantMsg);
1134
+ while (true) {
1135
+ const next = await phaseGen.next();
1136
+ if (next.done) {
1137
+ toolResults = next.value;
806
1138
  break;
807
1139
  }
808
- // onBeforeToolCall
809
- let toolArgs = tc.arguments;
810
- if (middlewares.length > 0) {
811
- const beforeResult = await runOnBeforeToolCall(middlewares, ctx, tc.name, toolArgs);
812
- if (beforeResult) {
813
- if (beforeResult.type === 'skip') {
814
- const resultStr = typeof beforeResult.result === 'string' ? beforeResult.result : JSON.stringify(beforeResult.result);
815
- toolResults.push({ toolCallId: tc.id, result: beforeResult.result });
816
- messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
817
- yield { type: 'tool-result', toolCall: tc, result: beforeResult.result };
818
- await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, beforeResult.result);
819
- continue;
820
- }
821
- if (beforeResult.type === 'abort') {
822
- await runOnAbort(middlewares, ctx, beforeResult.reason);
823
- break;
824
- }
825
- if (beforeResult.type === 'transformArgs') {
826
- toolArgs = beforeResult.args;
827
- }
828
- }
829
- }
830
- try {
831
- // Emit the tool-call marker before execution so the UI sees
832
- // tool-call → tool-update* → tool-result in order. Async-
833
- // generator executes stream their yields as tool-update chunks
834
- // live; plain executes yield nothing here.
835
- //
836
- // Pause detection: a yielded `pause_for_client_tools` control
837
- // chunk halts iteration, propagates the nested calls to the
838
- // parent's pending list, and SKIPS the tool_result emission
839
- // — the yielding tool's own call stays orphaned in the parent
840
- // message history until the caller resolves it on resume.
841
- yield { type: 'tool-call', toolCall: tc };
842
- const execGen = executeMaybeStreaming(tool, toolArgs, { toolCallId: tc.id });
843
- let result;
844
- let paused = false;
845
- while (true) {
846
- const step = await execGen.next();
847
- if (step.done) {
848
- result = step.value;
849
- break;
850
- }
851
- if (isPauseForClientToolsChunk(step.value)) {
852
- for (const pending of step.value.toolCalls) {
853
- pendingClientToolCalls.push(pending);
854
- }
855
- loopFinishReason = 'client_tool_calls';
856
- stopForClientTools = true;
857
- paused = true;
858
- break;
859
- }
860
- const updateChunk = { type: 'tool-update', toolCall: tc, update: step.value };
861
- if (middlewares.length > 0) {
862
- const transformed = runOnChunk(middlewares, ctx, updateChunk);
863
- if (transformed)
864
- yield transformed;
865
- }
866
- else {
867
- yield updateChunk;
868
- }
869
- }
870
- if (paused)
871
- continue; // skip tool_result emission + message push for this tc
872
- // The streamed `tool-result` chunk and `step.toolResults`
873
- // both carry the ORIGINAL value; only the message content
874
- // pushed onto `messages` (next-step model input) is narrowed
875
- // by toModelOutput.
876
- toolResults.push({ toolCallId: tc.id, result });
877
- const resultStr = await applyToModelOutput(tool, result, middlewares.length > 0 ? (e) => runOnError(middlewares, ctx, e) : undefined);
878
- messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
879
- yield { type: 'tool-result', toolCall: tc, result };
880
- // onAfterToolCall
881
- if (middlewares.length > 0)
882
- await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, result);
883
- }
884
- catch (err) {
885
- const msg = err instanceof Error ? err.message : String(err);
886
- const errResult = `Error: ${msg}`;
887
- toolResults.push({ toolCallId: tc.id, result: errResult });
888
- messages.push({ role: 'tool', content: errResult, toolCallId: tc.id });
889
- yield { type: 'tool-result', toolCall: tc, result: errResult };
890
- // onAfterToolCall (error case)
891
- if (middlewares.length > 0)
892
- await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, errResult);
893
- }
1140
+ yield next.value;
894
1141
  }
895
- // onToolPhaseComplete
896
- if (middlewares.length > 0)
897
- await runSequential(middlewares, 'onToolPhaseComplete', ctx);
898
1142
  }
899
1143
  else {
900
1144
  messages.push({ role: 'assistant', content: text });
@@ -907,7 +1151,8 @@ function runAgentLoopStreaming(a, input, options) {
907
1151
  finishReason,
908
1152
  };
909
1153
  steps.push(step);
910
- if (stopForClientTools || stopForApproval)
1154
+ emitObserverStepCompleted(loopCtx, iteration, true);
1155
+ if (loopCtx.stopForClientTools || loopCtx.stopForApproval)
911
1156
  break;
912
1157
  const shouldStop = stopConditions.some(cond => cond({ steps, iteration, lastMessage: step.message }));
913
1158
  if (shouldStop || finishReason !== 'tool_calls')
@@ -922,77 +1167,38 @@ function runAgentLoopStreaming(a, input, options) {
922
1167
  // onError
923
1168
  if (middlewares.length > 0)
924
1169
  await runOnError(middlewares, ctx, err);
925
- // Emit observer event on failure
926
- const obs = _getAiObservers();
927
- if (obs) {
928
- const inputText = options?.messages ? '' : input;
929
- obs.emit({
930
- kind: 'agent.failed',
931
- agentName: a.constructor.name,
932
- model: modelString,
933
- provider: providerName,
934
- input: inputText,
935
- output: '',
936
- steps: _buildObserverSteps(steps, modelString),
937
- tokens: { prompt: totalUsage.promptTokens, completion: totalUsage.completionTokens, total: totalUsage.totalTokens },
938
- duration: Math.round(performance.now() - loopStart),
939
- finishReason: 'error',
940
- streaming: true,
941
- conversationId: null,
942
- failoverAttempts,
943
- error: err instanceof Error ? err.message : String(err),
944
- });
945
- }
1170
+ emitObserverFailed(loopCtx, err, true);
946
1171
  throw err;
947
1172
  }
948
1173
  // onFinish
949
1174
  if (middlewares.length > 0)
950
1175
  await runSequential(middlewares, 'onFinish', ctx);
951
1176
  // Emit pending state to consumers via dedicated chunk types
952
- if (pendingClientToolCalls.length > 0) {
953
- yield { type: 'pending-client-tools', toolCalls: pendingClientToolCalls };
1177
+ if (loopCtx.pendingClientToolCalls.length > 0) {
1178
+ yield { type: 'pending-client-tools', toolCalls: loopCtx.pendingClientToolCalls };
954
1179
  }
955
- if (pendingApprovalToolCall) {
956
- yield { type: 'pending-approval', toolCall: pendingApprovalToolCall.toolCall, isClientTool: pendingApprovalToolCall.isClientTool };
957
- }
958
- const lastStep = steps[steps.length - 1];
959
- const result = {
960
- text: lastStep ? getMessageText(lastStep.message.content) : '',
961
- steps,
962
- usage: totalUsage,
963
- };
964
- if (loopFinishReason)
965
- result.finishReason = loopFinishReason;
966
- if (pendingClientToolCalls.length > 0)
967
- result.pendingClientToolCalls = pendingClientToolCalls;
968
- if (pendingApprovalToolCall)
969
- result.pendingApprovalToolCall = pendingApprovalToolCall;
970
- if (resumedToolMessages.length > 0)
971
- result.resumedToolMessages = resumedToolMessages;
972
- // Emit observer event on success
973
- const obs = _getAiObservers();
974
- if (obs) {
975
- const inputText = options?.messages ? '' : input;
976
- obs.emit({
977
- kind: 'agent.completed',
978
- agentName: a.constructor.name,
979
- model: modelString,
980
- provider: providerName,
981
- input: inputText,
982
- output: result.text,
983
- steps: _buildObserverSteps(steps, modelString),
984
- tokens: { prompt: totalUsage.promptTokens, completion: totalUsage.completionTokens, total: totalUsage.totalTokens },
985
- duration: Math.round(performance.now() - loopStart),
986
- finishReason: result.finishReason ?? lastStep?.finishReason ?? 'stop',
987
- streaming: true,
988
- conversationId: null,
989
- failoverAttempts,
990
- });
1180
+ if (loopCtx.pendingApprovalToolCall) {
1181
+ yield { type: 'pending-approval', toolCall: loopCtx.pendingApprovalToolCall.toolCall, isClientTool: loopCtx.pendingApprovalToolCall.isClientTool };
991
1182
  }
1183
+ const result = buildAgentResponse(loopCtx);
1184
+ emitObserverCompleted(loopCtx, result, true);
992
1185
  resolveResponse(result);
993
1186
  }
1187
+ // Outer wrapper: if `generateStream` throws (e.g. the caller's
1188
+ // AbortSignal fired), reject the `response` promise with the same
1189
+ // reason BEFORE re-throwing into the for-await consumer. Without this,
1190
+ // `await response` would hang forever after a mid-stream abort.
1191
+ async function* withRejectOnError() {
1192
+ try {
1193
+ yield* generateStream();
1194
+ }
1195
+ catch (err) {
1196
+ rejectResponse(err);
1197
+ throw err;
1198
+ }
1199
+ }
994
1200
  return {
995
- stream: generateStream(),
1201
+ stream: withRejectOnError(),
996
1202
  response: responsePromise,
997
1203
  };
998
1204
  }
@@ -1056,11 +1262,21 @@ async function resumePendingToolCalls(deps) {
1056
1262
  approvalStillRequired = { toolCall: tc, isClientTool: false };
1057
1263
  break;
1058
1264
  }
1265
+ // Validate args before executing on resume. Approval-resume bypasses
1266
+ // middleware so we use the raw tc.arguments. On failure, feed the
1267
+ // structured error to the model so it can correct itself.
1268
+ const validation = validateToolArgs(tool, tc.arguments);
1269
+ if (!validation.ok) {
1270
+ const m = { role: 'tool', content: JSON.stringify(validation.error), toolCallId: tc.id };
1271
+ messages.push(m);
1272
+ resumed.push(m);
1273
+ continue;
1274
+ }
1059
1275
  try {
1060
1276
  // Drain generator yields silently — approval-resume runs outside the
1061
1277
  // stream, so any preliminary updates are discarded; only the final
1062
1278
  // return value is captured.
1063
- const execGen = executeMaybeStreaming(tool, tc.arguments, { toolCallId: tc.id });
1279
+ const execGen = executeMaybeStreaming(tool, validation.value, { toolCallId: tc.id });
1064
1280
  let result;
1065
1281
  while (true) {
1066
1282
  const step = await execGen.next();
@@ -1127,6 +1343,30 @@ async function* executeMaybeStreaming(tool, args, ctx) {
1127
1343
  }
1128
1344
  return await ret;
1129
1345
  }
1346
+ /**
1347
+ * Validate a tool call's arguments against the tool's `inputSchema`. On
1348
+ * success, the parsed value is returned — zod transforms (`.transform`,
1349
+ * `.default`, type coercion) are applied, so `execute` receives the
1350
+ * canonical shape the schema describes. On failure, a structured error
1351
+ * suitable for feeding back to the model is returned.
1352
+ */
1353
+ function validateToolArgs(tool, args) {
1354
+ const parsed = tool.definition.inputSchema.safeParse(args);
1355
+ if (parsed.success) {
1356
+ return { ok: true, value: parsed.data };
1357
+ }
1358
+ return {
1359
+ ok: false,
1360
+ error: {
1361
+ error: 'invalid_arguments',
1362
+ message: `Tool "${tool.definition.name}" received arguments that did not match its inputSchema.`,
1363
+ issues: parsed.error.issues.map(i => ({
1364
+ path: i.path.map(seg => String(seg)).join('.'),
1365
+ message: i.message,
1366
+ })),
1367
+ },
1368
+ };
1369
+ }
1130
1370
  /**
1131
1371
  * Default stringification used for the `tool` role message content when a
1132
1372
  * tool has no `toModelOutput` transform: pass through strings, JSON-encode