@oh-my-pi/pi-agent-core 15.0.2 → 15.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/agent-loop.ts CHANGED
@@ -7,9 +7,12 @@ import {
7
7
  type AssistantMessageEvent,
8
8
  type Context,
9
9
  EventStream,
10
+ isZodSchema,
10
11
  streamSimple,
11
12
  type ToolResultMessage,
13
+ type TSchema,
12
14
  validateToolArguments,
15
+ zodToWireSchema,
13
16
  } from "@oh-my-pi/pi-ai";
14
17
  import { sanitizeText } from "@oh-my-pi/pi-natives";
15
18
  import {
@@ -19,6 +22,23 @@ import {
19
22
  isHarmonyLeakMitigationTarget,
20
23
  signalListLabel,
21
24
  } from "./harmony-leak";
25
+ import { type AgentRunCoverage, type AgentRunSummary, ToolCallBlockedError } from "./run-collector";
26
+ import {
27
+ type AgentTelemetry,
28
+ failChatSpan,
29
+ finishChatSpan,
30
+ finishExecuteToolSpan,
31
+ finishInvokeAgentSpan,
32
+ fireOnRunEnd,
33
+ PiGenAIAttr,
34
+ recordSkippedTool,
35
+ resolveTelemetry,
36
+ runInActiveSpan,
37
+ type Span,
38
+ startChatSpan,
39
+ startExecuteToolSpan,
40
+ startInvokeAgentSpan,
41
+ } from "./telemetry";
22
42
  import type {
23
43
  AgentContext,
24
44
  AgentEvent,
@@ -173,6 +193,113 @@ function createAgentStream(): EventStream<AgentEvent, AgentMessage[]> {
173
193
  );
174
194
  }
175
195
 
196
+ /**
197
+ * Build the `agent_end` event payload. When telemetry is enabled, snapshots
198
+ * the run collector so consumers receive {@link AgentRunSummary} +
199
+ * {@link AgentRunCoverage} alongside the messages without parsing OTEL spans.
200
+ * When telemetry is unset, returns the bare event for backwards compatibility.
201
+ */
202
+ function buildAgentEndEvent(
203
+ messages: AgentMessage[],
204
+ telemetry: AgentTelemetry | undefined,
205
+ stepCount: number,
206
+ ): Extract<AgentEvent, { type: "agent_end" }> {
207
+ if (!telemetry) return { type: "agent_end", messages };
208
+ const snapshot = telemetry.collector.snapshot({ stepCount });
209
+ if (telemetry.collector.markRunEnded()) {
210
+ fireOnRunEnd(telemetry, snapshot.summary, snapshot.coverage);
211
+ }
212
+ return { type: "agent_end", messages, telemetry: snapshot.summary, coverage: snapshot.coverage };
213
+ }
214
+
215
+ /**
216
+ * Detailed-result handle returned by {@link agentLoopDetailed}. Adds the
217
+ * run-level telemetry/coverage rollup to the existing `AgentMessage[]`
218
+ * payload without changing the resolved type of `stream.result()`.
219
+ */
220
+ export interface AgentLoopDetailedResult {
221
+ readonly messages: AgentMessage[];
222
+ readonly telemetry: AgentRunSummary | undefined;
223
+ readonly coverage: AgentRunCoverage | undefined;
224
+ }
225
+
226
+ /**
227
+ * Convenience wrapper over {@link agentLoop} that exposes the run-level
228
+ * summary + coverage alongside the messages. The returned `stream` is the
229
+ * same `EventStream` callers already consume; `detailed()` awaits the
230
+ * stream's `agent_end` event and returns the additive fields.
231
+ *
232
+ * Existing `stream.result()` semantics are preserved — it still resolves to
233
+ * `AgentMessage[]`. Use {@link agentLoopDetailed} when you need the rollup;
234
+ * use {@link agentLoop} when you do not.
235
+ */
236
+ export function agentLoopDetailed(
237
+ prompts: AgentMessage[],
238
+ context: AgentContext,
239
+ config: AgentLoopConfig,
240
+ signal?: AbortSignal,
241
+ streamFn?: StreamFn,
242
+ ): {
243
+ readonly stream: EventStream<AgentEvent, AgentMessage[]>;
244
+ readonly detailed: () => Promise<AgentLoopDetailedResult>;
245
+ } {
246
+ const capture = createDetailedCapture(config);
247
+ const stream = agentLoop(prompts, context, capture.config, signal, streamFn);
248
+ return { stream, detailed: () => capture.detailed(stream) };
249
+ }
250
+
251
+ /**
252
+ * Like {@link agentLoopDetailed} but built on top of
253
+ * {@link agentLoopContinue}.
254
+ */
255
+ export function agentLoopContinueDetailed(
256
+ context: AgentContext,
257
+ config: AgentLoopConfig,
258
+ signal?: AbortSignal,
259
+ streamFn?: StreamFn,
260
+ ): {
261
+ readonly stream: EventStream<AgentEvent, AgentMessage[]>;
262
+ readonly detailed: () => Promise<AgentLoopDetailedResult>;
263
+ } {
264
+ const capture = createDetailedCapture(config);
265
+ const stream = agentLoopContinue(context, capture.config, signal, streamFn);
266
+ return { stream, detailed: () => capture.detailed(stream) };
267
+ }
268
+
269
+ /**
270
+ * Wire an `onRunEnd` telemetry hook onto `config` so the detailed helper can
271
+ * capture the run summary without consuming the event stream. Preserves any
272
+ * existing `onRunEnd` the caller had set.
273
+ */
274
+ function createDetailedCapture(config: AgentLoopConfig): {
275
+ readonly config: AgentLoopConfig;
276
+ readonly detailed: (stream: EventStream<AgentEvent, AgentMessage[]>) => Promise<AgentLoopDetailedResult>;
277
+ } {
278
+ let captured: { summary: AgentRunSummary; coverage: AgentRunCoverage } | undefined;
279
+ const userHook = config.telemetry?.onRunEnd;
280
+ const wired: AgentLoopConfig = {
281
+ ...config,
282
+ telemetry: {
283
+ ...(config.telemetry ?? {}),
284
+ onRunEnd: (summary, coverage) => {
285
+ captured = { summary, coverage };
286
+ userHook?.(summary, coverage);
287
+ },
288
+ },
289
+ };
290
+ return {
291
+ config: wired,
292
+ detailed: async stream => {
293
+ const messages = await stream.result();
294
+ return {
295
+ messages,
296
+ telemetry: captured?.summary,
297
+ coverage: captured?.coverage,
298
+ };
299
+ },
300
+ };
301
+ }
302
+
176
303
  function normalizeMessagesForProvider(
177
304
  messages: Context["messages"],
178
305
  model: AgentLoopConfig["model"],
@@ -240,10 +367,15 @@ function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Co
240
367
  injectIntent = injectIntent && Bun.env.PI_NO_INTENT !== "1";
241
368
  return tools?.map(t => {
242
369
  const intentMode = resolveIntentMode(t.intent);
243
- const parameters =
244
- injectIntent && intentMode !== "omit"
245
- ? (injectIntentIntoSchema(t.parameters, intentMode) as typeof t.parameters)
246
- : t.parameters;
370
+ let parameters: TSchema = t.parameters;
371
+ if (injectIntent && intentMode !== "omit") {
372
+ if (isZodSchema(parameters)) {
373
+ const wired = zodToWireSchema(parameters);
374
+ parameters = injectIntentIntoSchema(wired, intentMode) as TSchema;
375
+ } else {
376
+ parameters = injectIntentIntoSchema(parameters, intentMode) as TSchema;
377
+ }
378
+ }
247
379
  const description = t.description ?? "";
248
380
  return { ...t, parameters, description };
249
381
  });
@@ -274,6 +406,50 @@ async function runLoop(
274
406
  signal: AbortSignal | undefined,
275
407
  stream: EventStream<AgentEvent, AgentMessage[]>,
276
408
  streamFn?: StreamFn,
409
+ ): Promise<void> {
410
+ const telemetry = resolveTelemetry(config.telemetry, config.sessionId);
411
+ const invokeAgentSpan = startInvokeAgentSpan(telemetry, config.model);
412
+ const stepCounter = { count: 0 };
413
+ let caughtError: unknown;
414
+ try {
415
+ await runInActiveSpan(invokeAgentSpan, () =>
416
+ runLoopBody(
417
+ currentContext,
418
+ newMessages,
419
+ config,
420
+ signal,
421
+ stream,
422
+ telemetry,
423
+ invokeAgentSpan,
424
+ stepCounter,
425
+ streamFn,
426
+ ),
427
+ );
428
+ } catch (err) {
429
+ caughtError = err;
430
+ throw err;
431
+ } finally {
432
+ finishInvokeAgentSpan(telemetry, invokeAgentSpan, {
433
+ stepCount: stepCounter.count,
434
+ errorObject: caughtError,
435
+ });
436
+ }
437
+ }
438
+
439
+ interface StepCounter {
440
+ count: number;
441
+ }
442
+
443
+ async function runLoopBody(
444
+ currentContext: AgentContext,
445
+ newMessages: AgentMessage[],
446
+ config: AgentLoopConfig,
447
+ signal: AbortSignal | undefined,
448
+ stream: EventStream<AgentEvent, AgentMessage[]>,
449
+ telemetry: AgentTelemetry | undefined,
450
+ invokeAgentSpan: Span | undefined,
451
+ stepCounter: StepCounter,
452
+ streamFn?: StreamFn,
277
453
  ): Promise<void> {
278
454
  let firstTurn = true;
279
455
  // Check for steering messages at start (user may have typed while waiting)
@@ -318,6 +494,9 @@ async function runLoop(
318
494
  config,
319
495
  signal,
320
496
  stream,
497
+ telemetry,
498
+ invokeAgentSpan,
499
+ stepCounter,
321
500
  streamFn,
322
501
  harmonyRetryAttempt,
323
502
  );
@@ -362,9 +541,19 @@ async function runLoop(
362
541
  currentContext.messages.push(result);
363
542
  newMessages.push(result);
364
543
  toolResults.push(result);
544
+ // The placeholder result above keeps the API's tool_use/tool_result
545
+ // pairing intact, but no execute_tool span is started for these
546
+ // calls. Mirror the run-collector entry directly so the run
547
+ // summary's tool counters and `coverage.toolsInvoked` reflect
548
+ // what the user actually saw on the wire.
549
+ recordSkippedTool(telemetry, {
550
+ toolCallId: toolCall.id,
551
+ toolName: toolCall.name,
552
+ status: message.stopReason === "aborted" ? "aborted" : "error",
553
+ });
365
554
  }
366
555
  stream.push({ type: "turn_end", message, toolResults });
367
- stream.push({ type: "agent_end", messages: newMessages });
556
+ stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
368
557
  stream.end(newMessages);
369
558
  return;
370
559
  }
@@ -375,7 +564,15 @@ async function runLoop(
375
564
 
376
565
  const toolResults: ToolResultMessage[] = [];
377
566
  if (hasMoreToolCalls) {
378
- const executionResult = await executeToolCalls(currentContext, message, signal, stream, config);
567
+ const executionResult = await executeToolCalls(
568
+ currentContext,
569
+ message,
570
+ signal,
571
+ stream,
572
+ config,
573
+ telemetry,
574
+ invokeAgentSpan,
575
+ );
379
576
 
380
577
  toolResults.push(...executionResult.toolResults);
381
578
  steeringMessagesFromExecution = executionResult.steeringMessages;
@@ -403,7 +600,7 @@ async function runLoop(
403
600
  break;
404
601
  }
405
602
 
406
- stream.push({ type: "agent_end", messages: newMessages });
603
+ stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
407
604
  stream.end(newMessages);
408
605
  }
409
606
 
@@ -433,6 +630,9 @@ async function streamAssistantResponse(
433
630
  config: AgentLoopConfig,
434
631
  signal: AbortSignal | undefined,
435
632
  stream: EventStream<AgentEvent, AgentMessage[]>,
633
+ telemetry: AgentTelemetry | undefined,
634
+ invokeAgentSpan: Span | undefined,
635
+ stepCounter: StepCounter,
436
636
  streamFn?: StreamFn,
437
637
  harmonyRetryAttempt = 0,
438
638
  ): Promise<AssistantMessage> {
@@ -474,111 +674,158 @@ async function streamAssistantResponse(
474
674
  ? AbortSignal.any([signal, harmonyAbortController.signal])
475
675
  : harmonyAbortController.signal
476
676
  : signal;
477
- const response = await streamFunction(config.model, llmContext, {
478
- ...config,
479
- apiKey: resolvedApiKey,
480
- metadata: resolvedMetadata,
481
- toolChoice: dynamicToolChoice ?? config.toolChoice,
482
- reasoning: dynamicReasoning ?? config.reasoning,
483
- temperature:
484
- harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature,
485
- signal: requestSignal,
677
+ const effectiveTemperature =
678
+ harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
679
+ const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
680
+ const effectiveReasoning = dynamicReasoning ?? config.reasoning;
681
+
682
+ const chatStepNumber = stepCounter.count;
683
+ stepCounter.count += 1;
684
+ const chatSpan = startChatSpan(telemetry, config.model, {
685
+ parent: invokeAgentSpan,
686
+ stepNumber: chatStepNumber,
687
+ request: {
688
+ maxTokens: config.maxTokens,
689
+ temperature: effectiveTemperature,
690
+ topP: config.topP,
691
+ topK: config.topK,
692
+ presencePenalty: config.presencePenalty,
693
+ serviceTier: config.serviceTier,
694
+ reasoningEffort: typeof effectiveReasoning === "string" ? effectiveReasoning : undefined,
695
+ toolChoice: effectiveToolChoice,
696
+ tools: llmContext.tools,
697
+ systemPrompt: llmContext.systemPrompt,
698
+ messages: llmContext.messages,
699
+ },
486
700
  });
487
701
 
488
- let partialMessage: AssistantMessage | null = null;
489
- let addedPartial = false;
490
-
491
- const responseIterator = response[Symbol.asyncIterator]();
492
-
493
- // Set up a single abort race: register the abort listener once for the whole
494
- // stream and reuse the same race promise for every iterator.next() instead of
495
- // allocating Promise.withResolvers and add/removeEventListener per event.
496
- let abortRacePromise: Promise<typeof ABORTED> | undefined;
497
- let detachAbortListener: (() => void) | undefined;
498
- if (requestSignal) {
499
- if (requestSignal.aborted) {
500
- return emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
501
- }
502
- const { promise, resolve } = Promise.withResolvers<typeof ABORTED>();
503
- const onAbort = () => resolve(ABORTED);
504
- requestSignal.addEventListener("abort", onAbort, { once: true });
505
- abortRacePromise = promise;
506
- detachAbortListener = () => requestSignal.removeEventListener("abort", onAbort);
507
- }
702
+ const finishChat = async (message: AssistantMessage): Promise<void> => {
703
+ await finishChatSpan(telemetry, chatSpan, message, {
704
+ stepNumber: chatStepNumber,
705
+ serviceTier: config.serviceTier,
706
+ });
707
+ };
508
708
 
509
709
  try {
510
- while (true) {
511
- let next: IteratorResult<AssistantMessageEvent>;
512
- if (abortRacePromise) {
513
- const result = await Promise.race([responseIterator.next(), abortRacePromise]);
514
- if (result === ABORTED) {
515
- responseIterator.return?.()?.catch(() => {});
516
- return emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
710
+ return await runInActiveSpan(chatSpan, async () => {
711
+ const response = await streamFunction(config.model, llmContext, {
712
+ ...config,
713
+ apiKey: resolvedApiKey,
714
+ metadata: resolvedMetadata,
715
+ toolChoice: effectiveToolChoice,
716
+ reasoning: effectiveReasoning,
717
+ temperature: effectiveTemperature,
718
+ signal: requestSignal,
719
+ });
720
+
721
+ let partialMessage: AssistantMessage | null = null;
722
+ let addedPartial = false;
723
+
724
+ const responseIterator = response[Symbol.asyncIterator]();
725
+
726
+ // Set up a single abort race: register the abort listener once for the whole
727
+ // stream and reuse the same race promise for every iterator.next() instead of
728
+ // allocating Promise.withResolvers and add/removeEventListener per event.
729
+ let abortRacePromise: Promise<typeof ABORTED> | undefined;
730
+ let detachAbortListener: (() => void) | undefined;
731
+ if (requestSignal) {
732
+ if (requestSignal.aborted) {
733
+ const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
734
+ await finishChat(aborted);
735
+ return aborted;
517
736
  }
518
- next = result;
519
- } else {
520
- next = await responseIterator.next();
737
+ const { promise, resolve } = Promise.withResolvers<typeof ABORTED>();
738
+ const onAbort = () => resolve(ABORTED);
739
+ requestSignal.addEventListener("abort", onAbort, { once: true });
740
+ abortRacePromise = promise;
741
+ detachAbortListener = () => requestSignal.removeEventListener("abort", onAbort);
521
742
  }
522
- if (requestSignal?.aborted) {
523
- return emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
524
- }
525
- if (next.done) break;
526
-
527
- const event = next.value;
528
-
529
- switch (event.type) {
530
- case "start":
531
- partialMessage = event.partial;
532
- context.messages.push(partialMessage);
533
- addedPartial = true;
534
- stream.push({ type: "message_start", message: { ...partialMessage } });
535
- break;
536
-
537
- case "text_start":
538
- case "text_delta":
539
- case "text_end":
540
- case "thinking_start":
541
- case "thinking_delta":
542
- case "thinking_end":
543
- case "toolcall_start":
544
- case "toolcall_delta":
545
- case "toolcall_end":
546
- if (partialMessage) {
547
- partialMessage = event.partial;
548
- context.messages[context.messages.length - 1] = partialMessage;
549
- config.onAssistantMessageEvent?.(partialMessage, event);
550
- if (signal?.aborted) {
551
- continue;
552
- }
553
- stream.push({
554
- type: "message_update",
555
- assistantMessageEvent: event,
556
- message: { ...partialMessage },
557
- });
558
- }
559
- break;
560
743
 
561
- case "done":
562
- case "error": {
563
- const finalMessage = await response.result();
564
- if (addedPartial) {
565
- context.messages[context.messages.length - 1] = finalMessage;
744
+ try {
745
+ while (true) {
746
+ let next: IteratorResult<AssistantMessageEvent>;
747
+ if (abortRacePromise) {
748
+ const result = await Promise.race([responseIterator.next(), abortRacePromise]);
749
+ if (result === ABORTED) {
750
+ responseIterator.return?.()?.catch(() => {});
751
+ const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
752
+ await finishChat(aborted);
753
+ return aborted;
754
+ }
755
+ next = result;
566
756
  } else {
567
- context.messages.push(finalMessage);
757
+ next = await responseIterator.next();
568
758
  }
569
- if (!addedPartial) {
570
- stream.push({ type: "message_start", message: { ...finalMessage } });
759
+ if (requestSignal?.aborted) {
760
+ const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
761
+ await finishChat(aborted);
762
+ return aborted;
763
+ }
764
+ if (next.done) break;
765
+
766
+ const event = next.value;
767
+
768
+ switch (event.type) {
769
+ case "start":
770
+ partialMessage = event.partial;
771
+ context.messages.push(partialMessage);
772
+ addedPartial = true;
773
+ stream.push({ type: "message_start", message: { ...partialMessage } });
774
+ break;
775
+
776
+ case "text_start":
777
+ case "text_delta":
778
+ case "text_end":
779
+ case "thinking_start":
780
+ case "thinking_delta":
781
+ case "thinking_end":
782
+ case "toolcall_start":
783
+ case "toolcall_delta":
784
+ case "toolcall_end":
785
+ if (partialMessage) {
786
+ partialMessage = event.partial;
787
+ context.messages[context.messages.length - 1] = partialMessage;
788
+ config.onAssistantMessageEvent?.(partialMessage, event);
789
+ if (signal?.aborted) {
790
+ continue;
791
+ }
792
+ stream.push({
793
+ type: "message_update",
794
+ assistantMessageEvent: event,
795
+ message: { ...partialMessage },
796
+ });
797
+ }
798
+ break;
799
+
800
+ case "done":
801
+ case "error": {
802
+ const finalMessage = await response.result();
803
+ if (addedPartial) {
804
+ context.messages[context.messages.length - 1] = finalMessage;
805
+ } else {
806
+ context.messages.push(finalMessage);
807
+ }
808
+ if (!addedPartial) {
809
+ stream.push({ type: "message_start", message: { ...finalMessage } });
810
+ }
811
+ stream.push({ type: "message_end", message: finalMessage });
812
+ await finishChat(finalMessage);
813
+ return finalMessage;
814
+ }
571
815
  }
572
- stream.push({ type: "message_end", message: finalMessage });
573
- return finalMessage;
574
816
  }
817
+ } finally {
818
+ detachAbortListener?.();
575
819
  }
576
- }
577
- } finally {
578
- detachAbortListener?.();
579
- }
580
820
 
581
- return await response.result();
821
+ const trailing = await response.result();
822
+ await finishChat(trailing);
823
+ return trailing;
824
+ });
825
+ } catch (err) {
826
+ failChatSpan(telemetry, chatSpan, { errorObject: err });
827
+ throw err;
828
+ }
582
829
  }
583
830
 
584
831
  function emitAbortedAssistantMessage(
@@ -628,6 +875,8 @@ async function executeToolCalls(
628
875
  signal: AbortSignal | undefined,
629
876
  stream: EventStream<AgentEvent, AgentMessage[]>,
630
877
  config: AgentLoopConfig,
878
+ telemetry: AgentTelemetry | undefined,
879
+ invokeAgentSpan: Span | undefined,
631
880
  ): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> {
632
881
  const tools = currentContext.tools;
633
882
  const {
@@ -733,6 +982,11 @@ async function executeToolCalls(
733
982
 
734
983
  const runTool = async (record: (typeof records)[number], index: number): Promise<void> => {
735
984
  if (interruptState.triggered) {
985
+ // Skip both span emission and the collector orphan record here. The
986
+ // tail sweep below (after `Promise.allSettled`) is the single path
987
+ // that handles "no result message was produced" — it calls
988
+ // `recordSkippedTool` and `emitToolResult` once per record, so any
989
+ // work we did here would double-count.
736
990
  record.skipped = true;
737
991
  return;
738
992
  }
@@ -765,111 +1019,148 @@ async function executeToolCalls(
765
1019
  intent: toolCall.intent,
766
1020
  });
767
1021
 
768
- let result: AgentToolResult<any>;
769
- let isError = false;
1022
+ const toolSpan = startExecuteToolSpan(telemetry, {
1023
+ tool,
1024
+ toolName: toolCall.name,
1025
+ toolCallId: toolCall.id,
1026
+ args: argsForExecution,
1027
+ parent: invokeAgentSpan,
1028
+ });
1029
+ if (toolSpan && toolCall.intent) {
1030
+ toolSpan.setAttribute(PiGenAIAttr.ToolCallIntent, toolCall.intent);
1031
+ }
770
1032
 
771
- try {
772
- if (!tool) throw new Error(`Tool ${toolCall.name} not found`);
1033
+ let result: AgentToolResult<any> = { content: [], details: {} };
1034
+ let isError = false;
1035
+ let caughtError: unknown;
773
1036
 
774
- let effectiveArgs: Record<string, unknown>;
1037
+ await runInActiveSpan(toolSpan, async () => {
775
1038
  try {
776
- effectiveArgs = validateToolArguments(tool, { ...toolCall, arguments: argsForExecution });
777
- } catch (validationError) {
778
- if (tool.lenientArgValidation) {
779
- effectiveArgs = argsForExecution;
780
- } else {
781
- throw validationError;
782
- }
783
- }
1039
+ if (!tool) throw new Error(`Tool ${toolCall.name} not found`);
784
1040
 
785
- if (beforeToolCall) {
786
- const beforeResult = await beforeToolCall(
787
- {
788
- assistantMessage,
789
- toolCall,
790
- args: effectiveArgs,
791
- context: currentContext,
792
- },
793
- toolSignal,
794
- );
795
- if (beforeResult?.block) {
796
- throw new Error(beforeResult.reason || "Tool execution was blocked");
1041
+ let effectiveArgs: Record<string, unknown>;
1042
+ try {
1043
+ effectiveArgs = validateToolArguments(tool, { ...toolCall, arguments: argsForExecution });
1044
+ } catch (validationError) {
1045
+ if (tool.lenientArgValidation) {
1046
+ effectiveArgs = argsForExecution;
1047
+ } else {
1048
+ throw validationError;
1049
+ }
797
1050
  }
798
- }
799
- // Reflect post-hook args so emitted tool results / afterToolCall see what actually executed.
800
- record.args = effectiveArgs;
801
-
802
- const toolContext = getToolContext
803
- ? getToolContext({
804
- batchId,
805
- index,
806
- total: toolCalls.length,
807
- toolCalls: toolCallInfos,
808
- })
809
- : undefined;
810
- const rawResult = await tool.execute(
811
- toolCall.id,
812
- transformToolCallArguments ? transformToolCallArguments(effectiveArgs, toolCall.name) : effectiveArgs,
813
- tool.nonAbortable ? undefined : toolSignal,
814
- partialResult => {
815
- stream.push({
816
- type: "tool_execution_update",
817
- toolCallId: toolCall.id,
818
- toolName: toolCall.name,
819
- args: effectiveArgs,
820
- partialResult: coerceToolResult(partialResult).result,
821
- });
822
- },
823
- toolContext,
824
- );
825
- const coerced = coerceToolResult(rawResult);
826
- result = coerced.result;
827
- if (coerced.malformed || result.isError) isError = true;
828
- } catch (e) {
829
- result = {
830
- content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }],
831
- details: {},
832
- };
833
- isError = true;
834
- }
835
1051
 
836
- if (afterToolCall) {
837
- try {
838
- const after = await afterToolCall(
839
- {
840
- assistantMessage,
841
- toolCall,
842
- args: record.args,
843
- result,
844
- isError,
845
- context: currentContext,
1052
+ if (beforeToolCall) {
1053
+ const beforeResult = await beforeToolCall(
1054
+ {
1055
+ assistantMessage,
1056
+ toolCall,
1057
+ args: effectiveArgs,
1058
+ context: currentContext,
1059
+ },
1060
+ toolSignal,
1061
+ );
1062
+ if (beforeResult?.block) {
1063
+ throw new ToolCallBlockedError(beforeResult.reason);
1064
+ }
1065
+ }
1066
+ // Reflect post-hook args so emitted tool results / afterToolCall see what actually executed.
1067
+ record.args = effectiveArgs;
1068
+
1069
+ const toolContext = getToolContext
1070
+ ? getToolContext({
1071
+ batchId,
1072
+ index,
1073
+ total: toolCalls.length,
1074
+ toolCalls: toolCallInfos,
1075
+ })
1076
+ : undefined;
1077
+ const rawResult = await tool.execute(
1078
+ toolCall.id,
1079
+ transformToolCallArguments ? transformToolCallArguments(effectiveArgs, toolCall.name) : effectiveArgs,
1080
+ tool.nonAbortable ? undefined : toolSignal,
1081
+ partialResult => {
1082
+ stream.push({
1083
+ type: "tool_execution_update",
1084
+ toolCallId: toolCall.id,
1085
+ toolName: toolCall.name,
1086
+ args: effectiveArgs,
1087
+ partialResult: coerceToolResult(partialResult).result,
1088
+ });
846
1089
  },
847
- toolSignal,
1090
+ toolContext,
848
1091
  );
849
- if (after) {
850
- result = {
851
- content: after.content ?? result.content,
852
- details: after.details ?? result.details,
853
- isError: after.isError ?? result.isError,
854
- };
855
- isError = after.isError ?? isError;
856
- }
1092
+ const coerced = coerceToolResult(rawResult);
1093
+ result = coerced.result;
1094
+ if (coerced.malformed || result.isError) isError = true;
857
1095
  } catch (e) {
1096
+ caughtError = e;
858
1097
  result = {
859
1098
  content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }],
860
1099
  details: {},
861
1100
  };
862
1101
  isError = true;
863
1102
  }
864
- }
865
1103
 
866
- if (interruptState.triggered) {
1104
+ if (afterToolCall) {
1105
+ try {
1106
+ const after = await afterToolCall(
1107
+ {
1108
+ assistantMessage,
1109
+ toolCall,
1110
+ args: record.args,
1111
+ result,
1112
+ isError,
1113
+ context: currentContext,
1114
+ },
1115
+ toolSignal,
1116
+ );
1117
+ if (after) {
1118
+ result = {
1119
+ content: after.content ?? result.content,
1120
+ details: after.details ?? result.details,
1121
+ isError: after.isError ?? result.isError,
1122
+ };
1123
+ isError = after.isError ?? isError;
1124
+ }
1125
+ } catch (e) {
1126
+ caughtError = e;
1127
+ result = {
1128
+ content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }],
1129
+ details: {},
1130
+ };
1131
+ isError = true;
1132
+ }
1133
+ }
1134
+ });
1135
+
1136
+ const interrupted = interruptState.triggered;
1137
+ if (interrupted) {
867
1138
  record.skipped = true;
868
1139
  emitToolResult(record, createSkippedToolResult(), true);
869
1140
  } else {
870
1141
  emitToolResult(record, result, isError);
871
1142
  }
872
1143
 
1144
+ const firstTextBlock = result.content?.[0];
1145
+ const errorMessageForSpan =
1146
+ caughtError === undefined && isError && firstTextBlock?.type === "text" ? firstTextBlock.text : undefined;
1147
+ const status = interrupted
1148
+ ? "aborted"
1149
+ : caughtError instanceof ToolCallBlockedError
1150
+ ? "blocked"
1151
+ : isError
1152
+ ? "error"
1153
+ : "ok";
1154
+ finishExecuteToolSpan(telemetry, toolSpan, {
1155
+ result,
1156
+ isError,
1157
+ status,
1158
+ errorMessage: errorMessageForSpan,
1159
+ errorObject: caughtError,
1160
+ toolCallId: toolCall.id,
1161
+ toolName: toolCall.name,
1162
+ });
1163
+
873
1164
  await checkSteering();
874
1165
  };
875
1166
 
@@ -896,6 +1187,11 @@ async function executeToolCalls(
896
1187
  for (const record of records) {
897
1188
  if (!record.toolResultMessage) {
898
1189
  record.skipped = true;
1190
+ recordSkippedTool(telemetry, {
1191
+ toolCallId: record.toolCall.id,
1192
+ toolName: record.toolCall.name,
1193
+ status: "skipped",
1194
+ });
899
1195
  emitToolResult(record, createSkippedToolResult(), true);
900
1196
  }
901
1197
  }