@gajae-code/agent-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +482 -0
  2. package/README.md +473 -0
  3. package/dist/types/agent-loop.d.ts +55 -0
  4. package/dist/types/agent.d.ts +334 -0
  5. package/dist/types/append-only-context.d.ts +113 -0
  6. package/dist/types/compaction/branch-summarization.d.ts +94 -0
  7. package/dist/types/compaction/compaction.d.ts +166 -0
  8. package/dist/types/compaction/entries.d.ts +103 -0
  9. package/dist/types/compaction/errors.d.ts +26 -0
  10. package/dist/types/compaction/index.d.ts +11 -0
  11. package/dist/types/compaction/messages.d.ts +61 -0
  12. package/dist/types/compaction/openai.d.ts +58 -0
  13. package/dist/types/compaction/pruning.d.ts +18 -0
  14. package/dist/types/compaction/utils.d.ts +32 -0
  15. package/dist/types/compaction.d.ts +1 -0
  16. package/dist/types/harmony-leak.d.ts +99 -0
  17. package/dist/types/index.d.ts +10 -0
  18. package/dist/types/proxy.d.ts +84 -0
  19. package/dist/types/run-collector.d.ts +196 -0
  20. package/dist/types/telemetry.d.ts +588 -0
  21. package/dist/types/thinking.d.ts +17 -0
  22. package/dist/types/types.d.ts +407 -0
  23. package/package.json +75 -0
  24. package/src/agent-loop.ts +1279 -0
  25. package/src/agent.ts +1399 -0
  26. package/src/append-only-context.ts +297 -0
  27. package/src/compaction/branch-summarization.ts +339 -0
  28. package/src/compaction/compaction.ts +1065 -0
  29. package/src/compaction/entries.ts +133 -0
  30. package/src/compaction/errors.ts +31 -0
  31. package/src/compaction/index.ts +12 -0
  32. package/src/compaction/messages.ts +212 -0
  33. package/src/compaction/openai.ts +552 -0
  34. package/src/compaction/prompts/auto-handoff-threshold-focus.md +1 -0
  35. package/src/compaction/prompts/branch-summary-context.md +5 -0
  36. package/src/compaction/prompts/branch-summary-preamble.md +2 -0
  37. package/src/compaction/prompts/branch-summary.md +30 -0
  38. package/src/compaction/prompts/compaction-short-summary.md +9 -0
  39. package/src/compaction/prompts/compaction-summary-context.md +5 -0
  40. package/src/compaction/prompts/compaction-summary.md +38 -0
  41. package/src/compaction/prompts/compaction-turn-prefix.md +17 -0
  42. package/src/compaction/prompts/compaction-update-summary.md +45 -0
  43. package/src/compaction/prompts/file-operations.md +10 -0
  44. package/src/compaction/prompts/handoff-document.md +49 -0
  45. package/src/compaction/prompts/summarization-system.md +3 -0
  46. package/src/compaction/pruning.ts +92 -0
  47. package/src/compaction/utils.ts +185 -0
  48. package/src/compaction.ts +1 -0
  49. package/src/harmony-leak.ts +427 -0
  50. package/src/index.ts +19 -0
  51. package/src/proxy.ts +326 -0
  52. package/src/run-collector.ts +631 -0
  53. package/src/telemetry.ts +2018 -0
  54. package/src/thinking.ts +19 -0
  55. package/src/types.ts +467 -0
@@ -0,0 +1,631 @@
1
+ /**
2
+ * Per-invocation run aggregator. Buffers per-chat and per-tool records as the
3
+ * loop executes and folds them into a single {@link AgentRunSummary} +
4
+ * {@link AgentRunCoverage} value at the end.
5
+ *
6
+ * One collector lives on each {@link AgentTelemetry} handle, which is
7
+ * constructed once per `agentLoop` invocation in {@link resolveTelemetry}.
8
+ * Collector lookups use the live `Span` as a `WeakMap` key — bounded memory,
9
+ * no cross-invoke leakage.
10
+ *
11
+ * The collector is fed exclusively by helpers in `./telemetry.ts`. Loop
12
+ * authors do not interact with it directly except via the public
13
+ * `recordSkippedTool` helper used for the two skip paths that bypass spans
14
+ * entirely (pre-run interrupt and the tail-sweep for tool calls that never
15
+ * produced a result message).
16
+ */
17
+
18
+ import type { AssistantMessage, Model, StopReason } from "@gajae-code/ai";
19
+ import type { Span } from "@opentelemetry/api";
20
+
21
+ /** Terminal status reported by an `execute_tool` span. */
22
+ export type ToolStatus = "ok" | "error" | "skipped" | "blocked" | "timeout" | "aborted";
23
+
24
+ /** Raw record for a single `chat` step, finalized by `finishChatSpan`. */
25
+ export interface ChatRecord {
26
+ readonly stepNumber: number;
27
+ readonly model: string;
28
+ readonly provider: string;
29
+ readonly stopReason: StopReason | undefined;
30
+ readonly latencyMs: number;
31
+ readonly inputTokens: number;
32
+ readonly outputTokens: number;
33
+ readonly cachedInputTokens: number;
34
+ readonly cacheWriteTokens: number;
35
+ readonly reasoningOutputTokens: number;
36
+ readonly totalTokens: number;
37
+ readonly costUsd: number | undefined;
38
+ readonly costUnavailableReason: string | undefined;
39
+ readonly errorType: string | undefined;
40
+ }
41
+
42
+ /** Raw record for a single `execute_tool` invocation. */
43
+ export interface ToolRecord {
44
+ readonly toolCallId: string;
45
+ readonly toolName: string;
46
+ readonly status: ToolStatus;
47
+ readonly latencyMs: number;
48
+ readonly errorType: string | undefined;
49
+ }
50
+
51
+ /** Per-tool counters surfaced under {@link AgentRunSummary.tools.byName}. */
52
+ export interface ToolCounters {
53
+ readonly total: number;
54
+ readonly ok: number;
55
+ readonly error: number;
56
+ readonly skipped: number;
57
+ readonly blocked: number;
58
+ readonly timeout: number;
59
+ readonly aborted: number;
60
+ readonly totalLatencyMs: number;
61
+ }
62
+
63
+ /**
64
+ * Run-level rollup returned in the `agent_end` event and passed to
65
+ * {@link AgentTelemetryConfig.onRunEnd}. Pure aggregation — no references to
66
+ * spans, no callbacks, no live state. Safe to persist / diff / assert.
67
+ */
68
+ export interface AgentRunSummary {
69
+ readonly chats: {
70
+ readonly total: number;
71
+ /** Bucketed by raw {@link StopReason}; absent reasons omitted. */
72
+ readonly byStopReason: Readonly<Record<string, number>>;
73
+ readonly totalLatencyMs: number;
74
+ };
75
+ readonly tools: {
76
+ readonly total: number;
77
+ readonly ok: number;
78
+ readonly error: number;
79
+ readonly skipped: number;
80
+ readonly blocked: number;
81
+ readonly timeout: number;
82
+ readonly aborted: number;
83
+ readonly totalLatencyMs: number;
84
+ /** Per-tool-name counters; keys sorted by name on snapshot. */
85
+ readonly byName: Readonly<Record<string, ToolCounters>>;
86
+ };
87
+ readonly usage: {
88
+ readonly inputTokens: number;
89
+ readonly outputTokens: number;
90
+ readonly cachedInputTokens: number;
91
+ readonly cacheWriteTokens: number;
92
+ readonly reasoningOutputTokens: number;
93
+ readonly totalTokens: number;
94
+ };
95
+ readonly cost: {
96
+ readonly estimatedUsd: number;
97
+ /** Sorted, deduped. */
98
+ readonly unavailableReasons: readonly string[];
99
+ };
100
+ readonly errors: {
101
+ readonly total: number;
102
+ readonly byType: Readonly<Record<string, number>>;
103
+ };
104
+ readonly stepCount: number;
105
+ }
106
+
107
+ /**
108
+ * Coverage rollup: registered-vs-invoked across the run. All arrays are
109
+ * sorted ascending and deduped so the value is stable for diffing.
110
+ */
111
+ export interface AgentRunCoverage {
112
+ readonly toolsAvailable: readonly string[];
113
+ readonly toolsInvoked: readonly string[];
114
+ readonly toolsUnused: readonly string[];
115
+ readonly modelsUsed: readonly string[];
116
+ readonly providersUsed: readonly string[];
117
+ }
118
+
119
+ interface ChatStart {
120
+ readonly stepNumber: number;
121
+ readonly startedAtMs: number;
122
+ readonly model: string;
123
+ readonly provider: string;
124
+ }
125
+
126
+ interface ToolStart {
127
+ readonly toolCallId: string;
128
+ readonly toolName: string;
129
+ readonly startedAtMs: number;
130
+ }
131
+
132
+ /**
133
+ * Per-invocation event buffer. Constructed unconditionally inside
134
+ * {@link resolveTelemetry}; cost is one allocation per `agentLoop` call.
135
+ *
136
+ * Methods are intentionally non-throwing — telemetry must never turn a
137
+ * successful agent run into a failed one. WeakMap keys keep span-state
138
+ * lookups bounded; if a finish path is somehow reached without a matching
139
+ * begin (provider crash, tracer swap mid-run), the corresponding record is
140
+ * still emitted with `latencyMs: 0` rather than throwing.
141
+ */
142
+ const kChatStart = Symbol("agent.run-collector.chatStart");
143
+ const kToolStart = Symbol("agent.run-collector.toolStart");
144
+ type SpanWithChatStart = Span & { [kChatStart]?: ChatStart };
145
+ type SpanWithToolStart = Span & { [kToolStart]?: ToolStart };
146
+
147
+ export class AgentRunCollector {
148
+ readonly #chats: ChatRecord[] = [];
149
+ readonly #tools: ToolRecord[] = [];
150
+ readonly #availableTools = new Set<string>();
151
+ readonly #invokedTools = new Set<string>();
152
+ readonly #modelsUsed = new Set<string>();
153
+ readonly #providersUsed = new Set<string>();
154
+ #runEnded = false;
155
+
156
+ /** True once `markRunEnded()` has been called for this invocation. */
157
+ get runEnded(): boolean {
158
+ return this.#runEnded;
159
+ }
160
+
161
+ /**
162
+ * Mark this run as logically ended. Callers use this to coordinate the
163
+ * `onRunEnd` hook between the success path (fires inside
164
+ * `buildAgentEndEvent`, before `stream.end()`) and the error path (fires
165
+ * inside `finishInvokeAgentSpan`'s finally). Idempotent — returns `true`
166
+ * the first time, `false` on subsequent calls.
167
+ */
168
+ markRunEnded(): boolean {
169
+ if (this.#runEnded) return false;
170
+ this.#runEnded = true;
171
+ return true;
172
+ }
173
+
174
+ /** Record the tool names exposed on a single chat step. */
175
+ noteAvailableTools(tools: readonly { readonly name: string }[] | undefined): void {
176
+ if (!tools) return;
177
+ for (const tool of tools) this.#availableTools.add(tool.name);
178
+ }
179
+
180
+ beginChat(
181
+ span: Span,
182
+ init: { readonly stepNumber: number; readonly model: Model; readonly provider?: string },
183
+ ): void {
184
+ const provider = init.provider ?? init.model.provider;
185
+ (span as SpanWithChatStart)[kChatStart] = {
186
+ stepNumber: init.stepNumber,
187
+ startedAtMs: performance.now(),
188
+ model: init.model.id,
189
+ provider,
190
+ };
191
+ this.#modelsUsed.add(init.model.id);
192
+ if (provider) this.#providersUsed.add(provider);
193
+ }
194
+
195
+ endChat(
196
+ span: Span,
197
+ message: AssistantMessage,
198
+ fields: {
199
+ readonly costUsd: number | undefined;
200
+ readonly costUnavailableReason: string | undefined;
201
+ },
202
+ ): void {
203
+ const start = (span as SpanWithChatStart)[kChatStart];
204
+ (span as SpanWithChatStart)[kChatStart] = undefined;
205
+ const usage = message.usage;
206
+ // Public surface: `inputTokens` is the total cost-bearing input the
207
+ // provider charged for, so it must include cache_read + cache_write.
208
+ // The per-bucket fields below preserve the breakdown for callers that
209
+ // want it. `aggregateAgentRunSummaries` sums each field independently
210
+ // and never re-derives `inputTokens` from the buckets, so this stays
211
+ // consistent across run merges.
212
+ const inputBase = usage?.input ?? 0;
213
+ const cachedInputTokens = usage?.cacheRead ?? 0;
214
+ const cacheWriteTokens = usage?.cacheWrite ?? 0;
215
+ const inputTokens = inputBase + cachedInputTokens + cacheWriteTokens;
216
+ const outputTokens = usage?.output ?? 0;
217
+ const reasoningOutputTokens = usage?.reasoningTokens ?? 0;
218
+ const totalTokens = usage?.totalTokens ?? inputTokens + outputTokens;
219
+ this.#chats.push({
220
+ stepNumber: start?.stepNumber ?? -1,
221
+ model: start?.model ?? message.model,
222
+ provider: start?.provider ?? message.provider,
223
+ stopReason: message.stopReason,
224
+ latencyMs: start ? Math.max(0, performance.now() - start.startedAtMs) : 0,
225
+ inputTokens,
226
+ outputTokens,
227
+ cachedInputTokens,
228
+ cacheWriteTokens,
229
+ reasoningOutputTokens,
230
+ totalTokens,
231
+ costUsd: fields.costUsd,
232
+ costUnavailableReason: fields.costUnavailableReason,
233
+ errorType: message.stopReason === "error" || message.stopReason === "aborted" ? message.stopReason : undefined,
234
+ });
235
+ }
236
+
237
+ /**
238
+ * Stamp the chat span as failed without a finalized AssistantMessage. Used
239
+ * by the `catch` arm of `streamAssistantResponse` so error chats still
240
+ * appear in the run summary.
241
+ */
242
+ failChat(span: Span, fields: { readonly errorType: string }): void {
243
+ const start = (span as SpanWithChatStart)[kChatStart];
244
+ (span as SpanWithChatStart)[kChatStart] = undefined;
245
+ this.#chats.push({
246
+ stepNumber: start?.stepNumber ?? -1,
247
+ model: start?.model ?? "",
248
+ provider: start?.provider ?? "",
249
+ stopReason: "error",
250
+ latencyMs: start ? Math.max(0, performance.now() - start.startedAtMs) : 0,
251
+ inputTokens: 0,
252
+ outputTokens: 0,
253
+ cachedInputTokens: 0,
254
+ cacheWriteTokens: 0,
255
+ reasoningOutputTokens: 0,
256
+ totalTokens: 0,
257
+ costUsd: undefined,
258
+ costUnavailableReason: undefined,
259
+ errorType: fields.errorType,
260
+ });
261
+ }
262
+
263
+ beginTool(span: Span, init: { readonly toolCallId: string; readonly toolName: string }): void {
264
+ (span as SpanWithToolStart)[kToolStart] = {
265
+ toolCallId: init.toolCallId,
266
+ toolName: init.toolName,
267
+ startedAtMs: performance.now(),
268
+ };
269
+ this.#invokedTools.add(init.toolName);
270
+ }
271
+
272
+ endTool(span: Span, fields: { readonly status: ToolStatus; readonly errorType: string | undefined }): void {
273
+ const start = (span as SpanWithToolStart)[kToolStart];
274
+ (span as SpanWithToolStart)[kToolStart] = undefined;
275
+ this.#tools.push({
276
+ toolCallId: start?.toolCallId ?? "",
277
+ toolName: start?.toolName ?? "",
278
+ status: fields.status,
279
+ latencyMs: start ? Math.max(0, performance.now() - start.startedAtMs) : 0,
280
+ errorType: fields.errorType,
281
+ });
282
+ }
283
+
284
+ /**
285
+ * Record a tool that never produced a span — pre-run interrupt or tail
286
+ * sweep. The LLM still asked for it, so it counts toward
287
+ * {@link AgentRunCoverage.toolsInvoked}.
288
+ */
289
+ recordOrphanTool(record: {
290
+ readonly toolCallId: string;
291
+ readonly toolName: string;
292
+ readonly status: ToolStatus;
293
+ }): void {
294
+ this.#invokedTools.add(record.toolName);
295
+ this.#tools.push({
296
+ toolCallId: record.toolCallId,
297
+ toolName: record.toolName,
298
+ status: record.status,
299
+ latencyMs: 0,
300
+ errorType: undefined,
301
+ });
302
+ }
303
+
304
+ /** Build the immutable summary value from buffered records. */
305
+ snapshot(opts: { readonly stepCount: number }): {
306
+ readonly summary: AgentRunSummary;
307
+ readonly coverage: AgentRunCoverage;
308
+ } {
309
+ return {
310
+ summary: this.#buildSummary(opts.stepCount),
311
+ coverage: this.#buildCoverage(),
312
+ };
313
+ }
314
+
315
+ #buildSummary(stepCount: number): AgentRunSummary {
316
+ const byStopReason: Record<string, number> = {};
317
+ let chatLatency = 0;
318
+ let inputTokens = 0;
319
+ let outputTokens = 0;
320
+ let cachedInputTokens = 0;
321
+ let cacheWriteTokens = 0;
322
+ let reasoningOutputTokens = 0;
323
+ let totalTokens = 0;
324
+ let estimatedUsd = 0;
325
+ const unavailableReasons = new Set<string>();
326
+ const errorsByType: Record<string, number> = {};
327
+
328
+ for (const chat of this.#chats) {
329
+ chatLatency += chat.latencyMs;
330
+ inputTokens += chat.inputTokens;
331
+ outputTokens += chat.outputTokens;
332
+ cachedInputTokens += chat.cachedInputTokens;
333
+ cacheWriteTokens += chat.cacheWriteTokens;
334
+ reasoningOutputTokens += chat.reasoningOutputTokens;
335
+ totalTokens += chat.totalTokens;
336
+ if (chat.stopReason) byStopReason[chat.stopReason] = (byStopReason[chat.stopReason] ?? 0) + 1;
337
+ if (chat.costUsd != null) estimatedUsd += chat.costUsd;
338
+ if (chat.costUnavailableReason) unavailableReasons.add(chat.costUnavailableReason);
339
+ if (chat.errorType) errorsByType[chat.errorType] = (errorsByType[chat.errorType] ?? 0) + 1;
340
+ }
341
+
342
+ const byName: Record<string, ToolCounters> = {};
343
+ const counts: Record<ToolStatus, number> = {
344
+ ok: 0,
345
+ error: 0,
346
+ skipped: 0,
347
+ blocked: 0,
348
+ timeout: 0,
349
+ aborted: 0,
350
+ };
351
+ let toolLatency = 0;
352
+ for (const tool of this.#tools) {
353
+ counts[tool.status] += 1;
354
+ toolLatency += tool.latencyMs;
355
+ const existing = byName[tool.toolName] ?? {
356
+ total: 0,
357
+ ok: 0,
358
+ error: 0,
359
+ skipped: 0,
360
+ blocked: 0,
361
+ timeout: 0,
362
+ aborted: 0,
363
+ totalLatencyMs: 0,
364
+ };
365
+ byName[tool.toolName] = {
366
+ total: existing.total + 1,
367
+ ok: existing.ok + (tool.status === "ok" ? 1 : 0),
368
+ error: existing.error + (tool.status === "error" ? 1 : 0),
369
+ skipped: existing.skipped + (tool.status === "skipped" ? 1 : 0),
370
+ blocked: existing.blocked + (tool.status === "blocked" ? 1 : 0),
371
+ timeout: existing.timeout + (tool.status === "timeout" ? 1 : 0),
372
+ aborted: existing.aborted + (tool.status === "aborted" ? 1 : 0),
373
+ totalLatencyMs: existing.totalLatencyMs + tool.latencyMs,
374
+ };
375
+ if (tool.errorType) errorsByType[tool.errorType] = (errorsByType[tool.errorType] ?? 0) + 1;
376
+ }
377
+
378
+ let errorTotal = 0;
379
+ for (const v of Object.values(errorsByType)) errorTotal += v;
380
+
381
+ return {
382
+ chats: {
383
+ total: this.#chats.length,
384
+ byStopReason: sortedRecord(byStopReason),
385
+ totalLatencyMs: chatLatency,
386
+ },
387
+ tools: {
388
+ total: this.#tools.length,
389
+ ok: counts.ok,
390
+ error: counts.error,
391
+ skipped: counts.skipped,
392
+ blocked: counts.blocked,
393
+ timeout: counts.timeout,
394
+ aborted: counts.aborted,
395
+ totalLatencyMs: toolLatency,
396
+ byName: sortedRecord(byName),
397
+ },
398
+ usage: {
399
+ inputTokens,
400
+ outputTokens,
401
+ cachedInputTokens,
402
+ cacheWriteTokens,
403
+ reasoningOutputTokens,
404
+ totalTokens,
405
+ },
406
+ cost: {
407
+ estimatedUsd,
408
+ unavailableReasons: [...unavailableReasons].sort(),
409
+ },
410
+ errors: {
411
+ total: errorTotal,
412
+ byType: sortedRecord(errorsByType),
413
+ },
414
+ stepCount,
415
+ };
416
+ }
417
+
418
+ #buildCoverage(): AgentRunCoverage {
419
+ const toolsAvailable = [...this.#availableTools].sort();
420
+ const toolsInvoked = [...this.#invokedTools].sort();
421
+ const toolsUnused = toolsAvailable.filter(name => !this.#invokedTools.has(name));
422
+ // Tools the LLM invoked that were never declared on any request remain
423
+ // present in `toolsInvoked` but absent from `toolsAvailable`. Callers
424
+ // diff to detect this case if they care.
425
+ return {
426
+ toolsAvailable,
427
+ toolsInvoked,
428
+ toolsUnused,
429
+ modelsUsed: [...this.#modelsUsed].sort(),
430
+ providersUsed: [...this.#providersUsed].sort(),
431
+ };
432
+ }
433
+ }
434
+
435
+ /**
436
+ * Fold multiple per-run summaries into one. Pure aggregation — useful when a
437
+ * caller (verify pass, benchmark harness) drives the agent loop N times and
438
+ * needs a single rollup across all invocations.
439
+ *
440
+ * Counters sum element-wise. Sets (cost reasons, error types, per-tool
441
+ * counters) merge by key. Numeric totals sum. The output is in the same
442
+ * shape as a single `AgentRunSummary`, so all dashboards and persistence
443
+ * layers handle it uniformly.
444
+ */
445
+ export function aggregateAgentRunSummaries(summaries: readonly AgentRunSummary[]): AgentRunSummary {
446
+ if (summaries.length === 0) return EMPTY_SUMMARY;
447
+ if (summaries.length === 1) return summaries[0];
448
+
449
+ let chatTotal = 0;
450
+ let chatLatency = 0;
451
+ const byStopReason: Record<string, number> = {};
452
+
453
+ let toolTotal = 0;
454
+ let toolOk = 0;
455
+ let toolError = 0;
456
+ let toolSkipped = 0;
457
+ let toolBlocked = 0;
458
+ let toolTimeout = 0;
459
+ let toolAborted = 0;
460
+ let toolLatency = 0;
461
+ const byName: Record<string, ToolCounters> = {};
462
+
463
+ let inputTokens = 0;
464
+ let outputTokens = 0;
465
+ let cachedInputTokens = 0;
466
+ let cacheWriteTokens = 0;
467
+ let reasoningOutputTokens = 0;
468
+ let totalTokens = 0;
469
+
470
+ let estimatedUsd = 0;
471
+ const unavailableReasons = new Set<string>();
472
+
473
+ const errorsByType: Record<string, number> = {};
474
+ let errorsTotal = 0;
475
+ let stepCount = 0;
476
+
477
+ for (const s of summaries) {
478
+ chatTotal += s.chats.total;
479
+ chatLatency += s.chats.totalLatencyMs;
480
+ for (const [reason, count] of Object.entries(s.chats.byStopReason)) {
481
+ byStopReason[reason] = (byStopReason[reason] ?? 0) + count;
482
+ }
483
+
484
+ toolTotal += s.tools.total;
485
+ toolOk += s.tools.ok;
486
+ toolError += s.tools.error;
487
+ toolSkipped += s.tools.skipped;
488
+ toolBlocked += s.tools.blocked;
489
+ toolTimeout += s.tools.timeout;
490
+ toolAborted += s.tools.aborted;
491
+ toolLatency += s.tools.totalLatencyMs;
492
+ for (const [name, counters] of Object.entries(s.tools.byName)) {
493
+ const existing = byName[name];
494
+ byName[name] = existing
495
+ ? {
496
+ total: existing.total + counters.total,
497
+ ok: existing.ok + counters.ok,
498
+ error: existing.error + counters.error,
499
+ skipped: existing.skipped + counters.skipped,
500
+ blocked: existing.blocked + counters.blocked,
501
+ timeout: existing.timeout + counters.timeout,
502
+ aborted: existing.aborted + counters.aborted,
503
+ totalLatencyMs: existing.totalLatencyMs + counters.totalLatencyMs,
504
+ }
505
+ : counters;
506
+ }
507
+
508
+ inputTokens += s.usage.inputTokens;
509
+ outputTokens += s.usage.outputTokens;
510
+ cachedInputTokens += s.usage.cachedInputTokens;
511
+ cacheWriteTokens += s.usage.cacheWriteTokens;
512
+ reasoningOutputTokens += s.usage.reasoningOutputTokens;
513
+ totalTokens += s.usage.totalTokens;
514
+
515
+ estimatedUsd += s.cost.estimatedUsd;
516
+ for (const r of s.cost.unavailableReasons) unavailableReasons.add(r);
517
+
518
+ for (const [type, count] of Object.entries(s.errors.byType)) {
519
+ errorsByType[type] = (errorsByType[type] ?? 0) + count;
520
+ }
521
+ errorsTotal += s.errors.total;
522
+ stepCount += s.stepCount;
523
+ }
524
+
525
+ return {
526
+ chats: { total: chatTotal, byStopReason: sortedRecord(byStopReason), totalLatencyMs: chatLatency },
527
+ tools: {
528
+ total: toolTotal,
529
+ ok: toolOk,
530
+ error: toolError,
531
+ skipped: toolSkipped,
532
+ blocked: toolBlocked,
533
+ timeout: toolTimeout,
534
+ aborted: toolAborted,
535
+ totalLatencyMs: toolLatency,
536
+ byName: sortedRecord(byName),
537
+ },
538
+ usage: { inputTokens, outputTokens, cachedInputTokens, cacheWriteTokens, reasoningOutputTokens, totalTokens },
539
+ cost: { estimatedUsd, unavailableReasons: [...unavailableReasons].sort() },
540
+ errors: { total: errorsTotal, byType: sortedRecord(errorsByType) },
541
+ stepCount,
542
+ };
543
+ }
544
+
545
+ /** Union-merge multiple coverage values, preserving the sorted+deduped invariant. */
546
+ export function aggregateAgentRunCoverage(coverages: readonly AgentRunCoverage[]): AgentRunCoverage {
547
+ if (coverages.length === 0) return EMPTY_COVERAGE;
548
+ if (coverages.length === 1) return coverages[0];
549
+ const available = new Set<string>();
550
+ const invoked = new Set<string>();
551
+ const models = new Set<string>();
552
+ const providers = new Set<string>();
553
+ for (const c of coverages) {
554
+ for (const t of c.toolsAvailable) available.add(t);
555
+ for (const t of c.toolsInvoked) invoked.add(t);
556
+ for (const m of c.modelsUsed) models.add(m);
557
+ for (const p of c.providersUsed) providers.add(p);
558
+ }
559
+ const toolsAvailable = [...available].sort();
560
+ return {
561
+ toolsAvailable,
562
+ toolsInvoked: [...invoked].sort(),
563
+ toolsUnused: toolsAvailable.filter(name => !invoked.has(name)),
564
+ modelsUsed: [...models].sort(),
565
+ providersUsed: [...providers].sort(),
566
+ };
567
+ }
568
+
569
+ const EMPTY_SUMMARY: AgentRunSummary = Object.freeze({
570
+ chats: Object.freeze({ total: 0, byStopReason: Object.freeze({}), totalLatencyMs: 0 }),
571
+ tools: Object.freeze({
572
+ total: 0,
573
+ ok: 0,
574
+ error: 0,
575
+ skipped: 0,
576
+ blocked: 0,
577
+ timeout: 0,
578
+ aborted: 0,
579
+ totalLatencyMs: 0,
580
+ byName: Object.freeze({}),
581
+ }),
582
+ usage: Object.freeze({
583
+ inputTokens: 0,
584
+ outputTokens: 0,
585
+ cachedInputTokens: 0,
586
+ cacheWriteTokens: 0,
587
+ reasoningOutputTokens: 0,
588
+ totalTokens: 0,
589
+ }),
590
+ cost: Object.freeze({ estimatedUsd: 0, unavailableReasons: Object.freeze([]) as readonly string[] }),
591
+ errors: Object.freeze({ total: 0, byType: Object.freeze({}) }),
592
+ stepCount: 0,
593
+ }) as AgentRunSummary;
594
+
595
+ const EMPTY_COVERAGE: AgentRunCoverage = Object.freeze({
596
+ toolsAvailable: Object.freeze([]) as readonly string[],
597
+ toolsInvoked: Object.freeze([]) as readonly string[],
598
+ toolsUnused: Object.freeze([]) as readonly string[],
599
+ modelsUsed: Object.freeze([]) as readonly string[],
600
+ providersUsed: Object.freeze([]) as readonly string[],
601
+ }) as AgentRunCoverage;
602
+
603
+ /** Empty `AgentRunSummary` constant. Exported for tests and default-initializers. */
604
+ export function emptyAgentRunSummary(): AgentRunSummary {
605
+ return EMPTY_SUMMARY;
606
+ }
607
+
608
+ /** Empty `AgentRunCoverage` constant. Exported for tests and default-initializers. */
609
+ export function emptyAgentRunCoverage(): AgentRunCoverage {
610
+ return EMPTY_COVERAGE;
611
+ }
612
+
613
+ /**
614
+ * Distinguishable error class thrown when `beforeToolCall` returns
615
+ * `{ block: true }`. Lets the catch arm of `runTool` set the terminal status
616
+ * on the execute_tool span to `"blocked"` instead of conflating with a real
617
+ * tool exception.
618
+ */
619
+ export class ToolCallBlockedError extends Error {
620
+ override readonly name = "ToolCallBlockedError";
621
+ constructor(reason?: string) {
622
+ super(reason ?? "Tool execution was blocked");
623
+ }
624
+ }
625
+
626
+ /** Return a new object whose own keys are listed in ascending order. */
627
+ function sortedRecord<V>(record: Record<string, V>): Record<string, V> {
628
+ const out: Record<string, V> = {};
629
+ for (const key of Object.keys(record).sort()) out[key] = record[key];
630
+ return out;
631
+ }