@tuttiai/core 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,108 +1,193 @@
1
- // src/logger.ts
2
- import pino from "pino";
3
- var createLogger = (name) => pino({
4
- name,
5
- level: process.env.TUTTI_LOG_LEVEL ?? "info",
6
- transport: process.env.NODE_ENV === "production" ? void 0 : {
7
- target: "pino-pretty",
8
- options: {
9
- colorize: true,
10
- translateTime: "HH:MM:ss",
11
- ignore: "pid,hostname"
12
- }
1
+ // src/errors.ts
2
+ var TuttiError = class extends Error {
3
+ constructor(code, message, context = {}) {
4
+ super(message);
5
+ this.code = code;
6
+ this.context = context;
7
+ this.name = this.constructor.name;
8
+ Error.captureStackTrace(this, this.constructor);
9
+ }
10
+ code;
11
+ context;
12
+ };
13
+ var ScoreValidationError = class extends TuttiError {
14
+ constructor(message, context = {}) {
15
+ super("SCORE_INVALID", message, context);
13
16
  }
14
- });
15
- var logger = createLogger("tutti");
16
-
17
- // src/telemetry.ts
18
- import { trace, SpanStatusCode } from "@opentelemetry/api";
19
- var tracer = trace.getTracer("tutti", "1.0.0");
20
- var TuttiTracer = {
21
- agentRun(agentName, sessionId, fn) {
22
- return tracer.startActiveSpan("agent.run", async (span) => {
23
- span.setAttribute("agent.name", agentName);
24
- span.setAttribute("session.id", sessionId);
25
- try {
26
- const result = await fn();
27
- span.setStatus({ code: SpanStatusCode.OK });
28
- return result;
29
- } catch (err) {
30
- span.setStatus({
31
- code: SpanStatusCode.ERROR,
32
- message: err instanceof Error ? err.message : String(err)
33
- });
34
- throw err;
35
- } finally {
36
- span.end();
37
- }
38
- });
39
- },
40
- llmCall(model, fn) {
41
- return tracer.startActiveSpan("llm.call", async (span) => {
42
- span.setAttribute("llm.model", model);
43
- try {
44
- const result = await fn();
45
- span.setStatus({ code: SpanStatusCode.OK });
46
- return result;
47
- } catch (err) {
48
- span.setStatus({
49
- code: SpanStatusCode.ERROR,
50
- message: err instanceof Error ? err.message : String(err)
51
- });
52
- throw err;
53
- } finally {
54
- span.end();
55
- }
56
- });
57
- },
58
- toolCall(toolName, fn) {
59
- return tracer.startActiveSpan("tool.call", async (span) => {
60
- span.setAttribute("tool.name", toolName);
61
- try {
62
- const result = await fn();
63
- span.setStatus({ code: SpanStatusCode.OK });
64
- return result;
65
- } catch (err) {
66
- span.setStatus({
67
- code: SpanStatusCode.ERROR,
68
- message: err instanceof Error ? err.message : String(err)
69
- });
70
- throw err;
71
- } finally {
72
- span.end();
73
- }
74
- });
17
+ };
18
+ var AgentNotFoundError = class extends TuttiError {
19
+ constructor(agentId, available) {
20
+ super(
21
+ "AGENT_NOT_FOUND",
22
+ `Agent "${agentId}" not found in your score.
23
+ Available agents: ${available.join(", ")}
24
+ Check your tutti.score.ts \u2014 the agent ID must match the key in the agents object.`,
25
+ { agent_id: agentId, available }
26
+ );
27
+ }
28
+ };
29
+ var PermissionError = class extends TuttiError {
30
+ constructor(voice, required, granted) {
31
+ const missing = required.filter((p) => !granted.includes(p));
32
+ super(
33
+ "PERMISSION_DENIED",
34
+ `Voice "${voice}" requires permissions not granted: ${missing.join(", ")}
35
+ Grant them in your score file:
36
+ permissions: [${missing.map((p) => "'" + p + "'").join(", ")}]`,
37
+ { voice, required, granted }
38
+ );
39
+ }
40
+ };
41
+ var BudgetExceededError = class extends TuttiError {
42
+ constructor(tokens, costUsd, limit) {
43
+ super(
44
+ "BUDGET_EXCEEDED",
45
+ `Token budget exceeded: ${tokens.toLocaleString()} tokens, $${costUsd.toFixed(4)} (limit: ${limit}).`,
46
+ { tokens, cost_usd: costUsd, limit }
47
+ );
48
+ }
49
+ };
50
+ var ToolTimeoutError = class extends TuttiError {
51
+ constructor(tool, timeoutMs) {
52
+ super(
53
+ "TOOL_TIMEOUT",
54
+ `Tool "${tool}" timed out after ${timeoutMs}ms.
55
+ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`,
56
+ { tool, timeout_ms: timeoutMs }
57
+ );
58
+ }
59
+ };
60
+ var ProviderError = class extends TuttiError {
61
+ constructor(message, context = { provider: "unknown" }) {
62
+ super("PROVIDER_ERROR", message, context);
63
+ }
64
+ };
65
+ var AuthenticationError = class extends ProviderError {
66
+ constructor(provider) {
67
+ super(
68
+ `Authentication failed for ${provider}.
69
+ Check that the API key is set correctly in your .env file.`,
70
+ { provider }
71
+ );
72
+ Object.defineProperty(this, "code", { value: "AUTH_ERROR" });
73
+ }
74
+ };
75
+ var RateLimitError = class extends ProviderError {
76
+ retryAfter;
77
+ constructor(provider, retryAfter) {
78
+ const msg = retryAfter ? `Rate limited by ${provider}. Retry after ${retryAfter}s.` : `Rate limited by ${provider}.`;
79
+ super(msg, { provider, retryAfter });
80
+ Object.defineProperty(this, "code", { value: "RATE_LIMIT" });
81
+ this.retryAfter = retryAfter;
82
+ }
83
+ };
84
+ var ContextWindowError = class extends ProviderError {
85
+ maxTokens;
86
+ constructor(provider, maxTokens) {
87
+ super(
88
+ `Context window exceeded for ${provider}.` + (maxTokens ? ` Max: ${maxTokens.toLocaleString()} tokens.` : "") + `
89
+ Reduce message history or use a model with a larger context window.`,
90
+ { provider, max_tokens: maxTokens }
91
+ );
92
+ Object.defineProperty(this, "code", { value: "CONTEXT_WINDOW" });
93
+ this.maxTokens = maxTokens;
94
+ }
95
+ };
96
+ var VoiceError = class extends TuttiError {
97
+ constructor(message, context) {
98
+ super("VOICE_ERROR", message, context);
99
+ }
100
+ };
101
+ var PathTraversalError = class extends VoiceError {
102
+ constructor(path) {
103
+ super(
104
+ `Path traversal detected: "${path}" is not allowed.
105
+ All file paths must stay within the allowed directory.`,
106
+ { voice: "filesystem", path }
107
+ );
108
+ Object.defineProperty(this, "code", { value: "PATH_TRAVERSAL" });
109
+ }
110
+ };
111
+ var UrlValidationError = class extends VoiceError {
112
+ constructor(url) {
113
+ super(
114
+ `URL blocked: "${url}".
115
+ Only http:// and https:// URLs to public hosts are allowed.`,
116
+ { voice: "playwright", url }
117
+ );
118
+ Object.defineProperty(this, "code", { value: "URL_BLOCKED" });
75
119
  }
76
120
  };
77
121
 
78
- // src/telemetry-setup.ts
79
- import { NodeSDK } from "@opentelemetry/sdk-node";
80
- import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
81
- import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
82
- var sdk;
83
- function initTelemetry(config) {
84
- if (!config.enabled || sdk) return;
85
- const endpoint = config.endpoint ?? "http://localhost:4318";
86
- const exporter = new OTLPTraceExporter({
87
- url: `${endpoint}/v1/traces`,
88
- headers: config.headers
89
- });
90
- sdk = new NodeSDK({
91
- traceExporter: exporter,
92
- instrumentations: [getNodeAutoInstrumentations({ "@opentelemetry/instrumentation-fs": { enabled: false } })],
93
- serviceName: process.env.OTEL_SERVICE_NAME ?? "tutti"
94
- });
95
- sdk.start();
96
- logger.info({ endpoint }, "OpenTelemetry tracing enabled");
122
+ // src/hooks/index.ts
123
+ function createLoggingHook(log) {
124
+ return {
125
+ beforeLLMCall(ctx, request) {
126
+ log.info({ agent: ctx.agent_name, turn: ctx.turn, model: request.model }, "LLM call");
127
+ return Promise.resolve(request);
128
+ },
129
+ afterLLMCall(ctx, response) {
130
+ log.info({ agent: ctx.agent_name, turn: ctx.turn, usage: response.usage }, "LLM response");
131
+ return Promise.resolve();
132
+ },
133
+ beforeToolCall(ctx, tool, input) {
134
+ log.info({ agent: ctx.agent_name, tool, input }, "Tool call");
135
+ return Promise.resolve(input);
136
+ },
137
+ afterToolCall(ctx, tool, result) {
138
+ log.info({ agent: ctx.agent_name, tool, is_error: result.is_error }, "Tool result");
139
+ return Promise.resolve(result);
140
+ }
141
+ };
97
142
  }
98
- async function shutdownTelemetry() {
99
- if (sdk) {
100
- await sdk.shutdown();
101
- sdk = void 0;
143
+ function createCacheHook(store) {
144
+ function cacheKey(tool, input) {
145
+ return tool + ":" + JSON.stringify(input);
102
146
  }
147
+ return {
148
+ beforeToolCall(_ctx, tool, input) {
149
+ const cached = store.get(cacheKey(tool, input));
150
+ if (cached) return Promise.resolve(cached);
151
+ return Promise.resolve(input);
152
+ },
153
+ afterToolCall(_ctx, tool, result) {
154
+ if (!result.is_error) {
155
+ store.set(cacheKey(tool, result.content), result.content);
156
+ }
157
+ return Promise.resolve(result);
158
+ }
159
+ };
160
+ }
161
+ function createBlocklistHook(blockedTools) {
162
+ const blocked = new Set(blockedTools);
163
+ return {
164
+ beforeToolCall(_ctx, tool) {
165
+ return Promise.resolve(!blocked.has(tool));
166
+ }
167
+ };
168
+ }
169
+ function createMaxCostHook(maxUsd) {
170
+ let totalCost = 0;
171
+ const INPUT_PER_M2 = 3;
172
+ const OUTPUT_PER_M2 = 15;
173
+ return {
174
+ afterLLMCall(_ctx, response) {
175
+ totalCost += response.usage.input_tokens / 1e6 * INPUT_PER_M2 + response.usage.output_tokens / 1e6 * OUTPUT_PER_M2;
176
+ return Promise.resolve();
177
+ },
178
+ beforeLLMCall(ctx, request) {
179
+ if (totalCost >= maxUsd) {
180
+ return Promise.reject(new Error(
181
+ "Max cost hook: $" + totalCost.toFixed(4) + " exceeds limit $" + maxUsd.toFixed(2) + " for agent " + ctx.agent_name
182
+ ));
183
+ }
184
+ return Promise.resolve(request);
185
+ }
186
+ };
103
187
  }
104
188
 
105
189
  // src/agent-runner.ts
190
+ import { z } from "zod";
106
191
  import { zodToJsonSchema } from "zod-to-json-schema";
107
192
 
108
193
  // src/secrets.ts
@@ -228,21 +313,144 @@ var TokenBudget = class {
228
313
  }
229
314
  };
230
315
 
316
+ // src/logger.ts
317
+ import pino from "pino";
318
+ var createLogger = (name) => pino({
319
+ name,
320
+ level: process.env.TUTTI_LOG_LEVEL ?? "info",
321
+ transport: process.env.NODE_ENV === "production" ? void 0 : {
322
+ target: "pino-pretty",
323
+ options: {
324
+ colorize: true,
325
+ translateTime: "HH:MM:ss",
326
+ ignore: "pid,hostname"
327
+ }
328
+ }
329
+ });
330
+ var logger = createLogger("tutti");
331
+
332
+ // src/telemetry.ts
333
+ import { trace, SpanStatusCode } from "@opentelemetry/api";
334
+ var tracer = trace.getTracer("tutti", "1.0.0");
335
+ var TuttiTracer = {
336
+ agentRun(agentName, sessionId, fn) {
337
+ return tracer.startActiveSpan("agent.run", async (span) => {
338
+ span.setAttribute("agent.name", agentName);
339
+ span.setAttribute("session.id", sessionId);
340
+ try {
341
+ const result = await fn();
342
+ span.setStatus({ code: SpanStatusCode.OK });
343
+ return result;
344
+ } catch (err) {
345
+ span.setStatus({
346
+ code: SpanStatusCode.ERROR,
347
+ message: err instanceof Error ? err.message : String(err)
348
+ });
349
+ throw err;
350
+ } finally {
351
+ span.end();
352
+ }
353
+ });
354
+ },
355
+ llmCall(model, fn) {
356
+ return tracer.startActiveSpan("llm.call", async (span) => {
357
+ span.setAttribute("llm.model", model);
358
+ try {
359
+ const result = await fn();
360
+ span.setStatus({ code: SpanStatusCode.OK });
361
+ return result;
362
+ } catch (err) {
363
+ span.setStatus({
364
+ code: SpanStatusCode.ERROR,
365
+ message: err instanceof Error ? err.message : String(err)
366
+ });
367
+ throw err;
368
+ } finally {
369
+ span.end();
370
+ }
371
+ });
372
+ },
373
+ toolCall(toolName, fn) {
374
+ return tracer.startActiveSpan("tool.call", async (span) => {
375
+ span.setAttribute("tool.name", toolName);
376
+ try {
377
+ const result = await fn();
378
+ span.setStatus({ code: SpanStatusCode.OK });
379
+ return result;
380
+ } catch (err) {
381
+ span.setStatus({
382
+ code: SpanStatusCode.ERROR,
383
+ message: err instanceof Error ? err.message : String(err)
384
+ });
385
+ throw err;
386
+ } finally {
387
+ span.end();
388
+ }
389
+ });
390
+ }
391
+ };
392
+
231
393
  // src/agent-runner.ts
232
394
  var DEFAULT_MAX_TURNS = 10;
233
395
  var DEFAULT_MAX_TOOL_CALLS = 20;
234
396
  var DEFAULT_TOOL_TIMEOUT_MS = 3e4;
397
+ var DEFAULT_HITL_TIMEOUT_S = 300;
398
+ var MAX_PROVIDER_RETRIES = 3;
399
+ var hitlRequestSchema = z.object({
400
+ question: z.string().describe("The question to ask the human"),
401
+ options: z.array(z.string()).optional().describe("If provided, the human picks one of these"),
402
+ timeout_seconds: z.number().optional().describe("How long to wait before timing out (default 300)")
403
+ });
404
+ async function withRetry(fn) {
405
+ for (let attempt = 1; ; attempt++) {
406
+ try {
407
+ return await fn();
408
+ } catch (err) {
409
+ if (attempt >= MAX_PROVIDER_RETRIES || !(err instanceof ProviderError)) {
410
+ throw err;
411
+ }
412
+ if (err instanceof RateLimitError && err.retryAfter) {
413
+ logger.warn({ attempt, retryAfter: err.retryAfter }, "Rate limited, waiting before retry");
414
+ await new Promise((r) => setTimeout(r, err.retryAfter * 1e3));
415
+ } else {
416
+ const delayMs = Math.min(1e3 * 2 ** (attempt - 1), 8e3);
417
+ logger.warn({ attempt, delayMs }, "Provider error, retrying with backoff");
418
+ await new Promise((r) => setTimeout(r, delayMs));
419
+ }
420
+ }
421
+ }
422
+ }
235
423
  var AgentRunner = class {
236
- constructor(provider, events, sessions, semanticMemory) {
424
+ constructor(provider, events, sessions, semanticMemory, globalHooks) {
237
425
  this.provider = provider;
238
426
  this.events = events;
239
427
  this.sessions = sessions;
240
428
  this.semanticMemory = semanticMemory;
429
+ this.globalHooks = globalHooks;
241
430
  }
242
431
  provider;
243
432
  events;
244
433
  sessions;
245
434
  semanticMemory;
435
+ globalHooks;
436
+ pendingHitl = /* @__PURE__ */ new Map();
437
+ async safeHook(fn) {
438
+ if (!fn) return void 0;
439
+ try {
440
+ return await fn() ?? void 0;
441
+ } catch (err) {
442
+ logger.warn({ error: err instanceof Error ? err.message : String(err) }, "Hook error (non-fatal)");
443
+ return void 0;
444
+ }
445
+ }
446
+ /** Resolve a pending human-in-the-loop request for a session. */
447
+ answer(sessionId, answer) {
448
+ const resolve2 = this.pendingHitl.get(sessionId);
449
+ if (resolve2) {
450
+ this.pendingHitl.delete(sessionId);
451
+ resolve2(answer);
452
+ }
453
+ }
246
454
  async run(agent, input, session_id) {
247
455
  const session = session_id ? this.sessions.get(session_id) : this.sessions.create(agent.name);
248
456
  if (!session) {
@@ -253,13 +461,31 @@ Omit session_id to start a new conversation.`
253
461
  );
254
462
  }
255
463
  return TuttiTracer.agentRun(agent.name, session.id, async () => {
464
+ const agentHooks = agent.hooks;
465
+ const hookCtx = {
466
+ agent_name: agent.name,
467
+ session_id: session.id,
468
+ turn: 0,
469
+ metadata: {}
470
+ };
471
+ await this.safeHook(() => this.globalHooks?.beforeAgentRun?.(hookCtx));
472
+ await this.safeHook(() => agentHooks?.beforeAgentRun?.(hookCtx));
256
473
  logger.info({ agent: agent.name, session: session.id }, "Agent started");
257
474
  this.events.emit({
258
475
  type: "agent:start",
259
476
  agent_name: agent.name,
260
477
  session_id: session.id
261
478
  });
262
- const allTools = agent.voices.flatMap((v) => v.tools);
479
+ const voiceCtx = { session_id: session.id, agent_name: agent.name };
480
+ for (const voice of agent.voices) {
481
+ if (voice.setup) {
482
+ await voice.setup(voiceCtx);
483
+ }
484
+ }
485
+ const allTools = [...agent.voices.flatMap((v) => v.tools)];
486
+ if (agent.allow_human_input) {
487
+ allTools.push(this.createHitlTool(agent.name, session.id));
488
+ }
263
489
  const toolDefs = allTools.map(toolToDefinition);
264
490
  const messages = [
265
491
  ...session.messages,
@@ -297,12 +523,17 @@ Omit session_id to start a new conversation.`
297
523
  }
298
524
  }
299
525
  }
300
- const request = {
526
+ let request = {
301
527
  model: agent.model,
302
528
  system: systemPrompt,
303
529
  messages,
304
530
  tools: toolDefs.length > 0 ? toolDefs : void 0
305
531
  };
532
+ hookCtx.turn = turns;
533
+ const globalReq = await this.safeHook(() => this.globalHooks?.beforeLLMCall?.(hookCtx, request));
534
+ if (globalReq) request = globalReq;
535
+ const agentReq = await this.safeHook(() => agentHooks?.beforeLLMCall?.(hookCtx, request));
536
+ if (agentReq) request = agentReq;
306
537
  logger.debug({ agent: agent.name, model: agent.model }, "LLM request");
307
538
  this.events.emit({
308
539
  type: "llm:request",
@@ -311,7 +542,9 @@ Omit session_id to start a new conversation.`
311
542
  });
312
543
  const response = await TuttiTracer.llmCall(
313
544
  agent.model ?? "unknown",
314
- () => agent.streaming ? this.streamToResponse(agent.name, request) : this.provider.chat(request)
545
+ () => withRetry(
546
+ () => agent.streaming ? this.streamToResponse(agent.name, request) : this.provider.chat(request)
547
+ )
315
548
  );
316
549
  logger.debug(
317
550
  { agent: agent.name, stopReason: response.stop_reason, usage: response.usage },
@@ -322,6 +555,8 @@ Omit session_id to start a new conversation.`
322
555
  agent_name: agent.name,
323
556
  response
324
557
  });
558
+ await this.safeHook(() => this.globalHooks?.afterLLMCall?.(hookCtx, response));
559
+ await this.safeHook(() => agentHooks?.afterLLMCall?.(hookCtx, response));
325
560
  totalUsage.input_tokens += response.usage.input_tokens;
326
561
  totalUsage.output_tokens += response.usage.output_tokens;
327
562
  if (budget) {
@@ -402,7 +637,7 @@ Omit session_id to start a new conversation.`
402
637
  }
403
638
  const toolResults = await Promise.all(
404
639
  toolUseBlocks.map(
405
- (block) => this.executeTool(allTools, block, toolContext, toolTimeoutMs)
640
+ (block) => this.executeTool(allTools, block, toolContext, toolTimeoutMs, hookCtx, agentHooks)
406
641
  )
407
642
  );
408
643
  messages.push({ role: "user", content: toolResults });
@@ -419,13 +654,16 @@ Omit session_id to start a new conversation.`
419
654
  agent_name: agent.name,
420
655
  session_id: session.id
421
656
  });
422
- return {
657
+ const agentResult = {
423
658
  session_id: session.id,
424
659
  output,
425
660
  messages,
426
661
  turns,
427
662
  usage: totalUsage
428
663
  };
664
+ await this.safeHook(() => this.globalHooks?.afterAgentRun?.(hookCtx, agentResult));
665
+ await this.safeHook(() => agentHooks?.afterAgentRun?.(hookCtx, agentResult));
666
+ return agentResult;
429
667
  });
430
668
  }
431
669
  async executeWithTimeout(fn, timeoutMs, toolName) {
@@ -433,12 +671,7 @@ Omit session_id to start a new conversation.`
433
671
  fn(),
434
672
  new Promise(
435
673
  (_, reject) => setTimeout(
436
- () => reject(
437
- new Error(
438
- `Tool "${toolName}" timed out after ${timeoutMs}ms.
439
- Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
440
- )
441
- ),
674
+ () => reject(new ToolTimeoutError(toolName, timeoutMs)),
442
675
  timeoutMs
443
676
  )
444
677
  )
@@ -476,7 +709,42 @@ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
476
709
  }
477
710
  return { id: "", content, stop_reason: stopReason, usage };
478
711
  }
479
- async executeTool(tools, block, context, timeoutMs) {
712
+ createHitlTool(agentName, sessionId) {
713
+ return {
714
+ name: "request_human_input",
715
+ description: "Pause and ask the human for guidance or approval before proceeding.",
716
+ parameters: hitlRequestSchema,
717
+ execute: async (input) => {
718
+ const timeout = (input.timeout_seconds ?? DEFAULT_HITL_TIMEOUT_S) * 1e3;
719
+ logger.info({ agent: agentName, question: input.question }, "Waiting for human input");
720
+ const answer = await new Promise((resolve2) => {
721
+ this.pendingHitl.set(sessionId, resolve2);
722
+ this.events.emit({
723
+ type: "hitl:requested",
724
+ agent_name: agentName,
725
+ session_id: sessionId,
726
+ question: input.question,
727
+ options: input.options
728
+ });
729
+ setTimeout(() => {
730
+ if (this.pendingHitl.has(sessionId)) {
731
+ this.pendingHitl.delete(sessionId);
732
+ this.events.emit({ type: "hitl:timeout", agent_name: agentName, session_id: sessionId });
733
+ resolve2("[timeout: human did not respond within " + timeout / 1e3 + "s]");
734
+ }
735
+ }, timeout);
736
+ });
737
+ this.events.emit({
738
+ type: "hitl:answered",
739
+ agent_name: agentName,
740
+ session_id: sessionId,
741
+ answer
742
+ });
743
+ return { content: "Human responded: " + answer };
744
+ }
745
+ };
746
+ }
747
+ async executeTool(tools, block, context, timeoutMs, hookCtx, agentHooks) {
480
748
  const tool = tools.find((t) => t.name === block.name);
481
749
  if (!tool) {
482
750
  const available = tools.map((t) => t.name).join(", ") || "(none)";
@@ -488,6 +756,16 @@ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
488
756
  };
489
757
  }
490
758
  return TuttiTracer.toolCall(block.name, async () => {
759
+ if (hookCtx) {
760
+ const globalResult = await this.safeHook(() => this.globalHooks?.beforeToolCall?.(hookCtx, block.name, block.input));
761
+ if (globalResult === false) {
762
+ return { type: "tool_result", tool_use_id: block.id, content: "Tool call blocked by hook", is_error: true };
763
+ }
764
+ const agentResult = await this.safeHook(() => agentHooks?.beforeToolCall?.(hookCtx, block.name, block.input));
765
+ if (agentResult === false) {
766
+ return { type: "tool_result", tool_use_id: block.id, content: "Tool call blocked by hook", is_error: true };
767
+ }
768
+ }
491
769
  logger.debug({ tool: block.name, input: block.input }, "Tool called");
492
770
  this.events.emit({
493
771
  type: "tool:start",
@@ -497,11 +775,17 @@ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
497
775
  });
498
776
  try {
499
777
  const parsed = tool.parameters.parse(block.input);
500
- const result = await this.executeWithTimeout(
778
+ let result = await this.executeWithTimeout(
501
779
  () => tool.execute(parsed, context),
502
780
  timeoutMs,
503
781
  block.name
504
782
  );
783
+ if (hookCtx) {
784
+ const globalMod = await this.safeHook(() => this.globalHooks?.afterToolCall?.(hookCtx, block.name, result));
785
+ if (globalMod) result = globalMod;
786
+ const agentMod = await this.safeHook(() => agentHooks?.afterToolCall?.(hookCtx, block.name, result));
787
+ if (agentMod) result = agentMod;
788
+ }
505
789
  logger.debug({ tool: block.name, result: result.content }, "Tool completed");
506
790
  this.events.emit({
507
791
  type: "tool:end",
@@ -731,18 +1015,18 @@ var PostgresSessionStore = class {
731
1015
  import { randomUUID as randomUUID3 } from "crypto";
732
1016
  var InMemorySemanticStore = class {
733
1017
  entries = [];
734
- async add(entry) {
1018
+ add(entry) {
735
1019
  const full = {
736
1020
  ...entry,
737
1021
  id: randomUUID3(),
738
1022
  created_at: /* @__PURE__ */ new Date()
739
1023
  };
740
1024
  this.entries.push(full);
741
- return full;
1025
+ return Promise.resolve(full);
742
1026
  }
743
- async search(query, agent_name, limit = 5) {
1027
+ search(query, agent_name, limit = 5) {
744
1028
  const queryTokens = tokenize(query);
745
- if (queryTokens.size === 0) return [];
1029
+ if (queryTokens.size === 0) return Promise.resolve([]);
746
1030
  const agentEntries = this.entries.filter(
747
1031
  (e) => e.agent_name === agent_name
748
1032
  );
@@ -755,13 +1039,17 @@ var InMemorySemanticStore = class {
755
1039
  const score = overlap / queryTokens.size;
756
1040
  return { entry, score };
757
1041
  });
758
- return scored.filter((s) => s.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.entry);
1042
+ return Promise.resolve(
1043
+ scored.filter((s) => s.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.entry)
1044
+ );
759
1045
  }
760
- async delete(id) {
1046
+ delete(id) {
761
1047
  this.entries = this.entries.filter((e) => e.id !== id);
1048
+ return Promise.resolve();
762
1049
  }
763
- async clear(agent_name) {
1050
+ clear(agent_name) {
764
1051
  this.entries = this.entries.filter((e) => e.agent_name !== agent_name);
1052
+ return Promise.resolve();
765
1053
  }
766
1054
  };
767
1055
  function tokenize(text) {
@@ -777,9 +1065,7 @@ var PermissionGuard = class {
777
1065
  (p) => !granted.includes(p)
778
1066
  );
779
1067
  if (missing.length > 0) {
780
- throw new Error(
781
- "Voice " + voice.name + " requires permissions not granted: " + missing.join(", ") + "\n\nGrant them in your score file:\n permissions: [" + missing.map((p) => "'" + p + "'").join(", ") + "]"
782
- );
1068
+ throw new PermissionError(voice.name, voice.required_permissions, granted);
783
1069
  }
784
1070
  }
785
1071
  static warn(voice) {
@@ -795,6 +1081,33 @@ var PermissionGuard = class {
795
1081
  }
796
1082
  };
797
1083
 
1084
+ // src/telemetry-setup.ts
1085
+ import { NodeSDK } from "@opentelemetry/sdk-node";
1086
+ import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
1087
+ import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
1088
+ var sdk;
1089
+ function initTelemetry(config) {
1090
+ if (!config.enabled || sdk) return;
1091
+ const endpoint = config.endpoint ?? "http://localhost:4318";
1092
+ const exporter = new OTLPTraceExporter({
1093
+ url: `${endpoint}/v1/traces`,
1094
+ headers: config.headers
1095
+ });
1096
+ sdk = new NodeSDK({
1097
+ traceExporter: exporter,
1098
+ instrumentations: [getNodeAutoInstrumentations({ "@opentelemetry/instrumentation-fs": { enabled: false } })],
1099
+ serviceName: process.env.OTEL_SERVICE_NAME ?? "tutti"
1100
+ });
1101
+ sdk.start();
1102
+ logger.info({ endpoint }, "OpenTelemetry tracing enabled");
1103
+ }
1104
+ async function shutdownTelemetry() {
1105
+ if (sdk) {
1106
+ await sdk.shutdown();
1107
+ sdk = void 0;
1108
+ }
1109
+ }
1110
+
798
1111
  // src/runtime.ts
799
1112
  var TuttiRuntime = class _TuttiRuntime {
800
1113
  events;
@@ -811,7 +1124,8 @@ var TuttiRuntime = class _TuttiRuntime {
811
1124
  score.provider,
812
1125
  this.events,
813
1126
  this._sessions,
814
- this.semanticMemory
1127
+ this.semanticMemory,
1128
+ score.hooks
815
1129
  );
816
1130
  if (score.telemetry) {
817
1131
  initTelemetry(score.telemetry);
@@ -837,15 +1151,17 @@ var TuttiRuntime = class _TuttiRuntime {
837
1151
  if (memory.provider === "postgres") {
838
1152
  const url = memory.url ?? process.env.DATABASE_URL;
839
1153
  if (!url) {
840
- throw new Error(
841
- "PostgreSQL session store requires a connection URL.\nSet memory.url in your score, or DATABASE_URL in your .env file."
1154
+ throw new ScoreValidationError(
1155
+ "PostgreSQL session store requires a connection URL.\nSet memory.url in your score, or DATABASE_URL in your .env file.",
1156
+ { field: "memory.url" }
842
1157
  );
843
1158
  }
844
1159
  return new PostgresSessionStore(url);
845
1160
  }
846
- throw new Error(
1161
+ throw new ScoreValidationError(
847
1162
  `Unsupported memory provider: "${memory.provider}".
848
- Supported: "in-memory", "postgres"`
1163
+ Supported: "in-memory", "postgres"`,
1164
+ { field: "memory.provider", value: memory.provider }
849
1165
  );
850
1166
  }
851
1167
  /** The score configuration this runtime was created with. */
@@ -859,12 +1175,7 @@ Supported: "in-memory", "postgres"`
859
1175
  async run(agent_name, input, session_id) {
860
1176
  const agent = this._score.agents[agent_name];
861
1177
  if (!agent) {
862
- const available = Object.keys(this._score.agents).join(", ");
863
- throw new Error(
864
- `Agent "${agent_name}" not found in your score.
865
- Available agents: ${available}
866
- Check your tutti.score.ts \u2014 the agent ID must match the key in the agents object.`
867
- );
1178
+ throw new AgentNotFoundError(agent_name, Object.keys(this._score.agents));
868
1179
  }
869
1180
  const granted = agent.permissions ?? [];
870
1181
  for (const voice of agent.voices) {
@@ -874,14 +1185,224 @@ Check your tutti.score.ts \u2014 the agent ID must match the key in the agents o
874
1185
  const resolvedAgent = agent.model ? agent : { ...agent, model: this._score.default_model ?? "claude-sonnet-4-20250514" };
875
1186
  return this._runner.run(resolvedAgent, input, session_id);
876
1187
  }
1188
+ /**
1189
+ * Provide an answer to a pending human-in-the-loop request.
1190
+ * Call this when a `hitl:requested` event fires to resume the agent.
1191
+ */
1192
+ answer(sessionId, answer) {
1193
+ this._runner.answer(sessionId, answer);
1194
+ }
877
1195
  /** Retrieve an existing session. */
878
1196
  getSession(id) {
879
1197
  return this._sessions.get(id);
880
1198
  }
881
1199
  };
882
1200
 
1201
+ // src/eval/runner.ts
1202
+ var INPUT_PER_M = 3;
1203
+ var OUTPUT_PER_M = 15;
1204
+ function estimateCost(inputTokens, outputTokens) {
1205
+ return inputTokens / 1e6 * INPUT_PER_M + outputTokens / 1e6 * OUTPUT_PER_M;
1206
+ }
1207
+ var EvalRunner = class {
1208
+ runtime;
1209
+ constructor(score) {
1210
+ this.runtime = new TuttiRuntime(score);
1211
+ }
1212
+ async run(suite) {
1213
+ const results = [];
1214
+ for (const testCase of suite.cases) {
1215
+ const result = await this.runCase(testCase);
1216
+ results.push(result);
1217
+ }
1218
+ const summary = this.summarize(results);
1219
+ return { suite_name: suite.name, results, summary };
1220
+ }
1221
+ async runCase(testCase) {
1222
+ const toolsCalled = [];
1223
+ const unsubscribeToolStart = this.runtime.events.on("tool:start", (e) => {
1224
+ toolsCalled.push(e.tool_name);
1225
+ });
1226
+ const start = Date.now();
1227
+ let output = "";
1228
+ let turns = 0;
1229
+ let usage = { input_tokens: 0, output_tokens: 0 };
1230
+ let error;
1231
+ try {
1232
+ const result = await this.runtime.run(testCase.agent_id, testCase.input);
1233
+ output = result.output;
1234
+ turns = result.turns;
1235
+ usage = result.usage;
1236
+ } catch (err) {
1237
+ error = err instanceof Error ? err.message : String(err);
1238
+ output = "[error] " + error;
1239
+ }
1240
+ unsubscribeToolStart();
1241
+ const durationMs = Date.now() - start;
1242
+ const costUsd = estimateCost(usage.input_tokens, usage.output_tokens);
1243
+ const assertionResults = testCase.assertions.map(
1244
+ (assertion) => this.checkAssertion(assertion, output, toolsCalled, turns, costUsd)
1245
+ );
1246
+ const passedCount = assertionResults.filter((a) => a.passed).length;
1247
+ const score = testCase.assertions.length > 0 ? passedCount / testCase.assertions.length : error ? 0 : 1;
1248
+ return {
1249
+ case_id: testCase.id,
1250
+ case_name: testCase.name,
1251
+ passed: assertionResults.every((a) => a.passed) && !error,
1252
+ score,
1253
+ output,
1254
+ turns,
1255
+ usage,
1256
+ cost_usd: costUsd,
1257
+ duration_ms: durationMs,
1258
+ assertions: assertionResults,
1259
+ error
1260
+ };
1261
+ }
1262
+ checkAssertion(assertion, output, toolsCalled, turns, costUsd) {
1263
+ const val = assertion.value;
1264
+ switch (assertion.type) {
1265
+ case "contains":
1266
+ return {
1267
+ assertion,
1268
+ passed: output.toLowerCase().includes(String(val).toLowerCase()),
1269
+ actual: output.slice(0, 200)
1270
+ };
1271
+ case "not_contains":
1272
+ return {
1273
+ assertion,
1274
+ passed: !output.toLowerCase().includes(String(val).toLowerCase()),
1275
+ actual: output.slice(0, 200)
1276
+ };
1277
+ case "matches_regex": {
1278
+ const regex = new RegExp(String(val), "i");
1279
+ return {
1280
+ assertion,
1281
+ passed: regex.test(output),
1282
+ actual: output.slice(0, 200)
1283
+ };
1284
+ }
1285
+ case "tool_called":
1286
+ return {
1287
+ assertion,
1288
+ passed: toolsCalled.includes(String(val)),
1289
+ actual: toolsCalled.join(", ") || "(none)"
1290
+ };
1291
+ case "tool_not_called":
1292
+ return {
1293
+ assertion,
1294
+ passed: !toolsCalled.includes(String(val)),
1295
+ actual: toolsCalled.join(", ") || "(none)"
1296
+ };
1297
+ case "turns_lte":
1298
+ return {
1299
+ assertion,
1300
+ passed: turns <= Number(val),
1301
+ actual: turns
1302
+ };
1303
+ case "cost_lte":
1304
+ return {
1305
+ assertion,
1306
+ passed: costUsd <= Number(val),
1307
+ actual: Number(costUsd.toFixed(4))
1308
+ };
1309
+ default:
1310
+ logger.warn({ type: assertion.type }, "Unknown assertion type");
1311
+ return { assertion, passed: false, actual: "unknown assertion type" };
1312
+ }
1313
+ }
1314
+ summarize(results) {
1315
+ const passed = results.filter((r) => r.passed).length;
1316
+ const scores = results.map((r) => r.score);
1317
+ const avgScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0;
1318
+ const totalCost = results.reduce((s, r) => s + r.cost_usd, 0);
1319
+ const totalDuration = results.reduce((s, r) => s + r.duration_ms, 0);
1320
+ return {
1321
+ total: results.length,
1322
+ passed,
1323
+ failed: results.length - passed,
1324
+ avg_score: Number(avgScore.toFixed(2)),
1325
+ total_cost_usd: Number(totalCost.toFixed(4)),
1326
+ total_duration_ms: totalDuration
1327
+ };
1328
+ }
1329
+ };
1330
+
1331
+ // src/eval/report.ts
1332
+ function pad(str, len) {
1333
+ return str.length >= len ? str.slice(0, len) : str + " ".repeat(len - str.length);
1334
+ }
1335
+ function printTable(report) {
1336
+ const { results, summary } = report;
1337
+ console.log();
1338
+ console.log(" Eval suite: " + report.suite_name + " (" + summary.total + " cases)");
1339
+ console.log();
1340
+ for (const r of results) {
1341
+ const icon = r.passed ? "\x1B[32m\u2714\x1B[0m" : "\x1B[31m\u2717\x1B[0m";
1342
+ const score = r.score.toFixed(2);
1343
+ const cost = "$" + r.cost_usd.toFixed(3);
1344
+ const line = " " + icon + " " + pad(r.case_id, 10) + " " + pad(r.case_name, 28) + " " + pad(score, 6) + " " + r.turns + " turns " + cost;
1345
+ console.log(line);
1346
+ if (!r.passed) {
1347
+ for (const a of r.assertions) {
1348
+ if (!a.passed) {
1349
+ const desc = a.assertion.description ?? a.assertion.type + ": " + String(a.assertion.value);
1350
+ console.log(" \x1B[31m\u21B3 FAIL: " + desc + " (actual: " + String(a.actual).slice(0, 60) + ")\x1B[0m");
1351
+ }
1352
+ }
1353
+ if (r.error) {
1354
+ console.log(" \x1B[31m\u21B3 ERROR: " + r.error.slice(0, 80) + "\x1B[0m");
1355
+ }
1356
+ }
1357
+ }
1358
+ const pct = summary.total > 0 ? Math.round(summary.passed / summary.total * 100) : 0;
1359
+ console.log();
1360
+ console.log(
1361
+ " Results: " + summary.passed + "/" + summary.total + " passed (" + pct + "%) | Avg: " + summary.avg_score.toFixed(2) + " | Total: $" + summary.total_cost_usd.toFixed(3)
1362
+ );
1363
+ console.log();
1364
+ }
1365
+ function toJSON(report) {
1366
+ return JSON.stringify(report, null, 2);
1367
+ }
1368
+ function toMarkdown(report) {
1369
+ const { results, summary } = report;
1370
+ const lines = [];
1371
+ lines.push("## Eval: " + report.suite_name);
1372
+ lines.push("");
1373
+ lines.push("| Status | ID | Name | Score | Turns | Cost |");
1374
+ lines.push("|--------|-----|------|-------|-------|------|");
1375
+ for (const r of results) {
1376
+ const icon = r.passed ? "pass" : "FAIL";
1377
+ lines.push(
1378
+ "| " + icon + " | " + r.case_id + " | " + r.case_name + " | " + r.score.toFixed(2) + " | " + r.turns + " | $" + r.cost_usd.toFixed(3) + " |"
1379
+ );
1380
+ }
1381
+ lines.push("");
1382
+ const pct = summary.total > 0 ? Math.round(summary.passed / summary.total * 100) : 0;
1383
+ lines.push(
1384
+ "**Results:** " + summary.passed + "/" + summary.total + " passed (" + pct + "%) | Avg score: " + summary.avg_score.toFixed(2) + " | Total cost: $" + summary.total_cost_usd.toFixed(3)
1385
+ );
1386
+ const failed = results.filter((r) => !r.passed);
1387
+ if (failed.length > 0) {
1388
+ lines.push("");
1389
+ lines.push("### Failures");
1390
+ lines.push("");
1391
+ for (const r of failed) {
1392
+ lines.push("**" + r.case_id + "** \u2014 " + r.case_name);
1393
+ for (const a of r.assertions.filter((x) => !x.passed)) {
1394
+ const desc = a.assertion.description ?? a.assertion.type + ": " + String(a.assertion.value);
1395
+ lines.push("- " + desc + " (actual: `" + String(a.actual).slice(0, 80) + "`)");
1396
+ }
1397
+ if (r.error) lines.push("- Error: " + r.error);
1398
+ lines.push("");
1399
+ }
1400
+ }
1401
+ return lines.join("\n");
1402
+ }
1403
+
883
1404
  // src/agent-router.ts
884
- import { z } from "zod";
1405
+ import { z as z2 } from "zod";
885
1406
  var AgentRouter = class {
886
1407
  constructor(_score) {
887
1408
  this._score = _score;
@@ -957,9 +1478,9 @@ When the user's request matches a specialist's expertise, delegate to them with
957
1478
  const runtime = () => this.runtime;
958
1479
  const events = () => this.runtime.events;
959
1480
  const entryName = score.agents[score.entry ?? "orchestrator"]?.name ?? "orchestrator";
960
- const parameters = z.object({
961
- agent_id: z.enum(delegateIds).describe("Which specialist agent to delegate to"),
962
- task: z.string().describe("The specific task description to pass to the specialist")
1481
+ const parameters = z2.object({
1482
+ agent_id: z2.enum(delegateIds).describe("Which specialist agent to delegate to"),
1483
+ task: z2.string().describe("The specific task description to pass to the specialist")
963
1484
  });
964
1485
  return {
965
1486
  name: "delegate_to_agent",
@@ -1000,50 +1521,51 @@ import { pathToFileURL } from "url";
1000
1521
  import { resolve } from "path";
1001
1522
 
1002
1523
  // src/score-schema.ts
1003
- import { z as z2 } from "zod";
1004
- var PermissionSchema = z2.enum(["network", "filesystem", "shell", "browser"]);
1005
- var VoiceSchema = z2.object({
1006
- name: z2.string().min(1, "Voice name cannot be empty"),
1007
- tools: z2.array(z2.any()),
1008
- required_permissions: z2.array(PermissionSchema)
1524
+ import { z as z3 } from "zod";
1525
+ var PermissionSchema = z3.enum(["network", "filesystem", "shell", "browser"]);
1526
+ var VoiceSchema = z3.object({
1527
+ name: z3.string().min(1, "Voice name cannot be empty"),
1528
+ tools: z3.array(z3.any()),
1529
+ required_permissions: z3.array(PermissionSchema)
1009
1530
  }).passthrough();
1010
- var BudgetSchema = z2.object({
1011
- max_tokens: z2.number().positive().optional(),
1012
- max_cost_usd: z2.number().positive().optional(),
1013
- warn_at_percent: z2.number().min(1).max(100).optional()
1531
+ var BudgetSchema = z3.object({
1532
+ max_tokens: z3.number().positive().optional(),
1533
+ max_cost_usd: z3.number().positive().optional(),
1534
+ warn_at_percent: z3.number().min(1).max(100).optional()
1014
1535
  }).strict();
1015
- var AgentSchema = z2.object({
1016
- name: z2.string().min(1, "Agent name cannot be empty"),
1017
- system_prompt: z2.string().min(1, "Agent system_prompt cannot be empty"),
1018
- voices: z2.array(VoiceSchema),
1019
- model: z2.string().optional(),
1020
- description: z2.string().optional(),
1021
- permissions: z2.array(PermissionSchema).optional(),
1022
- max_turns: z2.number().int().positive("max_turns must be a positive number").optional(),
1023
- max_tool_calls: z2.number().int().positive("max_tool_calls must be a positive number").optional(),
1024
- tool_timeout_ms: z2.number().int().positive("tool_timeout_ms must be a positive number").optional(),
1536
+ var AgentSchema = z3.object({
1537
+ name: z3.string().min(1, "Agent name cannot be empty"),
1538
+ system_prompt: z3.string().min(1, "Agent system_prompt cannot be empty"),
1539
+ voices: z3.array(VoiceSchema),
1540
+ model: z3.string().optional(),
1541
+ description: z3.string().optional(),
1542
+ permissions: z3.array(PermissionSchema).optional(),
1543
+ max_turns: z3.number().int().positive("max_turns must be a positive number").optional(),
1544
+ max_tool_calls: z3.number().int().positive("max_tool_calls must be a positive number").optional(),
1545
+ tool_timeout_ms: z3.number().int().positive("tool_timeout_ms must be a positive number").optional(),
1025
1546
  budget: BudgetSchema.optional(),
1026
- streaming: z2.boolean().optional(),
1027
- delegates: z2.array(z2.string()).optional(),
1028
- role: z2.enum(["orchestrator", "specialist"]).optional()
1547
+ streaming: z3.boolean().optional(),
1548
+ allow_human_input: z3.boolean().optional(),
1549
+ delegates: z3.array(z3.string()).optional(),
1550
+ role: z3.enum(["orchestrator", "specialist"]).optional()
1029
1551
  }).passthrough();
1030
- var TelemetrySchema = z2.object({
1031
- enabled: z2.boolean(),
1032
- endpoint: z2.string().url("telemetry.endpoint must be a valid URL").optional(),
1033
- headers: z2.record(z2.string(), z2.string()).optional()
1552
+ var TelemetrySchema = z3.object({
1553
+ enabled: z3.boolean(),
1554
+ endpoint: z3.string().url("telemetry.endpoint must be a valid URL").optional(),
1555
+ headers: z3.record(z3.string(), z3.string()).optional()
1034
1556
  }).strict();
1035
- var ScoreSchema = z2.object({
1036
- provider: z2.object({ chat: z2.function() }).passthrough().refine((p) => typeof p.chat === "function", {
1557
+ var ScoreSchema = z3.object({
1558
+ provider: z3.object({ chat: z3.function() }).passthrough().refine((p) => typeof p.chat === "function", {
1037
1559
  message: "provider must have a chat() method \u2014 did you forget to pass a provider instance?"
1038
1560
  }),
1039
- agents: z2.record(z2.string(), AgentSchema).refine(
1561
+ agents: z3.record(z3.string(), AgentSchema).refine(
1040
1562
  (agents) => Object.keys(agents).length > 0,
1041
1563
  { message: "Score must define at least one agent" }
1042
1564
  ),
1043
- name: z2.string().optional(),
1044
- description: z2.string().optional(),
1045
- default_model: z2.string().optional(),
1046
- entry: z2.string().optional(),
1565
+ name: z3.string().optional(),
1566
+ description: z3.string().optional(),
1567
+ default_model: z3.string().optional(),
1568
+ entry: z3.string().optional(),
1047
1569
  telemetry: TelemetrySchema.optional()
1048
1570
  }).passthrough();
1049
1571
  function validateScore(config) {
@@ -1053,7 +1575,7 @@ function validateScore(config) {
1053
1575
  const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
1054
1576
  return ` - ${path}: ${issue.message}`;
1055
1577
  });
1056
- throw new Error(
1578
+ throw new ScoreValidationError(
1057
1579
  "Invalid score file:\n" + issues.join("\n")
1058
1580
  );
1059
1581
  }
@@ -1063,18 +1585,20 @@ function validateScore(config) {
1063
1585
  if (agent.delegates) {
1064
1586
  for (const delegateId of agent.delegates) {
1065
1587
  if (!agentKeys.includes(delegateId)) {
1066
- throw new Error(
1588
+ throw new ScoreValidationError(
1067
1589
  `Invalid score file:
1068
- - agents.${key}.delegates: references unknown agent "${delegateId}". Available: ${agentKeys.join(", ")}`
1590
+ - agents.${key}.delegates: references unknown agent "${delegateId}". Available: ${agentKeys.join(", ")}`,
1591
+ { field: `agents.${key}.delegates`, value: delegateId }
1069
1592
  );
1070
1593
  }
1071
1594
  }
1072
1595
  }
1073
1596
  }
1074
1597
  if (data.entry && !agentKeys.includes(data.entry)) {
1075
- throw new Error(
1598
+ throw new ScoreValidationError(
1076
1599
  `Invalid score file:
1077
- - entry: references unknown agent "${data.entry}". Available: ${agentKeys.join(", ")}`
1600
+ - entry: references unknown agent "${data.entry}". Available: ${agentKeys.join(", ")}`,
1601
+ { field: "entry", value: data.entry }
1078
1602
  );
1079
1603
  }
1080
1604
  }
@@ -1117,8 +1641,9 @@ var AnthropicProvider = class {
1117
1641
  }
1118
1642
  async chat(request) {
1119
1643
  if (!request.model) {
1120
- throw new Error(
1121
- "AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
1644
+ throw new ProviderError(
1645
+ "AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
1646
+ { provider: "anthropic" }
1122
1647
  );
1123
1648
  }
1124
1649
  let response;
@@ -1142,10 +1667,10 @@ var AnthropicProvider = class {
1142
1667
  } catch (error) {
1143
1668
  const msg = error instanceof Error ? error.message : String(error);
1144
1669
  logger.error({ error: msg, provider: "anthropic" }, "Provider request failed");
1145
- throw new Error(
1146
- `Anthropic API error: ${msg}
1147
- Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
1148
- );
1670
+ if (msg.includes("authentication") || msg.includes("apiKey") || msg.includes("authToken")) {
1671
+ throw new AuthenticationError("anthropic");
1672
+ }
1673
+ throw new ProviderError(`Anthropic API error: ${msg}`, { provider: "anthropic" });
1149
1674
  }
1150
1675
  const content = response.content.map((block) => {
1151
1676
  if (block.type === "text") {
@@ -1173,8 +1698,9 @@ Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
1173
1698
  }
1174
1699
  async *stream(request) {
1175
1700
  if (!request.model) {
1176
- throw new Error(
1177
- "AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
1701
+ throw new ProviderError(
1702
+ "AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
1703
+ { provider: "anthropic" }
1178
1704
  );
1179
1705
  }
1180
1706
  let raw;
@@ -1199,10 +1725,10 @@ Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
1199
1725
  } catch (error) {
1200
1726
  const msg = error instanceof Error ? error.message : String(error);
1201
1727
  logger.error({ error: msg, provider: "anthropic" }, "Provider stream failed");
1202
- throw new Error(
1203
- `Anthropic API error: ${msg}
1204
- Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
1205
- );
1728
+ if (msg.includes("authentication") || msg.includes("apiKey") || msg.includes("authToken")) {
1729
+ throw new AuthenticationError("anthropic");
1730
+ }
1731
+ throw new ProviderError(`Anthropic API error: ${msg}`, { provider: "anthropic" });
1206
1732
  }
1207
1733
  const toolBlocks = /* @__PURE__ */ new Map();
1208
1734
  let inputTokens = 0;
@@ -1269,8 +1795,9 @@ var OpenAIProvider = class {
1269
1795
  }
1270
1796
  async chat(request) {
1271
1797
  if (!request.model) {
1272
- throw new Error(
1273
- "OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
1798
+ throw new ProviderError(
1799
+ "OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
1800
+ { provider: "openai" }
1274
1801
  );
1275
1802
  }
1276
1803
  const messages = [];
@@ -1339,10 +1866,10 @@ var OpenAIProvider = class {
1339
1866
  } catch (error) {
1340
1867
  const msg = error instanceof Error ? error.message : String(error);
1341
1868
  logger.error({ error: msg, provider: "openai" }, "Provider request failed");
1342
- throw new Error(
1343
- `OpenAI API error: ${msg}
1344
- Check that OPENAI_API_KEY is set correctly in your .env file.`
1345
- );
1869
+ if (msg.includes("Incorrect API key") || msg.includes("authentication")) {
1870
+ throw new AuthenticationError("openai");
1871
+ }
1872
+ throw new ProviderError(`OpenAI API error: ${msg}`, { provider: "openai" });
1346
1873
  }
1347
1874
  const choice = response.choices[0];
1348
1875
  const content = [];
@@ -1385,8 +1912,9 @@ Check that OPENAI_API_KEY is set correctly in your .env file.`
1385
1912
  }
1386
1913
  async *stream(request) {
1387
1914
  if (!request.model) {
1388
- throw new Error(
1389
- "OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
1915
+ throw new ProviderError(
1916
+ "OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
1917
+ { provider: "openai" }
1390
1918
  );
1391
1919
  }
1392
1920
  const messages = [];
@@ -1501,9 +2029,7 @@ var GeminiProvider = class {
1501
2029
  constructor(options = {}) {
1502
2030
  const apiKey = options.api_key ?? SecretsManager.optional("GEMINI_API_KEY");
1503
2031
  if (!apiKey) {
1504
- throw new Error(
1505
- "GeminiProvider requires an API key.\nSet GEMINI_API_KEY in your .env file, or pass api_key to the constructor:\n new GeminiProvider({ api_key: 'your-key' })"
1506
- );
2032
+ throw new AuthenticationError("gemini");
1507
2033
  }
1508
2034
  this.client = new GoogleGenerativeAI(apiKey);
1509
2035
  }
@@ -1582,10 +2108,7 @@ var GeminiProvider = class {
1582
2108
  } catch (error) {
1583
2109
  const msg = error instanceof Error ? error.message : String(error);
1584
2110
  logger.error({ error: msg, provider: "gemini" }, "Provider request failed");
1585
- throw new Error(
1586
- `Gemini API error: ${msg}
1587
- Check that GEMINI_API_KEY is set correctly in your .env file.`
1588
- );
2111
+ throw new ProviderError(`Gemini API error: ${msg}`, { provider: "gemini" });
1589
2112
  }
1590
2113
  const response = result.response;
1591
2114
  const candidate = response.candidates?.[0];
@@ -1725,26 +2248,47 @@ function convertJsonSchemaToGemini(schema) {
1725
2248
  };
1726
2249
  }
1727
2250
  export {
2251
+ AgentNotFoundError,
1728
2252
  AgentRouter,
1729
2253
  AgentRunner,
1730
2254
  AnthropicProvider,
2255
+ AuthenticationError,
2256
+ BudgetExceededError,
2257
+ ContextWindowError,
2258
+ EvalRunner,
1731
2259
  EventBus,
1732
2260
  GeminiProvider,
1733
2261
  InMemorySemanticStore,
1734
2262
  InMemorySessionStore,
1735
2263
  OpenAIProvider,
2264
+ PathTraversalError,
2265
+ PermissionError,
1736
2266
  PermissionGuard,
1737
2267
  PostgresSessionStore,
1738
2268
  PromptGuard,
2269
+ ProviderError,
2270
+ RateLimitError,
1739
2271
  ScoreLoader,
2272
+ ScoreValidationError,
1740
2273
  SecretsManager,
1741
2274
  TokenBudget,
2275
+ ToolTimeoutError,
2276
+ TuttiError,
1742
2277
  TuttiRuntime,
1743
2278
  TuttiTracer,
2279
+ UrlValidationError,
2280
+ VoiceError,
2281
+ createBlocklistHook,
2282
+ createCacheHook,
1744
2283
  createLogger,
2284
+ createLoggingHook,
2285
+ createMaxCostHook,
1745
2286
  defineScore,
2287
+ toJSON as evalToJSON,
2288
+ toMarkdown as evalToMarkdown,
1746
2289
  initTelemetry,
1747
2290
  logger,
2291
+ printTable as printEvalTable,
1748
2292
  shutdownTelemetry,
1749
2293
  validateScore
1750
2294
  };