npm - @tuttiai/core - Versions diffs - 0.7.0 → 0.9.0 - Mend

@tuttiai/core 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -1,108 +1,193 @@
-// src/logger.ts
-import pino from "pino";
-var createLogger = (name) => pino({
-  name,
-  level: process.env.TUTTI_LOG_LEVEL ?? "info",
-  transport: process.env.NODE_ENV === "production" ? void 0 : {
-    target: "pino-pretty",
-    options: {
-      colorize: true,
-      translateTime: "HH:MM:ss",
-      ignore: "pid,hostname"
-    }
+// src/errors.ts
+var TuttiError = class extends Error {
+  constructor(code, message, context = {}) {
+    super(message);
+    this.code = code;
+    this.context = context;
+    this.name = this.constructor.name;
+    Error.captureStackTrace(this, this.constructor);
+  }
+  code;
+  context;
+};
+var ScoreValidationError = class extends TuttiError {
+  constructor(message, context = {}) {
+    super("SCORE_INVALID", message, context);
   }
-});
-var logger = createLogger("tutti");
-// src/telemetry.ts
-import { trace, SpanStatusCode } from "@opentelemetry/api";
-var tracer = trace.getTracer("tutti", "1.0.0");
-var TuttiTracer = {
-  agentRun(agentName, sessionId, fn) {
-    return tracer.startActiveSpan("agent.run", async (span) => {
-      span.setAttribute("agent.name", agentName);
-      span.setAttribute("session.id", sessionId);
-      try {
-        const result = await fn();
-        span.setStatus({ code: SpanStatusCode.OK });
-        return result;
-      } catch (err) {
-        span.setStatus({
-          code: SpanStatusCode.ERROR,
-          message: err instanceof Error ? err.message : String(err)
-        });
-        throw err;
-      } finally {
-        span.end();
-      }
-    });
-  },
-  llmCall(model, fn) {
-    return tracer.startActiveSpan("llm.call", async (span) => {
-      span.setAttribute("llm.model", model);
-      try {
-        const result = await fn();
-        span.setStatus({ code: SpanStatusCode.OK });
-        return result;
-      } catch (err) {
-        span.setStatus({
-          code: SpanStatusCode.ERROR,
-          message: err instanceof Error ? err.message : String(err)
-        });
-        throw err;
-      } finally {
-        span.end();
-      }
-    });
-  },
-  toolCall(toolName, fn) {
-    return tracer.startActiveSpan("tool.call", async (span) => {
-      span.setAttribute("tool.name", toolName);
-      try {
-        const result = await fn();
-        span.setStatus({ code: SpanStatusCode.OK });
-        return result;
-      } catch (err) {
-        span.setStatus({
-          code: SpanStatusCode.ERROR,
-          message: err instanceof Error ? err.message : String(err)
-        });
-        throw err;
-      } finally {
-        span.end();
-      }
-    });
+};
+var AgentNotFoundError = class extends TuttiError {
+  constructor(agentId, available) {
+    super(
+      "AGENT_NOT_FOUND",
+      `Agent "${agentId}" not found in your score.
+Available agents: ${available.join(", ")}
+Check your tutti.score.ts \u2014 the agent ID must match the key in the agents object.`,
+      { agent_id: agentId, available }
+    );
+  }
+};
+var PermissionError = class extends TuttiError {
+  constructor(voice, required, granted) {
+    const missing = required.filter((p) => !granted.includes(p));
+    super(
+      "PERMISSION_DENIED",
+      `Voice "${voice}" requires permissions not granted: ${missing.join(", ")}
+Grant them in your score file:
+  permissions: [${missing.map((p) => "'" + p + "'").join(", ")}]`,
+      { voice, required, granted }
+    );
+  }
+};
+var BudgetExceededError = class extends TuttiError {
+  constructor(tokens, costUsd, limit) {
+    super(
+      "BUDGET_EXCEEDED",
+      `Token budget exceeded: ${tokens.toLocaleString()} tokens, $${costUsd.toFixed(4)} (limit: ${limit}).`,
+      { tokens, cost_usd: costUsd, limit }
+    );
+  }
+};
+var ToolTimeoutError = class extends TuttiError {
+  constructor(tool, timeoutMs) {
+    super(
+      "TOOL_TIMEOUT",
+      `Tool "${tool}" timed out after ${timeoutMs}ms.
+Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`,
+      { tool, timeout_ms: timeoutMs }
+    );
+  }
+};
+var ProviderError = class extends TuttiError {
+  constructor(message, context = { provider: "unknown" }) {
+    super("PROVIDER_ERROR", message, context);
+  }
+};
+var AuthenticationError = class extends ProviderError {
+  constructor(provider) {
+    super(
+      `Authentication failed for ${provider}.
+Check that the API key is set correctly in your .env file.`,
+      { provider }
+    );
+    Object.defineProperty(this, "code", { value: "AUTH_ERROR" });
+  }
+};
+var RateLimitError = class extends ProviderError {
+  retryAfter;
+  constructor(provider, retryAfter) {
+    const msg = retryAfter ? `Rate limited by ${provider}. Retry after ${retryAfter}s.` : `Rate limited by ${provider}.`;
+    super(msg, { provider, retryAfter });
+    Object.defineProperty(this, "code", { value: "RATE_LIMIT" });
+    this.retryAfter = retryAfter;
+  }
+};
+var ContextWindowError = class extends ProviderError {
+  maxTokens;
+  constructor(provider, maxTokens) {
+    super(
+      `Context window exceeded for ${provider}.` + (maxTokens ? ` Max: ${maxTokens.toLocaleString()} tokens.` : "") + `
+Reduce message history or use a model with a larger context window.`,
+      { provider, max_tokens: maxTokens }
+    );
+    Object.defineProperty(this, "code", { value: "CONTEXT_WINDOW" });
+    this.maxTokens = maxTokens;
+  }
+};
+var VoiceError = class extends TuttiError {
+  constructor(message, context) {
+    super("VOICE_ERROR", message, context);
+  }
+};
+var PathTraversalError = class extends VoiceError {
+  constructor(path) {
+    super(
+      `Path traversal detected: "${path}" is not allowed.
+All file paths must stay within the allowed directory.`,
+      { voice: "filesystem", path }
+    );
+    Object.defineProperty(this, "code", { value: "PATH_TRAVERSAL" });
+  }
+};
+var UrlValidationError = class extends VoiceError {
+  constructor(url) {
+    super(
+      `URL blocked: "${url}".
+Only http:// and https:// URLs to public hosts are allowed.`,
+      { voice: "playwright", url }
+    );
+    Object.defineProperty(this, "code", { value: "URL_BLOCKED" });
   }
 };
-// src/telemetry-setup.ts
-import { NodeSDK } from "@opentelemetry/sdk-node";
-import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
-import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
-var sdk;
-function initTelemetry(config) {
-  if (!config.enabled || sdk) return;
-  const endpoint = config.endpoint ?? "http://localhost:4318";
-  const exporter = new OTLPTraceExporter({
-    url: `${endpoint}/v1/traces`,
-    headers: config.headers
-  });
-  sdk = new NodeSDK({
-    traceExporter: exporter,
-    instrumentations: [getNodeAutoInstrumentations({ "@opentelemetry/instrumentation-fs": { enabled: false } })],
-    serviceName: process.env.OTEL_SERVICE_NAME ?? "tutti"
-  });
-  sdk.start();
-  logger.info({ endpoint }, "OpenTelemetry tracing enabled");
+// src/hooks/index.ts
+function createLoggingHook(log) {
+  return {
+    beforeLLMCall(ctx, request) {
+      log.info({ agent: ctx.agent_name, turn: ctx.turn, model: request.model }, "LLM call");
+      return Promise.resolve(request);
+    },
+    afterLLMCall(ctx, response) {
+      log.info({ agent: ctx.agent_name, turn: ctx.turn, usage: response.usage }, "LLM response");
+      return Promise.resolve();
+    },
+    beforeToolCall(ctx, tool, input) {
+      log.info({ agent: ctx.agent_name, tool, input }, "Tool call");
+      return Promise.resolve(input);
+    },
+    afterToolCall(ctx, tool, result) {
+      log.info({ agent: ctx.agent_name, tool, is_error: result.is_error }, "Tool result");
+      return Promise.resolve(result);
+    }
+  };
 }
-async function shutdownTelemetry() {
-  if (sdk) {
-    await sdk.shutdown();
-    sdk = void 0;
+function createCacheHook(store) {
+  function cacheKey(tool, input) {
+    return tool + ":" + JSON.stringify(input);
   }
+  return {
+    beforeToolCall(_ctx, tool, input) {
+      const cached = store.get(cacheKey(tool, input));
+      if (cached) return Promise.resolve(cached);
+      return Promise.resolve(input);
+    },
+    afterToolCall(_ctx, tool, result) {
+      if (!result.is_error) {
+        store.set(cacheKey(tool, result.content), result.content);
+      }
+      return Promise.resolve(result);
+    }
+  };
+}
+function createBlocklistHook(blockedTools) {
+  const blocked = new Set(blockedTools);
+  return {
+    beforeToolCall(_ctx, tool) {
+      return Promise.resolve(!blocked.has(tool));
+    }
+  };
+}
+function createMaxCostHook(maxUsd) {
+  let totalCost = 0;
+  const INPUT_PER_M2 = 3;
+  const OUTPUT_PER_M2 = 15;
+  return {
+    afterLLMCall(_ctx, response) {
+      totalCost += response.usage.input_tokens / 1e6 * INPUT_PER_M2 + response.usage.output_tokens / 1e6 * OUTPUT_PER_M2;
+      return Promise.resolve();
+    },
+    beforeLLMCall(ctx, request) {
+      if (totalCost >= maxUsd) {
+        return Promise.reject(new Error(
+          "Max cost hook: $" + totalCost.toFixed(4) + " exceeds limit $" + maxUsd.toFixed(2) + " for agent " + ctx.agent_name
+        ));
+      }
+      return Promise.resolve(request);
+    }
+  };
 }
 // src/agent-runner.ts
+import { z } from "zod";
 import { zodToJsonSchema } from "zod-to-json-schema";
 // src/secrets.ts
@@ -228,21 +313,144 @@ var TokenBudget = class {
   }
 };
+// src/logger.ts
+import pino from "pino";
+var createLogger = (name) => pino({
+  name,
+  level: process.env.TUTTI_LOG_LEVEL ?? "info",
+  transport: process.env.NODE_ENV === "production" ? void 0 : {
+    target: "pino-pretty",
+    options: {
+      colorize: true,
+      translateTime: "HH:MM:ss",
+      ignore: "pid,hostname"
+    }
+  }
+});
+var logger = createLogger("tutti");
+// src/telemetry.ts
+import { trace, SpanStatusCode } from "@opentelemetry/api";
+var tracer = trace.getTracer("tutti", "1.0.0");
+var TuttiTracer = {
+  agentRun(agentName, sessionId, fn) {
+    return tracer.startActiveSpan("agent.run", async (span) => {
+      span.setAttribute("agent.name", agentName);
+      span.setAttribute("session.id", sessionId);
+      try {
+        const result = await fn();
+        span.setStatus({ code: SpanStatusCode.OK });
+        return result;
+      } catch (err) {
+        span.setStatus({
+          code: SpanStatusCode.ERROR,
+          message: err instanceof Error ? err.message : String(err)
+        });
+        throw err;
+      } finally {
+        span.end();
+      }
+    });
+  },
+  llmCall(model, fn) {
+    return tracer.startActiveSpan("llm.call", async (span) => {
+      span.setAttribute("llm.model", model);
+      try {
+        const result = await fn();
+        span.setStatus({ code: SpanStatusCode.OK });
+        return result;
+      } catch (err) {
+        span.setStatus({
+          code: SpanStatusCode.ERROR,
+          message: err instanceof Error ? err.message : String(err)
+        });
+        throw err;
+      } finally {
+        span.end();
+      }
+    });
+  },
+  toolCall(toolName, fn) {
+    return tracer.startActiveSpan("tool.call", async (span) => {
+      span.setAttribute("tool.name", toolName);
+      try {
+        const result = await fn();
+        span.setStatus({ code: SpanStatusCode.OK });
+        return result;
+      } catch (err) {
+        span.setStatus({
+          code: SpanStatusCode.ERROR,
+          message: err instanceof Error ? err.message : String(err)
+        });
+        throw err;
+      } finally {
+        span.end();
+      }
+    });
+  }
+};
 // src/agent-runner.ts
 var DEFAULT_MAX_TURNS = 10;
 var DEFAULT_MAX_TOOL_CALLS = 20;
 var DEFAULT_TOOL_TIMEOUT_MS = 3e4;
+var DEFAULT_HITL_TIMEOUT_S = 300;
+var MAX_PROVIDER_RETRIES = 3;
+var hitlRequestSchema = z.object({
+  question: z.string().describe("The question to ask the human"),
+  options: z.array(z.string()).optional().describe("If provided, the human picks one of these"),
+  timeout_seconds: z.number().optional().describe("How long to wait before timing out (default 300)")
+});
+async function withRetry(fn) {
+  for (let attempt = 1; ; attempt++) {
+    try {
+      return await fn();
+    } catch (err) {
+      if (attempt >= MAX_PROVIDER_RETRIES || !(err instanceof ProviderError)) {
+        throw err;
+      }
+      if (err instanceof RateLimitError && err.retryAfter) {
+        logger.warn({ attempt, retryAfter: err.retryAfter }, "Rate limited, waiting before retry");
+        await new Promise((r) => setTimeout(r, err.retryAfter * 1e3));
+      } else {
+        const delayMs = Math.min(1e3 * 2 ** (attempt - 1), 8e3);
+        logger.warn({ attempt, delayMs }, "Provider error, retrying with backoff");
+        await new Promise((r) => setTimeout(r, delayMs));
+      }
+    }
+  }
+}
 var AgentRunner = class {
-  constructor(provider, events, sessions, semanticMemory) {
+  constructor(provider, events, sessions, semanticMemory, globalHooks) {
     this.provider = provider;
     this.events = events;
     this.sessions = sessions;
     this.semanticMemory = semanticMemory;
+    this.globalHooks = globalHooks;
   }
   provider;
   events;
   sessions;
   semanticMemory;
+  globalHooks;
+  pendingHitl = /* @__PURE__ */ new Map();
+  async safeHook(fn) {
+    if (!fn) return void 0;
+    try {
+      return await fn() ?? void 0;
+    } catch (err) {
+      logger.warn({ error: err instanceof Error ? err.message : String(err) }, "Hook error (non-fatal)");
+      return void 0;
+    }
+  }
+  /** Resolve a pending human-in-the-loop request for a session. */
+  answer(sessionId, answer) {
+    const resolve2 = this.pendingHitl.get(sessionId);
+    if (resolve2) {
+      this.pendingHitl.delete(sessionId);
+      resolve2(answer);
+    }
+  }
   async run(agent, input, session_id) {
     const session = session_id ? this.sessions.get(session_id) : this.sessions.create(agent.name);
     if (!session) {
@@ -253,13 +461,31 @@ Omit session_id to start a new conversation.`
       );
     }
     return TuttiTracer.agentRun(agent.name, session.id, async () => {
+      const agentHooks = agent.hooks;
+      const hookCtx = {
+        agent_name: agent.name,
+        session_id: session.id,
+        turn: 0,
+        metadata: {}
+      };
+      await this.safeHook(() => this.globalHooks?.beforeAgentRun?.(hookCtx));
+      await this.safeHook(() => agentHooks?.beforeAgentRun?.(hookCtx));
       logger.info({ agent: agent.name, session: session.id }, "Agent started");
       this.events.emit({
         type: "agent:start",
         agent_name: agent.name,
         session_id: session.id
       });
-      const allTools = agent.voices.flatMap((v) => v.tools);
+      const voiceCtx = { session_id: session.id, agent_name: agent.name };
+      for (const voice of agent.voices) {
+        if (voice.setup) {
+          await voice.setup(voiceCtx);
+        }
+      }
+      const allTools = [...agent.voices.flatMap((v) => v.tools)];
+      if (agent.allow_human_input) {
+        allTools.push(this.createHitlTool(agent.name, session.id));
+      }
       const toolDefs = allTools.map(toolToDefinition);
       const messages = [
         ...session.messages,
@@ -297,12 +523,17 @@ Omit session_id to start a new conversation.`
             }
           }
         }
-        const request = {
+        let request = {
           model: agent.model,
           system: systemPrompt,
           messages,
           tools: toolDefs.length > 0 ? toolDefs : void 0
         };
+        hookCtx.turn = turns;
+        const globalReq = await this.safeHook(() => this.globalHooks?.beforeLLMCall?.(hookCtx, request));
+        if (globalReq) request = globalReq;
+        const agentReq = await this.safeHook(() => agentHooks?.beforeLLMCall?.(hookCtx, request));
+        if (agentReq) request = agentReq;
         logger.debug({ agent: agent.name, model: agent.model }, "LLM request");
         this.events.emit({
           type: "llm:request",
@@ -311,7 +542,9 @@ Omit session_id to start a new conversation.`
         });
         const response = await TuttiTracer.llmCall(
           agent.model ?? "unknown",
-          () => agent.streaming ? this.streamToResponse(agent.name, request) : this.provider.chat(request)
+          () => withRetry(
+            () => agent.streaming ? this.streamToResponse(agent.name, request) : this.provider.chat(request)
+          )
         );
         logger.debug(
           { agent: agent.name, stopReason: response.stop_reason, usage: response.usage },
@@ -322,6 +555,8 @@ Omit session_id to start a new conversation.`
           agent_name: agent.name,
           response
         });
+        await this.safeHook(() => this.globalHooks?.afterLLMCall?.(hookCtx, response));
+        await this.safeHook(() => agentHooks?.afterLLMCall?.(hookCtx, response));
         totalUsage.input_tokens += response.usage.input_tokens;
         totalUsage.output_tokens += response.usage.output_tokens;
         if (budget) {
@@ -402,7 +637,7 @@ Omit session_id to start a new conversation.`
         }
         const toolResults = await Promise.all(
           toolUseBlocks.map(
-            (block) => this.executeTool(allTools, block, toolContext, toolTimeoutMs)
+            (block) => this.executeTool(allTools, block, toolContext, toolTimeoutMs, hookCtx, agentHooks)
           )
         );
         messages.push({ role: "user", content: toolResults });
@@ -419,13 +654,16 @@ Omit session_id to start a new conversation.`
         agent_name: agent.name,
         session_id: session.id
       });
-      return {
+      const agentResult = {
         session_id: session.id,
         output,
         messages,
         turns,
         usage: totalUsage
       };
+      await this.safeHook(() => this.globalHooks?.afterAgentRun?.(hookCtx, agentResult));
+      await this.safeHook(() => agentHooks?.afterAgentRun?.(hookCtx, agentResult));
+      return agentResult;
     });
   }
   async executeWithTimeout(fn, timeoutMs, toolName) {
@@ -433,12 +671,7 @@ Omit session_id to start a new conversation.`
       fn(),
       new Promise(
         (_, reject) => setTimeout(
-          () => reject(
-            new Error(
-              `Tool "${toolName}" timed out after ${timeoutMs}ms.
-Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
-            )
-          ),
+          () => reject(new ToolTimeoutError(toolName, timeoutMs)),
           timeoutMs
         )
       )
@@ -476,7 +709,42 @@ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
     }
     return { id: "", content, stop_reason: stopReason, usage };
   }
-  async executeTool(tools, block, context, timeoutMs) {
+  createHitlTool(agentName, sessionId) {
+    return {
+      name: "request_human_input",
+      description: "Pause and ask the human for guidance or approval before proceeding.",
+      parameters: hitlRequestSchema,
+      execute: async (input) => {
+        const timeout = (input.timeout_seconds ?? DEFAULT_HITL_TIMEOUT_S) * 1e3;
+        logger.info({ agent: agentName, question: input.question }, "Waiting for human input");
+        const answer = await new Promise((resolve2) => {
+          this.pendingHitl.set(sessionId, resolve2);
+          this.events.emit({
+            type: "hitl:requested",
+            agent_name: agentName,
+            session_id: sessionId,
+            question: input.question,
+            options: input.options
+          });
+          setTimeout(() => {
+            if (this.pendingHitl.has(sessionId)) {
+              this.pendingHitl.delete(sessionId);
+              this.events.emit({ type: "hitl:timeout", agent_name: agentName, session_id: sessionId });
+              resolve2("[timeout: human did not respond within " + timeout / 1e3 + "s]");
+            }
+          }, timeout);
+        });
+        this.events.emit({
+          type: "hitl:answered",
+          agent_name: agentName,
+          session_id: sessionId,
+          answer
+        });
+        return { content: "Human responded: " + answer };
+      }
+    };
+  }
+  async executeTool(tools, block, context, timeoutMs, hookCtx, agentHooks) {
     const tool = tools.find((t) => t.name === block.name);
     if (!tool) {
       const available = tools.map((t) => t.name).join(", ") || "(none)";
@@ -488,6 +756,16 @@ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
       };
     }
     return TuttiTracer.toolCall(block.name, async () => {
+      if (hookCtx) {
+        const globalResult = await this.safeHook(() => this.globalHooks?.beforeToolCall?.(hookCtx, block.name, block.input));
+        if (globalResult === false) {
+          return { type: "tool_result", tool_use_id: block.id, content: "Tool call blocked by hook", is_error: true };
+        }
+        const agentResult = await this.safeHook(() => agentHooks?.beforeToolCall?.(hookCtx, block.name, block.input));
+        if (agentResult === false) {
+          return { type: "tool_result", tool_use_id: block.id, content: "Tool call blocked by hook", is_error: true };
+        }
+      }
       logger.debug({ tool: block.name, input: block.input }, "Tool called");
       this.events.emit({
         type: "tool:start",
@@ -497,11 +775,17 @@ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
       });
       try {
         const parsed = tool.parameters.parse(block.input);
-        const result = await this.executeWithTimeout(
+        let result = await this.executeWithTimeout(
           () => tool.execute(parsed, context),
           timeoutMs,
           block.name
         );
+        if (hookCtx) {
+          const globalMod = await this.safeHook(() => this.globalHooks?.afterToolCall?.(hookCtx, block.name, result));
+          if (globalMod) result = globalMod;
+          const agentMod = await this.safeHook(() => agentHooks?.afterToolCall?.(hookCtx, block.name, result));
+          if (agentMod) result = agentMod;
+        }
         logger.debug({ tool: block.name, result: result.content }, "Tool completed");
         this.events.emit({
           type: "tool:end",
@@ -731,18 +1015,18 @@ var PostgresSessionStore = class {
 import { randomUUID as randomUUID3 } from "crypto";
 var InMemorySemanticStore = class {
   entries = [];
-  async add(entry) {
+  add(entry) {
     const full = {
       ...entry,
       id: randomUUID3(),
       created_at: /* @__PURE__ */ new Date()
     };
     this.entries.push(full);
-    return full;
+    return Promise.resolve(full);
   }
-  async search(query, agent_name, limit = 5) {
+  search(query, agent_name, limit = 5) {
     const queryTokens = tokenize(query);
-    if (queryTokens.size === 0) return [];
+    if (queryTokens.size === 0) return Promise.resolve([]);
     const agentEntries = this.entries.filter(
       (e) => e.agent_name === agent_name
     );
@@ -755,13 +1039,17 @@ var InMemorySemanticStore = class {
       const score = overlap / queryTokens.size;
       return { entry, score };
     });
-    return scored.filter((s) => s.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.entry);
+    return Promise.resolve(
+      scored.filter((s) => s.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.entry)
+    );
   }
-  async delete(id) {
+  delete(id) {
     this.entries = this.entries.filter((e) => e.id !== id);
+    return Promise.resolve();
   }
-  async clear(agent_name) {
+  clear(agent_name) {
     this.entries = this.entries.filter((e) => e.agent_name !== agent_name);
+    return Promise.resolve();
   }
 };
 function tokenize(text) {
@@ -777,9 +1065,7 @@ var PermissionGuard = class {
       (p) => !granted.includes(p)
     );
     if (missing.length > 0) {
-      throw new Error(
-        "Voice " + voice.name + " requires permissions not granted: " + missing.join(", ") + "\n\nGrant them in your score file:\n  permissions: [" + missing.map((p) => "'" + p + "'").join(", ") + "]"
-      );
+      throw new PermissionError(voice.name, voice.required_permissions, granted);
     }
   }
   static warn(voice) {
@@ -795,6 +1081,33 @@ var PermissionGuard = class {
   }
 };
+// src/telemetry-setup.ts
+import { NodeSDK } from "@opentelemetry/sdk-node";
+import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
+import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
+var sdk;
+function initTelemetry(config) {
+  if (!config.enabled || sdk) return;
+  const endpoint = config.endpoint ?? "http://localhost:4318";
+  const exporter = new OTLPTraceExporter({
+    url: `${endpoint}/v1/traces`,
+    headers: config.headers
+  });
+  sdk = new NodeSDK({
+    traceExporter: exporter,
+    instrumentations: [getNodeAutoInstrumentations({ "@opentelemetry/instrumentation-fs": { enabled: false } })],
+    serviceName: process.env.OTEL_SERVICE_NAME ?? "tutti"
+  });
+  sdk.start();
+  logger.info({ endpoint }, "OpenTelemetry tracing enabled");
+}
+async function shutdownTelemetry() {
+  if (sdk) {
+    await sdk.shutdown();
+    sdk = void 0;
+  }
+}
 // src/runtime.ts
 var TuttiRuntime = class _TuttiRuntime {
   events;
@@ -811,7 +1124,8 @@ var TuttiRuntime = class _TuttiRuntime {
       score.provider,
       this.events,
       this._sessions,
-      this.semanticMemory
+      this.semanticMemory,
+      score.hooks
     );
     if (score.telemetry) {
       initTelemetry(score.telemetry);
@@ -837,15 +1151,17 @@ var TuttiRuntime = class _TuttiRuntime {
     if (memory.provider === "postgres") {
       const url = memory.url ?? process.env.DATABASE_URL;
       if (!url) {
-        throw new Error(
-          "PostgreSQL session store requires a connection URL.\nSet memory.url in your score, or DATABASE_URL in your .env file."
+        throw new ScoreValidationError(
+          "PostgreSQL session store requires a connection URL.\nSet memory.url in your score, or DATABASE_URL in your .env file.",
+          { field: "memory.url" }
         );
       }
       return new PostgresSessionStore(url);
     }
-    throw new Error(
+    throw new ScoreValidationError(
       `Unsupported memory provider: "${memory.provider}".
-Supported: "in-memory", "postgres"`
+Supported: "in-memory", "postgres"`,
+      { field: "memory.provider", value: memory.provider }
     );
   }
   /** The score configuration this runtime was created with. */
@@ -859,12 +1175,7 @@ Supported: "in-memory", "postgres"`
   async run(agent_name, input, session_id) {
     const agent = this._score.agents[agent_name];
     if (!agent) {
-      const available = Object.keys(this._score.agents).join(", ");
-      throw new Error(
-        `Agent "${agent_name}" not found in your score.
-Available agents: ${available}
-Check your tutti.score.ts \u2014 the agent ID must match the key in the agents object.`
-      );
+      throw new AgentNotFoundError(agent_name, Object.keys(this._score.agents));
     }
     const granted = agent.permissions ?? [];
     for (const voice of agent.voices) {
@@ -874,14 +1185,224 @@ Check your tutti.score.ts \u2014 the agent ID must match the key in the agents o
     const resolvedAgent = agent.model ? agent : { ...agent, model: this._score.default_model ?? "claude-sonnet-4-20250514" };
     return this._runner.run(resolvedAgent, input, session_id);
   }
+  /**
+   * Provide an answer to a pending human-in-the-loop request.
+   * Call this when a `hitl:requested` event fires to resume the agent.
+   */
+  answer(sessionId, answer) {
+    this._runner.answer(sessionId, answer);
+  }
   /** Retrieve an existing session. */
   getSession(id) {
     return this._sessions.get(id);
   }
 };
+// src/eval/runner.ts
+var INPUT_PER_M = 3;
+var OUTPUT_PER_M = 15;
+function estimateCost(inputTokens, outputTokens) {
+  return inputTokens / 1e6 * INPUT_PER_M + outputTokens / 1e6 * OUTPUT_PER_M;
+}
+var EvalRunner = class {
+  runtime;
+  constructor(score) {
+    this.runtime = new TuttiRuntime(score);
+  }
+  async run(suite) {
+    const results = [];
+    for (const testCase of suite.cases) {
+      const result = await this.runCase(testCase);
+      results.push(result);
+    }
+    const summary = this.summarize(results);
+    return { suite_name: suite.name, results, summary };
+  }
+  async runCase(testCase) {
+    const toolsCalled = [];
+    const unsubscribeToolStart = this.runtime.events.on("tool:start", (e) => {
+      toolsCalled.push(e.tool_name);
+    });
+    const start = Date.now();
+    let output = "";
+    let turns = 0;
+    let usage = { input_tokens: 0, output_tokens: 0 };
+    let error;
+    try {
+      const result = await this.runtime.run(testCase.agent_id, testCase.input);
+      output = result.output;
+      turns = result.turns;
+      usage = result.usage;
+    } catch (err) {
+      error = err instanceof Error ? err.message : String(err);
+      output = "[error] " + error;
+    }
+    unsubscribeToolStart();
+    const durationMs = Date.now() - start;
+    const costUsd = estimateCost(usage.input_tokens, usage.output_tokens);
+    const assertionResults = testCase.assertions.map(
+      (assertion) => this.checkAssertion(assertion, output, toolsCalled, turns, costUsd)
+    );
+    const passedCount = assertionResults.filter((a) => a.passed).length;
+    const score = testCase.assertions.length > 0 ? passedCount / testCase.assertions.length : error ? 0 : 1;
+    return {
+      case_id: testCase.id,
+      case_name: testCase.name,
+      passed: assertionResults.every((a) => a.passed) && !error,
+      score,
+      output,
+      turns,
+      usage,
+      cost_usd: costUsd,
+      duration_ms: durationMs,
+      assertions: assertionResults,
+      error
+    };
+  }
+  checkAssertion(assertion, output, toolsCalled, turns, costUsd) {
+    const val = assertion.value;
+    switch (assertion.type) {
+      case "contains":
+        return {
+          assertion,
+          passed: output.toLowerCase().includes(String(val).toLowerCase()),
+          actual: output.slice(0, 200)
+        };
+      case "not_contains":
+        return {
+          assertion,
+          passed: !output.toLowerCase().includes(String(val).toLowerCase()),
+          actual: output.slice(0, 200)
+        };
+      case "matches_regex": {
+        const regex = new RegExp(String(val), "i");
+        return {
+          assertion,
+          passed: regex.test(output),
+          actual: output.slice(0, 200)
+        };
+      }
+      case "tool_called":
+        return {
+          assertion,
+          passed: toolsCalled.includes(String(val)),
+          actual: toolsCalled.join(", ") || "(none)"
+        };
+      case "tool_not_called":
+        return {
+          assertion,
+          passed: !toolsCalled.includes(String(val)),
+          actual: toolsCalled.join(", ") || "(none)"
+        };
+      case "turns_lte":
+        return {
+          assertion,
+          passed: turns <= Number(val),
+          actual: turns
+        };
+      case "cost_lte":
+        return {
+          assertion,
+          passed: costUsd <= Number(val),
+          actual: Number(costUsd.toFixed(4))
+        };
+      default:
+        logger.warn({ type: assertion.type }, "Unknown assertion type");
+        return { assertion, passed: false, actual: "unknown assertion type" };
+    }
+  }
+  summarize(results) {
+    const passed = results.filter((r) => r.passed).length;
+    const scores = results.map((r) => r.score);
+    const avgScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0;
+    const totalCost = results.reduce((s, r) => s + r.cost_usd, 0);
+    const totalDuration = results.reduce((s, r) => s + r.duration_ms, 0);
+    return {
+      total: results.length,
+      passed,
+      failed: results.length - passed,
+      avg_score: Number(avgScore.toFixed(2)),
+      total_cost_usd: Number(totalCost.toFixed(4)),
+      total_duration_ms: totalDuration
+    };
+  }
+};
+// src/eval/report.ts
+function pad(str, len) {
+  return str.length >= len ? str.slice(0, len) : str + " ".repeat(len - str.length);
+}
+function printTable(report) {
+  const { results, summary } = report;
+  console.log();
+  console.log("  Eval suite: " + report.suite_name + " (" + summary.total + " cases)");
+  console.log();
+  for (const r of results) {
+    const icon = r.passed ? "\x1B[32m\u2714\x1B[0m" : "\x1B[31m\u2717\x1B[0m";
+    const score = r.score.toFixed(2);
+    const cost = "$" + r.cost_usd.toFixed(3);
+    const line = "  " + icon + " " + pad(r.case_id, 10) + " " + pad(r.case_name, 28) + " " + pad(score, 6) + " " + r.turns + " turns  " + cost;
+    console.log(line);
+    if (!r.passed) {
+      for (const a of r.assertions) {
+        if (!a.passed) {
+          const desc = a.assertion.description ?? a.assertion.type + ": " + String(a.assertion.value);
+          console.log("    \x1B[31m\u21B3 FAIL: " + desc + " (actual: " + String(a.actual).slice(0, 60) + ")\x1B[0m");
+        }
+      }
+      if (r.error) {
+        console.log("    \x1B[31m\u21B3 ERROR: " + r.error.slice(0, 80) + "\x1B[0m");
+      }
+    }
+  }
+  const pct = summary.total > 0 ? Math.round(summary.passed / summary.total * 100) : 0;
+  console.log();
+  console.log(
+    "  Results: " + summary.passed + "/" + summary.total + " passed (" + pct + "%) | Avg: " + summary.avg_score.toFixed(2) + " | Total: $" + summary.total_cost_usd.toFixed(3)
+  );
+  console.log();
+}
+function toJSON(report) {
+  return JSON.stringify(report, null, 2);
+}
+function toMarkdown(report) {
+  const { results, summary } = report;
+  const lines = [];
+  lines.push("## Eval: " + report.suite_name);
+  lines.push("");
+  lines.push("| Status | ID | Name | Score | Turns | Cost |");
+  lines.push("|--------|-----|------|-------|-------|------|");
+  for (const r of results) {
+    const icon = r.passed ? "pass" : "FAIL";
+    lines.push(
+      "| " + icon + " | " + r.case_id + " | " + r.case_name + " | " + r.score.toFixed(2) + " | " + r.turns + " | $" + r.cost_usd.toFixed(3) + " |"
+    );
+  }
+  lines.push("");
+  const pct = summary.total > 0 ? Math.round(summary.passed / summary.total * 100) : 0;
+  lines.push(
+    "**Results:** " + summary.passed + "/" + summary.total + " passed (" + pct + "%) | Avg score: " + summary.avg_score.toFixed(2) + " | Total cost: $" + summary.total_cost_usd.toFixed(3)
+  );
+  const failed = results.filter((r) => !r.passed);
+  if (failed.length > 0) {
+    lines.push("");
+    lines.push("### Failures");
+    lines.push("");
+    for (const r of failed) {
+      lines.push("**" + r.case_id + "** \u2014 " + r.case_name);
+      for (const a of r.assertions.filter((x) => !x.passed)) {
+        const desc = a.assertion.description ?? a.assertion.type + ": " + String(a.assertion.value);
+        lines.push("- " + desc + " (actual: `" + String(a.actual).slice(0, 80) + "`)");
+      }
+      if (r.error) lines.push("- Error: " + r.error);
+      lines.push("");
+    }
+  }
+  return lines.join("\n");
+}
 // src/agent-router.ts
-import { z } from "zod";
+import { z as z2 } from "zod";
 var AgentRouter = class {
   constructor(_score) {
     this._score = _score;
@@ -957,9 +1478,9 @@ When the user's request matches a specialist's expertise, delegate to them with
     const runtime = () => this.runtime;
     const events = () => this.runtime.events;
     const entryName = score.agents[score.entry ?? "orchestrator"]?.name ?? "orchestrator";
-    const parameters = z.object({
-      agent_id: z.enum(delegateIds).describe("Which specialist agent to delegate to"),
-      task: z.string().describe("The specific task description to pass to the specialist")
+    const parameters = z2.object({
+      agent_id: z2.enum(delegateIds).describe("Which specialist agent to delegate to"),
+      task: z2.string().describe("The specific task description to pass to the specialist")
     });
     return {
       name: "delegate_to_agent",
@@ -1000,50 +1521,51 @@ import { pathToFileURL } from "url";
 import { resolve } from "path";
 // src/score-schema.ts
-import { z as z2 } from "zod";
-var PermissionSchema = z2.enum(["network", "filesystem", "shell", "browser"]);
-var VoiceSchema = z2.object({
-  name: z2.string().min(1, "Voice name cannot be empty"),
-  tools: z2.array(z2.any()),
-  required_permissions: z2.array(PermissionSchema)
+import { z as z3 } from "zod";
+var PermissionSchema = z3.enum(["network", "filesystem", "shell", "browser"]);
+var VoiceSchema = z3.object({
+  name: z3.string().min(1, "Voice name cannot be empty"),
+  tools: z3.array(z3.any()),
+  required_permissions: z3.array(PermissionSchema)
 }).passthrough();
-var BudgetSchema = z2.object({
-  max_tokens: z2.number().positive().optional(),
-  max_cost_usd: z2.number().positive().optional(),
-  warn_at_percent: z2.number().min(1).max(100).optional()
+var BudgetSchema = z3.object({
+  max_tokens: z3.number().positive().optional(),
+  max_cost_usd: z3.number().positive().optional(),
+  warn_at_percent: z3.number().min(1).max(100).optional()
 }).strict();
-var AgentSchema = z2.object({
-  name: z2.string().min(1, "Agent name cannot be empty"),
-  system_prompt: z2.string().min(1, "Agent system_prompt cannot be empty"),
-  voices: z2.array(VoiceSchema),
-  model: z2.string().optional(),
-  description: z2.string().optional(),
-  permissions: z2.array(PermissionSchema).optional(),
-  max_turns: z2.number().int().positive("max_turns must be a positive number").optional(),
-  max_tool_calls: z2.number().int().positive("max_tool_calls must be a positive number").optional(),
-  tool_timeout_ms: z2.number().int().positive("tool_timeout_ms must be a positive number").optional(),
+var AgentSchema = z3.object({
+  name: z3.string().min(1, "Agent name cannot be empty"),
+  system_prompt: z3.string().min(1, "Agent system_prompt cannot be empty"),
+  voices: z3.array(VoiceSchema),
+  model: z3.string().optional(),
+  description: z3.string().optional(),
+  permissions: z3.array(PermissionSchema).optional(),
+  max_turns: z3.number().int().positive("max_turns must be a positive number").optional(),
+  max_tool_calls: z3.number().int().positive("max_tool_calls must be a positive number").optional(),
+  tool_timeout_ms: z3.number().int().positive("tool_timeout_ms must be a positive number").optional(),
   budget: BudgetSchema.optional(),
-  streaming: z2.boolean().optional(),
-  delegates: z2.array(z2.string()).optional(),
-  role: z2.enum(["orchestrator", "specialist"]).optional()
+  streaming: z3.boolean().optional(),
+  allow_human_input: z3.boolean().optional(),
+  delegates: z3.array(z3.string()).optional(),
+  role: z3.enum(["orchestrator", "specialist"]).optional()
 }).passthrough();
-var TelemetrySchema = z2.object({
-  enabled: z2.boolean(),
-  endpoint: z2.string().url("telemetry.endpoint must be a valid URL").optional(),
-  headers: z2.record(z2.string(), z2.string()).optional()
+var TelemetrySchema = z3.object({
+  enabled: z3.boolean(),
+  endpoint: z3.string().url("telemetry.endpoint must be a valid URL").optional(),
+  headers: z3.record(z3.string(), z3.string()).optional()
 }).strict();
-var ScoreSchema = z2.object({
-  provider: z2.object({ chat: z2.function() }).passthrough().refine((p) => typeof p.chat === "function", {
+var ScoreSchema = z3.object({
+  provider: z3.object({ chat: z3.function() }).passthrough().refine((p) => typeof p.chat === "function", {
     message: "provider must have a chat() method \u2014 did you forget to pass a provider instance?"
   }),
-  agents: z2.record(z2.string(), AgentSchema).refine(
+  agents: z3.record(z3.string(), AgentSchema).refine(
     (agents) => Object.keys(agents).length > 0,
     { message: "Score must define at least one agent" }
   ),
-  name: z2.string().optional(),
-  description: z2.string().optional(),
-  default_model: z2.string().optional(),
-  entry: z2.string().optional(),
+  name: z3.string().optional(),
+  description: z3.string().optional(),
+  default_model: z3.string().optional(),
+  entry: z3.string().optional(),
   telemetry: TelemetrySchema.optional()
 }).passthrough();
 function validateScore(config) {
@@ -1053,7 +1575,7 @@ function validateScore(config) {
       const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
       return `  - ${path}: ${issue.message}`;
     });
-    throw new Error(
+    throw new ScoreValidationError(
       "Invalid score file:\n" + issues.join("\n")
     );
   }
@@ -1063,18 +1585,20 @@ function validateScore(config) {
     if (agent.delegates) {
       for (const delegateId of agent.delegates) {
         if (!agentKeys.includes(delegateId)) {
-          throw new Error(
+          throw new ScoreValidationError(
             `Invalid score file:
-  - agents.${key}.delegates: references unknown agent "${delegateId}". Available: ${agentKeys.join(", ")}`
+  - agents.${key}.delegates: references unknown agent "${delegateId}". Available: ${agentKeys.join(", ")}`,
+            { field: `agents.${key}.delegates`, value: delegateId }
           );
         }
       }
     }
   }
   if (data.entry && !agentKeys.includes(data.entry)) {
-    throw new Error(
+    throw new ScoreValidationError(
       `Invalid score file:
-  - entry: references unknown agent "${data.entry}". Available: ${agentKeys.join(", ")}`
+  - entry: references unknown agent "${data.entry}". Available: ${agentKeys.join(", ")}`,
+      { field: "entry", value: data.entry }
     );
   }
 }
@@ -1117,8 +1641,9 @@ var AnthropicProvider = class {
   }
   async chat(request) {
     if (!request.model) {
-      throw new Error(
-        "AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
+      throw new ProviderError(
+        "AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
+        { provider: "anthropic" }
       );
     }
     let response;
@@ -1142,10 +1667,10 @@ var AnthropicProvider = class {
     } catch (error) {
       const msg = error instanceof Error ? error.message : String(error);
       logger.error({ error: msg, provider: "anthropic" }, "Provider request failed");
-      throw new Error(
-        `Anthropic API error: ${msg}
-Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
-      );
+      if (msg.includes("authentication") || msg.includes("apiKey") || msg.includes("authToken")) {
+        throw new AuthenticationError("anthropic");
+      }
+      throw new ProviderError(`Anthropic API error: ${msg}`, { provider: "anthropic" });
     }
     const content = response.content.map((block) => {
       if (block.type === "text") {
@@ -1173,8 +1698,9 @@ Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
   }
   async *stream(request) {
     if (!request.model) {
-      throw new Error(
-        "AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
+      throw new ProviderError(
+        "AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
+        { provider: "anthropic" }
       );
     }
     let raw;
@@ -1199,10 +1725,10 @@ Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
     } catch (error) {
       const msg = error instanceof Error ? error.message : String(error);
       logger.error({ error: msg, provider: "anthropic" }, "Provider stream failed");
-      throw new Error(
-        `Anthropic API error: ${msg}
-Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
-      );
+      if (msg.includes("authentication") || msg.includes("apiKey") || msg.includes("authToken")) {
+        throw new AuthenticationError("anthropic");
+      }
+      throw new ProviderError(`Anthropic API error: ${msg}`, { provider: "anthropic" });
     }
     const toolBlocks = /* @__PURE__ */ new Map();
     let inputTokens = 0;
@@ -1269,8 +1795,9 @@ var OpenAIProvider = class {
   }
   async chat(request) {
     if (!request.model) {
-      throw new Error(
-        "OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
+      throw new ProviderError(
+        "OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
+        { provider: "openai" }
       );
     }
     const messages = [];
@@ -1339,10 +1866,10 @@ var OpenAIProvider = class {
     } catch (error) {
       const msg = error instanceof Error ? error.message : String(error);
       logger.error({ error: msg, provider: "openai" }, "Provider request failed");
-      throw new Error(
-        `OpenAI API error: ${msg}
-Check that OPENAI_API_KEY is set correctly in your .env file.`
-      );
+      if (msg.includes("Incorrect API key") || msg.includes("authentication")) {
+        throw new AuthenticationError("openai");
+      }
+      throw new ProviderError(`OpenAI API error: ${msg}`, { provider: "openai" });
     }
     const choice = response.choices[0];
     const content = [];
@@ -1385,8 +1912,9 @@ Check that OPENAI_API_KEY is set correctly in your .env file.`
   }
   async *stream(request) {
     if (!request.model) {
-      throw new Error(
-        "OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
+      throw new ProviderError(
+        "OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
+        { provider: "openai" }
       );
     }
     const messages = [];
@@ -1501,9 +2029,7 @@ var GeminiProvider = class {
   constructor(options = {}) {
     const apiKey = options.api_key ?? SecretsManager.optional("GEMINI_API_KEY");
     if (!apiKey) {
-      throw new Error(
-        "GeminiProvider requires an API key.\nSet GEMINI_API_KEY in your .env file, or pass api_key to the constructor:\n  new GeminiProvider({ api_key: 'your-key' })"
-      );
+      throw new AuthenticationError("gemini");
     }
     this.client = new GoogleGenerativeAI(apiKey);
   }
@@ -1582,10 +2108,7 @@ var GeminiProvider = class {
     } catch (error) {
       const msg = error instanceof Error ? error.message : String(error);
       logger.error({ error: msg, provider: "gemini" }, "Provider request failed");
-      throw new Error(
-        `Gemini API error: ${msg}
-Check that GEMINI_API_KEY is set correctly in your .env file.`
-      );
+      throw new ProviderError(`Gemini API error: ${msg}`, { provider: "gemini" });
     }
     const response = result.response;
     const candidate = response.candidates?.[0];
@@ -1725,26 +2248,47 @@ function convertJsonSchemaToGemini(schema) {
   };
 }
 export {
+  AgentNotFoundError,
   AgentRouter,
   AgentRunner,
   AnthropicProvider,
+  AuthenticationError,
+  BudgetExceededError,
+  ContextWindowError,
+  EvalRunner,
   EventBus,
   GeminiProvider,
   InMemorySemanticStore,
   InMemorySessionStore,
   OpenAIProvider,
+  PathTraversalError,
+  PermissionError,
   PermissionGuard,
   PostgresSessionStore,
   PromptGuard,
+  ProviderError,
+  RateLimitError,
   ScoreLoader,
+  ScoreValidationError,
   SecretsManager,
   TokenBudget,
+  ToolTimeoutError,
+  TuttiError,
   TuttiRuntime,
   TuttiTracer,
+  UrlValidationError,
+  VoiceError,
+  createBlocklistHook,
+  createCacheHook,
   createLogger,
+  createLoggingHook,
+  createMaxCostHook,
   defineScore,
+  toJSON as evalToJSON,
+  toMarkdown as evalToMarkdown,
   initTelemetry,
   logger,
+  printTable as printEvalTable,
   shutdownTelemetry,
   validateScore
 };