@tuttiai/core 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +167 -4
- package/dist/index.js +745 -201
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,108 +1,193 @@
|
|
|
1
|
-
// src/
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
// src/errors.ts
|
|
2
|
+
var TuttiError = class extends Error {
|
|
3
|
+
constructor(code, message, context = {}) {
|
|
4
|
+
super(message);
|
|
5
|
+
this.code = code;
|
|
6
|
+
this.context = context;
|
|
7
|
+
this.name = this.constructor.name;
|
|
8
|
+
Error.captureStackTrace(this, this.constructor);
|
|
9
|
+
}
|
|
10
|
+
code;
|
|
11
|
+
context;
|
|
12
|
+
};
|
|
13
|
+
var ScoreValidationError = class extends TuttiError {
|
|
14
|
+
constructor(message, context = {}) {
|
|
15
|
+
super("SCORE_INVALID", message, context);
|
|
13
16
|
}
|
|
14
|
-
}
|
|
15
|
-
var
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
17
|
+
};
|
|
18
|
+
var AgentNotFoundError = class extends TuttiError {
|
|
19
|
+
constructor(agentId, available) {
|
|
20
|
+
super(
|
|
21
|
+
"AGENT_NOT_FOUND",
|
|
22
|
+
`Agent "${agentId}" not found in your score.
|
|
23
|
+
Available agents: ${available.join(", ")}
|
|
24
|
+
Check your tutti.score.ts \u2014 the agent ID must match the key in the agents object.`,
|
|
25
|
+
{ agent_id: agentId, available }
|
|
26
|
+
);
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
var PermissionError = class extends TuttiError {
|
|
30
|
+
constructor(voice, required, granted) {
|
|
31
|
+
const missing = required.filter((p) => !granted.includes(p));
|
|
32
|
+
super(
|
|
33
|
+
"PERMISSION_DENIED",
|
|
34
|
+
`Voice "${voice}" requires permissions not granted: ${missing.join(", ")}
|
|
35
|
+
Grant them in your score file:
|
|
36
|
+
permissions: [${missing.map((p) => "'" + p + "'").join(", ")}]`,
|
|
37
|
+
{ voice, required, granted }
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
var BudgetExceededError = class extends TuttiError {
|
|
42
|
+
constructor(tokens, costUsd, limit) {
|
|
43
|
+
super(
|
|
44
|
+
"BUDGET_EXCEEDED",
|
|
45
|
+
`Token budget exceeded: ${tokens.toLocaleString()} tokens, $${costUsd.toFixed(4)} (limit: ${limit}).`,
|
|
46
|
+
{ tokens, cost_usd: costUsd, limit }
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
var ToolTimeoutError = class extends TuttiError {
|
|
51
|
+
constructor(tool, timeoutMs) {
|
|
52
|
+
super(
|
|
53
|
+
"TOOL_TIMEOUT",
|
|
54
|
+
`Tool "${tool}" timed out after ${timeoutMs}ms.
|
|
55
|
+
Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`,
|
|
56
|
+
{ tool, timeout_ms: timeoutMs }
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
var ProviderError = class extends TuttiError {
|
|
61
|
+
constructor(message, context = { provider: "unknown" }) {
|
|
62
|
+
super("PROVIDER_ERROR", message, context);
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
var AuthenticationError = class extends ProviderError {
|
|
66
|
+
constructor(provider) {
|
|
67
|
+
super(
|
|
68
|
+
`Authentication failed for ${provider}.
|
|
69
|
+
Check that the API key is set correctly in your .env file.`,
|
|
70
|
+
{ provider }
|
|
71
|
+
);
|
|
72
|
+
Object.defineProperty(this, "code", { value: "AUTH_ERROR" });
|
|
73
|
+
}
|
|
74
|
+
};
|
|
75
|
+
var RateLimitError = class extends ProviderError {
|
|
76
|
+
retryAfter;
|
|
77
|
+
constructor(provider, retryAfter) {
|
|
78
|
+
const msg = retryAfter ? `Rate limited by ${provider}. Retry after ${retryAfter}s.` : `Rate limited by ${provider}.`;
|
|
79
|
+
super(msg, { provider, retryAfter });
|
|
80
|
+
Object.defineProperty(this, "code", { value: "RATE_LIMIT" });
|
|
81
|
+
this.retryAfter = retryAfter;
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
var ContextWindowError = class extends ProviderError {
|
|
85
|
+
maxTokens;
|
|
86
|
+
constructor(provider, maxTokens) {
|
|
87
|
+
super(
|
|
88
|
+
`Context window exceeded for ${provider}.` + (maxTokens ? ` Max: ${maxTokens.toLocaleString()} tokens.` : "") + `
|
|
89
|
+
Reduce message history or use a model with a larger context window.`,
|
|
90
|
+
{ provider, max_tokens: maxTokens }
|
|
91
|
+
);
|
|
92
|
+
Object.defineProperty(this, "code", { value: "CONTEXT_WINDOW" });
|
|
93
|
+
this.maxTokens = maxTokens;
|
|
94
|
+
}
|
|
95
|
+
};
|
|
96
|
+
var VoiceError = class extends TuttiError {
|
|
97
|
+
constructor(message, context) {
|
|
98
|
+
super("VOICE_ERROR", message, context);
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
var PathTraversalError = class extends VoiceError {
|
|
102
|
+
constructor(path) {
|
|
103
|
+
super(
|
|
104
|
+
`Path traversal detected: "${path}" is not allowed.
|
|
105
|
+
All file paths must stay within the allowed directory.`,
|
|
106
|
+
{ voice: "filesystem", path }
|
|
107
|
+
);
|
|
108
|
+
Object.defineProperty(this, "code", { value: "PATH_TRAVERSAL" });
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
var UrlValidationError = class extends VoiceError {
|
|
112
|
+
constructor(url) {
|
|
113
|
+
super(
|
|
114
|
+
`URL blocked: "${url}".
|
|
115
|
+
Only http:// and https:// URLs to public hosts are allowed.`,
|
|
116
|
+
{ voice: "playwright", url }
|
|
117
|
+
);
|
|
118
|
+
Object.defineProperty(this, "code", { value: "URL_BLOCKED" });
|
|
75
119
|
}
|
|
76
120
|
};
|
|
77
121
|
|
|
78
|
-
// src/
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
122
|
+
// src/hooks/index.ts
|
|
123
|
+
function createLoggingHook(log) {
|
|
124
|
+
return {
|
|
125
|
+
beforeLLMCall(ctx, request) {
|
|
126
|
+
log.info({ agent: ctx.agent_name, turn: ctx.turn, model: request.model }, "LLM call");
|
|
127
|
+
return Promise.resolve(request);
|
|
128
|
+
},
|
|
129
|
+
afterLLMCall(ctx, response) {
|
|
130
|
+
log.info({ agent: ctx.agent_name, turn: ctx.turn, usage: response.usage }, "LLM response");
|
|
131
|
+
return Promise.resolve();
|
|
132
|
+
},
|
|
133
|
+
beforeToolCall(ctx, tool, input) {
|
|
134
|
+
log.info({ agent: ctx.agent_name, tool, input }, "Tool call");
|
|
135
|
+
return Promise.resolve(input);
|
|
136
|
+
},
|
|
137
|
+
afterToolCall(ctx, tool, result) {
|
|
138
|
+
log.info({ agent: ctx.agent_name, tool, is_error: result.is_error }, "Tool result");
|
|
139
|
+
return Promise.resolve(result);
|
|
140
|
+
}
|
|
141
|
+
};
|
|
97
142
|
}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
sdk = void 0;
|
|
143
|
+
function createCacheHook(store) {
|
|
144
|
+
function cacheKey(tool, input) {
|
|
145
|
+
return tool + ":" + JSON.stringify(input);
|
|
102
146
|
}
|
|
147
|
+
return {
|
|
148
|
+
beforeToolCall(_ctx, tool, input) {
|
|
149
|
+
const cached = store.get(cacheKey(tool, input));
|
|
150
|
+
if (cached) return Promise.resolve(cached);
|
|
151
|
+
return Promise.resolve(input);
|
|
152
|
+
},
|
|
153
|
+
afterToolCall(_ctx, tool, result) {
|
|
154
|
+
if (!result.is_error) {
|
|
155
|
+
store.set(cacheKey(tool, result.content), result.content);
|
|
156
|
+
}
|
|
157
|
+
return Promise.resolve(result);
|
|
158
|
+
}
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
function createBlocklistHook(blockedTools) {
|
|
162
|
+
const blocked = new Set(blockedTools);
|
|
163
|
+
return {
|
|
164
|
+
beforeToolCall(_ctx, tool) {
|
|
165
|
+
return Promise.resolve(!blocked.has(tool));
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
function createMaxCostHook(maxUsd) {
|
|
170
|
+
let totalCost = 0;
|
|
171
|
+
const INPUT_PER_M2 = 3;
|
|
172
|
+
const OUTPUT_PER_M2 = 15;
|
|
173
|
+
return {
|
|
174
|
+
afterLLMCall(_ctx, response) {
|
|
175
|
+
totalCost += response.usage.input_tokens / 1e6 * INPUT_PER_M2 + response.usage.output_tokens / 1e6 * OUTPUT_PER_M2;
|
|
176
|
+
return Promise.resolve();
|
|
177
|
+
},
|
|
178
|
+
beforeLLMCall(ctx, request) {
|
|
179
|
+
if (totalCost >= maxUsd) {
|
|
180
|
+
return Promise.reject(new Error(
|
|
181
|
+
"Max cost hook: $" + totalCost.toFixed(4) + " exceeds limit $" + maxUsd.toFixed(2) + " for agent " + ctx.agent_name
|
|
182
|
+
));
|
|
183
|
+
}
|
|
184
|
+
return Promise.resolve(request);
|
|
185
|
+
}
|
|
186
|
+
};
|
|
103
187
|
}
|
|
104
188
|
|
|
105
189
|
// src/agent-runner.ts
|
|
190
|
+
import { z } from "zod";
|
|
106
191
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
107
192
|
|
|
108
193
|
// src/secrets.ts
|
|
@@ -228,21 +313,144 @@ var TokenBudget = class {
|
|
|
228
313
|
}
|
|
229
314
|
};
|
|
230
315
|
|
|
316
|
+
// src/logger.ts
|
|
317
|
+
import pino from "pino";
|
|
318
|
+
var createLogger = (name) => pino({
|
|
319
|
+
name,
|
|
320
|
+
level: process.env.TUTTI_LOG_LEVEL ?? "info",
|
|
321
|
+
transport: process.env.NODE_ENV === "production" ? void 0 : {
|
|
322
|
+
target: "pino-pretty",
|
|
323
|
+
options: {
|
|
324
|
+
colorize: true,
|
|
325
|
+
translateTime: "HH:MM:ss",
|
|
326
|
+
ignore: "pid,hostname"
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
var logger = createLogger("tutti");
|
|
331
|
+
|
|
332
|
+
// src/telemetry.ts
|
|
333
|
+
import { trace, SpanStatusCode } from "@opentelemetry/api";
|
|
334
|
+
var tracer = trace.getTracer("tutti", "1.0.0");
|
|
335
|
+
var TuttiTracer = {
|
|
336
|
+
agentRun(agentName, sessionId, fn) {
|
|
337
|
+
return tracer.startActiveSpan("agent.run", async (span) => {
|
|
338
|
+
span.setAttribute("agent.name", agentName);
|
|
339
|
+
span.setAttribute("session.id", sessionId);
|
|
340
|
+
try {
|
|
341
|
+
const result = await fn();
|
|
342
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
343
|
+
return result;
|
|
344
|
+
} catch (err) {
|
|
345
|
+
span.setStatus({
|
|
346
|
+
code: SpanStatusCode.ERROR,
|
|
347
|
+
message: err instanceof Error ? err.message : String(err)
|
|
348
|
+
});
|
|
349
|
+
throw err;
|
|
350
|
+
} finally {
|
|
351
|
+
span.end();
|
|
352
|
+
}
|
|
353
|
+
});
|
|
354
|
+
},
|
|
355
|
+
llmCall(model, fn) {
|
|
356
|
+
return tracer.startActiveSpan("llm.call", async (span) => {
|
|
357
|
+
span.setAttribute("llm.model", model);
|
|
358
|
+
try {
|
|
359
|
+
const result = await fn();
|
|
360
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
361
|
+
return result;
|
|
362
|
+
} catch (err) {
|
|
363
|
+
span.setStatus({
|
|
364
|
+
code: SpanStatusCode.ERROR,
|
|
365
|
+
message: err instanceof Error ? err.message : String(err)
|
|
366
|
+
});
|
|
367
|
+
throw err;
|
|
368
|
+
} finally {
|
|
369
|
+
span.end();
|
|
370
|
+
}
|
|
371
|
+
});
|
|
372
|
+
},
|
|
373
|
+
toolCall(toolName, fn) {
|
|
374
|
+
return tracer.startActiveSpan("tool.call", async (span) => {
|
|
375
|
+
span.setAttribute("tool.name", toolName);
|
|
376
|
+
try {
|
|
377
|
+
const result = await fn();
|
|
378
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
379
|
+
return result;
|
|
380
|
+
} catch (err) {
|
|
381
|
+
span.setStatus({
|
|
382
|
+
code: SpanStatusCode.ERROR,
|
|
383
|
+
message: err instanceof Error ? err.message : String(err)
|
|
384
|
+
});
|
|
385
|
+
throw err;
|
|
386
|
+
} finally {
|
|
387
|
+
span.end();
|
|
388
|
+
}
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
};
|
|
392
|
+
|
|
231
393
|
// src/agent-runner.ts
|
|
232
394
|
var DEFAULT_MAX_TURNS = 10;
|
|
233
395
|
var DEFAULT_MAX_TOOL_CALLS = 20;
|
|
234
396
|
var DEFAULT_TOOL_TIMEOUT_MS = 3e4;
|
|
397
|
+
var DEFAULT_HITL_TIMEOUT_S = 300;
|
|
398
|
+
var MAX_PROVIDER_RETRIES = 3;
|
|
399
|
+
var hitlRequestSchema = z.object({
|
|
400
|
+
question: z.string().describe("The question to ask the human"),
|
|
401
|
+
options: z.array(z.string()).optional().describe("If provided, the human picks one of these"),
|
|
402
|
+
timeout_seconds: z.number().optional().describe("How long to wait before timing out (default 300)")
|
|
403
|
+
});
|
|
404
|
+
async function withRetry(fn) {
|
|
405
|
+
for (let attempt = 1; ; attempt++) {
|
|
406
|
+
try {
|
|
407
|
+
return await fn();
|
|
408
|
+
} catch (err) {
|
|
409
|
+
if (attempt >= MAX_PROVIDER_RETRIES || !(err instanceof ProviderError)) {
|
|
410
|
+
throw err;
|
|
411
|
+
}
|
|
412
|
+
if (err instanceof RateLimitError && err.retryAfter) {
|
|
413
|
+
logger.warn({ attempt, retryAfter: err.retryAfter }, "Rate limited, waiting before retry");
|
|
414
|
+
await new Promise((r) => setTimeout(r, err.retryAfter * 1e3));
|
|
415
|
+
} else {
|
|
416
|
+
const delayMs = Math.min(1e3 * 2 ** (attempt - 1), 8e3);
|
|
417
|
+
logger.warn({ attempt, delayMs }, "Provider error, retrying with backoff");
|
|
418
|
+
await new Promise((r) => setTimeout(r, delayMs));
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
235
423
|
var AgentRunner = class {
|
|
236
|
-
constructor(provider, events, sessions, semanticMemory) {
|
|
424
|
+
constructor(provider, events, sessions, semanticMemory, globalHooks) {
|
|
237
425
|
this.provider = provider;
|
|
238
426
|
this.events = events;
|
|
239
427
|
this.sessions = sessions;
|
|
240
428
|
this.semanticMemory = semanticMemory;
|
|
429
|
+
this.globalHooks = globalHooks;
|
|
241
430
|
}
|
|
242
431
|
provider;
|
|
243
432
|
events;
|
|
244
433
|
sessions;
|
|
245
434
|
semanticMemory;
|
|
435
|
+
globalHooks;
|
|
436
|
+
pendingHitl = /* @__PURE__ */ new Map();
|
|
437
|
+
async safeHook(fn) {
|
|
438
|
+
if (!fn) return void 0;
|
|
439
|
+
try {
|
|
440
|
+
return await fn() ?? void 0;
|
|
441
|
+
} catch (err) {
|
|
442
|
+
logger.warn({ error: err instanceof Error ? err.message : String(err) }, "Hook error (non-fatal)");
|
|
443
|
+
return void 0;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
/** Resolve a pending human-in-the-loop request for a session. */
|
|
447
|
+
answer(sessionId, answer) {
|
|
448
|
+
const resolve2 = this.pendingHitl.get(sessionId);
|
|
449
|
+
if (resolve2) {
|
|
450
|
+
this.pendingHitl.delete(sessionId);
|
|
451
|
+
resolve2(answer);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
246
454
|
async run(agent, input, session_id) {
|
|
247
455
|
const session = session_id ? this.sessions.get(session_id) : this.sessions.create(agent.name);
|
|
248
456
|
if (!session) {
|
|
@@ -253,13 +461,31 @@ Omit session_id to start a new conversation.`
|
|
|
253
461
|
);
|
|
254
462
|
}
|
|
255
463
|
return TuttiTracer.agentRun(agent.name, session.id, async () => {
|
|
464
|
+
const agentHooks = agent.hooks;
|
|
465
|
+
const hookCtx = {
|
|
466
|
+
agent_name: agent.name,
|
|
467
|
+
session_id: session.id,
|
|
468
|
+
turn: 0,
|
|
469
|
+
metadata: {}
|
|
470
|
+
};
|
|
471
|
+
await this.safeHook(() => this.globalHooks?.beforeAgentRun?.(hookCtx));
|
|
472
|
+
await this.safeHook(() => agentHooks?.beforeAgentRun?.(hookCtx));
|
|
256
473
|
logger.info({ agent: agent.name, session: session.id }, "Agent started");
|
|
257
474
|
this.events.emit({
|
|
258
475
|
type: "agent:start",
|
|
259
476
|
agent_name: agent.name,
|
|
260
477
|
session_id: session.id
|
|
261
478
|
});
|
|
262
|
-
const
|
|
479
|
+
const voiceCtx = { session_id: session.id, agent_name: agent.name };
|
|
480
|
+
for (const voice of agent.voices) {
|
|
481
|
+
if (voice.setup) {
|
|
482
|
+
await voice.setup(voiceCtx);
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
const allTools = [...agent.voices.flatMap((v) => v.tools)];
|
|
486
|
+
if (agent.allow_human_input) {
|
|
487
|
+
allTools.push(this.createHitlTool(agent.name, session.id));
|
|
488
|
+
}
|
|
263
489
|
const toolDefs = allTools.map(toolToDefinition);
|
|
264
490
|
const messages = [
|
|
265
491
|
...session.messages,
|
|
@@ -297,12 +523,17 @@ Omit session_id to start a new conversation.`
|
|
|
297
523
|
}
|
|
298
524
|
}
|
|
299
525
|
}
|
|
300
|
-
|
|
526
|
+
let request = {
|
|
301
527
|
model: agent.model,
|
|
302
528
|
system: systemPrompt,
|
|
303
529
|
messages,
|
|
304
530
|
tools: toolDefs.length > 0 ? toolDefs : void 0
|
|
305
531
|
};
|
|
532
|
+
hookCtx.turn = turns;
|
|
533
|
+
const globalReq = await this.safeHook(() => this.globalHooks?.beforeLLMCall?.(hookCtx, request));
|
|
534
|
+
if (globalReq) request = globalReq;
|
|
535
|
+
const agentReq = await this.safeHook(() => agentHooks?.beforeLLMCall?.(hookCtx, request));
|
|
536
|
+
if (agentReq) request = agentReq;
|
|
306
537
|
logger.debug({ agent: agent.name, model: agent.model }, "LLM request");
|
|
307
538
|
this.events.emit({
|
|
308
539
|
type: "llm:request",
|
|
@@ -311,7 +542,9 @@ Omit session_id to start a new conversation.`
|
|
|
311
542
|
});
|
|
312
543
|
const response = await TuttiTracer.llmCall(
|
|
313
544
|
agent.model ?? "unknown",
|
|
314
|
-
() =>
|
|
545
|
+
() => withRetry(
|
|
546
|
+
() => agent.streaming ? this.streamToResponse(agent.name, request) : this.provider.chat(request)
|
|
547
|
+
)
|
|
315
548
|
);
|
|
316
549
|
logger.debug(
|
|
317
550
|
{ agent: agent.name, stopReason: response.stop_reason, usage: response.usage },
|
|
@@ -322,6 +555,8 @@ Omit session_id to start a new conversation.`
|
|
|
322
555
|
agent_name: agent.name,
|
|
323
556
|
response
|
|
324
557
|
});
|
|
558
|
+
await this.safeHook(() => this.globalHooks?.afterLLMCall?.(hookCtx, response));
|
|
559
|
+
await this.safeHook(() => agentHooks?.afterLLMCall?.(hookCtx, response));
|
|
325
560
|
totalUsage.input_tokens += response.usage.input_tokens;
|
|
326
561
|
totalUsage.output_tokens += response.usage.output_tokens;
|
|
327
562
|
if (budget) {
|
|
@@ -402,7 +637,7 @@ Omit session_id to start a new conversation.`
|
|
|
402
637
|
}
|
|
403
638
|
const toolResults = await Promise.all(
|
|
404
639
|
toolUseBlocks.map(
|
|
405
|
-
(block) => this.executeTool(allTools, block, toolContext, toolTimeoutMs)
|
|
640
|
+
(block) => this.executeTool(allTools, block, toolContext, toolTimeoutMs, hookCtx, agentHooks)
|
|
406
641
|
)
|
|
407
642
|
);
|
|
408
643
|
messages.push({ role: "user", content: toolResults });
|
|
@@ -419,13 +654,16 @@ Omit session_id to start a new conversation.`
|
|
|
419
654
|
agent_name: agent.name,
|
|
420
655
|
session_id: session.id
|
|
421
656
|
});
|
|
422
|
-
|
|
657
|
+
const agentResult = {
|
|
423
658
|
session_id: session.id,
|
|
424
659
|
output,
|
|
425
660
|
messages,
|
|
426
661
|
turns,
|
|
427
662
|
usage: totalUsage
|
|
428
663
|
};
|
|
664
|
+
await this.safeHook(() => this.globalHooks?.afterAgentRun?.(hookCtx, agentResult));
|
|
665
|
+
await this.safeHook(() => agentHooks?.afterAgentRun?.(hookCtx, agentResult));
|
|
666
|
+
return agentResult;
|
|
429
667
|
});
|
|
430
668
|
}
|
|
431
669
|
async executeWithTimeout(fn, timeoutMs, toolName) {
|
|
@@ -433,12 +671,7 @@ Omit session_id to start a new conversation.`
|
|
|
433
671
|
fn(),
|
|
434
672
|
new Promise(
|
|
435
673
|
(_, reject) => setTimeout(
|
|
436
|
-
() => reject(
|
|
437
|
-
new Error(
|
|
438
|
-
`Tool "${toolName}" timed out after ${timeoutMs}ms.
|
|
439
|
-
Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
|
|
440
|
-
)
|
|
441
|
-
),
|
|
674
|
+
() => reject(new ToolTimeoutError(toolName, timeoutMs)),
|
|
442
675
|
timeoutMs
|
|
443
676
|
)
|
|
444
677
|
)
|
|
@@ -476,7 +709,42 @@ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
|
|
|
476
709
|
}
|
|
477
710
|
return { id: "", content, stop_reason: stopReason, usage };
|
|
478
711
|
}
|
|
479
|
-
|
|
712
|
+
createHitlTool(agentName, sessionId) {
|
|
713
|
+
return {
|
|
714
|
+
name: "request_human_input",
|
|
715
|
+
description: "Pause and ask the human for guidance or approval before proceeding.",
|
|
716
|
+
parameters: hitlRequestSchema,
|
|
717
|
+
execute: async (input) => {
|
|
718
|
+
const timeout = (input.timeout_seconds ?? DEFAULT_HITL_TIMEOUT_S) * 1e3;
|
|
719
|
+
logger.info({ agent: agentName, question: input.question }, "Waiting for human input");
|
|
720
|
+
const answer = await new Promise((resolve2) => {
|
|
721
|
+
this.pendingHitl.set(sessionId, resolve2);
|
|
722
|
+
this.events.emit({
|
|
723
|
+
type: "hitl:requested",
|
|
724
|
+
agent_name: agentName,
|
|
725
|
+
session_id: sessionId,
|
|
726
|
+
question: input.question,
|
|
727
|
+
options: input.options
|
|
728
|
+
});
|
|
729
|
+
setTimeout(() => {
|
|
730
|
+
if (this.pendingHitl.has(sessionId)) {
|
|
731
|
+
this.pendingHitl.delete(sessionId);
|
|
732
|
+
this.events.emit({ type: "hitl:timeout", agent_name: agentName, session_id: sessionId });
|
|
733
|
+
resolve2("[timeout: human did not respond within " + timeout / 1e3 + "s]");
|
|
734
|
+
}
|
|
735
|
+
}, timeout);
|
|
736
|
+
});
|
|
737
|
+
this.events.emit({
|
|
738
|
+
type: "hitl:answered",
|
|
739
|
+
agent_name: agentName,
|
|
740
|
+
session_id: sessionId,
|
|
741
|
+
answer
|
|
742
|
+
});
|
|
743
|
+
return { content: "Human responded: " + answer };
|
|
744
|
+
}
|
|
745
|
+
};
|
|
746
|
+
}
|
|
747
|
+
async executeTool(tools, block, context, timeoutMs, hookCtx, agentHooks) {
|
|
480
748
|
const tool = tools.find((t) => t.name === block.name);
|
|
481
749
|
if (!tool) {
|
|
482
750
|
const available = tools.map((t) => t.name).join(", ") || "(none)";
|
|
@@ -488,6 +756,16 @@ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
|
|
|
488
756
|
};
|
|
489
757
|
}
|
|
490
758
|
return TuttiTracer.toolCall(block.name, async () => {
|
|
759
|
+
if (hookCtx) {
|
|
760
|
+
const globalResult = await this.safeHook(() => this.globalHooks?.beforeToolCall?.(hookCtx, block.name, block.input));
|
|
761
|
+
if (globalResult === false) {
|
|
762
|
+
return { type: "tool_result", tool_use_id: block.id, content: "Tool call blocked by hook", is_error: true };
|
|
763
|
+
}
|
|
764
|
+
const agentResult = await this.safeHook(() => agentHooks?.beforeToolCall?.(hookCtx, block.name, block.input));
|
|
765
|
+
if (agentResult === false) {
|
|
766
|
+
return { type: "tool_result", tool_use_id: block.id, content: "Tool call blocked by hook", is_error: true };
|
|
767
|
+
}
|
|
768
|
+
}
|
|
491
769
|
logger.debug({ tool: block.name, input: block.input }, "Tool called");
|
|
492
770
|
this.events.emit({
|
|
493
771
|
type: "tool:start",
|
|
@@ -497,11 +775,17 @@ Increase tool_timeout_ms in your agent config, or check if the tool is hanging.`
|
|
|
497
775
|
});
|
|
498
776
|
try {
|
|
499
777
|
const parsed = tool.parameters.parse(block.input);
|
|
500
|
-
|
|
778
|
+
let result = await this.executeWithTimeout(
|
|
501
779
|
() => tool.execute(parsed, context),
|
|
502
780
|
timeoutMs,
|
|
503
781
|
block.name
|
|
504
782
|
);
|
|
783
|
+
if (hookCtx) {
|
|
784
|
+
const globalMod = await this.safeHook(() => this.globalHooks?.afterToolCall?.(hookCtx, block.name, result));
|
|
785
|
+
if (globalMod) result = globalMod;
|
|
786
|
+
const agentMod = await this.safeHook(() => agentHooks?.afterToolCall?.(hookCtx, block.name, result));
|
|
787
|
+
if (agentMod) result = agentMod;
|
|
788
|
+
}
|
|
505
789
|
logger.debug({ tool: block.name, result: result.content }, "Tool completed");
|
|
506
790
|
this.events.emit({
|
|
507
791
|
type: "tool:end",
|
|
@@ -731,18 +1015,18 @@ var PostgresSessionStore = class {
|
|
|
731
1015
|
import { randomUUID as randomUUID3 } from "crypto";
|
|
732
1016
|
var InMemorySemanticStore = class {
|
|
733
1017
|
entries = [];
|
|
734
|
-
|
|
1018
|
+
add(entry) {
|
|
735
1019
|
const full = {
|
|
736
1020
|
...entry,
|
|
737
1021
|
id: randomUUID3(),
|
|
738
1022
|
created_at: /* @__PURE__ */ new Date()
|
|
739
1023
|
};
|
|
740
1024
|
this.entries.push(full);
|
|
741
|
-
return full;
|
|
1025
|
+
return Promise.resolve(full);
|
|
742
1026
|
}
|
|
743
|
-
|
|
1027
|
+
search(query, agent_name, limit = 5) {
|
|
744
1028
|
const queryTokens = tokenize(query);
|
|
745
|
-
if (queryTokens.size === 0) return [];
|
|
1029
|
+
if (queryTokens.size === 0) return Promise.resolve([]);
|
|
746
1030
|
const agentEntries = this.entries.filter(
|
|
747
1031
|
(e) => e.agent_name === agent_name
|
|
748
1032
|
);
|
|
@@ -755,13 +1039,17 @@ var InMemorySemanticStore = class {
|
|
|
755
1039
|
const score = overlap / queryTokens.size;
|
|
756
1040
|
return { entry, score };
|
|
757
1041
|
});
|
|
758
|
-
return
|
|
1042
|
+
return Promise.resolve(
|
|
1043
|
+
scored.filter((s) => s.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.entry)
|
|
1044
|
+
);
|
|
759
1045
|
}
|
|
760
|
-
|
|
1046
|
+
delete(id) {
|
|
761
1047
|
this.entries = this.entries.filter((e) => e.id !== id);
|
|
1048
|
+
return Promise.resolve();
|
|
762
1049
|
}
|
|
763
|
-
|
|
1050
|
+
clear(agent_name) {
|
|
764
1051
|
this.entries = this.entries.filter((e) => e.agent_name !== agent_name);
|
|
1052
|
+
return Promise.resolve();
|
|
765
1053
|
}
|
|
766
1054
|
};
|
|
767
1055
|
function tokenize(text) {
|
|
@@ -777,9 +1065,7 @@ var PermissionGuard = class {
|
|
|
777
1065
|
(p) => !granted.includes(p)
|
|
778
1066
|
);
|
|
779
1067
|
if (missing.length > 0) {
|
|
780
|
-
throw new
|
|
781
|
-
"Voice " + voice.name + " requires permissions not granted: " + missing.join(", ") + "\n\nGrant them in your score file:\n permissions: [" + missing.map((p) => "'" + p + "'").join(", ") + "]"
|
|
782
|
-
);
|
|
1068
|
+
throw new PermissionError(voice.name, voice.required_permissions, granted);
|
|
783
1069
|
}
|
|
784
1070
|
}
|
|
785
1071
|
static warn(voice) {
|
|
@@ -795,6 +1081,33 @@ var PermissionGuard = class {
|
|
|
795
1081
|
}
|
|
796
1082
|
};
|
|
797
1083
|
|
|
1084
|
+
// src/telemetry-setup.ts
|
|
1085
|
+
import { NodeSDK } from "@opentelemetry/sdk-node";
|
|
1086
|
+
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
1087
|
+
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
|
|
1088
|
+
var sdk;
|
|
1089
|
+
function initTelemetry(config) {
|
|
1090
|
+
if (!config.enabled || sdk) return;
|
|
1091
|
+
const endpoint = config.endpoint ?? "http://localhost:4318";
|
|
1092
|
+
const exporter = new OTLPTraceExporter({
|
|
1093
|
+
url: `${endpoint}/v1/traces`,
|
|
1094
|
+
headers: config.headers
|
|
1095
|
+
});
|
|
1096
|
+
sdk = new NodeSDK({
|
|
1097
|
+
traceExporter: exporter,
|
|
1098
|
+
instrumentations: [getNodeAutoInstrumentations({ "@opentelemetry/instrumentation-fs": { enabled: false } })],
|
|
1099
|
+
serviceName: process.env.OTEL_SERVICE_NAME ?? "tutti"
|
|
1100
|
+
});
|
|
1101
|
+
sdk.start();
|
|
1102
|
+
logger.info({ endpoint }, "OpenTelemetry tracing enabled");
|
|
1103
|
+
}
|
|
1104
|
+
async function shutdownTelemetry() {
|
|
1105
|
+
if (sdk) {
|
|
1106
|
+
await sdk.shutdown();
|
|
1107
|
+
sdk = void 0;
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
|
|
798
1111
|
// src/runtime.ts
|
|
799
1112
|
var TuttiRuntime = class _TuttiRuntime {
|
|
800
1113
|
events;
|
|
@@ -811,7 +1124,8 @@ var TuttiRuntime = class _TuttiRuntime {
|
|
|
811
1124
|
score.provider,
|
|
812
1125
|
this.events,
|
|
813
1126
|
this._sessions,
|
|
814
|
-
this.semanticMemory
|
|
1127
|
+
this.semanticMemory,
|
|
1128
|
+
score.hooks
|
|
815
1129
|
);
|
|
816
1130
|
if (score.telemetry) {
|
|
817
1131
|
initTelemetry(score.telemetry);
|
|
@@ -837,15 +1151,17 @@ var TuttiRuntime = class _TuttiRuntime {
|
|
|
837
1151
|
if (memory.provider === "postgres") {
|
|
838
1152
|
const url = memory.url ?? process.env.DATABASE_URL;
|
|
839
1153
|
if (!url) {
|
|
840
|
-
throw new
|
|
841
|
-
"PostgreSQL session store requires a connection URL.\nSet memory.url in your score, or DATABASE_URL in your .env file."
|
|
1154
|
+
throw new ScoreValidationError(
|
|
1155
|
+
"PostgreSQL session store requires a connection URL.\nSet memory.url in your score, or DATABASE_URL in your .env file.",
|
|
1156
|
+
{ field: "memory.url" }
|
|
842
1157
|
);
|
|
843
1158
|
}
|
|
844
1159
|
return new PostgresSessionStore(url);
|
|
845
1160
|
}
|
|
846
|
-
throw new
|
|
1161
|
+
throw new ScoreValidationError(
|
|
847
1162
|
`Unsupported memory provider: "${memory.provider}".
|
|
848
|
-
Supported: "in-memory", "postgres"
|
|
1163
|
+
Supported: "in-memory", "postgres"`,
|
|
1164
|
+
{ field: "memory.provider", value: memory.provider }
|
|
849
1165
|
);
|
|
850
1166
|
}
|
|
851
1167
|
/** The score configuration this runtime was created with. */
|
|
@@ -859,12 +1175,7 @@ Supported: "in-memory", "postgres"`
|
|
|
859
1175
|
async run(agent_name, input, session_id) {
|
|
860
1176
|
const agent = this._score.agents[agent_name];
|
|
861
1177
|
if (!agent) {
|
|
862
|
-
|
|
863
|
-
throw new Error(
|
|
864
|
-
`Agent "${agent_name}" not found in your score.
|
|
865
|
-
Available agents: ${available}
|
|
866
|
-
Check your tutti.score.ts \u2014 the agent ID must match the key in the agents object.`
|
|
867
|
-
);
|
|
1178
|
+
throw new AgentNotFoundError(agent_name, Object.keys(this._score.agents));
|
|
868
1179
|
}
|
|
869
1180
|
const granted = agent.permissions ?? [];
|
|
870
1181
|
for (const voice of agent.voices) {
|
|
@@ -874,14 +1185,224 @@ Check your tutti.score.ts \u2014 the agent ID must match the key in the agents o
|
|
|
874
1185
|
const resolvedAgent = agent.model ? agent : { ...agent, model: this._score.default_model ?? "claude-sonnet-4-20250514" };
|
|
875
1186
|
return this._runner.run(resolvedAgent, input, session_id);
|
|
876
1187
|
}
|
|
1188
|
+
/**
|
|
1189
|
+
* Provide an answer to a pending human-in-the-loop request.
|
|
1190
|
+
* Call this when a `hitl:requested` event fires to resume the agent.
|
|
1191
|
+
*/
|
|
1192
|
+
answer(sessionId, answer) {
|
|
1193
|
+
this._runner.answer(sessionId, answer);
|
|
1194
|
+
}
|
|
877
1195
|
/** Retrieve an existing session. */
|
|
878
1196
|
getSession(id) {
|
|
879
1197
|
return this._sessions.get(id);
|
|
880
1198
|
}
|
|
881
1199
|
};
|
|
882
1200
|
|
|
1201
|
+
// src/eval/runner.ts
|
|
1202
|
+
var INPUT_PER_M = 3;
|
|
1203
|
+
var OUTPUT_PER_M = 15;
|
|
1204
|
+
function estimateCost(inputTokens, outputTokens) {
|
|
1205
|
+
return inputTokens / 1e6 * INPUT_PER_M + outputTokens / 1e6 * OUTPUT_PER_M;
|
|
1206
|
+
}
|
|
1207
|
+
var EvalRunner = class {
|
|
1208
|
+
runtime;
|
|
1209
|
+
constructor(score) {
|
|
1210
|
+
this.runtime = new TuttiRuntime(score);
|
|
1211
|
+
}
|
|
1212
|
+
async run(suite) {
|
|
1213
|
+
const results = [];
|
|
1214
|
+
for (const testCase of suite.cases) {
|
|
1215
|
+
const result = await this.runCase(testCase);
|
|
1216
|
+
results.push(result);
|
|
1217
|
+
}
|
|
1218
|
+
const summary = this.summarize(results);
|
|
1219
|
+
return { suite_name: suite.name, results, summary };
|
|
1220
|
+
}
|
|
1221
|
+
async runCase(testCase) {
|
|
1222
|
+
const toolsCalled = [];
|
|
1223
|
+
const unsubscribeToolStart = this.runtime.events.on("tool:start", (e) => {
|
|
1224
|
+
toolsCalled.push(e.tool_name);
|
|
1225
|
+
});
|
|
1226
|
+
const start = Date.now();
|
|
1227
|
+
let output = "";
|
|
1228
|
+
let turns = 0;
|
|
1229
|
+
let usage = { input_tokens: 0, output_tokens: 0 };
|
|
1230
|
+
let error;
|
|
1231
|
+
try {
|
|
1232
|
+
const result = await this.runtime.run(testCase.agent_id, testCase.input);
|
|
1233
|
+
output = result.output;
|
|
1234
|
+
turns = result.turns;
|
|
1235
|
+
usage = result.usage;
|
|
1236
|
+
} catch (err) {
|
|
1237
|
+
error = err instanceof Error ? err.message : String(err);
|
|
1238
|
+
output = "[error] " + error;
|
|
1239
|
+
}
|
|
1240
|
+
unsubscribeToolStart();
|
|
1241
|
+
const durationMs = Date.now() - start;
|
|
1242
|
+
const costUsd = estimateCost(usage.input_tokens, usage.output_tokens);
|
|
1243
|
+
const assertionResults = testCase.assertions.map(
|
|
1244
|
+
(assertion) => this.checkAssertion(assertion, output, toolsCalled, turns, costUsd)
|
|
1245
|
+
);
|
|
1246
|
+
const passedCount = assertionResults.filter((a) => a.passed).length;
|
|
1247
|
+
const score = testCase.assertions.length > 0 ? passedCount / testCase.assertions.length : error ? 0 : 1;
|
|
1248
|
+
return {
|
|
1249
|
+
case_id: testCase.id,
|
|
1250
|
+
case_name: testCase.name,
|
|
1251
|
+
passed: assertionResults.every((a) => a.passed) && !error,
|
|
1252
|
+
score,
|
|
1253
|
+
output,
|
|
1254
|
+
turns,
|
|
1255
|
+
usage,
|
|
1256
|
+
cost_usd: costUsd,
|
|
1257
|
+
duration_ms: durationMs,
|
|
1258
|
+
assertions: assertionResults,
|
|
1259
|
+
error
|
|
1260
|
+
};
|
|
1261
|
+
}
|
|
1262
|
+
checkAssertion(assertion, output, toolsCalled, turns, costUsd) {
|
|
1263
|
+
const val = assertion.value;
|
|
1264
|
+
switch (assertion.type) {
|
|
1265
|
+
case "contains":
|
|
1266
|
+
return {
|
|
1267
|
+
assertion,
|
|
1268
|
+
passed: output.toLowerCase().includes(String(val).toLowerCase()),
|
|
1269
|
+
actual: output.slice(0, 200)
|
|
1270
|
+
};
|
|
1271
|
+
case "not_contains":
|
|
1272
|
+
return {
|
|
1273
|
+
assertion,
|
|
1274
|
+
passed: !output.toLowerCase().includes(String(val).toLowerCase()),
|
|
1275
|
+
actual: output.slice(0, 200)
|
|
1276
|
+
};
|
|
1277
|
+
case "matches_regex": {
|
|
1278
|
+
const regex = new RegExp(String(val), "i");
|
|
1279
|
+
return {
|
|
1280
|
+
assertion,
|
|
1281
|
+
passed: regex.test(output),
|
|
1282
|
+
actual: output.slice(0, 200)
|
|
1283
|
+
};
|
|
1284
|
+
}
|
|
1285
|
+
case "tool_called":
|
|
1286
|
+
return {
|
|
1287
|
+
assertion,
|
|
1288
|
+
passed: toolsCalled.includes(String(val)),
|
|
1289
|
+
actual: toolsCalled.join(", ") || "(none)"
|
|
1290
|
+
};
|
|
1291
|
+
case "tool_not_called":
|
|
1292
|
+
return {
|
|
1293
|
+
assertion,
|
|
1294
|
+
passed: !toolsCalled.includes(String(val)),
|
|
1295
|
+
actual: toolsCalled.join(", ") || "(none)"
|
|
1296
|
+
};
|
|
1297
|
+
case "turns_lte":
|
|
1298
|
+
return {
|
|
1299
|
+
assertion,
|
|
1300
|
+
passed: turns <= Number(val),
|
|
1301
|
+
actual: turns
|
|
1302
|
+
};
|
|
1303
|
+
case "cost_lte":
|
|
1304
|
+
return {
|
|
1305
|
+
assertion,
|
|
1306
|
+
passed: costUsd <= Number(val),
|
|
1307
|
+
actual: Number(costUsd.toFixed(4))
|
|
1308
|
+
};
|
|
1309
|
+
default:
|
|
1310
|
+
logger.warn({ type: assertion.type }, "Unknown assertion type");
|
|
1311
|
+
return { assertion, passed: false, actual: "unknown assertion type" };
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
summarize(results) {
|
|
1315
|
+
const passed = results.filter((r) => r.passed).length;
|
|
1316
|
+
const scores = results.map((r) => r.score);
|
|
1317
|
+
const avgScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0;
|
|
1318
|
+
const totalCost = results.reduce((s, r) => s + r.cost_usd, 0);
|
|
1319
|
+
const totalDuration = results.reduce((s, r) => s + r.duration_ms, 0);
|
|
1320
|
+
return {
|
|
1321
|
+
total: results.length,
|
|
1322
|
+
passed,
|
|
1323
|
+
failed: results.length - passed,
|
|
1324
|
+
avg_score: Number(avgScore.toFixed(2)),
|
|
1325
|
+
total_cost_usd: Number(totalCost.toFixed(4)),
|
|
1326
|
+
total_duration_ms: totalDuration
|
|
1327
|
+
};
|
|
1328
|
+
}
|
|
1329
|
+
};
|
|
1330
|
+
|
|
1331
|
+
// src/eval/report.ts
|
|
1332
|
+
function pad(str, len) {
|
|
1333
|
+
return str.length >= len ? str.slice(0, len) : str + " ".repeat(len - str.length);
|
|
1334
|
+
}
|
|
1335
|
+
function printTable(report) {
|
|
1336
|
+
const { results, summary } = report;
|
|
1337
|
+
console.log();
|
|
1338
|
+
console.log(" Eval suite: " + report.suite_name + " (" + summary.total + " cases)");
|
|
1339
|
+
console.log();
|
|
1340
|
+
for (const r of results) {
|
|
1341
|
+
const icon = r.passed ? "\x1B[32m\u2714\x1B[0m" : "\x1B[31m\u2717\x1B[0m";
|
|
1342
|
+
const score = r.score.toFixed(2);
|
|
1343
|
+
const cost = "$" + r.cost_usd.toFixed(3);
|
|
1344
|
+
const line = " " + icon + " " + pad(r.case_id, 10) + " " + pad(r.case_name, 28) + " " + pad(score, 6) + " " + r.turns + " turns " + cost;
|
|
1345
|
+
console.log(line);
|
|
1346
|
+
if (!r.passed) {
|
|
1347
|
+
for (const a of r.assertions) {
|
|
1348
|
+
if (!a.passed) {
|
|
1349
|
+
const desc = a.assertion.description ?? a.assertion.type + ": " + String(a.assertion.value);
|
|
1350
|
+
console.log(" \x1B[31m\u21B3 FAIL: " + desc + " (actual: " + String(a.actual).slice(0, 60) + ")\x1B[0m");
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1353
|
+
if (r.error) {
|
|
1354
|
+
console.log(" \x1B[31m\u21B3 ERROR: " + r.error.slice(0, 80) + "\x1B[0m");
|
|
1355
|
+
}
|
|
1356
|
+
}
|
|
1357
|
+
}
|
|
1358
|
+
const pct = summary.total > 0 ? Math.round(summary.passed / summary.total * 100) : 0;
|
|
1359
|
+
console.log();
|
|
1360
|
+
console.log(
|
|
1361
|
+
" Results: " + summary.passed + "/" + summary.total + " passed (" + pct + "%) | Avg: " + summary.avg_score.toFixed(2) + " | Total: $" + summary.total_cost_usd.toFixed(3)
|
|
1362
|
+
);
|
|
1363
|
+
console.log();
|
|
1364
|
+
}
|
|
1365
|
+
function toJSON(report) {
|
|
1366
|
+
return JSON.stringify(report, null, 2);
|
|
1367
|
+
}
|
|
1368
|
+
function toMarkdown(report) {
|
|
1369
|
+
const { results, summary } = report;
|
|
1370
|
+
const lines = [];
|
|
1371
|
+
lines.push("## Eval: " + report.suite_name);
|
|
1372
|
+
lines.push("");
|
|
1373
|
+
lines.push("| Status | ID | Name | Score | Turns | Cost |");
|
|
1374
|
+
lines.push("|--------|-----|------|-------|-------|------|");
|
|
1375
|
+
for (const r of results) {
|
|
1376
|
+
const icon = r.passed ? "pass" : "FAIL";
|
|
1377
|
+
lines.push(
|
|
1378
|
+
"| " + icon + " | " + r.case_id + " | " + r.case_name + " | " + r.score.toFixed(2) + " | " + r.turns + " | $" + r.cost_usd.toFixed(3) + " |"
|
|
1379
|
+
);
|
|
1380
|
+
}
|
|
1381
|
+
lines.push("");
|
|
1382
|
+
const pct = summary.total > 0 ? Math.round(summary.passed / summary.total * 100) : 0;
|
|
1383
|
+
lines.push(
|
|
1384
|
+
"**Results:** " + summary.passed + "/" + summary.total + " passed (" + pct + "%) | Avg score: " + summary.avg_score.toFixed(2) + " | Total cost: $" + summary.total_cost_usd.toFixed(3)
|
|
1385
|
+
);
|
|
1386
|
+
const failed = results.filter((r) => !r.passed);
|
|
1387
|
+
if (failed.length > 0) {
|
|
1388
|
+
lines.push("");
|
|
1389
|
+
lines.push("### Failures");
|
|
1390
|
+
lines.push("");
|
|
1391
|
+
for (const r of failed) {
|
|
1392
|
+
lines.push("**" + r.case_id + "** \u2014 " + r.case_name);
|
|
1393
|
+
for (const a of r.assertions.filter((x) => !x.passed)) {
|
|
1394
|
+
const desc = a.assertion.description ?? a.assertion.type + ": " + String(a.assertion.value);
|
|
1395
|
+
lines.push("- " + desc + " (actual: `" + String(a.actual).slice(0, 80) + "`)");
|
|
1396
|
+
}
|
|
1397
|
+
if (r.error) lines.push("- Error: " + r.error);
|
|
1398
|
+
lines.push("");
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
return lines.join("\n");
|
|
1402
|
+
}
|
|
1403
|
+
|
|
883
1404
|
// src/agent-router.ts
|
|
884
|
-
import { z } from "zod";
|
|
1405
|
+
import { z as z2 } from "zod";
|
|
885
1406
|
var AgentRouter = class {
|
|
886
1407
|
constructor(_score) {
|
|
887
1408
|
this._score = _score;
|
|
@@ -957,9 +1478,9 @@ When the user's request matches a specialist's expertise, delegate to them with
|
|
|
957
1478
|
const runtime = () => this.runtime;
|
|
958
1479
|
const events = () => this.runtime.events;
|
|
959
1480
|
const entryName = score.agents[score.entry ?? "orchestrator"]?.name ?? "orchestrator";
|
|
960
|
-
const parameters =
|
|
961
|
-
agent_id:
|
|
962
|
-
task:
|
|
1481
|
+
const parameters = z2.object({
|
|
1482
|
+
agent_id: z2.enum(delegateIds).describe("Which specialist agent to delegate to"),
|
|
1483
|
+
task: z2.string().describe("The specific task description to pass to the specialist")
|
|
963
1484
|
});
|
|
964
1485
|
return {
|
|
965
1486
|
name: "delegate_to_agent",
|
|
@@ -1000,50 +1521,51 @@ import { pathToFileURL } from "url";
|
|
|
1000
1521
|
import { resolve } from "path";
|
|
1001
1522
|
|
|
1002
1523
|
// src/score-schema.ts
|
|
1003
|
-
import { z as
|
|
1004
|
-
var PermissionSchema =
|
|
1005
|
-
var VoiceSchema =
|
|
1006
|
-
name:
|
|
1007
|
-
tools:
|
|
1008
|
-
required_permissions:
|
|
1524
|
+
import { z as z3 } from "zod";
|
|
1525
|
+
var PermissionSchema = z3.enum(["network", "filesystem", "shell", "browser"]);
|
|
1526
|
+
var VoiceSchema = z3.object({
|
|
1527
|
+
name: z3.string().min(1, "Voice name cannot be empty"),
|
|
1528
|
+
tools: z3.array(z3.any()),
|
|
1529
|
+
required_permissions: z3.array(PermissionSchema)
|
|
1009
1530
|
}).passthrough();
|
|
1010
|
-
var BudgetSchema =
|
|
1011
|
-
max_tokens:
|
|
1012
|
-
max_cost_usd:
|
|
1013
|
-
warn_at_percent:
|
|
1531
|
+
var BudgetSchema = z3.object({
|
|
1532
|
+
max_tokens: z3.number().positive().optional(),
|
|
1533
|
+
max_cost_usd: z3.number().positive().optional(),
|
|
1534
|
+
warn_at_percent: z3.number().min(1).max(100).optional()
|
|
1014
1535
|
}).strict();
|
|
1015
|
-
var AgentSchema =
|
|
1016
|
-
name:
|
|
1017
|
-
system_prompt:
|
|
1018
|
-
voices:
|
|
1019
|
-
model:
|
|
1020
|
-
description:
|
|
1021
|
-
permissions:
|
|
1022
|
-
max_turns:
|
|
1023
|
-
max_tool_calls:
|
|
1024
|
-
tool_timeout_ms:
|
|
1536
|
+
var AgentSchema = z3.object({
|
|
1537
|
+
name: z3.string().min(1, "Agent name cannot be empty"),
|
|
1538
|
+
system_prompt: z3.string().min(1, "Agent system_prompt cannot be empty"),
|
|
1539
|
+
voices: z3.array(VoiceSchema),
|
|
1540
|
+
model: z3.string().optional(),
|
|
1541
|
+
description: z3.string().optional(),
|
|
1542
|
+
permissions: z3.array(PermissionSchema).optional(),
|
|
1543
|
+
max_turns: z3.number().int().positive("max_turns must be a positive number").optional(),
|
|
1544
|
+
max_tool_calls: z3.number().int().positive("max_tool_calls must be a positive number").optional(),
|
|
1545
|
+
tool_timeout_ms: z3.number().int().positive("tool_timeout_ms must be a positive number").optional(),
|
|
1025
1546
|
budget: BudgetSchema.optional(),
|
|
1026
|
-
streaming:
|
|
1027
|
-
|
|
1028
|
-
|
|
1547
|
+
streaming: z3.boolean().optional(),
|
|
1548
|
+
allow_human_input: z3.boolean().optional(),
|
|
1549
|
+
delegates: z3.array(z3.string()).optional(),
|
|
1550
|
+
role: z3.enum(["orchestrator", "specialist"]).optional()
|
|
1029
1551
|
}).passthrough();
|
|
1030
|
-
var TelemetrySchema =
|
|
1031
|
-
enabled:
|
|
1032
|
-
endpoint:
|
|
1033
|
-
headers:
|
|
1552
|
+
var TelemetrySchema = z3.object({
|
|
1553
|
+
enabled: z3.boolean(),
|
|
1554
|
+
endpoint: z3.string().url("telemetry.endpoint must be a valid URL").optional(),
|
|
1555
|
+
headers: z3.record(z3.string(), z3.string()).optional()
|
|
1034
1556
|
}).strict();
|
|
1035
|
-
var ScoreSchema =
|
|
1036
|
-
provider:
|
|
1557
|
+
var ScoreSchema = z3.object({
|
|
1558
|
+
provider: z3.object({ chat: z3.function() }).passthrough().refine((p) => typeof p.chat === "function", {
|
|
1037
1559
|
message: "provider must have a chat() method \u2014 did you forget to pass a provider instance?"
|
|
1038
1560
|
}),
|
|
1039
|
-
agents:
|
|
1561
|
+
agents: z3.record(z3.string(), AgentSchema).refine(
|
|
1040
1562
|
(agents) => Object.keys(agents).length > 0,
|
|
1041
1563
|
{ message: "Score must define at least one agent" }
|
|
1042
1564
|
),
|
|
1043
|
-
name:
|
|
1044
|
-
description:
|
|
1045
|
-
default_model:
|
|
1046
|
-
entry:
|
|
1565
|
+
name: z3.string().optional(),
|
|
1566
|
+
description: z3.string().optional(),
|
|
1567
|
+
default_model: z3.string().optional(),
|
|
1568
|
+
entry: z3.string().optional(),
|
|
1047
1569
|
telemetry: TelemetrySchema.optional()
|
|
1048
1570
|
}).passthrough();
|
|
1049
1571
|
function validateScore(config) {
|
|
@@ -1053,7 +1575,7 @@ function validateScore(config) {
|
|
|
1053
1575
|
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
|
|
1054
1576
|
return ` - ${path}: ${issue.message}`;
|
|
1055
1577
|
});
|
|
1056
|
-
throw new
|
|
1578
|
+
throw new ScoreValidationError(
|
|
1057
1579
|
"Invalid score file:\n" + issues.join("\n")
|
|
1058
1580
|
);
|
|
1059
1581
|
}
|
|
@@ -1063,18 +1585,20 @@ function validateScore(config) {
|
|
|
1063
1585
|
if (agent.delegates) {
|
|
1064
1586
|
for (const delegateId of agent.delegates) {
|
|
1065
1587
|
if (!agentKeys.includes(delegateId)) {
|
|
1066
|
-
throw new
|
|
1588
|
+
throw new ScoreValidationError(
|
|
1067
1589
|
`Invalid score file:
|
|
1068
|
-
- agents.${key}.delegates: references unknown agent "${delegateId}". Available: ${agentKeys.join(", ")}
|
|
1590
|
+
- agents.${key}.delegates: references unknown agent "${delegateId}". Available: ${agentKeys.join(", ")}`,
|
|
1591
|
+
{ field: `agents.${key}.delegates`, value: delegateId }
|
|
1069
1592
|
);
|
|
1070
1593
|
}
|
|
1071
1594
|
}
|
|
1072
1595
|
}
|
|
1073
1596
|
}
|
|
1074
1597
|
if (data.entry && !agentKeys.includes(data.entry)) {
|
|
1075
|
-
throw new
|
|
1598
|
+
throw new ScoreValidationError(
|
|
1076
1599
|
`Invalid score file:
|
|
1077
|
-
- entry: references unknown agent "${data.entry}". Available: ${agentKeys.join(", ")}
|
|
1600
|
+
- entry: references unknown agent "${data.entry}". Available: ${agentKeys.join(", ")}`,
|
|
1601
|
+
{ field: "entry", value: data.entry }
|
|
1078
1602
|
);
|
|
1079
1603
|
}
|
|
1080
1604
|
}
|
|
@@ -1117,8 +1641,9 @@ var AnthropicProvider = class {
|
|
|
1117
1641
|
}
|
|
1118
1642
|
async chat(request) {
|
|
1119
1643
|
if (!request.model) {
|
|
1120
|
-
throw new
|
|
1121
|
-
"AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
|
|
1644
|
+
throw new ProviderError(
|
|
1645
|
+
"AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
|
|
1646
|
+
{ provider: "anthropic" }
|
|
1122
1647
|
);
|
|
1123
1648
|
}
|
|
1124
1649
|
let response;
|
|
@@ -1142,10 +1667,10 @@ var AnthropicProvider = class {
|
|
|
1142
1667
|
} catch (error) {
|
|
1143
1668
|
const msg = error instanceof Error ? error.message : String(error);
|
|
1144
1669
|
logger.error({ error: msg, provider: "anthropic" }, "Provider request failed");
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
);
|
|
1670
|
+
if (msg.includes("authentication") || msg.includes("apiKey") || msg.includes("authToken")) {
|
|
1671
|
+
throw new AuthenticationError("anthropic");
|
|
1672
|
+
}
|
|
1673
|
+
throw new ProviderError(`Anthropic API error: ${msg}`, { provider: "anthropic" });
|
|
1149
1674
|
}
|
|
1150
1675
|
const content = response.content.map((block) => {
|
|
1151
1676
|
if (block.type === "text") {
|
|
@@ -1173,8 +1698,9 @@ Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
|
|
|
1173
1698
|
}
|
|
1174
1699
|
async *stream(request) {
|
|
1175
1700
|
if (!request.model) {
|
|
1176
|
-
throw new
|
|
1177
|
-
"AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
|
|
1701
|
+
throw new ProviderError(
|
|
1702
|
+
"AnthropicProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
|
|
1703
|
+
{ provider: "anthropic" }
|
|
1178
1704
|
);
|
|
1179
1705
|
}
|
|
1180
1706
|
let raw;
|
|
@@ -1199,10 +1725,10 @@ Check that ANTHROPIC_API_KEY is set correctly in your .env file.`
|
|
|
1199
1725
|
} catch (error) {
|
|
1200
1726
|
const msg = error instanceof Error ? error.message : String(error);
|
|
1201
1727
|
logger.error({ error: msg, provider: "anthropic" }, "Provider stream failed");
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
);
|
|
1728
|
+
if (msg.includes("authentication") || msg.includes("apiKey") || msg.includes("authToken")) {
|
|
1729
|
+
throw new AuthenticationError("anthropic");
|
|
1730
|
+
}
|
|
1731
|
+
throw new ProviderError(`Anthropic API error: ${msg}`, { provider: "anthropic" });
|
|
1206
1732
|
}
|
|
1207
1733
|
const toolBlocks = /* @__PURE__ */ new Map();
|
|
1208
1734
|
let inputTokens = 0;
|
|
@@ -1269,8 +1795,9 @@ var OpenAIProvider = class {
|
|
|
1269
1795
|
}
|
|
1270
1796
|
async chat(request) {
|
|
1271
1797
|
if (!request.model) {
|
|
1272
|
-
throw new
|
|
1273
|
-
"OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
|
|
1798
|
+
throw new ProviderError(
|
|
1799
|
+
"OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
|
|
1800
|
+
{ provider: "openai" }
|
|
1274
1801
|
);
|
|
1275
1802
|
}
|
|
1276
1803
|
const messages = [];
|
|
@@ -1339,10 +1866,10 @@ var OpenAIProvider = class {
|
|
|
1339
1866
|
} catch (error) {
|
|
1340
1867
|
const msg = error instanceof Error ? error.message : String(error);
|
|
1341
1868
|
logger.error({ error: msg, provider: "openai" }, "Provider request failed");
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
);
|
|
1869
|
+
if (msg.includes("Incorrect API key") || msg.includes("authentication")) {
|
|
1870
|
+
throw new AuthenticationError("openai");
|
|
1871
|
+
}
|
|
1872
|
+
throw new ProviderError(`OpenAI API error: ${msg}`, { provider: "openai" });
|
|
1346
1873
|
}
|
|
1347
1874
|
const choice = response.choices[0];
|
|
1348
1875
|
const content = [];
|
|
@@ -1385,8 +1912,9 @@ Check that OPENAI_API_KEY is set correctly in your .env file.`
|
|
|
1385
1912
|
}
|
|
1386
1913
|
async *stream(request) {
|
|
1387
1914
|
if (!request.model) {
|
|
1388
|
-
throw new
|
|
1389
|
-
"OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score."
|
|
1915
|
+
throw new ProviderError(
|
|
1916
|
+
"OpenAIProvider requires a model on ChatRequest.\nSet model on the agent or default_model on the score.",
|
|
1917
|
+
{ provider: "openai" }
|
|
1390
1918
|
);
|
|
1391
1919
|
}
|
|
1392
1920
|
const messages = [];
|
|
@@ -1501,9 +2029,7 @@ var GeminiProvider = class {
|
|
|
1501
2029
|
constructor(options = {}) {
|
|
1502
2030
|
const apiKey = options.api_key ?? SecretsManager.optional("GEMINI_API_KEY");
|
|
1503
2031
|
if (!apiKey) {
|
|
1504
|
-
throw new
|
|
1505
|
-
"GeminiProvider requires an API key.\nSet GEMINI_API_KEY in your .env file, or pass api_key to the constructor:\n new GeminiProvider({ api_key: 'your-key' })"
|
|
1506
|
-
);
|
|
2032
|
+
throw new AuthenticationError("gemini");
|
|
1507
2033
|
}
|
|
1508
2034
|
this.client = new GoogleGenerativeAI(apiKey);
|
|
1509
2035
|
}
|
|
@@ -1582,10 +2108,7 @@ var GeminiProvider = class {
|
|
|
1582
2108
|
} catch (error) {
|
|
1583
2109
|
const msg = error instanceof Error ? error.message : String(error);
|
|
1584
2110
|
logger.error({ error: msg, provider: "gemini" }, "Provider request failed");
|
|
1585
|
-
throw new
|
|
1586
|
-
`Gemini API error: ${msg}
|
|
1587
|
-
Check that GEMINI_API_KEY is set correctly in your .env file.`
|
|
1588
|
-
);
|
|
2111
|
+
throw new ProviderError(`Gemini API error: ${msg}`, { provider: "gemini" });
|
|
1589
2112
|
}
|
|
1590
2113
|
const response = result.response;
|
|
1591
2114
|
const candidate = response.candidates?.[0];
|
|
@@ -1725,26 +2248,47 @@ function convertJsonSchemaToGemini(schema) {
|
|
|
1725
2248
|
};
|
|
1726
2249
|
}
|
|
1727
2250
|
export {
|
|
2251
|
+
AgentNotFoundError,
|
|
1728
2252
|
AgentRouter,
|
|
1729
2253
|
AgentRunner,
|
|
1730
2254
|
AnthropicProvider,
|
|
2255
|
+
AuthenticationError,
|
|
2256
|
+
BudgetExceededError,
|
|
2257
|
+
ContextWindowError,
|
|
2258
|
+
EvalRunner,
|
|
1731
2259
|
EventBus,
|
|
1732
2260
|
GeminiProvider,
|
|
1733
2261
|
InMemorySemanticStore,
|
|
1734
2262
|
InMemorySessionStore,
|
|
1735
2263
|
OpenAIProvider,
|
|
2264
|
+
PathTraversalError,
|
|
2265
|
+
PermissionError,
|
|
1736
2266
|
PermissionGuard,
|
|
1737
2267
|
PostgresSessionStore,
|
|
1738
2268
|
PromptGuard,
|
|
2269
|
+
ProviderError,
|
|
2270
|
+
RateLimitError,
|
|
1739
2271
|
ScoreLoader,
|
|
2272
|
+
ScoreValidationError,
|
|
1740
2273
|
SecretsManager,
|
|
1741
2274
|
TokenBudget,
|
|
2275
|
+
ToolTimeoutError,
|
|
2276
|
+
TuttiError,
|
|
1742
2277
|
TuttiRuntime,
|
|
1743
2278
|
TuttiTracer,
|
|
2279
|
+
UrlValidationError,
|
|
2280
|
+
VoiceError,
|
|
2281
|
+
createBlocklistHook,
|
|
2282
|
+
createCacheHook,
|
|
1744
2283
|
createLogger,
|
|
2284
|
+
createLoggingHook,
|
|
2285
|
+
createMaxCostHook,
|
|
1745
2286
|
defineScore,
|
|
2287
|
+
toJSON as evalToJSON,
|
|
2288
|
+
toMarkdown as evalToMarkdown,
|
|
1746
2289
|
initTelemetry,
|
|
1747
2290
|
logger,
|
|
2291
|
+
printTable as printEvalTable,
|
|
1748
2292
|
shutdownTelemetry,
|
|
1749
2293
|
validateScore
|
|
1750
2294
|
};
|