@lightdash-tools/mcp 0.2.5 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,759 @@
1
+ /**
2
+ * MCP tools: AI agents (admin + project-scoped), threads, and evaluations.
3
+ */
4
+
5
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
6
+ import type { LightdashClient } from '@lightdash-tools/client';
7
+ import { z } from 'zod';
8
+ import {
9
+ wrapTool,
10
+ registerToolSafe,
11
+ READ_ONLY_DEFAULT,
12
+ WRITE_IDEMPOTENT,
13
+ WRITE_DESTRUCTIVE,
14
+ } from './shared.js';
15
+
16
+ export function registerAiAgentTools(server: McpServer, client: LightdashClient): void {
17
+ // ─── Admin: agents ───────────────────────────────────────────────────────────
18
+
19
+ registerToolSafe(
20
+ server,
21
+ 'list_admin_agents',
22
+ {
23
+ title: 'List AI agents (admin)',
24
+ description: 'List all AI agents across the organization (admin view)',
25
+ inputSchema: {},
26
+ annotations: READ_ONLY_DEFAULT,
27
+ },
28
+ wrapTool(client, (c) => async () => {
29
+ const result = await c.v1.aiAgents.listAdminAgents();
30
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
31
+ }),
32
+ );
33
+
34
+ // ─── Admin: threads ──────────────────────────────────────────────────────────
35
+
36
+ registerToolSafe(
37
+ server,
38
+ 'list_admin_agent_threads',
39
+ {
40
+ title: 'List AI agent threads (admin)',
41
+ description:
42
+ 'List AI agent conversation threads across the organization with optional filters',
43
+ inputSchema: {
44
+ page: z.number().optional().describe('Page number (1-based)'),
45
+ pageSize: z.number().optional().describe('Number of results per page'),
46
+ agentUuids: z.array(z.string()).optional().describe('Filter by agent UUIDs'),
47
+ projectUuids: z.array(z.string()).optional().describe('Filter by project UUIDs'),
48
+ humanScore: z
49
+ .number()
50
+ .optional()
51
+ .describe('Filter by human score: -1 (negative), 0 (neutral), 1 (positive)'),
52
+ dateFrom: z.string().optional().describe('Start date filter (YYYY-MM-DD)'),
53
+ dateTo: z.string().optional().describe('End date filter (YYYY-MM-DD)'),
54
+ sortField: z.enum(['createdAt', 'title']).optional().describe('Sort field'),
55
+ sortDirection: z.enum(['asc', 'desc']).optional().describe('Sort direction'),
56
+ },
57
+ annotations: READ_ONLY_DEFAULT,
58
+ },
59
+ wrapTool(
60
+ client,
61
+ (c) =>
62
+ async (params: {
63
+ page?: number;
64
+ pageSize?: number;
65
+ agentUuids?: string[];
66
+ projectUuids?: string[];
67
+ humanScore?: number;
68
+ dateFrom?: string;
69
+ dateTo?: string;
70
+ sortField?: 'createdAt' | 'title';
71
+ sortDirection?: 'asc' | 'desc';
72
+ }) => {
73
+ const result = await c.v1.aiAgents.getAdminThreads(params);
74
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
75
+ },
76
+ ),
77
+ );
78
+
79
+ // ─── Admin: settings ─────────────────────────────────────────────────────────
80
+
81
+ registerToolSafe(
82
+ server,
83
+ 'get_ai_organization_settings',
84
+ {
85
+ title: 'Get AI organization settings',
86
+ description: 'Get the AI settings for the current organization (admin)',
87
+ inputSchema: {},
88
+ annotations: READ_ONLY_DEFAULT,
89
+ },
90
+ wrapTool(client, (c) => async () => {
91
+ const result = await c.v1.aiAgents.getAiOrganizationSettings();
92
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
93
+ }),
94
+ );
95
+
96
+ registerToolSafe(
97
+ server,
98
+ 'update_ai_organization_settings',
99
+ {
100
+ title: 'Update AI organization settings',
101
+ description: 'Update the AI settings for the current organization (admin)',
102
+ inputSchema: {
103
+ aiAgentsVisible: z
104
+ .boolean()
105
+ .optional()
106
+ .describe('Whether AI agents feature is visible to users'),
107
+ },
108
+ annotations: WRITE_IDEMPOTENT,
109
+ },
110
+ wrapTool(client, (c) => async (params: { aiAgentsVisible?: boolean }) => {
111
+ const result = await c.v1.aiAgents.updateAiOrganizationSettings(
112
+ params as Parameters<typeof c.v1.aiAgents.updateAiOrganizationSettings>[0],
113
+ );
114
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
115
+ }),
116
+ );
117
+
118
+ // ─── Project-scoped: agent CRUD ──────────────────────────────────────────────
119
+
120
+ registerToolSafe(
121
+ server,
122
+ 'list_project_agents',
123
+ {
124
+ title: 'List agents in a project',
125
+ description: 'List all AI agents configured for a specific project',
126
+ inputSchema: {
127
+ projectUuid: z.string().describe('Project UUID'),
128
+ },
129
+ annotations: READ_ONLY_DEFAULT,
130
+ },
131
+ wrapTool(client, (c) => async ({ projectUuid }: { projectUuid: string }) => {
132
+ const result = await c.v1.aiAgents.listAgents(projectUuid);
133
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
134
+ }),
135
+ );
136
+
137
+ registerToolSafe(
138
+ server,
139
+ 'get_project_agent',
140
+ {
141
+ title: 'Get agent',
142
+ description: 'Get details of a specific AI agent in a project',
143
+ inputSchema: {
144
+ projectUuid: z.string().describe('Project UUID'),
145
+ agentUuid: z.string().describe('Agent UUID'),
146
+ },
147
+ annotations: READ_ONLY_DEFAULT,
148
+ },
149
+ wrapTool(
150
+ client,
151
+ (c) =>
152
+ async ({ projectUuid, agentUuid }: { projectUuid: string; agentUuid: string }) => {
153
+ const result = await c.v1.aiAgents.getAgent(projectUuid, agentUuid);
154
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
155
+ },
156
+ ),
157
+ );
158
+
159
+ registerToolSafe(
160
+ server,
161
+ 'create_project_agent',
162
+ {
163
+ title: 'Create agent',
164
+ description: 'Create a new AI agent in a project',
165
+ inputSchema: {
166
+ projectUuid: z.string().describe('Project UUID'),
167
+ name: z.string().describe('Agent name'),
168
+ description: z.string().optional().describe('Agent description'),
169
+ instruction: z.string().optional().describe('System instruction for the agent'),
170
+ },
171
+ annotations: WRITE_IDEMPOTENT,
172
+ },
173
+ wrapTool(
174
+ client,
175
+ (c) =>
176
+ async ({
177
+ projectUuid,
178
+ name,
179
+ description,
180
+ instruction,
181
+ }: {
182
+ projectUuid: string;
183
+ name: string;
184
+ description?: string;
185
+ instruction?: string;
186
+ }) => {
187
+ const body = {
188
+ name,
189
+ projectUuid,
190
+ ...(description != null ? { description } : {}),
191
+ ...(instruction != null ? { instruction } : {}),
192
+ } as Parameters<typeof c.v1.aiAgents.createAgent>[1];
193
+ const result = await c.v1.aiAgents.createAgent(projectUuid, body);
194
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
195
+ },
196
+ ),
197
+ );
198
+
199
+ registerToolSafe(
200
+ server,
201
+ 'update_project_agent',
202
+ {
203
+ title: 'Update agent',
204
+ description: 'Update an existing AI agent',
205
+ inputSchema: {
206
+ projectUuid: z.string().describe('Project UUID'),
207
+ agentUuid: z.string().describe('Agent UUID'),
208
+ name: z.string().optional().describe('New name'),
209
+ description: z.string().optional().describe('New description'),
210
+ instruction: z.string().optional().describe('New system instruction'),
211
+ },
212
+ annotations: WRITE_IDEMPOTENT,
213
+ },
214
+ wrapTool(
215
+ client,
216
+ (c) =>
217
+ async ({
218
+ projectUuid,
219
+ agentUuid,
220
+ ...body
221
+ }: {
222
+ projectUuid: string;
223
+ agentUuid: string;
224
+ name?: string;
225
+ description?: string;
226
+ instruction?: string;
227
+ }) => {
228
+ const result = await c.v1.aiAgents.updateAgent(
229
+ projectUuid,
230
+ agentUuid,
231
+ body as Parameters<typeof c.v1.aiAgents.updateAgent>[2],
232
+ );
233
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
234
+ },
235
+ ),
236
+ );
237
+
238
+ registerToolSafe(
239
+ server,
240
+ 'delete_project_agent',
241
+ {
242
+ title: 'Delete agent',
243
+ description: 'Delete an AI agent from a project',
244
+ inputSchema: {
245
+ projectUuid: z.string().describe('Project UUID'),
246
+ agentUuid: z.string().describe('Agent UUID'),
247
+ },
248
+ annotations: WRITE_DESTRUCTIVE,
249
+ },
250
+ wrapTool(
251
+ client,
252
+ (c) =>
253
+ async ({ projectUuid, agentUuid }: { projectUuid: string; agentUuid: string }) => {
254
+ await c.v1.aiAgents.deleteAgent(projectUuid, agentUuid);
255
+ return {
256
+ content: [{ type: 'text', text: `Agent ${agentUuid} deleted successfully` }],
257
+ };
258
+ },
259
+ ),
260
+ );
261
+
262
+ // ─── Project-scoped: threads ─────────────────────────────────────────────────
263
+
264
+ registerToolSafe(
265
+ server,
266
+ 'list_agent_threads',
267
+ {
268
+ title: 'List agent threads',
269
+ description: 'List all conversation threads for an agent',
270
+ inputSchema: {
271
+ projectUuid: z.string().describe('Project UUID'),
272
+ agentUuid: z.string().describe('Agent UUID'),
273
+ },
274
+ annotations: READ_ONLY_DEFAULT,
275
+ },
276
+ wrapTool(
277
+ client,
278
+ (c) =>
279
+ async ({ projectUuid, agentUuid }: { projectUuid: string; agentUuid: string }) => {
280
+ const result = await c.v1.aiAgents.listAgentThreads(projectUuid, agentUuid);
281
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
282
+ },
283
+ ),
284
+ );
285
+
286
+ registerToolSafe(
287
+ server,
288
+ 'get_agent_thread',
289
+ {
290
+ title: 'Get agent thread',
291
+ description: 'Get a conversation thread with all its messages',
292
+ inputSchema: {
293
+ projectUuid: z.string().describe('Project UUID'),
294
+ agentUuid: z.string().describe('Agent UUID'),
295
+ threadUuid: z.string().describe('Thread UUID'),
296
+ },
297
+ annotations: READ_ONLY_DEFAULT,
298
+ },
299
+ wrapTool(
300
+ client,
301
+ (c) =>
302
+ async ({
303
+ projectUuid,
304
+ agentUuid,
305
+ threadUuid,
306
+ }: {
307
+ projectUuid: string;
308
+ agentUuid: string;
309
+ threadUuid: string;
310
+ }) => {
311
+ const result = await c.v1.aiAgents.getAgentThread(projectUuid, agentUuid, threadUuid);
312
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
313
+ },
314
+ ),
315
+ );
316
+
317
+ registerToolSafe(
318
+ server,
319
+ 'generate_agent_message',
320
+ {
321
+ title: 'Generate agent message',
322
+ description:
323
+ 'Start a new conversation thread and generate the first agent response for a given prompt',
324
+ inputSchema: {
325
+ projectUuid: z.string().describe('Project UUID'),
326
+ agentUuid: z.string().describe('Agent UUID'),
327
+ prompt: z.string().describe('User prompt to send to the agent'),
328
+ },
329
+ annotations: WRITE_IDEMPOTENT,
330
+ },
331
+ wrapTool(
332
+ client,
333
+ (c) =>
334
+ async ({
335
+ projectUuid,
336
+ agentUuid,
337
+ prompt,
338
+ }: {
339
+ projectUuid: string;
340
+ agentUuid: string;
341
+ prompt: string;
342
+ }) => {
343
+ const thread = await c.v1.aiAgents.createAgentThread(projectUuid, agentUuid);
344
+ const result = await c.v1.aiAgents.generateAgentThreadResponse(
345
+ projectUuid,
346
+ agentUuid,
347
+ thread.uuid,
348
+ { prompt },
349
+ );
350
+ return {
351
+ content: [
352
+ {
353
+ type: 'text',
354
+ text: JSON.stringify({ threadUuid: thread.uuid, ...result }, null, 2),
355
+ },
356
+ ],
357
+ };
358
+ },
359
+ ),
360
+ );
361
+
362
+ registerToolSafe(
363
+ server,
364
+ 'continue_agent_thread',
365
+ {
366
+ title: 'Continue agent thread',
367
+ description: 'Continue an existing conversation thread with a new prompt',
368
+ inputSchema: {
369
+ projectUuid: z.string().describe('Project UUID'),
370
+ agentUuid: z.string().describe('Agent UUID'),
371
+ threadUuid: z.string().describe('Thread UUID to continue'),
372
+ prompt: z.string().describe('Follow-up prompt'),
373
+ },
374
+ annotations: WRITE_IDEMPOTENT,
375
+ },
376
+ wrapTool(
377
+ client,
378
+ (c) =>
379
+ async ({
380
+ projectUuid,
381
+ agentUuid,
382
+ threadUuid,
383
+ prompt,
384
+ }: {
385
+ projectUuid: string;
386
+ agentUuid: string;
387
+ threadUuid: string;
388
+ prompt: string;
389
+ }) => {
390
+ const result = await c.v1.aiAgents.generateAgentThreadResponse(
391
+ projectUuid,
392
+ agentUuid,
393
+ threadUuid,
394
+ { prompt },
395
+ );
396
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
397
+ },
398
+ ),
399
+ );
400
+
401
+ // ─── Project-scoped: evaluations ─────────────────────────────────────────────
402
+
403
+ registerToolSafe(
404
+ server,
405
+ 'list_agent_evaluations',
406
+ {
407
+ title: 'List agent evaluations',
408
+ description: 'List all evaluations for an agent',
409
+ inputSchema: {
410
+ projectUuid: z.string().describe('Project UUID'),
411
+ agentUuid: z.string().describe('Agent UUID'),
412
+ },
413
+ annotations: READ_ONLY_DEFAULT,
414
+ },
415
+ wrapTool(
416
+ client,
417
+ (c) =>
418
+ async ({ projectUuid, agentUuid }: { projectUuid: string; agentUuid: string }) => {
419
+ const result = await c.v1.aiAgents.listEvaluations(projectUuid, agentUuid);
420
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
421
+ },
422
+ ),
423
+ );
424
+
425
+ registerToolSafe(
426
+ server,
427
+ 'get_agent_evaluation',
428
+ {
429
+ title: 'Get agent evaluation',
430
+ description: 'Get a full evaluation including its test prompts',
431
+ inputSchema: {
432
+ projectUuid: z.string().describe('Project UUID'),
433
+ agentUuid: z.string().describe('Agent UUID'),
434
+ evalUuid: z.string().describe('Evaluation UUID'),
435
+ },
436
+ annotations: READ_ONLY_DEFAULT,
437
+ },
438
+ wrapTool(
439
+ client,
440
+ (c) =>
441
+ async ({
442
+ projectUuid,
443
+ agentUuid,
444
+ evalUuid,
445
+ }: {
446
+ projectUuid: string;
447
+ agentUuid: string;
448
+ evalUuid: string;
449
+ }) => {
450
+ const result = await c.v1.aiAgents.getEvaluation(projectUuid, agentUuid, evalUuid);
451
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
452
+ },
453
+ ),
454
+ );
455
+
456
+ registerToolSafe(
457
+ server,
458
+ 'create_agent_evaluation',
459
+ {
460
+ title: 'Create agent evaluation',
461
+ description:
462
+ 'Create a new evaluation test suite for an agent with a title and optional prompts',
463
+ inputSchema: {
464
+ projectUuid: z.string().describe('Project UUID'),
465
+ agentUuid: z.string().describe('Agent UUID'),
466
+ title: z.string().describe('Evaluation title'),
467
+ description: z.string().optional().describe('Evaluation description'),
468
+ prompts: z
469
+ .array(
470
+ z.union([
471
+ z.object({
472
+ prompt: z.string().describe('Test prompt text'),
473
+ expectedResponse: z.string().nullable().describe('Expected response (optional)'),
474
+ }),
475
+ z.object({
476
+ threadUuid: z.string().describe('Existing thread UUID'),
477
+ promptUuid: z.string().describe('Existing prompt UUID within the thread'),
478
+ expectedResponse: z.string().nullable().describe('Expected response (optional)'),
479
+ }),
480
+ ]),
481
+ )
482
+ .optional()
483
+ .describe('Test prompts for the evaluation'),
484
+ },
485
+ annotations: WRITE_IDEMPOTENT,
486
+ },
487
+ wrapTool(
488
+ client,
489
+ (c) =>
490
+ async ({
491
+ projectUuid,
492
+ agentUuid,
493
+ title,
494
+ description,
495
+ prompts,
496
+ }: {
497
+ projectUuid: string;
498
+ agentUuid: string;
499
+ title: string;
500
+ description?: string;
501
+ prompts?: Parameters<typeof c.v1.aiAgents.createEvaluation>[2]['prompts'];
502
+ }) => {
503
+ const body: Parameters<typeof c.v1.aiAgents.createEvaluation>[2] = {
504
+ title,
505
+ prompts: prompts ?? [],
506
+ ...(description != null ? { description } : {}),
507
+ };
508
+ const result = await c.v1.aiAgents.createEvaluation(projectUuid, agentUuid, body);
509
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
510
+ },
511
+ ),
512
+ );
513
+
514
+ registerToolSafe(
515
+ server,
516
+ 'update_agent_evaluation',
517
+ {
518
+ title: 'Update agent evaluation',
519
+ description: 'Update an evaluation title, description, or replace its prompts',
520
+ inputSchema: {
521
+ projectUuid: z.string().describe('Project UUID'),
522
+ agentUuid: z.string().describe('Agent UUID'),
523
+ evalUuid: z.string().describe('Evaluation UUID'),
524
+ title: z.string().optional().describe('New title'),
525
+ description: z.string().optional().describe('New description'),
526
+ prompts: z
527
+ .array(
528
+ z.union([
529
+ z.object({
530
+ prompt: z.string().describe('Test prompt text'),
531
+ expectedResponse: z.string().nullable().describe('Expected response (optional)'),
532
+ }),
533
+ z.object({
534
+ threadUuid: z.string().describe('Existing thread UUID'),
535
+ promptUuid: z.string().describe('Existing prompt UUID within the thread'),
536
+ expectedResponse: z.string().nullable().describe('Expected response (optional)'),
537
+ }),
538
+ ]),
539
+ )
540
+ .optional()
541
+ .describe('Replacement prompt list (omit to leave unchanged)'),
542
+ },
543
+ annotations: WRITE_IDEMPOTENT,
544
+ },
545
+ wrapTool(
546
+ client,
547
+ (c) =>
548
+ async ({
549
+ projectUuid,
550
+ agentUuid,
551
+ evalUuid,
552
+ ...body
553
+ }: {
554
+ projectUuid: string;
555
+ agentUuid: string;
556
+ evalUuid: string;
557
+ title?: string;
558
+ description?: string;
559
+ prompts?: Parameters<typeof c.v1.aiAgents.updateEvaluation>[3]['prompts'];
560
+ }) => {
561
+ const result = await c.v1.aiAgents.updateEvaluation(
562
+ projectUuid,
563
+ agentUuid,
564
+ evalUuid,
565
+ body as Parameters<typeof c.v1.aiAgents.updateEvaluation>[3],
566
+ );
567
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
568
+ },
569
+ ),
570
+ );
571
+
572
+ registerToolSafe(
573
+ server,
574
+ 'append_agent_evaluation_prompts',
575
+ {
576
+ title: 'Append evaluation prompts',
577
+ description:
578
+ 'Append additional prompts to an existing evaluation without replacing existing ones',
579
+ inputSchema: {
580
+ projectUuid: z.string().describe('Project UUID'),
581
+ agentUuid: z.string().describe('Agent UUID'),
582
+ evalUuid: z.string().describe('Evaluation UUID'),
583
+ prompts: z
584
+ .array(
585
+ z.union([
586
+ z.object({
587
+ prompt: z.string().describe('Test prompt text'),
588
+ expectedResponse: z.string().nullable().describe('Expected response (optional)'),
589
+ }),
590
+ z.object({
591
+ threadUuid: z.string().describe('Existing thread UUID'),
592
+ promptUuid: z.string().describe('Existing prompt UUID within the thread'),
593
+ expectedResponse: z.string().nullable().describe('Expected response (optional)'),
594
+ }),
595
+ ]),
596
+ )
597
+ .describe('Prompts to append'),
598
+ },
599
+ annotations: WRITE_IDEMPOTENT,
600
+ },
601
+ wrapTool(
602
+ client,
603
+ (c) =>
604
+ async ({
605
+ projectUuid,
606
+ agentUuid,
607
+ evalUuid,
608
+ prompts,
609
+ }: {
610
+ projectUuid: string;
611
+ agentUuid: string;
612
+ evalUuid: string;
613
+ prompts: Parameters<typeof c.v1.aiAgents.appendToEvaluation>[3]['prompts'];
614
+ }) => {
615
+ await c.v1.aiAgents.appendToEvaluation(projectUuid, agentUuid, evalUuid, { prompts });
616
+ return {
617
+ content: [
618
+ { type: 'text', text: `Prompts appended to evaluation ${evalUuid} successfully` },
619
+ ],
620
+ };
621
+ },
622
+ ),
623
+ );
624
+
625
+ registerToolSafe(
626
+ server,
627
+ 'run_agent_evaluation',
628
+ {
629
+ title: 'Run agent evaluation',
630
+ description: 'Trigger a new evaluation run for an agent. Returns the run UUID and status.',
631
+ inputSchema: {
632
+ projectUuid: z.string().describe('Project UUID'),
633
+ agentUuid: z.string().describe('Agent UUID'),
634
+ evalUuid: z.string().describe('Evaluation UUID to run'),
635
+ },
636
+ annotations: WRITE_IDEMPOTENT,
637
+ },
638
+ wrapTool(
639
+ client,
640
+ (c) =>
641
+ async ({
642
+ projectUuid,
643
+ agentUuid,
644
+ evalUuid,
645
+ }: {
646
+ projectUuid: string;
647
+ agentUuid: string;
648
+ evalUuid: string;
649
+ }) => {
650
+ const result = await c.v1.aiAgents.runEvaluation(projectUuid, agentUuid, evalUuid);
651
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
652
+ },
653
+ ),
654
+ );
655
+
656
+ registerToolSafe(
657
+ server,
658
+ 'list_agent_evaluation_runs',
659
+ {
660
+ title: 'List evaluation runs',
661
+ description: 'List all runs for an evaluation with their status and pass/fail counts',
662
+ inputSchema: {
663
+ projectUuid: z.string().describe('Project UUID'),
664
+ agentUuid: z.string().describe('Agent UUID'),
665
+ evalUuid: z.string().describe('Evaluation UUID'),
666
+ },
667
+ annotations: READ_ONLY_DEFAULT,
668
+ },
669
+ wrapTool(
670
+ client,
671
+ (c) =>
672
+ async ({
673
+ projectUuid,
674
+ agentUuid,
675
+ evalUuid,
676
+ }: {
677
+ projectUuid: string;
678
+ agentUuid: string;
679
+ evalUuid: string;
680
+ }) => {
681
+ const result = await c.v1.aiAgents.listEvaluationRuns(projectUuid, agentUuid, evalUuid);
682
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
683
+ },
684
+ ),
685
+ );
686
+
687
+ registerToolSafe(
688
+ server,
689
+ 'get_agent_evaluation_run_results',
690
+ {
691
+ title: 'Get evaluation run results',
692
+ description:
693
+ 'Get detailed per-prompt results for a specific evaluation run, including pass/fail and assessments',
694
+ inputSchema: {
695
+ projectUuid: z.string().describe('Project UUID'),
696
+ agentUuid: z.string().describe('Agent UUID'),
697
+ evalUuid: z.string().describe('Evaluation UUID'),
698
+ runUuid: z.string().describe('Run UUID'),
699
+ },
700
+ annotations: READ_ONLY_DEFAULT,
701
+ },
702
+ wrapTool(
703
+ client,
704
+ (c) =>
705
+ async ({
706
+ projectUuid,
707
+ agentUuid,
708
+ evalUuid,
709
+ runUuid,
710
+ }: {
711
+ projectUuid: string;
712
+ agentUuid: string;
713
+ evalUuid: string;
714
+ runUuid: string;
715
+ }) => {
716
+ const result = await c.v1.aiAgents.getEvaluationRunResults(
717
+ projectUuid,
718
+ agentUuid,
719
+ evalUuid,
720
+ runUuid,
721
+ );
722
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
723
+ },
724
+ ),
725
+ );
726
+
727
+ registerToolSafe(
728
+ server,
729
+ 'delete_agent_evaluation',
730
+ {
731
+ title: 'Delete agent evaluation',
732
+ description: 'Delete an evaluation and all its runs',
733
+ inputSchema: {
734
+ projectUuid: z.string().describe('Project UUID'),
735
+ agentUuid: z.string().describe('Agent UUID'),
736
+ evalUuid: z.string().describe('Evaluation UUID'),
737
+ },
738
+ annotations: WRITE_DESTRUCTIVE,
739
+ },
740
+ wrapTool(
741
+ client,
742
+ (c) =>
743
+ async ({
744
+ projectUuid,
745
+ agentUuid,
746
+ evalUuid,
747
+ }: {
748
+ projectUuid: string;
749
+ agentUuid: string;
750
+ evalUuid: string;
751
+ }) => {
752
+ await c.v1.aiAgents.deleteEvaluation(projectUuid, agentUuid, evalUuid);
753
+ return {
754
+ content: [{ type: 'text', text: `Evaluation ${evalUuid} deleted successfully` }],
755
+ };
756
+ },
757
+ ),
758
+ );
759
+ }