applied-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- applied_cli/__init__.py +2 -0
- applied_cli/auth_store.py +263 -0
- applied_cli/commands/__init__.py +2 -0
- applied_cli/commands/_hints.py +11 -0
- applied_cli/commands/_normalize.py +79 -0
- applied_cli/commands/_parsers.py +58 -0
- applied_cli/commands/_ui.py +33 -0
- applied_cli/commands/agent.py +1231 -0
- applied_cli/commands/auth.py +739 -0
- applied_cli/commands/chat.py +379 -0
- applied_cli/commands/coverage.py +348 -0
- applied_cli/commands/discover.py +1006 -0
- applied_cli/commands/fix.py +1204 -0
- applied_cli/commands/insights.py +614 -0
- applied_cli/commands/intents.py +447 -0
- applied_cli/commands/rate.py +508 -0
- applied_cli/commands/responses.py +604 -0
- applied_cli/commands/shop.py +1757 -0
- applied_cli/commands/simulate.py +330 -0
- applied_cli/commands/spec.py +238 -0
- applied_cli/config.py +50 -0
- applied_cli/error_reporting.py +38 -0
- applied_cli/http.py +1614 -0
- applied_cli/main.py +90 -0
- applied_cli/mcp_server.py +738 -0
- applied_cli/presets/demo.yaml +170 -0
- applied_cli/runtime.py +53 -0
- applied_cli/shop_spec.py +398 -0
- applied_cli/spec_workflow.py +432 -0
- applied_cli-0.1.0.dist-info/METADATA +176 -0
- applied_cli-0.1.0.dist-info/RECORD +34 -0
- applied_cli-0.1.0.dist-info/WHEEL +5 -0
- applied_cli-0.1.0.dist-info/entry_points.txt +3 -0
- applied_cli-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,738 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MCP Server for Applied Labs CLI.
|
|
3
|
+
|
|
4
|
+
This module exposes applied-cli commands as MCP tools for use with Claude.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
# Run directly
|
|
8
|
+
python -m applied_cli.mcp_server
|
|
9
|
+
|
|
10
|
+
# Or via the entry point
|
|
11
|
+
applied-cli-mcp
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import subprocess
|
|
16
|
+
import sys
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from mcp.server.fastmcp import FastMCP
|
|
20
|
+
|
|
21
|
+
# Initialize FastMCP server
|
|
22
|
+
mcp = FastMCP(
|
|
23
|
+
"applied-cli",
|
|
24
|
+
instructions="""
|
|
25
|
+
You have access to the Applied Labs CLI for managing AI support agents.
|
|
26
|
+
|
|
27
|
+
## Quick Reference
|
|
28
|
+
|
|
29
|
+
### Setup & Auth
|
|
30
|
+
- `auth_status` - Check authentication and current shop
|
|
31
|
+
- `auth_shops` / `auth_switch_shop` - List and switch shops
|
|
32
|
+
- `shop_setup` - Full shop bootstrap from YAML spec
|
|
33
|
+
|
|
34
|
+
### Agents
|
|
35
|
+
- `agent_list` / `agent_show` - View agents
|
|
36
|
+
- `agent_create` / `agent_update` - Manage agents
|
|
37
|
+
- `chat_send` - Test agent responses
|
|
38
|
+
|
|
39
|
+
### Knowledge Base
|
|
40
|
+
- `knowledge_list` - List Q&A entries and rules
|
|
41
|
+
- `knowledge_upsert` - Create/update entries
|
|
42
|
+
|
|
43
|
+
### Testing Workflow
|
|
44
|
+
1. `test_benchmarks_list` - List benchmarks
|
|
45
|
+
2. `test_fix_context` - Get failing scenarios with feedback
|
|
46
|
+
3. `knowledge_upsert` - Fix knowledge base
|
|
47
|
+
4. `test_fix_batch` - Re-test scenarios in bulk
|
|
48
|
+
5. `test_fix_status` - Track progress
|
|
49
|
+
6. `test_coverage_summary` - Coverage by intent
|
|
50
|
+
|
|
51
|
+
### Data
|
|
52
|
+
- `conversations_list` / `conversations_show` - View conversations
|
|
53
|
+
- `conversations_import` - Import historical CSV
|
|
54
|
+
- `simulate_run` - Generate test conversations
|
|
55
|
+
- `insights_run` / `insights_show` - Analytics reports
|
|
56
|
+
|
|
57
|
+
All tools return JSON.
|
|
58
|
+
""",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _run_cli(args: list[str], timeout: int = 120) -> dict[str, Any]:
|
|
63
|
+
"""Run an applied-cli command and return structured output."""
|
|
64
|
+
cmd = ["python", "-m", "applied_cli.main"] + args
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
result = subprocess.run(
|
|
68
|
+
cmd,
|
|
69
|
+
capture_output=True,
|
|
70
|
+
text=True,
|
|
71
|
+
timeout=timeout,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
output = result.stdout.strip()
|
|
75
|
+
error = result.stderr.strip()
|
|
76
|
+
|
|
77
|
+
# Try to parse as JSON
|
|
78
|
+
try:
|
|
79
|
+
parsed = json.loads(output)
|
|
80
|
+
return {
|
|
81
|
+
"success": result.returncode == 0,
|
|
82
|
+
"data": parsed,
|
|
83
|
+
"error": error if result.returncode != 0 else None,
|
|
84
|
+
}
|
|
85
|
+
except json.JSONDecodeError:
|
|
86
|
+
return {
|
|
87
|
+
"success": result.returncode == 0,
|
|
88
|
+
"output": output,
|
|
89
|
+
"error": error if result.returncode != 0 else None,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
except subprocess.TimeoutExpired:
|
|
93
|
+
return {
|
|
94
|
+
"success": False,
|
|
95
|
+
"error": f"Command timed out after {timeout} seconds",
|
|
96
|
+
}
|
|
97
|
+
except Exception as e:
|
|
98
|
+
return {
|
|
99
|
+
"success": False,
|
|
100
|
+
"error": str(e),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# =============================================================================
|
|
105
|
+
# AUTHENTICATION
|
|
106
|
+
# =============================================================================
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@mcp.tool()
|
|
110
|
+
async def auth_status() -> str:
|
|
111
|
+
"""Check authentication status and current shop."""
|
|
112
|
+
result = _run_cli(["auth", "status", "--json"])
|
|
113
|
+
return json.dumps(result, indent=2)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@mcp.tool()
|
|
117
|
+
async def auth_shops() -> str:
|
|
118
|
+
"""List all shops the user has access to."""
|
|
119
|
+
result = _run_cli(["auth", "shops", "--json"])
|
|
120
|
+
return json.dumps(result, indent=2)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@mcp.tool()
|
|
124
|
+
async def auth_switch_shop(shop_identifier: str) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Switch to a different shop.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
shop_identifier: Shop name or UUID to switch to
|
|
130
|
+
"""
|
|
131
|
+
result = _run_cli(["auth", "use-shop", shop_identifier])
|
|
132
|
+
return json.dumps(result, indent=2)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# =============================================================================
|
|
136
|
+
# AGENTS
|
|
137
|
+
# =============================================================================
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@mcp.tool()
|
|
141
|
+
async def agent_list() -> str:
|
|
142
|
+
"""List all agents in the current shop."""
|
|
143
|
+
result = _run_cli(["agent", "list", "--json"])
|
|
144
|
+
return json.dumps(result, indent=2)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@mcp.tool()
|
|
148
|
+
async def agent_show(agent_id: str) -> str:
|
|
149
|
+
"""
|
|
150
|
+
Get detailed information about a specific agent.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
agent_id: UUID of the agent
|
|
154
|
+
"""
|
|
155
|
+
result = _run_cli(["agent", "show", "--agent-id", agent_id, "--json"])
|
|
156
|
+
return json.dumps(result, indent=2)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@mcp.tool()
|
|
160
|
+
async def agent_create(
|
|
161
|
+
name: str,
|
|
162
|
+
modality: str,
|
|
163
|
+
description: str = "",
|
|
164
|
+
agent_type: str = "customer_support",
|
|
165
|
+
) -> str:
|
|
166
|
+
"""
|
|
167
|
+
Create a new agent.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
name: Name for the agent
|
|
171
|
+
modality: 'chat', 'email', or 'sms'
|
|
172
|
+
description: Optional description
|
|
173
|
+
agent_type: Agent type (default: customer_support)
|
|
174
|
+
"""
|
|
175
|
+
args = ["agent", "create", "--name", name, "--modality", modality, "--type", agent_type, "--json"]
|
|
176
|
+
if description:
|
|
177
|
+
args.extend(["--description", description])
|
|
178
|
+
result = _run_cli(args)
|
|
179
|
+
return json.dumps(result, indent=2)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@mcp.tool()
|
|
183
|
+
async def agent_update(
|
|
184
|
+
agent_id: str,
|
|
185
|
+
name: str = None,
|
|
186
|
+
description: str = None,
|
|
187
|
+
guardrail: str = None,
|
|
188
|
+
) -> str:
|
|
189
|
+
"""
|
|
190
|
+
Update an existing agent.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
agent_id: UUID of the agent
|
|
194
|
+
name: New name
|
|
195
|
+
description: New description
|
|
196
|
+
guardrail: New guardrail text
|
|
197
|
+
"""
|
|
198
|
+
args = ["agent", "update", "--agent-id", agent_id, "--yes", "--json"]
|
|
199
|
+
if name:
|
|
200
|
+
args.extend(["--name", name])
|
|
201
|
+
if description:
|
|
202
|
+
args.extend(["--description", description])
|
|
203
|
+
if guardrail:
|
|
204
|
+
args.extend(["--guardrail", guardrail])
|
|
205
|
+
result = _run_cli(args)
|
|
206
|
+
return json.dumps(result, indent=2)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# =============================================================================
|
|
210
|
+
# CHAT
|
|
211
|
+
# =============================================================================
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@mcp.tool()
|
|
215
|
+
async def chat_send(agent_id: str, message: str, channel: str = "chat") -> str:
|
|
216
|
+
"""
|
|
217
|
+
Send a message to an agent and get the response.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
agent_id: UUID of the agent to chat with
|
|
221
|
+
message: The message to send
|
|
222
|
+
channel: Channel type - 'chat', 'email', or 'sms' (default: chat)
|
|
223
|
+
"""
|
|
224
|
+
result = _run_cli(
|
|
225
|
+
["chat", "--agent-id", agent_id, "--channel", channel, "--message", message, "--json"],
|
|
226
|
+
timeout=90,
|
|
227
|
+
)
|
|
228
|
+
return json.dumps(result, indent=2)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
# =============================================================================
|
|
232
|
+
# CONVERSATIONS
|
|
233
|
+
# =============================================================================
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@mcp.tool()
|
|
237
|
+
async def conversations_list(limit: int = 20, agent_id: str = None) -> str:
|
|
238
|
+
"""
|
|
239
|
+
List recent conversations.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
limit: Maximum number of conversations to return (default: 20)
|
|
243
|
+
agent_id: Optional agent UUID to filter by
|
|
244
|
+
"""
|
|
245
|
+
args = ["conversations", "list", "--limit", str(limit), "--json"]
|
|
246
|
+
if agent_id:
|
|
247
|
+
args.extend(["--agent-id", agent_id])
|
|
248
|
+
result = _run_cli(args)
|
|
249
|
+
return json.dumps(result, indent=2)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@mcp.tool()
|
|
253
|
+
async def conversations_show(conversation_id: str) -> str:
|
|
254
|
+
"""
|
|
255
|
+
Get full details of a conversation including all messages.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
conversation_id: UUID of the conversation
|
|
259
|
+
"""
|
|
260
|
+
result = _run_cli(["conversations", "show", "--conversation-id", conversation_id, "--json"])
|
|
261
|
+
return json.dumps(result, indent=2)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@mcp.tool()
|
|
265
|
+
async def conversations_import(
|
|
266
|
+
agent_id: str,
|
|
267
|
+
file_path: str = None,
|
|
268
|
+
url: str = None,
|
|
269
|
+
) -> str:
|
|
270
|
+
"""
|
|
271
|
+
Import historical conversations from CSV.
|
|
272
|
+
|
|
273
|
+
Provide either file_path or url. CSV should have columns:
|
|
274
|
+
ID, DATE_CREATED, SENDER, BODY, TOPIC, INTENT.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
agent_id: UUID of the agent to import for
|
|
278
|
+
file_path: Local path to CSV file
|
|
279
|
+
url: URL to CSV file
|
|
280
|
+
"""
|
|
281
|
+
args = ["conversations", "import", "--agent-id", agent_id, "--yes", "--json"]
|
|
282
|
+
if file_path:
|
|
283
|
+
args.extend(["--file-path", file_path])
|
|
284
|
+
elif url:
|
|
285
|
+
args.extend(["--url", url])
|
|
286
|
+
result = _run_cli(args, timeout=300)
|
|
287
|
+
return json.dumps(result, indent=2)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
# =============================================================================
|
|
291
|
+
# KNOWLEDGE BASE
|
|
292
|
+
# =============================================================================
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@mcp.tool()
|
|
296
|
+
async def knowledge_list(agent_id: str, entry_type: str = None) -> str:
|
|
297
|
+
"""
|
|
298
|
+
List knowledge base entries for an agent.
|
|
299
|
+
|
|
300
|
+
Entry types:
|
|
301
|
+
- 'escalate': Triggers escalation to human
|
|
302
|
+
- 'qa': Question-answer pairs
|
|
303
|
+
- 'exact': Returns answer verbatim
|
|
304
|
+
- 'context': Background knowledge
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
agent_id: UUID of the agent
|
|
308
|
+
entry_type: Optional filter - 'escalate', 'qa', 'exact', or 'context'
|
|
309
|
+
"""
|
|
310
|
+
args = ["knowledge", "list", "--agent-id", agent_id, "--json"]
|
|
311
|
+
if entry_type:
|
|
312
|
+
args.extend(["--type", entry_type])
|
|
313
|
+
result = _run_cli(args)
|
|
314
|
+
return json.dumps(result, indent=2)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
@mcp.tool()
|
|
318
|
+
async def knowledge_upsert(
|
|
319
|
+
agent_id: str,
|
|
320
|
+
entry_type: str,
|
|
321
|
+
question: str,
|
|
322
|
+
answer: str = "",
|
|
323
|
+
) -> str:
|
|
324
|
+
"""
|
|
325
|
+
Create or update a knowledge base entry.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
agent_id: UUID of the agent
|
|
329
|
+
entry_type: Type - 'escalate', 'qa', 'exact', 'context'
|
|
330
|
+
question: The question/trigger to match
|
|
331
|
+
answer: The answer content
|
|
332
|
+
"""
|
|
333
|
+
args = [
|
|
334
|
+
"knowledge", "upsert",
|
|
335
|
+
"--agent-id", agent_id,
|
|
336
|
+
"--type", entry_type,
|
|
337
|
+
"--question", question,
|
|
338
|
+
"--yes", "--json",
|
|
339
|
+
]
|
|
340
|
+
if answer:
|
|
341
|
+
args.extend(["--answer", answer])
|
|
342
|
+
result = _run_cli(args)
|
|
343
|
+
return json.dumps(result, indent=2)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
# =============================================================================
|
|
347
|
+
# TAXONOMY
|
|
348
|
+
# =============================================================================
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
@mcp.tool()
|
|
352
|
+
async def taxonomy_list() -> str:
|
|
353
|
+
"""List all topics and intents in the shop taxonomy."""
|
|
354
|
+
result = _run_cli(["taxonomy", "list", "--json"])
|
|
355
|
+
return json.dumps(result, indent=2)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
# =============================================================================
|
|
359
|
+
# SHOP SETUP
|
|
360
|
+
# =============================================================================
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
@mcp.tool()
|
|
364
|
+
async def shop_setup(spec_path: str, shop_id: str = None) -> str:
|
|
365
|
+
"""
|
|
366
|
+
Full shop setup from a YAML spec file.
|
|
367
|
+
|
|
368
|
+
Creates shop, configures agents, uploads conversations,
|
|
369
|
+
runs insights, simulation, and syncs knowledge base.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
spec_path: Path to YAML spec file
|
|
373
|
+
shop_id: Optional existing shop UUID to configure instead of creating new
|
|
374
|
+
"""
|
|
375
|
+
args = ["shop", "setup", "--spec", spec_path, "--yes", "--json"]
|
|
376
|
+
if shop_id:
|
|
377
|
+
args.extend(["--shop-id", shop_id])
|
|
378
|
+
result = _run_cli(args, timeout=600)
|
|
379
|
+
return json.dumps(result, indent=2)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
# =============================================================================
|
|
383
|
+
# SIMULATE
|
|
384
|
+
# =============================================================================
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
@mcp.tool()
|
|
388
|
+
async def simulate_run(
|
|
389
|
+
agent_id: str,
|
|
390
|
+
count: int = 10,
|
|
391
|
+
conversation_type: str = "web_chat",
|
|
392
|
+
is_test: bool = False,
|
|
393
|
+
) -> str:
|
|
394
|
+
"""
|
|
395
|
+
Generate simulated test conversations for an agent.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
agent_id: UUID of the agent
|
|
399
|
+
count: Number of conversations to generate
|
|
400
|
+
conversation_type: 'web_chat', 'email', or 'sms'
|
|
401
|
+
is_test: Mark as test conversations
|
|
402
|
+
"""
|
|
403
|
+
args = [
|
|
404
|
+
"simulate", "run",
|
|
405
|
+
"--agent-id", agent_id,
|
|
406
|
+
"--count", str(count),
|
|
407
|
+
"--conversation-type", conversation_type,
|
|
408
|
+
"--continue-on-error",
|
|
409
|
+
"--json",
|
|
410
|
+
]
|
|
411
|
+
if is_test:
|
|
412
|
+
args.append("--is-test")
|
|
413
|
+
else:
|
|
414
|
+
args.append("--no-is-test")
|
|
415
|
+
result = _run_cli(args, timeout=600)
|
|
416
|
+
return json.dumps(result, indent=2)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
# =============================================================================
|
|
420
|
+
# TEST: BENCHMARKS
|
|
421
|
+
# =============================================================================
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
@mcp.tool()
|
|
425
|
+
async def test_benchmarks_list() -> str:
|
|
426
|
+
"""List all test benchmarks in the current shop."""
|
|
427
|
+
result = _run_cli(["test", "benchmarks", "list", "--json"])
|
|
428
|
+
return json.dumps(result, indent=2)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
@mcp.tool()
|
|
432
|
+
async def test_benchmarks_create(name: str, agent_id: str) -> str:
|
|
433
|
+
"""
|
|
434
|
+
Create a new benchmark.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
name: Name for the benchmark
|
|
438
|
+
agent_id: UUID of the agent this benchmark is for
|
|
439
|
+
"""
|
|
440
|
+
result = _run_cli(["test", "benchmarks", "create", "--name", name, "--agent-id", agent_id, "--json"])
|
|
441
|
+
return json.dumps(result, indent=2)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
# =============================================================================
|
|
445
|
+
# TEST: SCENARIOS
|
|
446
|
+
# =============================================================================
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
@mcp.tool()
|
|
450
|
+
async def test_scenarios_list(
|
|
451
|
+
benchmark_id: str,
|
|
452
|
+
pass_status: str = None,
|
|
453
|
+
limit: int = 100,
|
|
454
|
+
) -> str:
|
|
455
|
+
"""
|
|
456
|
+
List test scenarios in a benchmark.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
benchmark_id: UUID of the benchmark
|
|
460
|
+
pass_status: Optional filter - 'pass', 'fail', or 'unrated'
|
|
461
|
+
limit: Maximum scenarios to return (default: 100)
|
|
462
|
+
"""
|
|
463
|
+
args = ["test", "scenarios", "list", "--benchmark-id", benchmark_id, "--limit", str(limit), "--json"]
|
|
464
|
+
if pass_status:
|
|
465
|
+
args.extend(["--pass-status", pass_status])
|
|
466
|
+
result = _run_cli(args)
|
|
467
|
+
return json.dumps(result, indent=2)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
@mcp.tool()
|
|
471
|
+
async def test_scenarios_show(scenario_id: str) -> str:
|
|
472
|
+
"""
|
|
473
|
+
Get full details of a test scenario.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
scenario_id: UUID of the scenario
|
|
477
|
+
"""
|
|
478
|
+
result = _run_cli(["test", "scenarios", "show", "--scenario-id", scenario_id, "--json"])
|
|
479
|
+
return json.dumps(result, indent=2)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
@mcp.tool()
|
|
483
|
+
async def test_scenarios_update(
|
|
484
|
+
scenario_id: str,
|
|
485
|
+
pass_status: str = None,
|
|
486
|
+
feedback: str = None,
|
|
487
|
+
) -> str:
|
|
488
|
+
"""
|
|
489
|
+
Update a scenario's pass status or feedback.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
scenario_id: UUID of the scenario
|
|
493
|
+
pass_status: New status - 'pass' or 'fail'
|
|
494
|
+
feedback: Optional feedback notes
|
|
495
|
+
"""
|
|
496
|
+
args = ["test", "scenarios", "update", "--scenario-id", scenario_id]
|
|
497
|
+
if pass_status:
|
|
498
|
+
args.extend(["--pass-status", pass_status])
|
|
499
|
+
if feedback:
|
|
500
|
+
args.extend(["--feedback", feedback])
|
|
501
|
+
result = _run_cli(args)
|
|
502
|
+
return json.dumps(result, indent=2)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
@mcp.tool()
|
|
506
|
+
async def test_scenarios_rate(
|
|
507
|
+
conversation_id: str,
|
|
508
|
+
agent_id: str = None,
|
|
509
|
+
auto: bool = True,
|
|
510
|
+
) -> str:
|
|
511
|
+
"""
|
|
512
|
+
Rate a conversation and create a test scenario from it.
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
conversation_id: UUID of the conversation to rate
|
|
516
|
+
agent_id: Optional agent UUID override
|
|
517
|
+
auto: Auto-compute rating (default: True)
|
|
518
|
+
"""
|
|
519
|
+
args = ["test", "scenarios", "rate", "--conversation-id", conversation_id, "--yes", "--json"]
|
|
520
|
+
if agent_id:
|
|
521
|
+
args.extend(["--agent-id", agent_id])
|
|
522
|
+
if not auto:
|
|
523
|
+
args.append("--manual")
|
|
524
|
+
result = _run_cli(args, timeout=60)
|
|
525
|
+
return json.dumps(result, indent=2)
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
# =============================================================================
|
|
529
|
+
# TEST: COVERAGE
|
|
530
|
+
# =============================================================================
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
@mcp.tool()
|
|
534
|
+
async def test_coverage_summary(agent_id: str, benchmark_id: str = None) -> str:
|
|
535
|
+
"""
|
|
536
|
+
Summarize test coverage by topic/intent.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
agent_id: UUID of the agent
|
|
540
|
+
benchmark_id: Optional benchmark UUID to filter by
|
|
541
|
+
"""
|
|
542
|
+
args = ["test", "coverage", "summary", "--agent-id", agent_id, "--json"]
|
|
543
|
+
if benchmark_id:
|
|
544
|
+
args.extend(["--benchmark-id", benchmark_id])
|
|
545
|
+
result = _run_cli(args, timeout=120)
|
|
546
|
+
return json.dumps(result, indent=2)
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
# =============================================================================
|
|
550
|
+
# TEST: FIX
|
|
551
|
+
# =============================================================================
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
@mcp.tool()
|
|
555
|
+
async def test_fix_context(benchmark_id: str, include_passing: bool = False) -> str:
|
|
556
|
+
"""
|
|
557
|
+
Get all context needed to fix failing scenarios.
|
|
558
|
+
|
|
559
|
+
Returns failing scenarios with feedback, knowledge base entries,
|
|
560
|
+
and agent guardrails.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
benchmark_id: UUID of the benchmark to analyze
|
|
564
|
+
include_passing: Also include passing scenarios for reference
|
|
565
|
+
"""
|
|
566
|
+
args = ["test", "fix", "context", "--benchmark-id", benchmark_id, "--json"]
|
|
567
|
+
if include_passing:
|
|
568
|
+
args.append("--include-passing")
|
|
569
|
+
result = _run_cli(args, timeout=180)
|
|
570
|
+
return json.dumps(result, indent=2)
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
@mcp.tool()
|
|
574
|
+
async def test_fix_test(
|
|
575
|
+
scenario_id: str,
|
|
576
|
+
benchmark_id: str,
|
|
577
|
+
auto_pass: bool = False,
|
|
578
|
+
expect_escalation: bool = None,
|
|
579
|
+
) -> str:
|
|
580
|
+
"""
|
|
581
|
+
Test a fix by replaying a scenario's input message.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
scenario_id: Original scenario UUID to replay
|
|
585
|
+
benchmark_id: Target benchmark UUID for the new scenario
|
|
586
|
+
auto_pass: Automatically mark the new scenario as pass
|
|
587
|
+
expect_escalation: If True, fail if agent doesn't escalate
|
|
588
|
+
"""
|
|
589
|
+
args = [
|
|
590
|
+
"test", "fix", "test",
|
|
591
|
+
"--scenario-id", scenario_id,
|
|
592
|
+
"--benchmark-id", benchmark_id,
|
|
593
|
+
"--retry", "2", "--json",
|
|
594
|
+
]
|
|
595
|
+
if auto_pass:
|
|
596
|
+
args.append("--auto-pass")
|
|
597
|
+
if expect_escalation is True:
|
|
598
|
+
args.append("--expect-escalation")
|
|
599
|
+
elif expect_escalation is False:
|
|
600
|
+
args.append("--expect-response")
|
|
601
|
+
result = _run_cli(args, timeout=120)
|
|
602
|
+
return json.dumps(result, indent=2)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
@mcp.tool()
|
|
606
|
+
async def test_fix_batch(
|
|
607
|
+
source_benchmark_id: str,
|
|
608
|
+
target_benchmark_id: str,
|
|
609
|
+
pass_status_filter: str = "fail",
|
|
610
|
+
auto_pass: bool = False,
|
|
611
|
+
limit: int = 0,
|
|
612
|
+
parallel: int = 1,
|
|
613
|
+
) -> str:
|
|
614
|
+
"""
|
|
615
|
+
Batch test scenarios from one benchmark into another.
|
|
616
|
+
|
|
617
|
+
Args:
|
|
618
|
+
source_benchmark_id: Source benchmark with scenarios to test
|
|
619
|
+
target_benchmark_id: Target benchmark for new test scenarios
|
|
620
|
+
pass_status_filter: Filter - 'fail', 'pass', 'unrated', or 'all'
|
|
621
|
+
auto_pass: Automatically mark successful tests as pass
|
|
622
|
+
limit: Max scenarios to test, 0 = all
|
|
623
|
+
parallel: Number of parallel tests 1-10
|
|
624
|
+
"""
|
|
625
|
+
args = [
|
|
626
|
+
"test", "fix", "batch",
|
|
627
|
+
"--source", source_benchmark_id,
|
|
628
|
+
"--target", target_benchmark_id,
|
|
629
|
+
"--pass-status", pass_status_filter,
|
|
630
|
+
"--parallel", str(min(10, max(1, parallel))),
|
|
631
|
+
"--retry", "2", "--json",
|
|
632
|
+
]
|
|
633
|
+
if auto_pass:
|
|
634
|
+
args.append("--auto-pass")
|
|
635
|
+
if limit > 0:
|
|
636
|
+
args.extend(["--limit", str(limit)])
|
|
637
|
+
result = _run_cli(args, timeout=600)
|
|
638
|
+
return json.dumps(result, indent=2)
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
@mcp.tool()
|
|
642
|
+
async def test_fix_status(source_benchmark_id: str, target_benchmark_id: str) -> str:
|
|
643
|
+
"""
|
|
644
|
+
Track fix progress between benchmarks.
|
|
645
|
+
|
|
646
|
+
Args:
|
|
647
|
+
source_benchmark_id: Source benchmark with original scenarios
|
|
648
|
+
target_benchmark_id: Target benchmark with test results
|
|
649
|
+
"""
|
|
650
|
+
result = _run_cli([
|
|
651
|
+
"test", "fix", "status",
|
|
652
|
+
"--source", source_benchmark_id,
|
|
653
|
+
"--target", target_benchmark_id,
|
|
654
|
+
"--json",
|
|
655
|
+
])
|
|
656
|
+
return json.dumps(result, indent=2)
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
@mcp.tool()
|
|
660
|
+
async def test_fix_summary(benchmark_id: str) -> str:
|
|
661
|
+
"""
|
|
662
|
+
Get a quick summary of benchmark pass/fail status.
|
|
663
|
+
|
|
664
|
+
Args:
|
|
665
|
+
benchmark_id: UUID of the benchmark
|
|
666
|
+
"""
|
|
667
|
+
result = _run_cli(["test", "fix", "summary", "--benchmark-id", benchmark_id, "--json"])
|
|
668
|
+
return json.dumps(result, indent=2)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
# =============================================================================
|
|
672
|
+
# INSIGHTS
|
|
673
|
+
# =============================================================================
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
@mcp.tool()
|
|
677
|
+
async def insights_run(
|
|
678
|
+
query: str,
|
|
679
|
+
date_range: str = "relative:-30d,",
|
|
680
|
+
agent_ids: str = None,
|
|
681
|
+
wait: bool = True,
|
|
682
|
+
) -> str:
|
|
683
|
+
"""
|
|
684
|
+
Generate an analytics insights report.
|
|
685
|
+
|
|
686
|
+
Args:
|
|
687
|
+
query: Natural language query like "What are the top customer issues?"
|
|
688
|
+
date_range: Date range - 'relative:-30d,' for last 30 days
|
|
689
|
+
agent_ids: Comma-separated agent UUIDs to filter by
|
|
690
|
+
wait: Wait for report completion
|
|
691
|
+
"""
|
|
692
|
+
args = ["insights", "run", "--query", query, "--date-range", date_range, "--json"]
|
|
693
|
+
if agent_ids:
|
|
694
|
+
args.extend(["--agent-ids", agent_ids])
|
|
695
|
+
if wait:
|
|
696
|
+
args.append("--wait")
|
|
697
|
+
result = _run_cli(args, timeout=300)
|
|
698
|
+
return json.dumps(result, indent=2)
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
@mcp.tool()
|
|
702
|
+
async def insights_list(limit: int = 10) -> str:
|
|
703
|
+
"""
|
|
704
|
+
List recent insights reports.
|
|
705
|
+
|
|
706
|
+
Args:
|
|
707
|
+
limit: Maximum reports to return
|
|
708
|
+
"""
|
|
709
|
+
result = _run_cli(["insights", "list", "--limit", str(limit), "--json"])
|
|
710
|
+
return json.dumps(result, indent=2)
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
@mcp.tool()
|
|
714
|
+
async def insights_show(report_id: str) -> str:
|
|
715
|
+
"""
|
|
716
|
+
Get the full content of an insights report.
|
|
717
|
+
|
|
718
|
+
Args:
|
|
719
|
+
report_id: UUID of the report
|
|
720
|
+
"""
|
|
721
|
+
result = _run_cli(["insights", "show", "--report-id", report_id, "--json"])
|
|
722
|
+
return json.dumps(result, indent=2)
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
# =============================================================================
|
|
726
|
+
# ENTRY POINT
|
|
727
|
+
# =============================================================================
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def main():
|
|
731
|
+
"""Run the MCP server."""
|
|
732
|
+
import logging
|
|
733
|
+
logging.basicConfig(level=logging.WARNING, stream=sys.stderr)
|
|
734
|
+
mcp.run(transport="stdio")
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
if __name__ == "__main__":
|
|
738
|
+
main()
|