apteva 0.4.11 → 0.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,8 @@
2
2
  // This allows the meta agent (Apteva Assistant) to control the platform
3
3
 
4
4
  import { AgentDB, ProjectDB, McpServerDB, SkillDB, TelemetryDB, generateId } from "./db";
5
+ import { TestCaseDB, TestRunDB } from "./db-tests";
6
+ import { runTest, runAll } from "./test-runner";
5
7
  import { getProvidersWithStatus, PROVIDERS } from "./providers";
6
8
  import { startAgentProcess, setAgentStatus, toApiAgent, META_AGENT_ID, agentFetch } from "./routes/api/agent-utils";
7
9
  import { agentProcesses } from "./server";
@@ -48,24 +50,47 @@ const PLATFORM_TOOLS = [
48
50
  },
49
51
  {
50
52
  name: "create_agent",
51
- description: "Create a new AI agent. Requires a name, provider, and model. The provider must have an API key configured.",
53
+ description: `Create a new AI agent. The provider must have an API key configured — use list_providers first to check.
54
+
55
+ PROVIDERS & MODELS (use list_providers to see which have keys):
56
+ - anthropic: claude-sonnet-4-5 (recommended), claude-haiku-4-5 (fast/cheap)
57
+ - openai: gpt-4o (recommended), gpt-4o-mini (fast/cheap)
58
+ - groq: llama-3.3-70b-versatile (recommended), llama-3.1-8b-instant (fast)
59
+ - gemini: gemini-3-pro-preview (recommended), gemini-3-flash-preview (fast)
60
+ - xai: grok-2 (recommended), grok-2-mini (fast)
61
+ - together: moonshotai/Kimi-K2.5 (recommended), moonshotai/Kimi-K2-Thinking (reasoning)
62
+ - fireworks: accounts/fireworks/models/kimi-k2p5, accounts/fireworks/models/kimi-k2-thinking
63
+ - moonshot: moonshot-v1-128k (recommended), moonshot-v1-32k (fast)
64
+ - ollama: llama3.3, llama3.2, qwen2.5, mistral, deepseek-r1 (local, no API key needed)
65
+
66
+ FEATURES (all optional, default false):
67
+ - memory: Persistent memory across conversations — agent remembers past interactions. Requires OpenAI key for embeddings.
68
+ - tasks: Task scheduling — agent can create, schedule, and track tasks. Supports recurring tasks.
69
+ - vision: Image & PDF understanding — agent can analyze uploaded images and PDFs.
70
+ - mcp: MCP tool use — agent can use tools from assigned MCP servers. Enable this if you plan to assign MCP servers.
71
+ - files: File management — agent can read, write, and manage files in its workspace.
72
+
73
+ TIPS:
74
+ - Always provide a descriptive system_prompt that tells the agent what it does and how to behave.
75
+ - Assign to a project_id to organize agents. Use list_projects to see available projects.
76
+ - After creating, use start_agent to run it. Then assign MCP servers or skills as needed.`,
52
77
  inputSchema: {
53
78
  type: "object",
54
79
  properties: {
55
- name: { type: "string", description: "Agent name" },
56
- provider: { type: "string", description: "LLM provider ID (e.g. anthropic, openai, groq, gemini, xai, together, fireworks, ollama)" },
57
- model: { type: "string", description: "Model ID (e.g. claude-sonnet-4-5, gpt-4o, llama-3.3-70b-versatile)" },
58
- system_prompt: { type: "string", description: "System prompt for the agent (optional)" },
59
- project_id: { type: "string", description: "Project ID to assign the agent to (optional)" },
80
+ name: { type: "string", description: "Agent name (e.g. 'Customer Support', 'Code Reviewer')" },
81
+ provider: { type: "string", description: "LLM provider ID: anthropic, openai, groq, gemini, xai, together, fireworks, moonshot, ollama" },
82
+ model: { type: "string", description: "Model ID see tool description for full list per provider" },
83
+ system_prompt: { type: "string", description: "Instructions for the agent. Describe its role, personality, and capabilities. This is the most important field for agent behavior." },
84
+ project_id: { type: "string", description: "Project ID to assign the agent to (optional). Use list_projects to find IDs." },
60
85
  features: {
61
86
  type: "object",
62
- description: "Feature flags (optional). All default to false.",
87
+ description: "Feature flags to enable. All default to false. See tool description for details on each feature.",
63
88
  properties: {
64
- memory: { type: "boolean" },
65
- tasks: { type: "boolean" },
66
- vision: { type: "boolean" },
67
- mcp: { type: "boolean" },
68
- files: { type: "boolean" },
89
+ memory: { type: "boolean", description: "Persistent memory across conversations (requires OpenAI key for embeddings)" },
90
+ tasks: { type: "boolean", description: "Task scheduling and tracking" },
91
+ vision: { type: "boolean", description: "Image and PDF understanding" },
92
+ mcp: { type: "boolean", description: "MCP tool use — required if assigning MCP servers" },
93
+ files: { type: "boolean", description: "File read/write in agent workspace" },
69
94
  },
70
95
  },
71
96
  },
@@ -74,17 +99,27 @@ const PLATFORM_TOOLS = [
74
99
  },
75
100
  {
76
101
  name: "update_agent",
77
- description: "Update an existing agent's configuration. Only provide fields you want to change.",
102
+ description: "Update an existing agent's configuration. Only provide fields you want to change. If the agent is running, restart it after updating for changes to take effect.",
78
103
  inputSchema: {
79
104
  type: "object",
80
105
  properties: {
81
106
  agent_id: { type: "string", description: "The agent ID to update" },
82
- name: { type: "string", description: "New name" },
83
- model: { type: "string", description: "New model ID" },
84
- provider: { type: "string", description: "New provider ID" },
85
- system_prompt: { type: "string", description: "New system prompt" },
86
- project_id: { type: "string", description: "New project ID (or null to unassign)" },
87
- features: { type: "object", description: "Feature flags to update" },
107
+ name: { type: "string", description: "New display name" },
108
+ model: { type: "string", description: "New model ID (see create_agent for available models per provider)" },
109
+ provider: { type: "string", description: "New provider ID (the new provider must have an API key configured)" },
110
+ system_prompt: { type: "string", description: "New system prompt / instructions" },
111
+ project_id: { type: "string", description: "New project ID, or null to unassign from project" },
112
+ features: {
113
+ type: "object",
114
+ description: "Feature flags to update (only provided flags are changed, others remain as-is)",
115
+ properties: {
116
+ memory: { type: "boolean" },
117
+ tasks: { type: "boolean" },
118
+ vision: { type: "boolean" },
119
+ mcp: { type: "boolean" },
120
+ files: { type: "boolean" },
121
+ },
122
+ },
88
123
  },
89
124
  required: ["agent_id"],
90
125
  },
@@ -102,7 +137,7 @@ const PLATFORM_TOOLS = [
102
137
  },
103
138
  {
104
139
  name: "start_agent",
105
- description: "Start a stopped agent. The agent's provider must have an API key configured.",
140
+ description: "Start a stopped agent. The agent's provider must have an API key configured. Starting spawns a process, waits for health check, and pushes configuration (model, features, MCP servers, skills). Takes a few seconds.",
106
141
  inputSchema: {
107
142
  type: "object",
108
143
  properties: {
@@ -174,18 +209,26 @@ const PLATFORM_TOOLS = [
174
209
  },
175
210
  {
176
211
  name: "create_mcp_server",
177
- description: "Create a new MCP server. For HTTP (remote) servers, provide url and optional headers. For npm package servers, provide a package name.",
212
+ description: `Create a new MCP server configuration. MCP servers provide tools that agents can use (web search, file access, APIs, etc).
213
+
214
+ SERVER TYPES:
215
+ - http: Remote MCP server accessible via URL. Provide url and optional auth headers. Ready to use immediately.
216
+ - npm: Node.js MCP server from npm. Provide package name (e.g. '@modelcontextprotocol/server-filesystem'). Needs to be started.
217
+ - pip: Python MCP server from PyPI. Provide package name. Needs to be started.
218
+ - custom: Custom command. Provide command and args. Needs to be started.
219
+
220
+ After creating, assign to agents with assign_mcp_server_to_agent. HTTP servers work immediately; npm/pip/custom servers need to be started from the MCP page in the UI.`,
178
221
  inputSchema: {
179
222
  type: "object",
180
223
  properties: {
181
- name: { type: "string", description: "Server display name" },
182
- type: { type: "string", description: "Server type: 'http' (remote URL), 'npm' (npm package), 'pip' (Python package), 'custom' (custom command)" },
183
- url: { type: "string", description: "For http type: the remote MCP server URL" },
184
- headers: { type: "object", description: "For http type: auth headers (e.g. {\"Authorization\": \"Bearer ...\"})" },
185
- package: { type: "string", description: "For npm/pip type: the package name (e.g. '@modelcontextprotocol/server-filesystem')" },
186
- command: { type: "string", description: "For custom type: the command to run" },
187
- args: { type: "string", description: "Command arguments (optional)" },
188
- project_id: { type: "string", description: "Project ID to scope the server to (optional, null = global)" },
224
+ name: { type: "string", description: "Display name (e.g. 'Filesystem', 'Web Search', 'GitHub')" },
225
+ type: { type: "string", description: "Server type: http, npm, pip, or custom" },
226
+ url: { type: "string", description: "For http type: the remote MCP server URL (e.g. 'https://mcp.example.com/sse')" },
227
+ headers: { type: "object", description: "For http type: auth headers as key-value pairs" },
228
+ package: { type: "string", description: "For npm/pip type: package name" },
229
+ command: { type: "string", description: "For custom type: executable command" },
230
+ args: { type: "string", description: "Command arguments string (optional)" },
231
+ project_id: { type: "string", description: "Scope to a project (optional). null = available globally to all agents." },
189
232
  },
190
233
  required: ["name", "type"],
191
234
  },
@@ -203,7 +246,7 @@ const PLATFORM_TOOLS = [
203
246
  },
204
247
  {
205
248
  name: "assign_mcp_server_to_agent",
206
- description: "Assign an MCP server to an agent so the agent can use its tools. The agent must have MCP feature enabled.",
249
+ description: "Assign an MCP server to an agent so the agent can use its tools. This automatically enables the MCP feature on the agent. If the agent is running, restart it for changes to take effect.",
207
250
  inputSchema: {
208
251
  type: "object",
209
252
  properties: {
@@ -248,7 +291,7 @@ const PLATFORM_TOOLS = [
248
291
  // Skills management
249
292
  {
250
293
  name: "list_skills",
251
- description: "List all installed skills. Skills are reusable instruction sets that give agents specialized capabilities.",
294
+ description: "List all installed skills. Skills are reusable instruction sets (like prompt templates with tool permissions) that give agents specialized capabilities. Skills can be installed from the SkillsMP marketplace or created locally.",
252
295
  inputSchema: {
253
296
  type: "object",
254
297
  properties: {
@@ -281,7 +324,7 @@ const PLATFORM_TOOLS = [
281
324
  },
282
325
  {
283
326
  name: "assign_skill_to_agent",
284
- description: "Assign a skill to an agent so it can use those instructions.",
327
+ description: "Assign a skill to an agent. The skill's instructions and tool permissions will be pushed to the agent on next start/restart.",
285
328
  inputSchema: {
286
329
  type: "object",
287
330
  properties: {
@@ -314,6 +357,77 @@ const PLATFORM_TOOLS = [
314
357
  required: ["skill_id"],
315
358
  },
316
359
  },
360
+ // Test tools
361
+ {
362
+ name: "list_tests",
363
+ description: "List all test cases. Tests validate agent workflows by sending a message and using an LLM judge to evaluate the result.",
364
+ inputSchema: {
365
+ type: "object",
366
+ properties: {
367
+ project_id: { type: "string", description: "Optional project ID to filter tests" },
368
+ },
369
+ },
370
+ },
371
+ {
372
+ name: "create_test",
373
+ description: "Create a new test case for an agent. The test sends a message to the agent, then an LLM judge evaluates the conversation against the success criteria.",
374
+ inputSchema: {
375
+ type: "object",
376
+ properties: {
377
+ name: { type: "string", description: "Test name" },
378
+ agent_id: { type: "string", description: "Agent ID to test" },
379
+ input_message: { type: "string", description: "Message to send to the agent" },
380
+ eval_criteria: { type: "string", description: "Natural language success criteria for the LLM judge. E.g. 'The agent should use the post_tweet tool and confirm the post was made.'" },
381
+ description: { type: "string", description: "Optional description" },
382
+ timeout_ms: { type: "number", description: "Timeout in ms (default 60000)" },
383
+ },
384
+ required: ["name", "agent_id", "input_message", "eval_criteria"],
385
+ },
386
+ },
387
+ {
388
+ name: "run_test",
389
+ description: "Run a test case. The agent must be running. Returns pass/fail with LLM judge reasoning.",
390
+ inputSchema: {
391
+ type: "object",
392
+ properties: {
393
+ test_id: { type: "string", description: "Test case ID to run. Use list_tests to find IDs." },
394
+ },
395
+ required: ["test_id"],
396
+ },
397
+ },
398
+ {
399
+ name: "run_all_tests",
400
+ description: "Run all test cases (or specific ones). Returns summary of pass/fail results.",
401
+ inputSchema: {
402
+ type: "object",
403
+ properties: {
404
+ test_case_ids: { type: "array", items: { type: "string" }, description: "Optional array of test case IDs. If empty, runs all tests." },
405
+ },
406
+ },
407
+ },
408
+ {
409
+ name: "get_test_results",
410
+ description: "Get run history for a test case. Shows pass/fail status, judge reasoning, and duration.",
411
+ inputSchema: {
412
+ type: "object",
413
+ properties: {
414
+ test_id: { type: "string", description: "Test case ID" },
415
+ limit: { type: "number", description: "Max results to return (default 10)" },
416
+ },
417
+ required: ["test_id"],
418
+ },
419
+ },
420
+ {
421
+ name: "delete_test",
422
+ description: "Delete a test case and all its run history.",
423
+ inputSchema: {
424
+ type: "object",
425
+ properties: {
426
+ test_id: { type: "string", description: "Test case ID to delete" },
427
+ },
428
+ required: ["test_id"],
429
+ },
430
+ },
317
431
  ];
318
432
 
319
433
  // Tool execution handlers
@@ -725,6 +839,91 @@ async function executeTool(name: string, args: Record<string, any>): Promise<{ c
725
839
  return { content: [{ type: "text", text: `Skill "${skill.name}" deleted${agentsWithSkill.length > 0 ? ` (unassigned from ${agentsWithSkill.length} agent(s))` : ""}` }] };
726
840
  }
727
841
 
842
+ // Test tools
843
+ case "list_tests": {
844
+ const tests = TestCaseDB.findAll(args.project_id);
845
+ const result = tests.map(tc => {
846
+ const agent = AgentDB.findById(tc.agent_id);
847
+ const lastRun = TestRunDB.getLatestByTestCase(tc.id);
848
+ return {
849
+ id: tc.id,
850
+ name: tc.name,
851
+ agent_id: tc.agent_id,
852
+ agent_name: agent?.name || "Unknown",
853
+ input_message: tc.input_message,
854
+ eval_criteria: tc.eval_criteria,
855
+ timeout_ms: tc.timeout_ms,
856
+ last_status: lastRun?.status || null,
857
+ last_reasoning: lastRun?.judge_reasoning || null,
858
+ };
859
+ });
860
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
861
+ }
862
+
863
+ case "create_test": {
864
+ const agent = AgentDB.findById(args.agent_id);
865
+ if (!agent) {
866
+ return { content: [{ type: "text", text: `Agent not found: ${args.agent_id}` }], isError: true };
867
+ }
868
+ const tc = TestCaseDB.create({
869
+ name: args.name,
870
+ agent_id: args.agent_id,
871
+ input_message: args.input_message,
872
+ eval_criteria: args.eval_criteria,
873
+ description: args.description,
874
+ timeout_ms: args.timeout_ms,
875
+ });
876
+ return { content: [{ type: "text", text: `Test "${tc.name}" created (id: ${tc.id}) for agent "${agent.name}". Use run_test to execute it.` }] };
877
+ }
878
+
879
+ case "run_test": {
880
+ const tc = TestCaseDB.findById(args.test_id);
881
+ if (!tc) {
882
+ return { content: [{ type: "text", text: `Test not found: ${args.test_id}` }], isError: true };
883
+ }
884
+ const result = await runTest(tc);
885
+ const agent = AgentDB.findById(tc.agent_id);
886
+ return { content: [{ type: "text", text: `Test "${tc.name}" (agent: ${agent?.name || tc.agent_id}): ${result.status.toUpperCase()}${result.duration_ms ? ` in ${(result.duration_ms / 1000).toFixed(1)}s` : ""}\n\nJudge: ${result.judge_reasoning || result.error || "No reasoning"}` }] };
887
+ }
888
+
889
+ case "run_all_tests": {
890
+ const results = await runAll(args.test_case_ids);
891
+ const passed = results.filter(r => r.status === "passed").length;
892
+ const failed = results.filter(r => r.status === "failed").length;
893
+ const errors = results.filter(r => r.status === "error").length;
894
+ const lines = results.map(r => {
895
+ const tc = TestCaseDB.findById(r.test_case_id);
896
+ return `- ${tc?.name || r.test_case_id}: ${r.status.toUpperCase()}${r.judge_reasoning ? ` — ${r.judge_reasoning}` : ""}${r.error ? ` — Error: ${r.error}` : ""}`;
897
+ });
898
+ return { content: [{ type: "text", text: `Test Results: ${passed} passed, ${failed} failed, ${errors} errors (${results.length} total)\n\n${lines.join("\n")}` }] };
899
+ }
900
+
901
+ case "get_test_results": {
902
+ const tc = TestCaseDB.findById(args.test_id);
903
+ if (!tc) {
904
+ return { content: [{ type: "text", text: `Test not found: ${args.test_id}` }], isError: true };
905
+ }
906
+ const runs = TestRunDB.findByTestCase(args.test_id, args.limit || 10);
907
+ const result = runs.map(r => ({
908
+ id: r.id,
909
+ status: r.status,
910
+ duration_ms: r.duration_ms,
911
+ judge_reasoning: r.judge_reasoning,
912
+ error: r.error,
913
+ created_at: r.created_at,
914
+ }));
915
+ return { content: [{ type: "text", text: `Run history for "${tc.name}":\n${JSON.stringify(result, null, 2)}` }] };
916
+ }
917
+
918
+ case "delete_test": {
919
+ const tc = TestCaseDB.findById(args.test_id);
920
+ if (!tc) {
921
+ return { content: [{ type: "text", text: `Test not found: ${args.test_id}` }], isError: true };
922
+ }
923
+ TestCaseDB.delete(args.test_id);
924
+ return { content: [{ type: "text", text: `Test "${tc.name}" deleted.` }] };
925
+ }
926
+
728
927
  default:
729
928
  return { content: [{ type: "text", text: `Unknown tool: ${name}` }], isError: true };
730
929
  }
@@ -772,7 +971,19 @@ export async function handlePlatformMcpRequest(req: Request): Promise<Response>
772
971
  name: "apteva-platform",
773
972
  version: "1.0.0",
774
973
  },
775
- instructions: "This MCP server provides tools to control the Apteva AI agent platform. You can create, start, stop, and manage agents, projects, and view system status.",
974
+ instructions: `This MCP server controls the Apteva AI agent management platform.
975
+
976
+ You can manage:
977
+ - AGENTS: Create, configure, start, stop, and delete AI agents. Each agent has a provider (LLM), model, system prompt, and optional features (memory, tasks, vision, MCP tools, files).
978
+ - PROJECTS: Organize agents into projects for grouping.
979
+ - MCP SERVERS: Tool integrations that give agents capabilities (web search, file access, APIs). Assign servers to agents.
980
+ - SKILLS: Reusable instruction sets that specialize agent behavior. Assign skills to agents.
981
+ - PROVIDERS: View which LLM providers have API keys configured.
982
+ - TESTS: Create and run automated tests for agent workflows. Tests send a message to an agent, then an LLM judge evaluates the response against success criteria. Use list_tests, create_test, run_test, run_all_tests, get_test_results, delete_test.
983
+
984
+ Typical workflow: list_providers → create_agent → assign MCP servers/skills → start_agent.
985
+ Test workflow: create_test (set agent, message, eval criteria) → run_test → check results.
986
+ Always use list_providers first to check which providers have API keys before creating agents.`,
776
987
  };
777
988
  break;
778
989
  }