apteva 0.4.12 → 0.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/App.jdzxkzm1.js +228 -0
- package/dist/index.html +1 -1
- package/dist/styles.css +1 -1
- package/package.json +1 -1
- package/src/auth/middleware.ts +42 -26
- package/src/crypto.ts +2 -2
- package/src/db-tests.ts +174 -0
- package/src/db.ts +302 -5
- package/src/integrations/agentdojo.ts +168 -42
- package/src/mcp-client.ts +15 -9
- package/src/mcp-platform.ts +160 -0
- package/src/openapi.ts +416 -21
- package/src/routes/api/agent-utils.ts +2 -2
- package/src/routes/api/api-keys.ts +95 -0
- package/src/routes/api/integrations.ts +1 -1
- package/src/routes/api/mcp.ts +2 -2
- package/src/routes/api/system.ts +10 -1
- package/src/routes/api/tests.ts +148 -0
- package/src/routes/api.ts +4 -0
- package/src/server.ts +2 -1
- package/src/test-runner.ts +598 -0
- package/src/web/App.tsx +23 -10
- package/src/web/components/agents/AgentPanel.tsx +4 -8
- package/src/web/components/common/Icons.tsx +8 -0
- package/src/web/components/dashboard/Dashboard.tsx +2 -4
- package/src/web/components/index.ts +1 -0
- package/src/web/components/layout/Sidebar.tsx +7 -1
- package/src/web/components/settings/SettingsPage.tsx +288 -5
- package/src/web/components/skills/SkillsPage.tsx +1 -1
- package/src/web/components/tasks/TasksPage.tsx +8 -3
- package/src/web/components/telemetry/TelemetryPage.tsx +2 -5
- package/src/web/components/tests/TestsPage.tsx +580 -0
- package/src/web/context/index.ts +1 -1
- package/src/web/types.ts +1 -1
- package/dist/App.9ph8javh.js +0 -228
package/src/mcp-platform.ts
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
// This allows the meta agent (Apteva Assistant) to control the platform
|
|
3
3
|
|
|
4
4
|
import { AgentDB, ProjectDB, McpServerDB, SkillDB, TelemetryDB, generateId } from "./db";
|
|
5
|
+
import { TestCaseDB, TestRunDB } from "./db-tests";
|
|
6
|
+
import { runTest, runAll } from "./test-runner";
|
|
5
7
|
import { getProvidersWithStatus, PROVIDERS } from "./providers";
|
|
6
8
|
import { startAgentProcess, setAgentStatus, toApiAgent, META_AGENT_ID, agentFetch } from "./routes/api/agent-utils";
|
|
7
9
|
import { agentProcesses } from "./server";
|
|
@@ -355,6 +357,77 @@ After creating, assign to agents with assign_mcp_server_to_agent. HTTP servers w
|
|
|
355
357
|
required: ["skill_id"],
|
|
356
358
|
},
|
|
357
359
|
},
|
|
360
|
+
// Test tools
|
|
361
|
+
{
|
|
362
|
+
name: "list_tests",
|
|
363
|
+
description: "List all test cases. Tests validate agent workflows by sending a message and using an LLM judge to evaluate the result.",
|
|
364
|
+
inputSchema: {
|
|
365
|
+
type: "object",
|
|
366
|
+
properties: {
|
|
367
|
+
project_id: { type: "string", description: "Optional project ID to filter tests" },
|
|
368
|
+
},
|
|
369
|
+
},
|
|
370
|
+
},
|
|
371
|
+
{
|
|
372
|
+
name: "create_test",
|
|
373
|
+
description: "Create a new test case for an agent. The test sends a message to the agent, then an LLM judge evaluates the conversation against the success criteria.",
|
|
374
|
+
inputSchema: {
|
|
375
|
+
type: "object",
|
|
376
|
+
properties: {
|
|
377
|
+
name: { type: "string", description: "Test name" },
|
|
378
|
+
agent_id: { type: "string", description: "Agent ID to test" },
|
|
379
|
+
input_message: { type: "string", description: "Message to send to the agent" },
|
|
380
|
+
eval_criteria: { type: "string", description: "Natural language success criteria for the LLM judge. E.g. 'The agent should use the post_tweet tool and confirm the post was made.'" },
|
|
381
|
+
description: { type: "string", description: "Optional description" },
|
|
382
|
+
timeout_ms: { type: "number", description: "Timeout in ms (default 60000)" },
|
|
383
|
+
},
|
|
384
|
+
required: ["name", "agent_id", "input_message", "eval_criteria"],
|
|
385
|
+
},
|
|
386
|
+
},
|
|
387
|
+
{
|
|
388
|
+
name: "run_test",
|
|
389
|
+
description: "Run a test case. The agent must be running. Returns pass/fail with LLM judge reasoning.",
|
|
390
|
+
inputSchema: {
|
|
391
|
+
type: "object",
|
|
392
|
+
properties: {
|
|
393
|
+
test_id: { type: "string", description: "Test case ID to run. Use list_tests to find IDs." },
|
|
394
|
+
},
|
|
395
|
+
required: ["test_id"],
|
|
396
|
+
},
|
|
397
|
+
},
|
|
398
|
+
{
|
|
399
|
+
name: "run_all_tests",
|
|
400
|
+
description: "Run all test cases (or specific ones). Returns summary of pass/fail results.",
|
|
401
|
+
inputSchema: {
|
|
402
|
+
type: "object",
|
|
403
|
+
properties: {
|
|
404
|
+
test_case_ids: { type: "array", items: { type: "string" }, description: "Optional array of test case IDs. If empty, runs all tests." },
|
|
405
|
+
},
|
|
406
|
+
},
|
|
407
|
+
},
|
|
408
|
+
{
|
|
409
|
+
name: "get_test_results",
|
|
410
|
+
description: "Get run history for a test case. Shows pass/fail status, judge reasoning, and duration.",
|
|
411
|
+
inputSchema: {
|
|
412
|
+
type: "object",
|
|
413
|
+
properties: {
|
|
414
|
+
test_id: { type: "string", description: "Test case ID" },
|
|
415
|
+
limit: { type: "number", description: "Max results to return (default 10)" },
|
|
416
|
+
},
|
|
417
|
+
required: ["test_id"],
|
|
418
|
+
},
|
|
419
|
+
},
|
|
420
|
+
{
|
|
421
|
+
name: "delete_test",
|
|
422
|
+
description: "Delete a test case and all its run history.",
|
|
423
|
+
inputSchema: {
|
|
424
|
+
type: "object",
|
|
425
|
+
properties: {
|
|
426
|
+
test_id: { type: "string", description: "Test case ID to delete" },
|
|
427
|
+
},
|
|
428
|
+
required: ["test_id"],
|
|
429
|
+
},
|
|
430
|
+
},
|
|
358
431
|
];
|
|
359
432
|
|
|
360
433
|
// Tool execution handlers
|
|
@@ -766,6 +839,91 @@ async function executeTool(name: string, args: Record<string, any>): Promise<{ c
|
|
|
766
839
|
return { content: [{ type: "text", text: `Skill "${skill.name}" deleted${agentsWithSkill.length > 0 ? ` (unassigned from ${agentsWithSkill.length} agent(s))` : ""}` }] };
|
|
767
840
|
}
|
|
768
841
|
|
|
842
|
+
// Test tools
|
|
843
|
+
case "list_tests": {
|
|
844
|
+
const tests = TestCaseDB.findAll(args.project_id);
|
|
845
|
+
const result = tests.map(tc => {
|
|
846
|
+
const agent = AgentDB.findById(tc.agent_id);
|
|
847
|
+
const lastRun = TestRunDB.getLatestByTestCase(tc.id);
|
|
848
|
+
return {
|
|
849
|
+
id: tc.id,
|
|
850
|
+
name: tc.name,
|
|
851
|
+
agent_id: tc.agent_id,
|
|
852
|
+
agent_name: agent?.name || "Unknown",
|
|
853
|
+
input_message: tc.input_message,
|
|
854
|
+
eval_criteria: tc.eval_criteria,
|
|
855
|
+
timeout_ms: tc.timeout_ms,
|
|
856
|
+
last_status: lastRun?.status || null,
|
|
857
|
+
last_reasoning: lastRun?.judge_reasoning || null,
|
|
858
|
+
};
|
|
859
|
+
});
|
|
860
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
case "create_test": {
|
|
864
|
+
const agent = AgentDB.findById(args.agent_id);
|
|
865
|
+
if (!agent) {
|
|
866
|
+
return { content: [{ type: "text", text: `Agent not found: ${args.agent_id}` }], isError: true };
|
|
867
|
+
}
|
|
868
|
+
const tc = TestCaseDB.create({
|
|
869
|
+
name: args.name,
|
|
870
|
+
agent_id: args.agent_id,
|
|
871
|
+
input_message: args.input_message,
|
|
872
|
+
eval_criteria: args.eval_criteria,
|
|
873
|
+
description: args.description,
|
|
874
|
+
timeout_ms: args.timeout_ms,
|
|
875
|
+
});
|
|
876
|
+
return { content: [{ type: "text", text: `Test "${tc.name}" created (id: ${tc.id}) for agent "${agent.name}". Use run_test to execute it.` }] };
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
case "run_test": {
|
|
880
|
+
const tc = TestCaseDB.findById(args.test_id);
|
|
881
|
+
if (!tc) {
|
|
882
|
+
return { content: [{ type: "text", text: `Test not found: ${args.test_id}` }], isError: true };
|
|
883
|
+
}
|
|
884
|
+
const result = await runTest(tc);
|
|
885
|
+
const agent = AgentDB.findById(tc.agent_id);
|
|
886
|
+
return { content: [{ type: "text", text: `Test "${tc.name}" (agent: ${agent?.name || tc.agent_id}): ${result.status.toUpperCase()}${result.duration_ms ? ` in ${(result.duration_ms / 1000).toFixed(1)}s` : ""}\n\nJudge: ${result.judge_reasoning || result.error || "No reasoning"}` }] };
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
case "run_all_tests": {
|
|
890
|
+
const results = await runAll(args.test_case_ids);
|
|
891
|
+
const passed = results.filter(r => r.status === "passed").length;
|
|
892
|
+
const failed = results.filter(r => r.status === "failed").length;
|
|
893
|
+
const errors = results.filter(r => r.status === "error").length;
|
|
894
|
+
const lines = results.map(r => {
|
|
895
|
+
const tc = TestCaseDB.findById(r.test_case_id);
|
|
896
|
+
return `- ${tc?.name || r.test_case_id}: ${r.status.toUpperCase()}${r.judge_reasoning ? ` — ${r.judge_reasoning}` : ""}${r.error ? ` — Error: ${r.error}` : ""}`;
|
|
897
|
+
});
|
|
898
|
+
return { content: [{ type: "text", text: `Test Results: ${passed} passed, ${failed} failed, ${errors} errors (${results.length} total)\n\n${lines.join("\n")}` }] };
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
case "get_test_results": {
|
|
902
|
+
const tc = TestCaseDB.findById(args.test_id);
|
|
903
|
+
if (!tc) {
|
|
904
|
+
return { content: [{ type: "text", text: `Test not found: ${args.test_id}` }], isError: true };
|
|
905
|
+
}
|
|
906
|
+
const runs = TestRunDB.findByTestCase(args.test_id, args.limit || 10);
|
|
907
|
+
const result = runs.map(r => ({
|
|
908
|
+
id: r.id,
|
|
909
|
+
status: r.status,
|
|
910
|
+
duration_ms: r.duration_ms,
|
|
911
|
+
judge_reasoning: r.judge_reasoning,
|
|
912
|
+
error: r.error,
|
|
913
|
+
created_at: r.created_at,
|
|
914
|
+
}));
|
|
915
|
+
return { content: [{ type: "text", text: `Run history for "${tc.name}":\n${JSON.stringify(result, null, 2)}` }] };
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
case "delete_test": {
|
|
919
|
+
const tc = TestCaseDB.findById(args.test_id);
|
|
920
|
+
if (!tc) {
|
|
921
|
+
return { content: [{ type: "text", text: `Test not found: ${args.test_id}` }], isError: true };
|
|
922
|
+
}
|
|
923
|
+
TestCaseDB.delete(args.test_id);
|
|
924
|
+
return { content: [{ type: "text", text: `Test "${tc.name}" deleted.` }] };
|
|
925
|
+
}
|
|
926
|
+
|
|
769
927
|
default:
|
|
770
928
|
return { content: [{ type: "text", text: `Unknown tool: ${name}` }], isError: true };
|
|
771
929
|
}
|
|
@@ -821,8 +979,10 @@ You can manage:
|
|
|
821
979
|
- MCP SERVERS: Tool integrations that give agents capabilities (web search, file access, APIs). Assign servers to agents.
|
|
822
980
|
- SKILLS: Reusable instruction sets that specialize agent behavior. Assign skills to agents.
|
|
823
981
|
- PROVIDERS: View which LLM providers have API keys configured.
|
|
982
|
+
- TESTS: Create and run automated tests for agent workflows. Tests send a message to an agent, then an LLM judge evaluates the response against success criteria. Use list_tests, create_test, run_test, run_all_tests, get_test_results, delete_test.
|
|
824
983
|
|
|
825
984
|
Typical workflow: list_providers → create_agent → assign MCP servers/skills → start_agent.
|
|
985
|
+
Test workflow: create_test (set agent, message, eval criteria) → run_test → check results.
|
|
826
986
|
Always use list_providers first to check which providers have API keys before creating agents.`,
|
|
827
987
|
};
|
|
828
988
|
break;
|