@archon-claw/cli 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/dist/agent.d.ts +2 -0
  2. package/dist/agent.js +152 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +141 -0
  5. package/dist/config.d.ts +2 -0
  6. package/dist/config.js +161 -0
  7. package/dist/eval/assertions.d.ts +9 -0
  8. package/dist/eval/assertions.js +137 -0
  9. package/dist/eval/execute.d.ts +13 -0
  10. package/dist/eval/execute.js +260 -0
  11. package/dist/eval/formatter.d.ts +10 -0
  12. package/dist/eval/formatter.js +62 -0
  13. package/dist/eval/judge.d.ts +7 -0
  14. package/dist/eval/judge.js +116 -0
  15. package/dist/eval/runner.d.ts +9 -0
  16. package/dist/eval/runner.js +156 -0
  17. package/dist/eval/types.d.ts +67 -0
  18. package/dist/eval/types.js +1 -0
  19. package/dist/llm.d.ts +7 -0
  20. package/dist/llm.js +52 -0
  21. package/dist/mcp-manager.d.ts +51 -0
  22. package/dist/mcp-manager.js +268 -0
  23. package/dist/pending-tool-results.d.ts +4 -0
  24. package/dist/pending-tool-results.js +39 -0
  25. package/dist/public/assets/chat-input-BBnVJs9h.js +151 -0
  26. package/dist/public/assets/chat-input-CISJdhF2.css +1 -0
  27. package/dist/public/assets/embed-DhIUBDdf.js +1 -0
  28. package/dist/public/assets/main-Bfvj6DnV.js +16 -0
  29. package/dist/public/embed/widget.js +233 -0
  30. package/dist/public/embed.html +14 -0
  31. package/dist/public/index.html +14 -0
  32. package/dist/scaffold.d.ts +2 -0
  33. package/dist/scaffold.js +82 -0
  34. package/dist/schemas.d.ts +899 -0
  35. package/dist/schemas.js +134 -0
  36. package/dist/server.d.ts +3 -0
  37. package/dist/server.js +258 -0
  38. package/dist/session.d.ts +8 -0
  39. package/dist/session.js +70 -0
  40. package/dist/templates/agent/model.json +6 -0
  41. package/dist/templates/agent/system-prompt.md +9 -0
  42. package/dist/templates/agent/tool-impls/greeting.impl.js +9 -0
  43. package/dist/templates/agent/tools/greeting.json +14 -0
  44. package/dist/templates/workspace/.claude/skills/create-agent/SKILL.md +90 -0
  45. package/dist/templates/workspace/.claude/skills/create-dataset/SKILL.md +57 -0
  46. package/dist/templates/workspace/.claude/skills/create-eval-case/SKILL.md +159 -0
  47. package/dist/templates/workspace/.claude/skills/create-eval-judge/SKILL.md +128 -0
  48. package/dist/templates/workspace/.claude/skills/create-mcp-config/SKILL.md +151 -0
  49. package/dist/templates/workspace/.claude/skills/create-model-config/SKILL.md +45 -0
  50. package/dist/templates/workspace/.claude/skills/create-skill/SKILL.md +63 -0
  51. package/dist/templates/workspace/.claude/skills/create-system-prompt/SKILL.md +168 -0
  52. package/dist/templates/workspace/.claude/skills/create-tool/SKILL.md +56 -0
  53. package/dist/templates/workspace/.claude/skills/create-tool-impl/SKILL.md +83 -0
  54. package/dist/templates/workspace/.claude/skills/create-tool-test/SKILL.md +117 -0
  55. package/dist/templates/workspace/.claude/skills/create-tool-ui/SKILL.md +218 -0
  56. package/dist/test-runner.d.ts +22 -0
  57. package/dist/test-runner.js +166 -0
  58. package/dist/types.d.ts +75 -0
  59. package/dist/types.js +1 -0
  60. package/dist/validator/index.d.ts +16 -0
  61. package/dist/validator/index.js +54 -0
  62. package/dist/validator/plugin.d.ts +21 -0
  63. package/dist/validator/plugin.js +1 -0
  64. package/dist/validator/plugins/agent-dir.d.ts +2 -0
  65. package/dist/validator/plugins/agent-dir.js +171 -0
  66. package/dist/validator/plugins/agent-skill.d.ts +2 -0
  67. package/dist/validator/plugins/agent-skill.js +31 -0
  68. package/dist/validator/plugins/dataset.d.ts +2 -0
  69. package/dist/validator/plugins/dataset.js +20 -0
  70. package/dist/validator/plugins/mcp.d.ts +2 -0
  71. package/dist/validator/plugins/mcp.js +20 -0
  72. package/dist/validator/plugins/model.d.ts +2 -0
  73. package/dist/validator/plugins/model.js +20 -0
  74. package/dist/validator/plugins/system-prompt.d.ts +2 -0
  75. package/dist/validator/plugins/system-prompt.js +25 -0
  76. package/dist/validator/plugins/tool.d.ts +2 -0
  77. package/dist/validator/plugins/tool.js +20 -0
  78. package/dist/validator/zod-utils.d.ts +3 -0
  79. package/dist/validator/zod-utils.js +7 -0
  80. package/package.json +41 -0
@@ -0,0 +1,2 @@
1
+ import type { AgentConfig, SSEEvent, Session } from "./types.js";
2
+ export declare function runAgentLoop(config: AgentConfig, session: Session, userMessage: string, emit: (event: SSEEvent) => void, hostContext?: Record<string, unknown> | null): Promise<void>;
package/dist/agent.js ADDED
@@ -0,0 +1,152 @@
1
+ import { createClient, toOpenAITools, streamChat } from "./llm.js";
2
+ import { waitForToolResult } from "./pending-tool-results.js";
3
+ const MAX_ITERATIONS = 20;
4
+ function buildToolTargets(tools) {
5
+ const map = new Map();
6
+ for (const t of tools) {
7
+ map.set(t.name, {
8
+ target: (t.execution_target ?? "server"),
9
+ handler: t.handler,
10
+ });
11
+ }
12
+ return map;
13
+ }
14
+ export async function runAgentLoop(config, session, userMessage, emit, hostContext) {
15
+ const client = createClient(config.model);
16
+ const openAITools = toOpenAITools(config.tools);
17
+ const toolTargets = buildToolTargets(config.tools);
18
+ // Add system prompt on first message
19
+ if (session.messages.length === 0) {
20
+ let systemPrompt = config.systemPrompt;
21
+ if (hostContext && Object.keys(hostContext).length > 0) {
22
+ systemPrompt += `\n\n<host-context>\n${JSON.stringify(hostContext, null, 2)}\n</host-context>`;
23
+ }
24
+ session.messages.push({ role: "system", content: systemPrompt });
25
+ }
26
+ // Add user message
27
+ session.messages.push({ role: "user", content: userMessage });
28
+ // Emit tools_config for client/host tools and tools with custom UIs
29
+ const configuredTools = config.tools
30
+ .filter((t) => t.execution_target === "client" || t.execution_target === "host" || config.toolUIs.has(t.name))
31
+ .map((t) => ({
32
+ name: t.name,
33
+ handler: t.handler,
34
+ executionTarget: t.execution_target ?? "server",
35
+ component: config.toolUIs.has(t.name) ? t.name : undefined,
36
+ }));
37
+ if (configuredTools.length > 0) {
38
+ emit({ type: "tools_config", tools: configuredTools });
39
+ }
40
+ let completed = false;
41
+ for (let i = 0; i < MAX_ITERATIONS; i++) {
42
+ const stream = streamChat(client, config.model, session.messages, openAITools);
43
+ let textContent = "";
44
+ const toolCalls = new Map();
45
+ for await (const chunk of stream) {
46
+ const delta = chunk.choices[0]?.delta;
47
+ if (!delta)
48
+ continue;
49
+ // Accumulate text
50
+ if (delta.content) {
51
+ textContent += delta.content;
52
+ emit({ type: "text", content: delta.content });
53
+ }
54
+ // Accumulate tool calls (streamed in fragments by index)
55
+ if (delta.tool_calls) {
56
+ for (const tc of delta.tool_calls) {
57
+ const existing = toolCalls.get(tc.index);
58
+ if (existing) {
59
+ if (tc.function?.arguments) {
60
+ existing.args += tc.function.arguments;
61
+ }
62
+ }
63
+ else {
64
+ toolCalls.set(tc.index, {
65
+ id: tc.id ?? "",
66
+ name: tc.function?.name ?? "",
67
+ args: tc.function?.arguments ?? "",
68
+ });
69
+ }
70
+ }
71
+ }
72
+ }
73
+ // Build assistant message
74
+ const assistantToolCalls = [...toolCalls.values()].map((tc) => ({
75
+ id: tc.id,
76
+ type: "function",
77
+ function: { name: tc.name, arguments: tc.args },
78
+ }));
79
+ const assistantMessage = assistantToolCalls.length > 0
80
+ ? { role: "assistant", content: textContent || null, tool_calls: assistantToolCalls }
81
+ : { role: "assistant", content: textContent };
82
+ session.messages.push(assistantMessage);
83
+ // No tool calls — done
84
+ if (assistantToolCalls.length === 0) {
85
+ completed = true;
86
+ break;
87
+ }
88
+ // Execute each tool call
89
+ for (const tc of assistantToolCalls) {
90
+ let args;
91
+ try {
92
+ args = JSON.parse(tc.function.arguments);
93
+ }
94
+ catch {
95
+ const result = { error: `Invalid JSON in tool arguments: ${tc.function.arguments}` };
96
+ emit({ type: "tool_result", name: tc.function.name, result, toolCallId: tc.id });
97
+ session.messages.push({
98
+ role: "tool",
99
+ tool_call_id: tc.id,
100
+ content: JSON.stringify(result),
101
+ });
102
+ continue;
103
+ }
104
+ const targetInfo = toolTargets.get(tc.function.name);
105
+ const target = targetInfo?.target ?? "server";
106
+ let result;
107
+ if (target === "server") {
108
+ // Server execution: use local impl
109
+ emit({ type: "tool_call", name: tc.function.name, args, toolCallId: tc.id });
110
+ const impl = config.toolImpls.get(tc.function.name);
111
+ if (impl) {
112
+ try {
113
+ result = await impl(args);
114
+ }
115
+ catch (err) {
116
+ result = { error: err instanceof Error ? err.message : String(err) };
117
+ }
118
+ }
119
+ else {
120
+ result = { error: `Unknown tool: ${tc.function.name}` };
121
+ }
122
+ }
123
+ else {
124
+ // Client or Host execution: emit tool_call with target and wait for result
125
+ emit({
126
+ type: "tool_call",
127
+ name: tc.function.name,
128
+ args,
129
+ toolCallId: tc.id,
130
+ executionTarget: target,
131
+ });
132
+ try {
133
+ result = await waitForToolResult(tc.id);
134
+ }
135
+ catch (err) {
136
+ result = { error: err instanceof Error ? err.message : String(err) };
137
+ }
138
+ }
139
+ emit({ type: "tool_result", name: tc.function.name, result, toolCallId: tc.id });
140
+ session.messages.push({
141
+ role: "tool",
142
+ tool_call_id: tc.id,
143
+ content: JSON.stringify(result),
144
+ });
145
+ }
146
+ // Continue loop — LLM will see tool results and generate next response
147
+ }
148
+ if (!completed) {
149
+ emit({ type: "error", message: `Agent stopped: reached maximum iterations (${MAX_ITERATIONS})` });
150
+ }
151
+ emit({ type: "done", sessionId: session.id });
152
+ }
package/dist/cli.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
package/dist/cli.js ADDED
@@ -0,0 +1,141 @@
1
+ #!/usr/bin/env node
2
+ import { fileURLToPath } from "url";
3
+ import path from "path";
4
+ import { readFileSync } from "fs";
5
+ import dotenv from "dotenv";
6
+ import { Command } from "commander";
7
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
8
+ import { loadAgentConfig } from "./config.js";
9
+ import { createServer } from "./server.js";
10
+ import { initSessionStore } from "./session.js";
11
+ import { runToolTests, formatResults } from "./test-runner.js";
12
+ import { runEvals } from "./eval/runner.js";
13
+ import { scaffoldAgent, scaffoldWorkspace } from "./scaffold.js";
14
+ const pkg = JSON.parse(readFileSync(path.resolve(__dirname, "../package.json"), "utf-8"));
15
+ const program = new Command();
16
+ program
17
+ .name("archon-claw")
18
+ .description("AI Agent CLI")
19
+ .version(pkg.version)
20
+ .option("-e, --env-file <path>", "Path to .env file (default: .env in cwd)")
21
+ .hook("preAction", () => {
22
+ const envPath = program.opts().envFile
23
+ ? path.resolve(program.opts().envFile)
24
+ : path.resolve(process.cwd(), ".env");
25
+ dotenv.config({ path: envPath });
26
+ });
27
+ program
28
+ .command("chat")
29
+ .description("Start an agent chat session")
30
+ .action(() => {
31
+ console.log("Starting agent...");
32
+ });
33
+ program
34
+ .command("start")
35
+ .description("Start an agent HTTP server")
36
+ .argument("<agent-dir>", "Path to agent directory")
37
+ .option("-p, --port <port>", "Server port", "4000")
38
+ .action(async (agentDir, opts) => {
39
+ try {
40
+ const config = await loadAgentConfig(agentDir);
41
+ await initSessionStore(agentDir);
42
+ const port = parseInt(opts.port, 10);
43
+ console.log(`Agent loaded:`);
44
+ console.log(` Model: ${config.model.provider}/${config.model.model}`);
45
+ console.log(` Tools: ${config.tools.map((t) => t.name).join(", ")}`);
46
+ if (config.mcpManager) {
47
+ const servers = config.mcpManager.getServerNames();
48
+ console.log(` MCP servers: ${servers.join(", ")}`);
49
+ }
50
+ console.log(` Skills: ${Object.keys(config.skills).join(", ") || "(none)"}`);
51
+ console.log(` System prompt: ${config.systemPrompt.length} chars`);
52
+ const server = createServer(config, port);
53
+ const shutdown = async () => {
54
+ console.log("\nShutting down...");
55
+ await config.mcpManager?.shutdown();
56
+ server.close(() => process.exit(0));
57
+ // Force exit if server hasn't closed within 3s
58
+ setTimeout(() => process.exit(1), 3000).unref();
59
+ };
60
+ process.on("SIGINT", shutdown);
61
+ process.on("SIGTERM", shutdown);
62
+ }
63
+ catch (err) {
64
+ console.error(err instanceof Error ? err.message : err);
65
+ process.exit(1);
66
+ }
67
+ });
68
+ program
69
+ .command("test")
70
+ .description("Run tool implementation tests")
71
+ .argument("<agent-dir>", "Path to agent directory")
72
+ .action(async (agentDir) => {
73
+ try {
74
+ const results = await runToolTests(agentDir);
75
+ if (results.length === 0) {
76
+ console.log("No test files found (*.test.js in tool-impls/)");
77
+ return;
78
+ }
79
+ console.log(formatResults(results));
80
+ const failed = results.some((t) => t.results.some((r) => !r.passed));
81
+ if (failed)
82
+ process.exit(1);
83
+ }
84
+ catch (err) {
85
+ console.error(err instanceof Error ? err.message : err);
86
+ process.exit(1);
87
+ }
88
+ });
89
+ program
90
+ .command("eval")
91
+ .description("Run agent evaluation cases")
92
+ .argument("<agent-dir>", "Path to agent directory")
93
+ .option("-f, --file <file>", "Only run a specific .eval.json file")
94
+ .option("-t, --tag <tag>", "Only run cases with this tag")
95
+ .option("-s, --save", "Save results to eval-results/")
96
+ .option("--no-judge", "Skip judge evaluation")
97
+ .action(async (agentDir, opts) => {
98
+ try {
99
+ const { summary, formatted } = await runEvals(agentDir, {
100
+ file: opts.file,
101
+ tag: opts.tag,
102
+ save: opts.save,
103
+ noJudge: opts.judge === false,
104
+ });
105
+ console.log(formatted);
106
+ if (summary.failed > 0)
107
+ process.exit(1);
108
+ }
109
+ catch (err) {
110
+ console.error(err instanceof Error ? err.message : err);
111
+ process.exit(1);
112
+ }
113
+ });
114
+ program
115
+ .command("init")
116
+ .description("Initialise an agent workspace with shared .claude/ skills")
117
+ .argument("[dir]", "Directory to initialise", "./agents")
118
+ .action(async (dir) => {
119
+ try {
120
+ await scaffoldWorkspace(dir);
121
+ }
122
+ catch (err) {
123
+ console.error(err instanceof Error ? err.message : err);
124
+ process.exit(1);
125
+ }
126
+ });
127
+ program
128
+ .command("create")
129
+ .description("Create a new agent project")
130
+ .argument("<agent-name>", "Name of the agent (used as directory name)")
131
+ .option("-d, --dir <path>", "Parent directory for the agent", "./agents")
132
+ .action(async (agentName, opts) => {
133
+ try {
134
+ await scaffoldAgent(agentName, opts.dir);
135
+ }
136
+ catch (err) {
137
+ console.error(err instanceof Error ? err.message : err);
138
+ process.exit(1);
139
+ }
140
+ });
141
+ program.parse();
@@ -0,0 +1,2 @@
1
+ import type { AgentConfig } from "./types.js";
2
+ export declare function loadAgentConfig(agentDir: string): Promise<AgentConfig>;
package/dist/config.js ADDED
@@ -0,0 +1,161 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { pathToFileURL } from "node:url";
4
+ import { Liquid } from "liquidjs";
5
+ import matter from "gray-matter";
6
+ import { validateDir } from "./validator/index.js";
7
+ import { mcpConfigSchema } from "./schemas.js";
8
+ import { McpManager } from "./mcp-manager.js";
9
+ export async function loadAgentConfig(agentDir) {
10
+ const absDir = path.resolve(agentDir);
11
+ // Validate directory structure
12
+ const validation = await validateDir("agent-dir", absDir);
13
+ if (!validation.valid) {
14
+ throw new Error(`Invalid agent directory:\n${validation.errors.map((e) => ` - ${e}`).join("\n")}`);
15
+ }
16
+ // Load model.json
17
+ const modelRaw = await fs.readFile(path.join(absDir, "model.json"), "utf-8");
18
+ const model = JSON.parse(modelRaw);
19
+ // Load datasets
20
+ const datasets = {};
21
+ const datasetsDir = path.join(absDir, "datasets");
22
+ try {
23
+ const files = await fs.readdir(datasetsDir);
24
+ const jsonFiles = files.filter((f) => f.endsWith(".json"));
25
+ const loaded = await Promise.all(jsonFiles.map(async (file) => ({
26
+ name: file.replace(/\.json$/, ""),
27
+ content: await fs.readFile(path.join(datasetsDir, file), "utf-8"),
28
+ })));
29
+ for (const { name, content } of loaded) {
30
+ datasets[name] = JSON.parse(content);
31
+ }
32
+ }
33
+ catch {
34
+ // datasets/ is optional
35
+ }
36
+ // Load skills (optional)
37
+ const skills = {};
38
+ const skillsMeta = [];
39
+ const skillsDir = path.join(absDir, "skills");
40
+ try {
41
+ const files = await fs.readdir(skillsDir);
42
+ const mdFiles = files.filter((f) => f.endsWith(".md"));
43
+ const loaded = await Promise.all(mdFiles.map(async (file) => ({
44
+ key: file.replace(/\.md$/, ""),
45
+ content: await fs.readFile(path.join(skillsDir, file), "utf-8"),
46
+ })));
47
+ for (const { key, content } of loaded) {
48
+ const { data, content: body } = matter(content);
49
+ skills[key] = body.trim();
50
+ if (data.name && data.description) {
51
+ skillsMeta.push({ name: data.name, description: data.description });
52
+ }
53
+ }
54
+ }
55
+ catch {
56
+ // skills/ is optional
57
+ }
58
+ // Load tool schemas
59
+ const toolsDir = path.join(absDir, "tools");
60
+ const toolFiles = (await fs.readdir(toolsDir)).filter((f) => f.endsWith(".json"));
61
+ const tools = await Promise.all(toolFiles.map(async (file) => {
62
+ const content = await fs.readFile(path.join(toolsDir, file), "utf-8");
63
+ return JSON.parse(content);
64
+ }));
65
+ // Build set of tools that don't need server-side implementations
66
+ const nonServerToolNames = new Set(tools
67
+ .filter((t) => t.execution_target === "client" || t.execution_target === "host")
68
+ .map((t) => t.name));
69
+ // Load tool implementations (only for server tools)
70
+ const toolImpls = new Map();
71
+ const implsDir = path.join(absDir, "tool-impls");
72
+ try {
73
+ const implFiles = (await fs.readdir(implsDir)).filter((f) => f.endsWith(".impl.js"));
74
+ const implEntries = await Promise.all(implFiles.map(async (file) => {
75
+ const name = file.replace(/\.impl\.js$/, "");
76
+ const implPath = pathToFileURL(path.join(implsDir, file)).href;
77
+ const mod = await import(implPath);
78
+ return [name, mod.default];
79
+ }));
80
+ for (const [name, impl] of implEntries) {
81
+ toolImpls.set(name, impl);
82
+ }
83
+ }
84
+ catch {
85
+ // tool-impls/ may not exist if all tools are client/host
86
+ if (tools.length > nonServerToolNames.size) {
87
+ throw new Error("Missing required directory: tool-impls/");
88
+ }
89
+ }
90
+ // Load MCP servers (optional)
91
+ let mcpManager;
92
+ try {
93
+ const mcpRaw = await fs.readFile(path.join(absDir, "mcp.json"), "utf-8");
94
+ const mcpData = JSON.parse(mcpRaw);
95
+ const mcpConfig = mcpConfigSchema.parse(mcpData);
96
+ mcpManager = new McpManager(mcpConfig);
97
+ await mcpManager.initialize();
98
+ // Merge MCP tools into tools + toolImpls
99
+ const localToolNames = new Set(tools.map((t) => t.name));
100
+ for (const mcpTool of mcpManager.getTools()) {
101
+ if (localToolNames.has(mcpTool.name)) {
102
+ console.warn(`[mcp] Skipping tool "${mcpTool.name}" — conflicts with local tool`);
103
+ continue;
104
+ }
105
+ tools.push(mcpTool);
106
+ const toolName = mcpTool.name;
107
+ toolImpls.set(toolName, async (args) => mcpManager.callTool(toolName, args));
108
+ }
109
+ }
110
+ catch (err) {
111
+ const isNotFound = err instanceof Error && "code" in err && err.code === "ENOENT";
112
+ if (!isNotFound)
113
+ throw err;
114
+ // mcp.json not found — that's fine
115
+ }
116
+ // Scan tool-uis/ for custom UI components
117
+ const toolUIs = new Set();
118
+ const toolUIsDir = path.join(absDir, "tool-uis");
119
+ try {
120
+ const uiFiles = (await fs.readdir(toolUIsDir)).filter((f) => f.endsWith(".ui.js"));
121
+ const toolNames = new Set(tools.map((t) => t.name));
122
+ for (const file of uiFiles) {
123
+ const name = file.replace(/\.ui\.js$/, "");
124
+ if (toolNames.has(name)) {
125
+ toolUIs.add(name);
126
+ }
127
+ }
128
+ }
129
+ catch {
130
+ // tool-uis/ is optional
131
+ }
132
+ // Build tool template variables: tools (array) + tool (map by name)
133
+ const toolsMeta = tools.map((t) => ({ name: t.name, description: t.description }));
134
+ const toolMap = {};
135
+ for (const t of toolsMeta) {
136
+ toolMap[t.name] = t;
137
+ }
138
+ // Build mcp template variable: mcp.{serverName}.tools/tool + mcp.servers
139
+ const mcpVar = { servers: [] };
140
+ if (mcpManager) {
141
+ const byServer = mcpManager.getToolsByServer();
142
+ const servers = [];
143
+ for (const serverName of mcpManager.getServerNames()) {
144
+ const serverTools = byServer[serverName] ?? [];
145
+ const serverToolMap = {};
146
+ for (const t of serverTools) {
147
+ serverToolMap[t.name] = t;
148
+ }
149
+ const serverObj = { name: serverName, tools: serverTools, tool: serverToolMap };
150
+ mcpVar[serverName] = serverObj;
151
+ servers.push(serverObj);
152
+ }
153
+ mcpVar.servers = servers;
154
+ }
155
+ // Load and render system prompt (after tools/MCP so they're available in template)
156
+ const promptRaw = await fs.readFile(path.join(absDir, "system-prompt.md"), "utf-8");
157
+ const engine = new Liquid();
158
+ const templateVars = { ...datasets, skills: skillsMeta, tools: toolsMeta, tool: toolMap, mcp: mcpVar };
159
+ const systemPrompt = (await engine.parseAndRender(promptRaw, templateVars)).trim();
160
+ return { systemPrompt, model: model, tools, toolImpls, skills, mcpManager, agentDir: absDir, toolUIs };
161
+ }
@@ -0,0 +1,9 @@
1
+ import type { Assertion, AssertionResult, ToolCallRecord } from "./types.js";
2
+ /**
3
+ * Run a single assertion against a response text and tool calls.
4
+ */
5
+ export declare function runAssertion(assertion: Assertion, responseText: string, toolCalls: ToolCallRecord[]): AssertionResult;
6
+ /**
7
+ * Run multiple assertions and return all results.
8
+ */
9
+ export declare function runAssertions(assertions: Assertion[], responseText: string, toolCalls: ToolCallRecord[]): AssertionResult[];
@@ -0,0 +1,137 @@
1
+ /**
2
+ * Run a single assertion against a response text and tool calls.
3
+ */
4
+ export function runAssertion(assertion, responseText, toolCalls) {
5
+ const { type, value } = assertion;
6
+ switch (type) {
7
+ case "contains":
8
+ return {
9
+ type,
10
+ value,
11
+ passed: responseText.toLowerCase().includes(value.toLowerCase()),
12
+ message: responseText.toLowerCase().includes(value.toLowerCase())
13
+ ? undefined
14
+ : `"${value}" not found in response`,
15
+ };
16
+ case "not-contains":
17
+ return {
18
+ type,
19
+ value,
20
+ passed: !responseText.toLowerCase().includes(value.toLowerCase()),
21
+ message: !responseText.toLowerCase().includes(value.toLowerCase())
22
+ ? undefined
23
+ : `"${value}" found in response`,
24
+ };
25
+ case "regex": {
26
+ const re = new RegExp(value);
27
+ const matched = re.test(responseText);
28
+ return {
29
+ type,
30
+ value,
31
+ passed: matched,
32
+ message: matched ? undefined : `regex /${value}/ did not match`,
33
+ };
34
+ }
35
+ case "length-min": {
36
+ const min = parseInt(value, 10);
37
+ const passed = responseText.length >= min;
38
+ return {
39
+ type,
40
+ value,
41
+ passed,
42
+ message: passed ? undefined : `response length ${responseText.length} < ${min}`,
43
+ };
44
+ }
45
+ case "length-max": {
46
+ const max = parseInt(value, 10);
47
+ const passed = responseText.length <= max;
48
+ return {
49
+ type,
50
+ value,
51
+ passed,
52
+ message: passed ? undefined : `response length ${responseText.length} > ${max}`,
53
+ };
54
+ }
55
+ case "json-valid": {
56
+ let passed = false;
57
+ try {
58
+ JSON.parse(responseText);
59
+ passed = true;
60
+ }
61
+ catch {
62
+ // not valid JSON
63
+ }
64
+ return {
65
+ type,
66
+ value,
67
+ passed,
68
+ message: passed ? undefined : "response is not valid JSON",
69
+ };
70
+ }
71
+ case "tool-called": {
72
+ const called = toolCalls.some((tc) => tc.name === value);
73
+ return {
74
+ type,
75
+ value,
76
+ passed: called,
77
+ message: called ? undefined : `tool "${value}" was not called`,
78
+ };
79
+ }
80
+ case "tool-not-called": {
81
+ const called = toolCalls.some((tc) => tc.name === value);
82
+ return {
83
+ type,
84
+ value,
85
+ passed: !called,
86
+ message: !called ? undefined : `tool "${value}" was called`,
87
+ };
88
+ }
89
+ case "tool-called-with": {
90
+ const spec = JSON.parse(value);
91
+ const match = toolCalls.find((tc) => {
92
+ if (tc.name !== spec.tool)
93
+ return false;
94
+ return objectContains(tc.args, spec.args);
95
+ });
96
+ return {
97
+ type,
98
+ value,
99
+ passed: !!match,
100
+ message: match
101
+ ? undefined
102
+ : `tool "${spec.tool}" was not called with matching args`,
103
+ };
104
+ }
105
+ default:
106
+ return {
107
+ type,
108
+ value,
109
+ passed: false,
110
+ message: `unknown assertion type: ${type}`,
111
+ };
112
+ }
113
+ }
114
+ /**
115
+ * Run multiple assertions and return all results.
116
+ */
117
+ export function runAssertions(assertions, responseText, toolCalls) {
118
+ return assertions.map((a) => runAssertion(a, responseText, toolCalls));
119
+ }
120
+ /**
121
+ * Check if `obj` contains all key-value pairs from `subset` (shallow).
122
+ */
123
+ function objectContains(obj, subset) {
124
+ for (const [key, val] of Object.entries(subset)) {
125
+ if (typeof val === "object" && val !== null) {
126
+ if (typeof obj[key] !== "object" || obj[key] === null)
127
+ return false;
128
+ if (!objectContains(obj[key], val)) {
129
+ return false;
130
+ }
131
+ }
132
+ else if (obj[key] !== val) {
133
+ return false;
134
+ }
135
+ }
136
+ return true;
137
+ }
@@ -0,0 +1,13 @@
1
+ import type { AgentConfig, ChatMessage } from "../types.js";
2
+ import type { EvalCase, CaseResult, JudgeResult } from "./types.js";
3
+ interface ExecuteOptions {
4
+ config: AgentConfig;
5
+ evalCase: EvalCase;
6
+ fileName: string;
7
+ judgeRunner?: (userInput: string, expectedOutput: string | undefined, actualResponse: string, conversation: ChatMessage[]) => Promise<JudgeResult>;
8
+ }
9
+ /**
10
+ * Execute a single eval case, dispatching to the appropriate mode.
11
+ */
12
+ export declare function executeCase(opts: ExecuteOptions): Promise<CaseResult>;
13
+ export {};