@archon-claw/cli 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +2 -0
- package/dist/agent.js +152 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +141 -0
- package/dist/config.d.ts +2 -0
- package/dist/config.js +161 -0
- package/dist/eval/assertions.d.ts +9 -0
- package/dist/eval/assertions.js +137 -0
- package/dist/eval/execute.d.ts +13 -0
- package/dist/eval/execute.js +260 -0
- package/dist/eval/formatter.d.ts +10 -0
- package/dist/eval/formatter.js +62 -0
- package/dist/eval/judge.d.ts +7 -0
- package/dist/eval/judge.js +116 -0
- package/dist/eval/runner.d.ts +9 -0
- package/dist/eval/runner.js +156 -0
- package/dist/eval/types.d.ts +67 -0
- package/dist/eval/types.js +1 -0
- package/dist/llm.d.ts +7 -0
- package/dist/llm.js +52 -0
- package/dist/mcp-manager.d.ts +51 -0
- package/dist/mcp-manager.js +268 -0
- package/dist/pending-tool-results.d.ts +4 -0
- package/dist/pending-tool-results.js +39 -0
- package/dist/public/assets/chat-input-BBnVJs9h.js +151 -0
- package/dist/public/assets/chat-input-CISJdhF2.css +1 -0
- package/dist/public/assets/embed-DhIUBDdf.js +1 -0
- package/dist/public/assets/main-Bfvj6DnV.js +16 -0
- package/dist/public/embed/widget.js +233 -0
- package/dist/public/embed.html +14 -0
- package/dist/public/index.html +14 -0
- package/dist/scaffold.d.ts +2 -0
- package/dist/scaffold.js +82 -0
- package/dist/schemas.d.ts +899 -0
- package/dist/schemas.js +134 -0
- package/dist/server.d.ts +3 -0
- package/dist/server.js +258 -0
- package/dist/session.d.ts +8 -0
- package/dist/session.js +70 -0
- package/dist/templates/agent/model.json +6 -0
- package/dist/templates/agent/system-prompt.md +9 -0
- package/dist/templates/agent/tool-impls/greeting.impl.js +9 -0
- package/dist/templates/agent/tools/greeting.json +14 -0
- package/dist/templates/workspace/.claude/skills/create-agent/SKILL.md +90 -0
- package/dist/templates/workspace/.claude/skills/create-dataset/SKILL.md +57 -0
- package/dist/templates/workspace/.claude/skills/create-eval-case/SKILL.md +159 -0
- package/dist/templates/workspace/.claude/skills/create-eval-judge/SKILL.md +128 -0
- package/dist/templates/workspace/.claude/skills/create-mcp-config/SKILL.md +151 -0
- package/dist/templates/workspace/.claude/skills/create-model-config/SKILL.md +45 -0
- package/dist/templates/workspace/.claude/skills/create-skill/SKILL.md +63 -0
- package/dist/templates/workspace/.claude/skills/create-system-prompt/SKILL.md +168 -0
- package/dist/templates/workspace/.claude/skills/create-tool/SKILL.md +56 -0
- package/dist/templates/workspace/.claude/skills/create-tool-impl/SKILL.md +83 -0
- package/dist/templates/workspace/.claude/skills/create-tool-test/SKILL.md +117 -0
- package/dist/templates/workspace/.claude/skills/create-tool-ui/SKILL.md +218 -0
- package/dist/test-runner.d.ts +22 -0
- package/dist/test-runner.js +166 -0
- package/dist/types.d.ts +75 -0
- package/dist/types.js +1 -0
- package/dist/validator/index.d.ts +16 -0
- package/dist/validator/index.js +54 -0
- package/dist/validator/plugin.d.ts +21 -0
- package/dist/validator/plugin.js +1 -0
- package/dist/validator/plugins/agent-dir.d.ts +2 -0
- package/dist/validator/plugins/agent-dir.js +171 -0
- package/dist/validator/plugins/agent-skill.d.ts +2 -0
- package/dist/validator/plugins/agent-skill.js +31 -0
- package/dist/validator/plugins/dataset.d.ts +2 -0
- package/dist/validator/plugins/dataset.js +20 -0
- package/dist/validator/plugins/mcp.d.ts +2 -0
- package/dist/validator/plugins/mcp.js +20 -0
- package/dist/validator/plugins/model.d.ts +2 -0
- package/dist/validator/plugins/model.js +20 -0
- package/dist/validator/plugins/system-prompt.d.ts +2 -0
- package/dist/validator/plugins/system-prompt.js +25 -0
- package/dist/validator/plugins/tool.d.ts +2 -0
- package/dist/validator/plugins/tool.js +20 -0
- package/dist/validator/zod-utils.d.ts +3 -0
- package/dist/validator/zod-utils.js +7 -0
- package/package.json +41 -0
package/dist/agent.d.ts
ADDED
package/dist/agent.js
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { createClient, toOpenAITools, streamChat } from "./llm.js";
|
|
2
|
+
import { waitForToolResult } from "./pending-tool-results.js";
|
|
3
|
+
const MAX_ITERATIONS = 20;
|
|
4
|
+
function buildToolTargets(tools) {
|
|
5
|
+
const map = new Map();
|
|
6
|
+
for (const t of tools) {
|
|
7
|
+
map.set(t.name, {
|
|
8
|
+
target: (t.execution_target ?? "server"),
|
|
9
|
+
handler: t.handler,
|
|
10
|
+
});
|
|
11
|
+
}
|
|
12
|
+
return map;
|
|
13
|
+
}
|
|
14
|
+
export async function runAgentLoop(config, session, userMessage, emit, hostContext) {
|
|
15
|
+
const client = createClient(config.model);
|
|
16
|
+
const openAITools = toOpenAITools(config.tools);
|
|
17
|
+
const toolTargets = buildToolTargets(config.tools);
|
|
18
|
+
// Add system prompt on first message
|
|
19
|
+
if (session.messages.length === 0) {
|
|
20
|
+
let systemPrompt = config.systemPrompt;
|
|
21
|
+
if (hostContext && Object.keys(hostContext).length > 0) {
|
|
22
|
+
systemPrompt += `\n\n<host-context>\n${JSON.stringify(hostContext, null, 2)}\n</host-context>`;
|
|
23
|
+
}
|
|
24
|
+
session.messages.push({ role: "system", content: systemPrompt });
|
|
25
|
+
}
|
|
26
|
+
// Add user message
|
|
27
|
+
session.messages.push({ role: "user", content: userMessage });
|
|
28
|
+
// Emit tools_config for client/host tools and tools with custom UIs
|
|
29
|
+
const configuredTools = config.tools
|
|
30
|
+
.filter((t) => t.execution_target === "client" || t.execution_target === "host" || config.toolUIs.has(t.name))
|
|
31
|
+
.map((t) => ({
|
|
32
|
+
name: t.name,
|
|
33
|
+
handler: t.handler,
|
|
34
|
+
executionTarget: t.execution_target ?? "server",
|
|
35
|
+
component: config.toolUIs.has(t.name) ? t.name : undefined,
|
|
36
|
+
}));
|
|
37
|
+
if (configuredTools.length > 0) {
|
|
38
|
+
emit({ type: "tools_config", tools: configuredTools });
|
|
39
|
+
}
|
|
40
|
+
let completed = false;
|
|
41
|
+
for (let i = 0; i < MAX_ITERATIONS; i++) {
|
|
42
|
+
const stream = streamChat(client, config.model, session.messages, openAITools);
|
|
43
|
+
let textContent = "";
|
|
44
|
+
const toolCalls = new Map();
|
|
45
|
+
for await (const chunk of stream) {
|
|
46
|
+
const delta = chunk.choices[0]?.delta;
|
|
47
|
+
if (!delta)
|
|
48
|
+
continue;
|
|
49
|
+
// Accumulate text
|
|
50
|
+
if (delta.content) {
|
|
51
|
+
textContent += delta.content;
|
|
52
|
+
emit({ type: "text", content: delta.content });
|
|
53
|
+
}
|
|
54
|
+
// Accumulate tool calls (streamed in fragments by index)
|
|
55
|
+
if (delta.tool_calls) {
|
|
56
|
+
for (const tc of delta.tool_calls) {
|
|
57
|
+
const existing = toolCalls.get(tc.index);
|
|
58
|
+
if (existing) {
|
|
59
|
+
if (tc.function?.arguments) {
|
|
60
|
+
existing.args += tc.function.arguments;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
toolCalls.set(tc.index, {
|
|
65
|
+
id: tc.id ?? "",
|
|
66
|
+
name: tc.function?.name ?? "",
|
|
67
|
+
args: tc.function?.arguments ?? "",
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
// Build assistant message
|
|
74
|
+
const assistantToolCalls = [...toolCalls.values()].map((tc) => ({
|
|
75
|
+
id: tc.id,
|
|
76
|
+
type: "function",
|
|
77
|
+
function: { name: tc.name, arguments: tc.args },
|
|
78
|
+
}));
|
|
79
|
+
const assistantMessage = assistantToolCalls.length > 0
|
|
80
|
+
? { role: "assistant", content: textContent || null, tool_calls: assistantToolCalls }
|
|
81
|
+
: { role: "assistant", content: textContent };
|
|
82
|
+
session.messages.push(assistantMessage);
|
|
83
|
+
// No tool calls — done
|
|
84
|
+
if (assistantToolCalls.length === 0) {
|
|
85
|
+
completed = true;
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
// Execute each tool call
|
|
89
|
+
for (const tc of assistantToolCalls) {
|
|
90
|
+
let args;
|
|
91
|
+
try {
|
|
92
|
+
args = JSON.parse(tc.function.arguments);
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
const result = { error: `Invalid JSON in tool arguments: ${tc.function.arguments}` };
|
|
96
|
+
emit({ type: "tool_result", name: tc.function.name, result, toolCallId: tc.id });
|
|
97
|
+
session.messages.push({
|
|
98
|
+
role: "tool",
|
|
99
|
+
tool_call_id: tc.id,
|
|
100
|
+
content: JSON.stringify(result),
|
|
101
|
+
});
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
const targetInfo = toolTargets.get(tc.function.name);
|
|
105
|
+
const target = targetInfo?.target ?? "server";
|
|
106
|
+
let result;
|
|
107
|
+
if (target === "server") {
|
|
108
|
+
// Server execution: use local impl
|
|
109
|
+
emit({ type: "tool_call", name: tc.function.name, args, toolCallId: tc.id });
|
|
110
|
+
const impl = config.toolImpls.get(tc.function.name);
|
|
111
|
+
if (impl) {
|
|
112
|
+
try {
|
|
113
|
+
result = await impl(args);
|
|
114
|
+
}
|
|
115
|
+
catch (err) {
|
|
116
|
+
result = { error: err instanceof Error ? err.message : String(err) };
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
result = { error: `Unknown tool: ${tc.function.name}` };
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
// Client or Host execution: emit tool_call with target and wait for result
|
|
125
|
+
emit({
|
|
126
|
+
type: "tool_call",
|
|
127
|
+
name: tc.function.name,
|
|
128
|
+
args,
|
|
129
|
+
toolCallId: tc.id,
|
|
130
|
+
executionTarget: target,
|
|
131
|
+
});
|
|
132
|
+
try {
|
|
133
|
+
result = await waitForToolResult(tc.id);
|
|
134
|
+
}
|
|
135
|
+
catch (err) {
|
|
136
|
+
result = { error: err instanceof Error ? err.message : String(err) };
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
emit({ type: "tool_result", name: tc.function.name, result, toolCallId: tc.id });
|
|
140
|
+
session.messages.push({
|
|
141
|
+
role: "tool",
|
|
142
|
+
tool_call_id: tc.id,
|
|
143
|
+
content: JSON.stringify(result),
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
// Continue loop — LLM will see tool results and generate next response
|
|
147
|
+
}
|
|
148
|
+
if (!completed) {
|
|
149
|
+
emit({ type: "error", message: `Agent stopped: reached maximum iterations (${MAX_ITERATIONS})` });
|
|
150
|
+
}
|
|
151
|
+
emit({ type: "done", sessionId: session.id });
|
|
152
|
+
}
|
package/dist/cli.d.ts
ADDED
package/dist/cli.js
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { fileURLToPath } from "url";
|
|
3
|
+
import path from "path";
|
|
4
|
+
import { readFileSync } from "fs";
|
|
5
|
+
import dotenv from "dotenv";
|
|
6
|
+
import { Command } from "commander";
|
|
7
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
8
|
+
import { loadAgentConfig } from "./config.js";
|
|
9
|
+
import { createServer } from "./server.js";
|
|
10
|
+
import { initSessionStore } from "./session.js";
|
|
11
|
+
import { runToolTests, formatResults } from "./test-runner.js";
|
|
12
|
+
import { runEvals } from "./eval/runner.js";
|
|
13
|
+
import { scaffoldAgent, scaffoldWorkspace } from "./scaffold.js";
|
|
14
|
+
const pkg = JSON.parse(readFileSync(path.resolve(__dirname, "../package.json"), "utf-8"));
|
|
15
|
+
const program = new Command();
|
|
16
|
+
program
|
|
17
|
+
.name("archon-claw")
|
|
18
|
+
.description("AI Agent CLI")
|
|
19
|
+
.version(pkg.version)
|
|
20
|
+
.option("-e, --env-file <path>", "Path to .env file (default: .env in cwd)")
|
|
21
|
+
.hook("preAction", () => {
|
|
22
|
+
const envPath = program.opts().envFile
|
|
23
|
+
? path.resolve(program.opts().envFile)
|
|
24
|
+
: path.resolve(process.cwd(), ".env");
|
|
25
|
+
dotenv.config({ path: envPath });
|
|
26
|
+
});
|
|
27
|
+
program
|
|
28
|
+
.command("chat")
|
|
29
|
+
.description("Start an agent chat session")
|
|
30
|
+
.action(() => {
|
|
31
|
+
console.log("Starting agent...");
|
|
32
|
+
});
|
|
33
|
+
program
|
|
34
|
+
.command("start")
|
|
35
|
+
.description("Start an agent HTTP server")
|
|
36
|
+
.argument("<agent-dir>", "Path to agent directory")
|
|
37
|
+
.option("-p, --port <port>", "Server port", "4000")
|
|
38
|
+
.action(async (agentDir, opts) => {
|
|
39
|
+
try {
|
|
40
|
+
const config = await loadAgentConfig(agentDir);
|
|
41
|
+
await initSessionStore(agentDir);
|
|
42
|
+
const port = parseInt(opts.port, 10);
|
|
43
|
+
console.log(`Agent loaded:`);
|
|
44
|
+
console.log(` Model: ${config.model.provider}/${config.model.model}`);
|
|
45
|
+
console.log(` Tools: ${config.tools.map((t) => t.name).join(", ")}`);
|
|
46
|
+
if (config.mcpManager) {
|
|
47
|
+
const servers = config.mcpManager.getServerNames();
|
|
48
|
+
console.log(` MCP servers: ${servers.join(", ")}`);
|
|
49
|
+
}
|
|
50
|
+
console.log(` Skills: ${Object.keys(config.skills).join(", ") || "(none)"}`);
|
|
51
|
+
console.log(` System prompt: ${config.systemPrompt.length} chars`);
|
|
52
|
+
const server = createServer(config, port);
|
|
53
|
+
const shutdown = async () => {
|
|
54
|
+
console.log("\nShutting down...");
|
|
55
|
+
await config.mcpManager?.shutdown();
|
|
56
|
+
server.close(() => process.exit(0));
|
|
57
|
+
// Force exit if server hasn't closed within 3s
|
|
58
|
+
setTimeout(() => process.exit(1), 3000).unref();
|
|
59
|
+
};
|
|
60
|
+
process.on("SIGINT", shutdown);
|
|
61
|
+
process.on("SIGTERM", shutdown);
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
console.error(err instanceof Error ? err.message : err);
|
|
65
|
+
process.exit(1);
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
program
|
|
69
|
+
.command("test")
|
|
70
|
+
.description("Run tool implementation tests")
|
|
71
|
+
.argument("<agent-dir>", "Path to agent directory")
|
|
72
|
+
.action(async (agentDir) => {
|
|
73
|
+
try {
|
|
74
|
+
const results = await runToolTests(agentDir);
|
|
75
|
+
if (results.length === 0) {
|
|
76
|
+
console.log("No test files found (*.test.js in tool-impls/)");
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
console.log(formatResults(results));
|
|
80
|
+
const failed = results.some((t) => t.results.some((r) => !r.passed));
|
|
81
|
+
if (failed)
|
|
82
|
+
process.exit(1);
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
console.error(err instanceof Error ? err.message : err);
|
|
86
|
+
process.exit(1);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
program
|
|
90
|
+
.command("eval")
|
|
91
|
+
.description("Run agent evaluation cases")
|
|
92
|
+
.argument("<agent-dir>", "Path to agent directory")
|
|
93
|
+
.option("-f, --file <file>", "Only run a specific .eval.json file")
|
|
94
|
+
.option("-t, --tag <tag>", "Only run cases with this tag")
|
|
95
|
+
.option("-s, --save", "Save results to eval-results/")
|
|
96
|
+
.option("--no-judge", "Skip judge evaluation")
|
|
97
|
+
.action(async (agentDir, opts) => {
|
|
98
|
+
try {
|
|
99
|
+
const { summary, formatted } = await runEvals(agentDir, {
|
|
100
|
+
file: opts.file,
|
|
101
|
+
tag: opts.tag,
|
|
102
|
+
save: opts.save,
|
|
103
|
+
noJudge: opts.judge === false,
|
|
104
|
+
});
|
|
105
|
+
console.log(formatted);
|
|
106
|
+
if (summary.failed > 0)
|
|
107
|
+
process.exit(1);
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
console.error(err instanceof Error ? err.message : err);
|
|
111
|
+
process.exit(1);
|
|
112
|
+
}
|
|
113
|
+
});
|
|
114
|
+
program
|
|
115
|
+
.command("init")
|
|
116
|
+
.description("Initialise an agent workspace with shared .claude/ skills")
|
|
117
|
+
.argument("[dir]", "Directory to initialise", "./agents")
|
|
118
|
+
.action(async (dir) => {
|
|
119
|
+
try {
|
|
120
|
+
await scaffoldWorkspace(dir);
|
|
121
|
+
}
|
|
122
|
+
catch (err) {
|
|
123
|
+
console.error(err instanceof Error ? err.message : err);
|
|
124
|
+
process.exit(1);
|
|
125
|
+
}
|
|
126
|
+
});
|
|
127
|
+
program
|
|
128
|
+
.command("create")
|
|
129
|
+
.description("Create a new agent project")
|
|
130
|
+
.argument("<agent-name>", "Name of the agent (used as directory name)")
|
|
131
|
+
.option("-d, --dir <path>", "Parent directory for the agent", "./agents")
|
|
132
|
+
.action(async (agentName, opts) => {
|
|
133
|
+
try {
|
|
134
|
+
await scaffoldAgent(agentName, opts.dir);
|
|
135
|
+
}
|
|
136
|
+
catch (err) {
|
|
137
|
+
console.error(err instanceof Error ? err.message : err);
|
|
138
|
+
process.exit(1);
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
program.parse();
|
package/dist/config.d.ts
ADDED
package/dist/config.js
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { pathToFileURL } from "node:url";
|
|
4
|
+
import { Liquid } from "liquidjs";
|
|
5
|
+
import matter from "gray-matter";
|
|
6
|
+
import { validateDir } from "./validator/index.js";
|
|
7
|
+
import { mcpConfigSchema } from "./schemas.js";
|
|
8
|
+
import { McpManager } from "./mcp-manager.js";
|
|
9
|
+
export async function loadAgentConfig(agentDir) {
|
|
10
|
+
const absDir = path.resolve(agentDir);
|
|
11
|
+
// Validate directory structure
|
|
12
|
+
const validation = await validateDir("agent-dir", absDir);
|
|
13
|
+
if (!validation.valid) {
|
|
14
|
+
throw new Error(`Invalid agent directory:\n${validation.errors.map((e) => ` - ${e}`).join("\n")}`);
|
|
15
|
+
}
|
|
16
|
+
// Load model.json
|
|
17
|
+
const modelRaw = await fs.readFile(path.join(absDir, "model.json"), "utf-8");
|
|
18
|
+
const model = JSON.parse(modelRaw);
|
|
19
|
+
// Load datasets
|
|
20
|
+
const datasets = {};
|
|
21
|
+
const datasetsDir = path.join(absDir, "datasets");
|
|
22
|
+
try {
|
|
23
|
+
const files = await fs.readdir(datasetsDir);
|
|
24
|
+
const jsonFiles = files.filter((f) => f.endsWith(".json"));
|
|
25
|
+
const loaded = await Promise.all(jsonFiles.map(async (file) => ({
|
|
26
|
+
name: file.replace(/\.json$/, ""),
|
|
27
|
+
content: await fs.readFile(path.join(datasetsDir, file), "utf-8"),
|
|
28
|
+
})));
|
|
29
|
+
for (const { name, content } of loaded) {
|
|
30
|
+
datasets[name] = JSON.parse(content);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
// datasets/ is optional
|
|
35
|
+
}
|
|
36
|
+
// Load skills (optional)
|
|
37
|
+
const skills = {};
|
|
38
|
+
const skillsMeta = [];
|
|
39
|
+
const skillsDir = path.join(absDir, "skills");
|
|
40
|
+
try {
|
|
41
|
+
const files = await fs.readdir(skillsDir);
|
|
42
|
+
const mdFiles = files.filter((f) => f.endsWith(".md"));
|
|
43
|
+
const loaded = await Promise.all(mdFiles.map(async (file) => ({
|
|
44
|
+
key: file.replace(/\.md$/, ""),
|
|
45
|
+
content: await fs.readFile(path.join(skillsDir, file), "utf-8"),
|
|
46
|
+
})));
|
|
47
|
+
for (const { key, content } of loaded) {
|
|
48
|
+
const { data, content: body } = matter(content);
|
|
49
|
+
skills[key] = body.trim();
|
|
50
|
+
if (data.name && data.description) {
|
|
51
|
+
skillsMeta.push({ name: data.name, description: data.description });
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
// skills/ is optional
|
|
57
|
+
}
|
|
58
|
+
// Load tool schemas
|
|
59
|
+
const toolsDir = path.join(absDir, "tools");
|
|
60
|
+
const toolFiles = (await fs.readdir(toolsDir)).filter((f) => f.endsWith(".json"));
|
|
61
|
+
const tools = await Promise.all(toolFiles.map(async (file) => {
|
|
62
|
+
const content = await fs.readFile(path.join(toolsDir, file), "utf-8");
|
|
63
|
+
return JSON.parse(content);
|
|
64
|
+
}));
|
|
65
|
+
// Build set of tools that don't need server-side implementations
|
|
66
|
+
const nonServerToolNames = new Set(tools
|
|
67
|
+
.filter((t) => t.execution_target === "client" || t.execution_target === "host")
|
|
68
|
+
.map((t) => t.name));
|
|
69
|
+
// Load tool implementations (only for server tools)
|
|
70
|
+
const toolImpls = new Map();
|
|
71
|
+
const implsDir = path.join(absDir, "tool-impls");
|
|
72
|
+
try {
|
|
73
|
+
const implFiles = (await fs.readdir(implsDir)).filter((f) => f.endsWith(".impl.js"));
|
|
74
|
+
const implEntries = await Promise.all(implFiles.map(async (file) => {
|
|
75
|
+
const name = file.replace(/\.impl\.js$/, "");
|
|
76
|
+
const implPath = pathToFileURL(path.join(implsDir, file)).href;
|
|
77
|
+
const mod = await import(implPath);
|
|
78
|
+
return [name, mod.default];
|
|
79
|
+
}));
|
|
80
|
+
for (const [name, impl] of implEntries) {
|
|
81
|
+
toolImpls.set(name, impl);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
// tool-impls/ may not exist if all tools are client/host
|
|
86
|
+
if (tools.length > nonServerToolNames.size) {
|
|
87
|
+
throw new Error("Missing required directory: tool-impls/");
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
// Load MCP servers (optional)
|
|
91
|
+
let mcpManager;
|
|
92
|
+
try {
|
|
93
|
+
const mcpRaw = await fs.readFile(path.join(absDir, "mcp.json"), "utf-8");
|
|
94
|
+
const mcpData = JSON.parse(mcpRaw);
|
|
95
|
+
const mcpConfig = mcpConfigSchema.parse(mcpData);
|
|
96
|
+
mcpManager = new McpManager(mcpConfig);
|
|
97
|
+
await mcpManager.initialize();
|
|
98
|
+
// Merge MCP tools into tools + toolImpls
|
|
99
|
+
const localToolNames = new Set(tools.map((t) => t.name));
|
|
100
|
+
for (const mcpTool of mcpManager.getTools()) {
|
|
101
|
+
if (localToolNames.has(mcpTool.name)) {
|
|
102
|
+
console.warn(`[mcp] Skipping tool "${mcpTool.name}" — conflicts with local tool`);
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
tools.push(mcpTool);
|
|
106
|
+
const toolName = mcpTool.name;
|
|
107
|
+
toolImpls.set(toolName, async (args) => mcpManager.callTool(toolName, args));
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
catch (err) {
|
|
111
|
+
const isNotFound = err instanceof Error && "code" in err && err.code === "ENOENT";
|
|
112
|
+
if (!isNotFound)
|
|
113
|
+
throw err;
|
|
114
|
+
// mcp.json not found — that's fine
|
|
115
|
+
}
|
|
116
|
+
// Scan tool-uis/ for custom UI components
|
|
117
|
+
const toolUIs = new Set();
|
|
118
|
+
const toolUIsDir = path.join(absDir, "tool-uis");
|
|
119
|
+
try {
|
|
120
|
+
const uiFiles = (await fs.readdir(toolUIsDir)).filter((f) => f.endsWith(".ui.js"));
|
|
121
|
+
const toolNames = new Set(tools.map((t) => t.name));
|
|
122
|
+
for (const file of uiFiles) {
|
|
123
|
+
const name = file.replace(/\.ui\.js$/, "");
|
|
124
|
+
if (toolNames.has(name)) {
|
|
125
|
+
toolUIs.add(name);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
// tool-uis/ is optional
|
|
131
|
+
}
|
|
132
|
+
// Build tool template variables: tools (array) + tool (map by name)
|
|
133
|
+
const toolsMeta = tools.map((t) => ({ name: t.name, description: t.description }));
|
|
134
|
+
const toolMap = {};
|
|
135
|
+
for (const t of toolsMeta) {
|
|
136
|
+
toolMap[t.name] = t;
|
|
137
|
+
}
|
|
138
|
+
// Build mcp template variable: mcp.{serverName}.tools/tool + mcp.servers
|
|
139
|
+
const mcpVar = { servers: [] };
|
|
140
|
+
if (mcpManager) {
|
|
141
|
+
const byServer = mcpManager.getToolsByServer();
|
|
142
|
+
const servers = [];
|
|
143
|
+
for (const serverName of mcpManager.getServerNames()) {
|
|
144
|
+
const serverTools = byServer[serverName] ?? [];
|
|
145
|
+
const serverToolMap = {};
|
|
146
|
+
for (const t of serverTools) {
|
|
147
|
+
serverToolMap[t.name] = t;
|
|
148
|
+
}
|
|
149
|
+
const serverObj = { name: serverName, tools: serverTools, tool: serverToolMap };
|
|
150
|
+
mcpVar[serverName] = serverObj;
|
|
151
|
+
servers.push(serverObj);
|
|
152
|
+
}
|
|
153
|
+
mcpVar.servers = servers;
|
|
154
|
+
}
|
|
155
|
+
// Load and render system prompt (after tools/MCP so they're available in template)
|
|
156
|
+
const promptRaw = await fs.readFile(path.join(absDir, "system-prompt.md"), "utf-8");
|
|
157
|
+
const engine = new Liquid();
|
|
158
|
+
const templateVars = { ...datasets, skills: skillsMeta, tools: toolsMeta, tool: toolMap, mcp: mcpVar };
|
|
159
|
+
const systemPrompt = (await engine.parseAndRender(promptRaw, templateVars)).trim();
|
|
160
|
+
return { systemPrompt, model: model, tools, toolImpls, skills, mcpManager, agentDir: absDir, toolUIs };
|
|
161
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Assertion, AssertionResult, ToolCallRecord } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Run a single assertion against a response text and tool calls.
|
|
4
|
+
*/
|
|
5
|
+
export declare function runAssertion(assertion: Assertion, responseText: string, toolCalls: ToolCallRecord[]): AssertionResult;
|
|
6
|
+
/**
|
|
7
|
+
* Run multiple assertions and return all results.
|
|
8
|
+
*/
|
|
9
|
+
export declare function runAssertions(assertions: Assertion[], responseText: string, toolCalls: ToolCallRecord[]): AssertionResult[];
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run a single assertion against a response text and tool calls.
|
|
3
|
+
*/
|
|
4
|
+
export function runAssertion(assertion, responseText, toolCalls) {
|
|
5
|
+
const { type, value } = assertion;
|
|
6
|
+
switch (type) {
|
|
7
|
+
case "contains":
|
|
8
|
+
return {
|
|
9
|
+
type,
|
|
10
|
+
value,
|
|
11
|
+
passed: responseText.toLowerCase().includes(value.toLowerCase()),
|
|
12
|
+
message: responseText.toLowerCase().includes(value.toLowerCase())
|
|
13
|
+
? undefined
|
|
14
|
+
: `"${value}" not found in response`,
|
|
15
|
+
};
|
|
16
|
+
case "not-contains":
|
|
17
|
+
return {
|
|
18
|
+
type,
|
|
19
|
+
value,
|
|
20
|
+
passed: !responseText.toLowerCase().includes(value.toLowerCase()),
|
|
21
|
+
message: !responseText.toLowerCase().includes(value.toLowerCase())
|
|
22
|
+
? undefined
|
|
23
|
+
: `"${value}" found in response`,
|
|
24
|
+
};
|
|
25
|
+
case "regex": {
|
|
26
|
+
const re = new RegExp(value);
|
|
27
|
+
const matched = re.test(responseText);
|
|
28
|
+
return {
|
|
29
|
+
type,
|
|
30
|
+
value,
|
|
31
|
+
passed: matched,
|
|
32
|
+
message: matched ? undefined : `regex /${value}/ did not match`,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
case "length-min": {
|
|
36
|
+
const min = parseInt(value, 10);
|
|
37
|
+
const passed = responseText.length >= min;
|
|
38
|
+
return {
|
|
39
|
+
type,
|
|
40
|
+
value,
|
|
41
|
+
passed,
|
|
42
|
+
message: passed ? undefined : `response length ${responseText.length} < ${min}`,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
case "length-max": {
|
|
46
|
+
const max = parseInt(value, 10);
|
|
47
|
+
const passed = responseText.length <= max;
|
|
48
|
+
return {
|
|
49
|
+
type,
|
|
50
|
+
value,
|
|
51
|
+
passed,
|
|
52
|
+
message: passed ? undefined : `response length ${responseText.length} > ${max}`,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
case "json-valid": {
|
|
56
|
+
let passed = false;
|
|
57
|
+
try {
|
|
58
|
+
JSON.parse(responseText);
|
|
59
|
+
passed = true;
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
// not valid JSON
|
|
63
|
+
}
|
|
64
|
+
return {
|
|
65
|
+
type,
|
|
66
|
+
value,
|
|
67
|
+
passed,
|
|
68
|
+
message: passed ? undefined : "response is not valid JSON",
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
case "tool-called": {
|
|
72
|
+
const called = toolCalls.some((tc) => tc.name === value);
|
|
73
|
+
return {
|
|
74
|
+
type,
|
|
75
|
+
value,
|
|
76
|
+
passed: called,
|
|
77
|
+
message: called ? undefined : `tool "${value}" was not called`,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
case "tool-not-called": {
|
|
81
|
+
const called = toolCalls.some((tc) => tc.name === value);
|
|
82
|
+
return {
|
|
83
|
+
type,
|
|
84
|
+
value,
|
|
85
|
+
passed: !called,
|
|
86
|
+
message: !called ? undefined : `tool "${value}" was called`,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
case "tool-called-with": {
|
|
90
|
+
const spec = JSON.parse(value);
|
|
91
|
+
const match = toolCalls.find((tc) => {
|
|
92
|
+
if (tc.name !== spec.tool)
|
|
93
|
+
return false;
|
|
94
|
+
return objectContains(tc.args, spec.args);
|
|
95
|
+
});
|
|
96
|
+
return {
|
|
97
|
+
type,
|
|
98
|
+
value,
|
|
99
|
+
passed: !!match,
|
|
100
|
+
message: match
|
|
101
|
+
? undefined
|
|
102
|
+
: `tool "${spec.tool}" was not called with matching args`,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
default:
|
|
106
|
+
return {
|
|
107
|
+
type,
|
|
108
|
+
value,
|
|
109
|
+
passed: false,
|
|
110
|
+
message: `unknown assertion type: ${type}`,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Run multiple assertions and return all results.
|
|
116
|
+
*/
|
|
117
|
+
export function runAssertions(assertions, responseText, toolCalls) {
|
|
118
|
+
return assertions.map((a) => runAssertion(a, responseText, toolCalls));
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Check if `obj` contains all key-value pairs from `subset` (shallow).
|
|
122
|
+
*/
|
|
123
|
+
function objectContains(obj, subset) {
|
|
124
|
+
for (const [key, val] of Object.entries(subset)) {
|
|
125
|
+
if (typeof val === "object" && val !== null) {
|
|
126
|
+
if (typeof obj[key] !== "object" || obj[key] === null)
|
|
127
|
+
return false;
|
|
128
|
+
if (!objectContains(obj[key], val)) {
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
else if (obj[key] !== val) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { AgentConfig, ChatMessage } from "../types.js";
|
|
2
|
+
import type { EvalCase, CaseResult, JudgeResult } from "./types.js";
|
|
3
|
+
interface ExecuteOptions {
|
|
4
|
+
config: AgentConfig;
|
|
5
|
+
evalCase: EvalCase;
|
|
6
|
+
fileName: string;
|
|
7
|
+
judgeRunner?: (userInput: string, expectedOutput: string | undefined, actualResponse: string, conversation: ChatMessage[]) => Promise<JudgeResult>;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Execute a single eval case, dispatching to the appropriate mode.
|
|
11
|
+
*/
|
|
12
|
+
export declare function executeCase(opts: ExecuteOptions): Promise<CaseResult>;
|
|
13
|
+
export {};
|