npm - skyloom - Versions diffs - 1.16.2 → 1.18.0 - Mend

skyloom 1.16.2 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/README.md +15 -3
package/dist/cli/loom_chat.d.ts.map +1 -1
package/dist/cli/loom_chat.js +17 -0
package/dist/cli/loom_chat.js.map +1 -1
package/dist/cli/main.js +37 -1
package/dist/cli/main.js.map +1 -1
package/dist/core/agent.d.ts +2 -0
package/dist/core/agent.d.ts.map +1 -1
package/dist/core/agent.js +21 -5
package/dist/core/agent.js.map +1 -1
package/dist/core/bgproc.d.ts +59 -0
package/dist/core/bgproc.d.ts.map +1 -0
package/dist/core/bgproc.js +135 -0
package/dist/core/bgproc.js.map +1 -0
package/dist/core/commands.d.ts.map +1 -1
package/dist/core/commands.js +20 -0
package/dist/core/commands.js.map +1 -1
package/dist/core/diagnostics.d.ts +39 -0
package/dist/core/diagnostics.d.ts.map +1 -0
package/dist/core/diagnostics.js +206 -0
package/dist/core/diagnostics.js.map +1 -0
package/dist/core/diff.d.ts +31 -0
package/dist/core/diff.d.ts.map +1 -0
package/dist/core/diff.js +82 -0
package/dist/core/diff.js.map +1 -0
package/dist/core/envcontext.d.ts +25 -0
package/dist/core/envcontext.d.ts.map +1 -0
package/dist/core/envcontext.js +112 -0
package/dist/core/envcontext.js.map +1 -0
package/dist/core/factory.d.ts +2 -0
package/dist/core/factory.d.ts.map +1 -1
package/dist/core/factory.js +35 -2
package/dist/core/factory.js.map +1 -1
package/dist/core/patch.d.ts +59 -0
package/dist/core/patch.d.ts.map +1 -0
package/dist/core/patch.js +220 -0
package/dist/core/patch.js.map +1 -0
package/dist/core/protocol.d.ts +11 -0
package/dist/core/protocol.d.ts.map +1 -0
package/dist/core/protocol.js +39 -0
package/dist/core/protocol.js.map +1 -0
package/dist/core/sandbox.d.ts +1 -0
package/dist/core/sandbox.d.ts.map +1 -1
package/dist/core/sandbox.js +1 -0
package/dist/core/sandbox.js.map +1 -1
package/dist/core/search.d.ts +41 -0
package/dist/core/search.d.ts.map +1 -0
package/dist/core/search.js +156 -0
package/dist/core/search.js.map +1 -0
package/dist/core/security.d.ts +22 -2
package/dist/core/security.d.ts.map +1 -1
package/dist/core/security.js +55 -24
package/dist/core/security.js.map +1 -1
package/dist/core/skill.d.ts +4 -0
package/dist/core/skill.d.ts.map +1 -1
package/dist/core/skill.js +1 -0
package/dist/core/skill.js.map +1 -1
package/dist/core/subagent.d.ts +75 -0
package/dist/core/subagent.d.ts.map +1 -0
package/dist/core/subagent.js +287 -0
package/dist/core/subagent.js.map +1 -0
package/dist/core/tool.d.ts +23 -1
package/dist/core/tool.d.ts.map +1 -1
package/dist/core/tool.js +95 -30
package/dist/core/tool.js.map +1 -1
package/dist/plugins/loader.d.ts +49 -8
package/dist/plugins/loader.d.ts.map +1 -1
package/dist/plugins/loader.js +129 -16
package/dist/plugins/loader.js.map +1 -1
package/dist/tools/builtin.d.ts.map +1 -1
package/dist/tools/builtin.js +183 -17
package/dist/tools/builtin.js.map +1 -1
package/dist/tools/spawn.d.ts +23 -0
package/dist/tools/spawn.d.ts.map +1 -0
package/dist/tools/spawn.js +77 -0
package/dist/tools/spawn.js.map +1 -0
package/docs/OPTIMIZATION_PLAN.md +21 -4
package/package.json +1 -1
package/src/cli/loom_chat.ts +11 -0
package/src/cli/main.ts +31 -1
package/src/core/agent.ts +20 -5
package/src/core/bgproc.ts +153 -0
package/src/core/commands.ts +20 -0
package/src/core/diagnostics.ts +178 -0
package/src/core/diff.ts +98 -0
package/src/core/envcontext.ts +79 -0
package/src/core/factory.ts +31 -2
package/src/core/patch.ts +176 -0
package/src/core/protocol.ts +36 -0
package/src/core/sandbox.ts +1 -1
package/src/core/search.ts +138 -0
package/src/core/security.ts +63 -21
package/src/core/skill.ts +1 -1
package/src/core/subagent.ts +272 -0
package/src/core/tool.ts +101 -31
package/src/plugins/loader.ts +145 -18
package/src/tools/builtin.ts +167 -17
package/src/tools/spawn.ts +92 -0
package/tests/bgproc.test.ts +65 -0
package/tests/diagnostics.test.ts +86 -0
package/tests/edit_diff.test.ts +102 -0
package/tests/envcontext.test.ts +67 -0
package/tests/patch.test.ts +128 -0
package/tests/plugins.test.ts +84 -0
package/tests/protocol.test.ts +27 -0
package/tests/search.test.ts +87 -0
package/tests/security.test.ts +87 -0
package/tests/subagent.test.ts +211 -0
package/tests/tool.test.ts +120 -0

package/tests/search.test.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import * as fs from "fs";
+import * as os from "os";
+import * as path from "path";
+import { searchCode, formatSearchResult } from "../src/core/search";
+describe("search · searchCode (pure JS)", () => {
+  let root: string;
+  beforeEach(() => {
+    root = fs.mkdtempSync(path.join(os.tmpdir(), "sky-search-"));
+    fs.writeFileSync(path.join(root, "a.ts"), "const Foo = 1;\nexport function useFoo() { return Foo; }\n");
+    fs.writeFileSync(path.join(root, "b.js"), "// foo lower\nconst x = 2;\n");
+    fs.mkdirSync(path.join(root, "sub"));
+    fs.writeFileSync(path.join(root, "sub", "c.ts"), "import { useFoo } from '../a';\n");
+    // should be ignored by default
+    fs.mkdirSync(path.join(root, "node_modules", "dep"), { recursive: true });
+    fs.writeFileSync(path.join(root, "node_modules", "dep", "x.ts"), "const Foo = 999;\n");
+  });
+  afterEach(() => { try { fs.rmSync(root, { recursive: true, force: true }); } catch {} });
+  it("finds matches with file:line", () => {
+    const res = searchCode({ pattern: "useFoo", root });
+    const files = res.matches.map((m) => m.file).sort();
+    expect(files).toContain("a.ts");
+    expect(files).toContain("sub/c.ts");
+    const a = res.matches.find((m) => m.file === "a.ts")!;
+    expect(a.line).toBe(2);
+    expect(a.text).toContain("useFoo");
+  });
+  it("skips node_modules by default", () => {
+    const res = searchCode({ pattern: "Foo", root });
+    expect(res.matches.some((m) => m.file.includes("node_modules"))).toBe(false);
+  });
+  it("restricts by glob", () => {
+    const res = searchCode({ pattern: "foo", root, glob: "**/*.ts", ignoreCase: true });
+    expect(res.matches.some((m) => m.file === "b.js")).toBe(false);
+    expect(res.matches.some((m) => m.file === "a.ts")).toBe(true);
+  });
+  it("honors ignoreCase", () => {
+    // b.js contains lowercase "foo"; capital "Foo" only matches case-insensitively.
+    expect(searchCode({ pattern: "Foo", root, glob: "b.js" }).matches.length).toBe(0);
+    expect(searchCode({ pattern: "Foo", root, glob: "b.js", ignoreCase: true }).matches.length).toBe(1);
+  });
+  it("returns context lines", () => {
+    const res = searchCode({ pattern: "useFoo", root, glob: "a.ts", context: 1 });
+    const m = res.matches[0];
+    expect(m.before).toEqual(["const Foo = 1;"]);
+  });
+  it("treats pattern as literal when regex=false", () => {
+    fs.writeFileSync(path.join(root, "d.ts"), "a.b.c\n");
+    const asRegex = searchCode({ pattern: "a.b", root, glob: "d.ts" });          // '.' = any char
+    const literal = searchCode({ pattern: "a.b", root, glob: "d.ts", regex: false });
+    expect(asRegex.matches.length).toBe(1);
+    expect(literal.matches.length).toBe(1);
+    const noLit = searchCode({ pattern: "axb", root, glob: "d.ts", regex: false });
+    expect(noLit.matches.length).toBe(0);
+  });
+  it("caps results and flags truncation", () => {
+    fs.writeFileSync(path.join(root, "many.ts"), Array.from({ length: 50 }, () => "hit").join("\n"));
+    const res = searchCode({ pattern: "hit", root, glob: "many.ts", maxResults: 10 });
+    expect(res.matches.length).toBe(10);
+    expect(res.truncated).toBe(true);
+  });
+  it("reports an invalid regex instead of throwing", () => {
+    const res = searchCode({ pattern: "(", root });
+    expect(res.error).toContain("invalid regex");
+  });
+});
+describe("search · formatSearchResult", () => {
+  it("renders file:line and a no-match message", () => {
+    expect(formatSearchResult({ matches: [], filesScanned: 3, truncated: false })).toBe("No matches found.");
+    const s = formatSearchResult({
+      matches: [{ file: "a.ts", line: 2, text: "  return Foo;" }],
+      filesScanned: 1, truncated: false,
+    });
+    expect(s).toContain("a.ts:2:");
+    expect(s).toContain("return Foo");
+  });
+});

package/tests/security.test.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import { describe, it, expect } from "vitest";
+import {
+  DangerLevel,
+  decideApproval,
+  isEditTool,
+  SecurityContext,
+} from "../src/core/security";
+describe("security · decideApproval matrix", () => {
+  it("SAFE is always allowed in every mode", () => {
+    for (const mode of ["auto", "interactive", "strict", "acceptEdits", "bypass"] as const) {
+      expect(decideApproval(DangerLevel.SAFE, mode, "read_file")).toBe("allow");
+    }
+  });
+  it("strict denies every non-SAFE tool", () => {
+    expect(decideApproval(DangerLevel.LOW, "strict", "write_file")).toBe("deny");
+    expect(decideApproval(DangerLevel.HIGH, "strict", "run_bash")).toBe("deny");
+  });
+  it("bypass allows everything (red-line is gated elsewhere)", () => {
+    expect(decideApproval(DangerLevel.CRITICAL, "bypass", "run_bash")).toBe("allow");
+    expect(decideApproval(DangerLevel.HIGH, "bypass", "deploy")).toBe("allow");
+  });
+  it("interactive asks for every non-SAFE tool", () => {
+    expect(decideApproval(DangerLevel.LOW, "interactive", "write_file")).toBe("ask");
+    expect(decideApproval(DangerLevel.HIGH, "interactive", "run_bash")).toBe("ask");
+  });
+  it("auto allows LOW, asks MEDIUM/HIGH, denies CRITICAL (unchanged)", () => {
+    expect(decideApproval(DangerLevel.LOW, "auto", "write_file")).toBe("allow");
+    expect(decideApproval(DangerLevel.MEDIUM, "auto", "git_push")).toBe("ask");
+    expect(decideApproval(DangerLevel.HIGH, "auto", "run_bash")).toBe("ask");
+    expect(decideApproval(DangerLevel.CRITICAL, "auto", "run_bash")).toBe("deny");
+  });
+  it("acceptEdits waves through edit tools but asks for other risky tools", () => {
+    expect(decideApproval(DangerLevel.LOW, "acceptEdits", "write_file")).toBe("allow");
+    expect(decideApproval(DangerLevel.MEDIUM, "acceptEdits", "delete_file")).toBe("allow"); // edit tool
+    expect(decideApproval(DangerLevel.HIGH, "acceptEdits", "run_bash")).toBe("ask");       // not an edit
+    expect(decideApproval(DangerLevel.CRITICAL, "acceptEdits", "delete_file")).toBe("deny");
+  });
+});
+describe("security · isEditTool", () => {
+  it("recognizes filesystem-mutating tools", () => {
+    expect(isEditTool("write_file")).toBe(true);
+    expect(isEditTool("edit_file")).toBe(true);
+    expect(isEditTool("delete_file")).toBe(true);
+    expect(isEditTool("move_file")).toBe(true);
+    expect(isEditTool("read_file")).toBe(false);
+    expect(isEditTool("run_bash")).toBe(false);
+  });
+});
+describe("security · checkApproval integration", () => {
+  it("blocks red-line shell commands regardless of mode", async () => {
+    const sec = new SecurityContext({ mode: "bypass" });
+    const [ok, reason] = await sec.checkApproval("run_bash", { command: "rm -rf /" }, "fog");
+    expect(ok).toBe(false);
+    expect(reason.toLowerCase()).toContain("red-line");
+  });
+  it("write_file: auto allows, strict denies, acceptEdits allows", async () => {
+    const args = { path: "a.txt", content: "x" };
+    expect((await new SecurityContext({ mode: "auto" }).checkApproval("write_file", args, "rain"))[0]).toBe(true);
+    expect((await new SecurityContext({ mode: "strict" }).checkApproval("write_file", args, "rain"))[0]).toBe(false);
+    expect((await new SecurityContext({ mode: "acceptEdits" }).checkApproval("write_file", args, "rain"))[0]).toBe(true);
+  });
+  it("ask defers to the approval callback", async () => {
+    const sec = new SecurityContext({ mode: "interactive" });
+    let asked = false;
+    sec.setApprovalCallback(async () => { asked = true; return false; });
+    const [ok] = await sec.checkApproval("write_file", { path: "a", content: "b" }, "rain");
+    expect(asked).toBe(true);
+    expect(ok).toBe(false);
+  });
+  it("setMode switches behavior at runtime", () => {
+    const sec = new SecurityContext({ mode: "auto" });
+    expect(sec.approvalMode).toBe("auto");
+    sec.setMode("bypass");
+    expect(sec.approvalMode).toBe("bypass");
+  });
+});

package/tests/subagent.test.ts ADDED Viewed

@@ -0,0 +1,211 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import * as fs from "fs";
+import * as os from "os";
+import * as path from "path";
+import { MessageBus } from "../src/core/bus";
+import { ToolRegistry } from "../src/core/tool";
+import { SkillRegistry } from "../src/core/skill";
+import {
+  loadSubagentDefinitions,
+  parseSubagentFile,
+  runSubagent,
+  READ_ONLY_TOOLS,
+} from "../src/core/subagent";
+import { createSpawnAgentTool } from "../src/tools/spawn";
+/**
+ * Subagent system: definition loading/parsing + isolated-context execution,
+ * driven by a scripted mock LLM (no network), mirroring tests/agent.test.ts.
+ */
+interface Turn { content?: string; toolCalls?: { name: string; args?: any }[] }
+class MockLLM {
+  calls = 0;
+  constructor(private turns: Turn[]) {}
+  private turn(): Turn { const t = this.turns[Math.min(this.calls, this.turns.length - 1)]; this.calls++; return t || {}; }
+  private toolCallObjs(t: Turn) {
+    return (t.toolCalls || []).map((tc, i) => ({
+      id: `call_${this.calls}_${i}`, type: "function",
+      function: { name: tc.name, arguments: JSON.stringify(tc.args || {}) },
+    }));
+  }
+  async *streamWithTools(): AsyncGenerator<any> {
+    const t = this.turn();
+    if (t.content) yield { type: "content", text: t.content };
+    for (const tc of this.toolCallObjs(t)) yield { type: "tool_call", toolCall: tc };
+    yield { type: "done", usage: { promptTokens: 1, completionTokens: 1 } };
+  }
+  async complete(): Promise<any> {
+    const t = this.turn();
+    return { content: t.content || "", toolCalls: this.toolCallObjs(t), model: "mock", usage: { promptTokens: 1, completionTokens: 1 }, cost: 0, truncated: false };
+  }
+  getTotalCost() { return 0; }
+  getModel() { return "mock"; }
+  setLogger() { /* noop */ }
+}
+function baseConfig() {
+  return { agents: {}, llm: { language: "zh" }, memory: { shortTermLimit: 100, dbPath: path.join(os.tmpdir(), "sky-sub-test") } };
+}
+describe("subagent · definitions", () => {
+  it("ships built-in general-purpose and explore agents", () => {
+    const defs = loadSubagentDefinitions(os.tmpdir());
+    expect(defs.has("general-purpose")).toBe(true);
+    expect(defs.has("explore")).toBe(true);
+  });
+  it("explore is read-only: includes read_file, excludes write_file", () => {
+    const defs = loadSubagentDefinitions(os.tmpdir());
+    const explore = defs.get("explore")!;
+    expect(explore.tools).not.toBeNull();
+    expect(explore.tools).toContain("read_file");
+    expect(explore.tools).not.toContain("write_file");
+    expect(READ_ONLY_TOOLS).toContain("grep");
+  });
+  it("general-purpose inherits the full tool set (tools = null)", () => {
+    const defs = loadSubagentDefinitions(os.tmpdir());
+    expect(defs.get("general-purpose")!.tools).toBeNull();
+  });
+});
+describe("subagent · file parsing", () => {
+  let dir: string;
+  beforeEach(() => { dir = fs.mkdtempSync(path.join(os.tmpdir(), "sky-agentdefs-")); });
+  afterEach(() => { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} });
+  it("parses frontmatter, body, and normalizes Claude tool names", () => {
+    const file = path.join(dir, "reviewer.md");
+    fs.writeFileSync(file,
+      "---\nname: reviewer\ndescription: 审查代码\ntools: Read, Grep, Bash\nmodel: gpt-4o\n---\n你是一个代码审查子智能体。\n");
+    const def = parseSubagentFile(file)!;
+    expect(def.name).toBe("reviewer");
+    expect(def.description).toBe("审查代码");
+    expect(def.model).toBe("gpt-4o");
+    expect(def.systemPrompt).toContain("代码审查");
+    // Read -> read_file, Bash -> run_bash, Grep -> grep
+    expect(def.tools).toEqual(["read_file", "grep", "run_bash"]);
+  });
+  it("omitted tools means inherit all (null)", () => {
+    const file = path.join(dir, "helper.md");
+    fs.writeFileSync(file, "---\ndescription: 万能\n---\nbody\n");
+    const def = parseSubagentFile(file)!;
+    expect(def.name).toBe("helper");      // falls back to filename
+    expect(def.tools).toBeNull();
+  });
+  it("project .sky/agents definitions are discovered and override built-ins", () => {
+    const cwd = fs.mkdtempSync(path.join(os.tmpdir(), "sky-cwd-"));
+    try {
+      const agentsDir = path.join(cwd, ".sky", "agents");
+      fs.mkdirSync(agentsDir, { recursive: true });
+      fs.writeFileSync(path.join(agentsDir, "custom.md"), "---\ndescription: 自定义\n---\nhi\n");
+      const defs = loadSubagentDefinitions(cwd);
+      expect(defs.has("custom")).toBe(true);
+      expect(defs.get("custom")!.description).toBe("自定义");
+    } finally {
+      try { fs.rmSync(cwd, { recursive: true, force: true }); } catch {}
+    }
+  });
+});
+describe("subagent · isolated execution (mock LLM)", () => {
+  it("runs to completion and returns the final report", async () => {
+    const defs = loadSubagentDefinitions(os.tmpdir());
+    const report = await runSubagent({
+      def: defs.get("general-purpose")!,
+      task: "say hi",
+      config: baseConfig(),
+      llm: new MockLLM([{ content: "REPORT: 完成了任务。" }]) as any,
+      bus: new MessageBus(),
+      baseToolRegistry: new ToolRegistry(),
+      baseSkillRegistry: new SkillRegistry(),
+    });
+    expect(report).toContain("REPORT: 完成了任务。");
+  });
+  it("executes inherited tools inside the isolated loop", async () => {
+    let ran = false;
+    const reg = new ToolRegistry();
+    reg.register({ name: "echo", description: "echo", handler: async (a: any) => { ran = true; return `echo:${a.text}`; } });
+    const defs = loadSubagentDefinitions(os.tmpdir());
+    const report = await runSubagent({
+      def: defs.get("general-purpose")!,
+      task: "use echo",
+      config: baseConfig(),
+      llm: new MockLLM([
+        { toolCalls: [{ name: "echo", args: { text: "hi" } }] },
+        { content: "用过 echo 了。" },
+      ]) as any,
+      bus: new MessageBus(),
+      baseToolRegistry: reg,
+      baseSkillRegistry: new SkillRegistry(),
+    });
+    expect(ran).toBe(true);
+    expect(report).toContain("echo");
+  });
+  it("never carries spawn_agent into the subagent (no recursion)", async () => {
+    // A registry that includes spawn_agent — the subagent must not see it.
+    const reg = new ToolRegistry();
+    reg.register({ name: "spawn_agent", description: "spawn", handler: async () => "should-not-run" });
+    reg.register({ name: "noop", description: "noop", handler: async () => "ok" });
+    const defs = loadSubagentDefinitions(os.tmpdir());
+    // Script the model to TRY spawn_agent; it should be reported as nonexistent.
+    const report = await runSubagent({
+      def: defs.get("general-purpose")!,
+      task: "try to spawn",
+      config: baseConfig(),
+      llm: new MockLLM([
+        { toolCalls: [{ name: "spawn_agent", args: { agent_type: "x", task: "y" } }] },
+        { content: "无法再派生。" },
+      ]) as any,
+      bus: new MessageBus(),
+      baseToolRegistry: reg,
+      baseSkillRegistry: new SkillRegistry(),
+    });
+    expect(report).toContain("无法再派生");
+  });
+});
+describe("spawn_agent tool", () => {
+  function makeTool(reg = new ToolRegistry(), llm = new MockLLM([{ content: "done" }])) {
+    return createSpawnAgentTool({
+      config: baseConfig(),
+      llm: llm as any,
+      bus: new MessageBus(),
+      baseToolRegistry: reg,
+      baseSkillRegistry: new SkillRegistry(),
+      cwd: os.tmpdir(),
+    });
+  }
+  it("lists available agent types in its description", () => {
+    const tool = makeTool();
+    expect(tool.description).toContain("general-purpose");
+    expect(tool.description).toContain("explore");
+  });
+  it("errors on missing args", async () => {
+    const tool = makeTool();
+    expect(await tool.handler!({ agent_type: "general-purpose" })).toContain("task is required");
+    expect(await tool.handler!({ task: "do" })).toContain("agent_type is required");
+  });
+  it("errors on unknown agent_type", async () => {
+    const tool = makeTool();
+    const out = await tool.handler!({ agent_type: "nope", task: "do" });
+    expect(out).toContain("unknown agent_type");
+    expect(out).toContain("general-purpose");
+  });
+  it("runs a subagent and returns its report with a header", async () => {
+    const tool = makeTool(new ToolRegistry(), new MockLLM([{ content: "子任务结果。" }]));
+    const out = await tool.handler!({ agent_type: "general-purpose", task: "做点事" });
+    expect(out).toContain("subagent general-purpose 完成");
+    expect(out).toContain("子任务结果。");
+  });
+});

package/tests/tool.test.ts CHANGED Viewed

@@ -16,6 +16,7 @@ function makeTool(overrides: Partial<ToolDefinition> & { name: string }): ToolDe
     maxRetries: overrides.maxRetries,
     retryDelay: overrides.retryDelay,
     timeout: overrides.timeout,
+    validateOutput: overrides.validateOutput,
   };
 }
@@ -108,6 +109,125 @@ describe('ToolRegistry', () => {
   });
 });
+describe('ToolRegistry · input validation + coercion', () => {
+  let registry: ToolRegistry;
+  beforeEach(() => { registry = new ToolRegistry(); });
+  function recordTool(name: string, parameters: any[]) {
+    let received: any = null;
+    registry.register(makeTool({
+      name, parameters,
+      handler: async (p: any) => { received = p; return 'ok'; },
+    }));
+    return () => received;
+  }
+  it('coerces a numeric string to a number for the handler', async () => {
+    const got = recordTool('n', [{ name: 'x', type: 'number', description: 'x', required: true }]);
+    await registry.execute('n', { x: '5' });
+    expect(got()).toEqual({ x: 5 });
+  });
+  it('does not truncate floats (Number, not parseInt)', async () => {
+    const got = recordTool('f', [{ name: 'x', type: 'number', description: 'x', required: true }]);
+    await registry.execute('f', { x: '3.5' });
+    expect(got()).toEqual({ x: 3.5 });
+  });
+  it('coerces boolean-like strings', async () => {
+    const got = recordTool('b', [{ name: 'flag', type: 'boolean', description: 'f', required: true }]);
+    await registry.execute('b', { flag: 'true' });
+    expect(got()).toEqual({ flag: true });
+  });
+  it('parses a JSON-string object param', async () => {
+    const got = recordTool('o', [{ name: 'cfg', type: 'object', description: 'c', required: true }]);
+    await registry.execute('o', { cfg: '{"a":1}' });
+    expect(got()).toEqual({ cfg: { a: 1 } });
+  });
+  it('rejects an uncoercible type and does not run the handler', async () => {
+    const handler = vi.fn().mockResolvedValue('ok');
+    registry.register(makeTool({
+      name: 'num', parameters: [{ name: 'x', type: 'number', description: 'x', required: true }], handler,
+    }));
+    const res = await registry.execute('num', { x: 'not-a-number' });
+    expect(res.success).toBe(false);
+    expect(res.error).toContain('expected number');
+    expect(handler).not.toHaveBeenCalled();
+  });
+  it('enforces enum membership with a helpful message', async () => {
+    const handler = vi.fn().mockResolvedValue('ok');
+    registry.register(makeTool({
+      name: 'pick',
+      parameters: [{ name: 'mode', type: 'string', description: 'm', required: true, enum: ['fast', 'slow'] }],
+      handler,
+    }));
+    const bad = await registry.execute('pick', { mode: 'turbo' });
+    expect(bad.success).toBe(false);
+    expect(bad.error).toContain('fast, slow');
+    expect(handler).not.toHaveBeenCalled();
+    const ok = await registry.execute('pick', { mode: 'fast' });
+    expect(ok.success).toBe(true);
+  });
+  it('treats a present-but-null required param as missing', async () => {
+    const handler = vi.fn().mockResolvedValue('ok');
+    registry.register(makeTool({
+      name: 'req', parameters: [{ name: 'p', type: 'string', description: 'p', required: true }], handler,
+    }));
+    const res = await registry.execute('req', { p: null });
+    expect(res.success).toBe(false);
+    expect(res.error).toContain('required');
+    expect(handler).not.toHaveBeenCalled();
+  });
+});
+describe('ToolRegistry · output validation', () => {
+  let registry: ToolRegistry;
+  beforeEach(() => { registry = new ToolRegistry(); });
+  it('fails the call when validateOutput rejects the result', async () => {
+    registry.register(makeTool({
+      name: 'guarded',
+      maxRetries: 0,
+      handler: async () => 'garbage',
+      validateOutput: (r) => (r === 'garbage' ? 'looks like garbage' : null),
+    }));
+    const res = await registry.execute('guarded', {});
+    expect(res.success).toBe(false);
+    expect(res.error).toContain('invalid tool output');
+    expect(res.error).toContain('looks like garbage');
+  });
+  it('passes when validateOutput accepts the result', async () => {
+    registry.register(makeTool({
+      name: 'ok',
+      handler: async () => 'fine',
+      validateOutput: () => null,
+    }));
+    const res = await registry.execute('ok', {});
+    expect(res.success).toBe(true);
+    expect(res.result).toBe('fine');
+  });
+  it('retries a rejected output through the normal retry path', async () => {
+    let n = 0;
+    registry.register(makeTool({
+      name: 'retryout',
+      maxRetries: 1,
+      retryDelay: 0,
+      handler: async () => `v${++n}`,
+      validateOutput: (r) => (r === 'v1' ? 'first is bad' : null),
+    }));
+    const res = await registry.execute('retryout', {});
+    expect(res.success).toBe(true);
+    expect(res.result).toBe('v2');
+  });
+});
 describe('stableStringify', () => {
   it('produces an order-independent key for objects', async () => {
     const { stableStringify } = await import('../src/core/tool');