npm - @sanity/ailf - Versions diffs - 3.8.0 → 3.8.1 - Mend

@sanity/ailf 3.8.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js DELETED Viewed

@@ -1,366 +0,0 @@
-/**
- * agent-harness-handler.test.ts — Tests for agent harness mode compilation.
- *
- * Tests validation, provider assembly, tool permission resolution,
- * assertion mapping, sandbox config, lifecycle extensions, and
- * end-to-end compilation of example tasks.
- *
- * Run: npx tsx --test src/pipeline/compiler/__tests__/agent-harness-handler.test.ts
- */
-import assert from "node:assert/strict";
-import { describe, it } from "node:test";
-import { LiteracyVariant } from "../../normalize-mode.js";
-import { compileAgentHarnessTask, AGENT_HARNESS_PROMPT_TEMPLATES, handler as agentHandler, validateAgentHarnessTask, } from "../mode-handlers/agent-harness/index.js";
-import { allAgentHarnessExampleTasks, scaffoldProjectTask, modifyCodeTask, multiFileRefactorTask, } from "../mode-handlers/__fixtures__/agent-harness-example-tasks.js";
-// ---------------------------------------------------------------------------
-// Helpers
-// ---------------------------------------------------------------------------
-function makeTask(overrides) {
-    return {
-        mode: "agent-harness",
-        id: "test-agent-task",
-        title: "Test Agent Task",
-        description: "A test agent harness task",
-        area: "studio",
-        ...overrides,
-    };
-}
-// ---------------------------------------------------------------------------
-// handler.getPrompts() — prompt template ownership
-// ---------------------------------------------------------------------------
-describe("AgentHarnessHandler.getPrompts", () => {
-    it("returns prompt templates", () => {
-        const prompts = agentHandler.getPrompts();
-        assert.ok(prompts, "getPrompts() should return a record");
-        assert.ok(Object.keys(prompts).length > 0, "should return at least one template");
-    });
-    it("returns templates keyed by agent-specific IDs (not literacy names)", () => {
-        const prompts = agentHandler.getPrompts();
-        const keys = Object.keys(prompts);
-        // Must not use literacy template names
-        assert.ok(!keys.includes("with-docs"), "should not use literacy key 'with-docs'");
-        assert.ok(!keys.includes("without-docs"), "should not use literacy key 'without-docs'");
-        assert.ok(!keys.includes(LiteracyVariant.AGENTIC), "should not use literacy key 'agentic'");
-        // Must have agent-appropriate key(s)
-        assert.ok(keys.includes("agent-harness"), "should include 'agent-harness' template");
-    });
-    it("agent-harness template describes task for agent execution", () => {
-        const prompts = agentHandler.getPrompts();
-        const template = prompts["agent-harness"];
-        assert.ok(template, "agent-harness template should exist");
-        assert.ok(template.template.includes("{{task}}"), "should include {{task}} placeholder");
-        // Should reference agent / sandbox / tool execution context
-        assert.ok(/sandbox|file|tool|implement|code/i.test(template.template), "template should reference agent execution concepts");
-    });
-    it("template has correct PromptTemplate shape", () => {
-        const prompts = agentHandler.getPrompts();
-        const template = prompts["agent-harness"];
-        assert.equal(template.id, "agent-harness");
-        assert.ok(template.label, "should have a human-readable label");
-        assert.ok(template.template, "should have a template string");
-        assert.ok(Array.isArray(template.variables), "should declare variables");
-        assert.ok(template.variables.includes("task"), "variables should include 'task'");
-    });
-    it("exported AGENT_HARNESS_PROMPT_TEMPLATES matches handler.getPrompts()", () => {
-        const fromHandler = agentHandler.getPrompts();
-        assert.deepEqual(fromHandler, AGENT_HARNESS_PROMPT_TEMPLATES);
-    });
-});
-// ---------------------------------------------------------------------------
-// validateAgentHarnessTask
-// ---------------------------------------------------------------------------
-describe("validateAgentHarnessTask", () => {
-    it("passes for a valid minimal task", () => {
-        const errors = validateAgentHarnessTask(makeTask());
-        assert.equal(errors.length, 0);
-    });
-    it("errors on missing ID", () => {
-        const errors = validateAgentHarnessTask(makeTask({ id: "" }));
-        assert.ok(errors.some((e) => e.field === "id"));
-    });
-    it("errors on missing title", () => {
-        const errors = validateAgentHarnessTask(makeTask({ title: "" }));
-        assert.ok(errors.some((e) => e.field === "title"));
-    });
-});
-// ---------------------------------------------------------------------------
-// compileAgentHarnessTask — provider assembly
-// ---------------------------------------------------------------------------
-describe("compileAgentHarnessTask — providers", () => {
-    it("produces a Claude Agent SDK provider", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.ok(result.providers.length > 0);
-        assert.equal(result.providers[0].id, "anthropic:claude-agent-sdk");
-    });
-    it("sets default agent config", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        const config = result.providers[0].config;
-        assert.ok(config.model, "should set a model");
-        assert.ok(config.max_turns, "should set max_turns");
-        assert.ok(config.max_budget_usd, "should set budget cap");
-        assert.equal(config.permission_mode, "bypassPermissions");
-    });
-    it("resolves coding tool preset into custom_allowed_tools", () => {
-        const result = compileAgentHarnessTask(makeTask({ tools: ["coding"] }));
-        const config = result.providers[0].config;
-        const tools = config.custom_allowed_tools;
-        assert.ok(tools.includes("Bash"));
-        assert.ok(tools.includes("Read"));
-        assert.ok(tools.includes("Write"));
-        assert.ok(tools.includes("Edit"));
-    });
-    it("resolves read-only tool preset", () => {
-        const result = compileAgentHarnessTask(makeTask({ tools: ["read-only"] }));
-        const config = result.providers[0].config;
-        const tools = config.custom_allowed_tools;
-        assert.ok(tools.includes("Read"));
-        assert.ok(tools.includes("Grep"));
-        assert.ok(!tools.includes("Write"), "read-only should not include Write");
-    });
-    it("mixes preset and explicit tools", () => {
-        const result = compileAgentHarnessTask(makeTask({ tools: ["read-only", "WebFetch"] }));
-        const config = result.providers[0].config;
-        const tools = config.custom_allowed_tools;
-        assert.ok(tools.includes("Read"));
-        assert.ok(tools.includes("WebFetch"));
-    });
-});
-// ---------------------------------------------------------------------------
-// compileAgentHarnessTask — test cases
-// ---------------------------------------------------------------------------
-describe("compileAgentHarnessTask — test cases", () => {
-    it("produces at least one test case", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.ok(result.tests.length > 0);
-    });
-    it("includes task description in vars", () => {
-        const result = compileAgentHarnessTask(makeTask({ description: "Do the thing" }));
-        assert.equal(result.tests[0].vars.task, "Do the thing");
-    });
-    it("prefers prompt.vars.task over description", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            description: "Description",
-            prompt: { vars: { task: "Custom prompt" } },
-        }));
-        assert.equal(result.tests[0].vars.task, "Custom prompt");
-    });
-    it("creates multi-turn test case", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            multiTurn: {
-                turns: [
-                    { role: "user", content: "Hello" },
-                    { role: "assistant", content: "Hi" },
-                ],
-            },
-        }));
-        assert.equal(result.tests.length, 2);
-        assert.ok(result.tests[1].description.includes("[multi-turn]"));
-    });
-    it("sets sandbox metadata in vars", () => {
-        const result = compileAgentHarnessTask(makeTask({ sandbox: { type: "docker" } }));
-        assert.equal(result.tests[0].vars.__sandboxType, "docker");
-    });
-});
-// ---------------------------------------------------------------------------
-// compileAgentHarnessTask — assertions
-// ---------------------------------------------------------------------------
-describe("compileAgentHarnessTask — assertions", () => {
-    const RUNTIME = "file://dist/agent-harness/assertions-runtime.js";
-    it("maps file-exists to file-based javascript assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "file-exists", value: "sanity.config.ts" }],
-        }));
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion);
-        assert.equal(assertion.type, "javascript");
-        assert.equal(assertion.value, `${RUNTIME}:fileExists`);
-        assert.deepEqual(assertion.config, {
-            filePath: "sanity.config.ts",
-        });
-    });
-    it("maps file-contains to file-based javascript assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [
-                {
-                    type: "file-contains",
-                    value: { path: "config.ts", content: "projectId" },
-                },
-            ],
-        }));
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion);
-        assert.equal(assertion.type, "javascript");
-        assert.equal(assertion.value, `${RUNTIME}:fileContains`);
-        assert.deepEqual(assertion.config, {
-            filePath: "config.ts",
-            content: "projectId",
-        });
-    });
-    it("maps command-succeeds to file-based javascript assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "command-succeeds", value: "npx tsc --noEmit" }],
-        }));
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion);
-        assert.equal(assertion.type, "javascript");
-        assert.equal(assertion.value, `${RUNTIME}:commandSucceeds`);
-        assert.deepEqual(assertion.config, {
-            command: "npx tsc --noEmit",
-        });
-    });
-    it("maps diff-matches to file-based javascript assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "diff-matches", value: "createClient" }],
-        }));
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion);
-        assert.equal(assertion.type, "javascript");
-        assert.equal(assertion.value, `${RUNTIME}:diffMatches`);
-        assert.deepEqual(assertion.config, {
-            expected: "createClient",
-        });
-    });
-    it("passes through standard assertions", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "contains", value: "sanity" }],
-        }));
-        assert.equal(result.tests[0].assert?.[0]?.type, "contains");
-    });
-    it("sets grader provider on llm-rubric", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "llm-rubric", value: "Check quality" }],
-        }), { graderProvider: "openai:chat:gpt-5" });
-        assert.equal(result.tests[0].assert?.[0]?.provider, "openai:chat:gpt-5");
-    });
-    it("resolves templated llm-rubric with rubric text and dimension metadata", () => {
-        const rubricConfig = {
-            templates: {
-                "agent-output": {
-                    dimension: "agent-output",
-                    header: "Score the agent's final output from 0 to 100:",
-                    scale: ["0: Failed", "50: Partial", "100: Complete"],
-                    criteria_label: "Check for:",
-                },
-            },
-        };
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [
-                {
-                    type: "llm-rubric",
-                    template: "agent-output",
-                    criteria: ["File created", "Correct content"],
-                },
-            ],
-        }), { rubricConfig, graderProvider: "anthropic:messages:claude-opus-4-5" });
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion, "should produce an assertion");
-        assert.equal(assertion.type, "llm-rubric");
-        // Rubric text should be fully rendered (not empty)
-        assert.ok(assertion.value.includes("Score the agent"), "should contain rendered rubric header");
-        assert.ok(assertion.value.includes("File created"), "should contain task-specific criteria");
-        // Dimension metadata should be attached
-        const metadata = assertion.metadata;
-        assert.ok(metadata, "should have metadata");
-        assert.equal(metadata.dimension, "agent-output");
-        assert.equal(metadata.maxScore, 100);
-        // Grader provider should be set
-        assert.equal(assertion.provider, "anthropic:messages:claude-opus-4-5");
-    });
-    it("warns when rubric template is unknown", () => {
-        const rubricConfig = { templates: {} };
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [
-                {
-                    type: "llm-rubric",
-                    template: "nonexistent-template",
-                    criteria: ["Something"],
-                },
-            ],
-        }), { rubricConfig });
-        // Unknown template produces a warning and no assertion
-        assert.ok(result.warnings.some((w) => w.includes("nonexistent-template")), "should warn about unknown template");
-        // The assertion should be null (filtered out)
-        assert.equal(result.tests[0].assert?.length ?? 0, 0, "should not produce an assertion for unknown template");
-    });
-    it("warns when rubricConfig is not provided for templated assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [
-                {
-                    type: "llm-rubric",
-                    template: "agent-output",
-                    criteria: ["Something"],
-                },
-            ],
-        })
-        // No rubricConfig in options
-        );
-        assert.ok(result.warnings.some((w) => w.includes("No rubric config")), "should warn about missing rubric config");
-    });
-});
-// ---------------------------------------------------------------------------
-// compileAgentHarnessTask — lifecycle extensions
-// ---------------------------------------------------------------------------
-describe("compileAgentHarnessTask — lifecycle", () => {
-    it("produces beforeEach and afterEach extensions", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.equal(result.extensions.length, 2);
-        assert.equal(result.extensions[0].type, "beforeEach");
-        assert.equal(result.extensions[1].type, "afterEach");
-    });
-    it("beforeEach hook creates working directory", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.ok(result.extensions[0].code.includes("mkdirSync"));
-        assert.ok(result.extensions[0].code.includes("__workingDir"));
-    });
-    it("afterEach hook cleans up", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.ok(result.extensions[1].code.includes("rmSync"));
-    });
-    it("sandbox config captures task settings", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            sandbox: {
-                type: "docker",
-                image: "node:22",
-                limits: { cpus: 2, networkAccess: false },
-            },
-            fixtures: ["file://schema.ts"],
-        }));
-        assert.equal(result.sandboxConfig.type, "docker");
-        assert.equal(result.sandboxConfig.image, "node:22");
-        assert.deepEqual(result.sandboxConfig.fixtures, ["schema.ts"]);
-        assert.equal(result.sandboxConfig.limits?.cpus, 2);
-        assert.equal(result.sandboxConfig.limits?.networkAccess, false);
-    });
-});
-// ---------------------------------------------------------------------------
-// Example task compilation (end-to-end)
-// ---------------------------------------------------------------------------
-describe("example agent harness tasks — end-to-end", () => {
-    it("compiles all example tasks without errors", () => {
-        for (const task of allAgentHarnessExampleTasks) {
-            const result = compileAgentHarnessTask(task);
-            assert.ok(result.providers.length > 0, `${task.id}: should produce providers`);
-            assert.ok(result.tests.length > 0, `${task.id}: should produce test cases`);
-            assert.ok(result.extensions.length > 0, `${task.id}: should produce lifecycle extensions`);
-        }
-    });
-    it("scaffold task has file-exists assertions", () => {
-        const result = compileAgentHarnessTask(scaffoldProjectTask);
-        assert.ok(result.tests[0].assert);
-        assert.ok(result.tests[0].assert.length >= 3);
-        // First two are file-exists (javascript), third is file-contains, fourth is command-succeeds
-        assert.equal(result.tests[0].assert[0].type, "javascript");
-    });
-    it("modify task has file-contains assertions", () => {
-        const result = compileAgentHarnessTask(modifyCodeTask);
-        assert.ok(result.tests[0].assert);
-        assert.ok(result.tests[0].assert.some((a) => a.type === "javascript" &&
-            a.value.includes("fileContains") &&
-            a.config != null));
-    });
-    it("refactor task has docker sandbox config", () => {
-        const result = compileAgentHarnessTask(multiFileRefactorTask);
-        assert.equal(result.sandboxConfig.type, "docker");
-        assert.equal(result.sandboxConfig.image, "node:22-slim");
-    });
-});

package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts DELETED Viewed

@@ -1,9 +0,0 @@
-/**
- * assertion-mapper.test.ts — Unit tests for the assertion type mapper.
- *
- * Tests mapping of AILF assertion types to Promptfoo assertion types,
- * mode compatibility checking, negation support, and templated assertions.
- *
- * Run: npx tsx --test src/pipeline/compiler/__tests__/assertion-mapper.test.ts
- */
-export {};

package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js DELETED Viewed

@@ -1,145 +0,0 @@
-/**
- * assertion-mapper.test.ts — Unit tests for the assertion type mapper.
- *
- * Tests mapping of AILF assertion types to Promptfoo assertion types,
- * mode compatibility checking, negation support, and templated assertions.
- *
- * Run: npx tsx --test src/pipeline/compiler/__tests__/assertion-mapper.test.ts
- */
-import assert from "node:assert/strict";
-import { describe, it } from "node:test";
-import { isAssertionCompatibleWithMode, isValidAssertionType, mapAssertions, } from "../assertion-mapper.js";
-// ---------------------------------------------------------------------------
-// isValidAssertionType
-// ---------------------------------------------------------------------------
-describe("isValidAssertionType", () => {
-    it("recognizes known deterministic types", () => {
-        assert.equal(isValidAssertionType("contains"), true);
-        assert.equal(isValidAssertionType("equals"), true);
-        assert.equal(isValidAssertionType("regex"), true);
-        assert.equal(isValidAssertionType("is-json"), true);
-    });
-    it("recognizes LLM-graded types", () => {
-        assert.equal(isValidAssertionType("llm-rubric"), true);
-        assert.equal(isValidAssertionType("model-graded-closedqa"), true);
-        assert.equal(isValidAssertionType("model-graded-factuality"), true);
-        assert.equal(isValidAssertionType("g-eval"), true);
-        assert.equal(isValidAssertionType("similar"), true);
-    });
-    it("recognizes programmatic types", () => {
-        assert.equal(isValidAssertionType("javascript"), true);
-        assert.equal(isValidAssertionType("python"), true);
-    });
-    it("recognizes tool-use types", () => {
-        assert.equal(isValidAssertionType("tool-called"), true);
-        assert.equal(isValidAssertionType("tool-call-f1"), true);
-        assert.equal(isValidAssertionType("skill-used"), true);
-    });
-    it("recognizes negated types", () => {
-        assert.equal(isValidAssertionType("not-contains"), true);
-        assert.equal(isValidAssertionType("not-equals"), true);
-    });
-    it("rejects unknown types", () => {
-        assert.equal(isValidAssertionType("nonexistent"), false);
-    });
-});
-// ---------------------------------------------------------------------------
-// isAssertionCompatibleWithMode
-// ---------------------------------------------------------------------------
-describe("isAssertionCompatibleWithMode", () => {
-    it("allows deterministic types in any mode", () => {
-        assert.equal(isAssertionCompatibleWithMode("contains", "literacy"), true);
-        assert.equal(isAssertionCompatibleWithMode("contains", "agent-harness"), true);
-        assert.equal(isAssertionCompatibleWithMode("contains", "knowledge-probe"), true);
-    });
-    it("restricts tool-use types to appropriate modes", () => {
-        assert.equal(isAssertionCompatibleWithMode("tool-called", "agent-harness"), true);
-        assert.equal(isAssertionCompatibleWithMode("tool-called", "mcp-server"), true);
-        assert.equal(isAssertionCompatibleWithMode("tool-called", "literacy"), true);
-        assert.equal(isAssertionCompatibleWithMode("tool-called", "knowledge-probe"), false);
-    });
-    it("handles negated types", () => {
-        assert.equal(isAssertionCompatibleWithMode("not-contains", "literacy"), true);
-    });
-});
-// ---------------------------------------------------------------------------
-// mapAssertions
-// ---------------------------------------------------------------------------
-describe("mapAssertions", () => {
-    it("maps a simple contains assertion", () => {
-        const { mapped, warnings } = mapAssertions([
-            { type: "contains", value: "defineType" },
-        ]);
-        assert.equal(mapped.length, 1);
-        assert.equal(mapped[0].type, "contains");
-        assert.equal(mapped[0].value, "defineType");
-        assert.equal(warnings.length, 0);
-    });
-    it("maps an llm-rubric assertion with template and criteria", () => {
-        const { mapped } = mapAssertions([
-            {
-                type: "llm-rubric",
-                template: "task-completion",
-                criteria: ["Uses projection syntax", "Demonstrates spread operator"],
-            },
-        ]);
-        assert.equal(mapped.length, 1);
-        assert.equal(mapped[0].type, "llm-rubric");
-        assert.ok(mapped[0].value.includes("task-completion"), "Should include template name");
-    });
-    it("preserves weight on assertions", () => {
-        const { mapped } = mapAssertions([
-            { type: "contains", value: "test", weight: 0.5 },
-        ]);
-        assert.equal(mapped[0].weight, 0.5);
-    });
-    it("maps negated assertions", () => {
-        const { mapped } = mapAssertions([
-            { type: "not-contains", value: "deprecated" },
-        ]);
-        assert.equal(mapped.length, 1);
-        assert.equal(mapped[0].type, "not-contains");
-    });
-    it("skips incompatible tool-use assertions with warning", () => {
-        const { mapped, warnings } = mapAssertions([{ type: "tool-called", value: "WebSearch" }], { mode: "knowledge-probe" });
-        assert.equal(mapped.length, 0);
-        assert.equal(warnings.length, 1);
-        assert.ok(warnings[0].includes("not compatible"));
-    });
-    it("warns about unknown assertion types but passes through", () => {
-        const { mapped, warnings } = mapAssertions([
-            { type: "custom-check", value: "something" },
-        ]);
-        assert.equal(mapped.length, 1);
-        assert.equal(mapped[0].type, "custom-check");
-        assert.ok(warnings.some((w) => w.includes("Unknown")));
-    });
-    it("skips non-negatable assertion types", () => {
-        const { mapped, warnings } = mapAssertions([
-            { type: "not-javascript", value: "return true" },
-        ]);
-        assert.equal(mapped.length, 0);
-        assert.ok(warnings.some((w) => w.includes("negation")));
-    });
-    it("sets grader provider on LLM-graded assertions", () => {
-        const { mapped } = mapAssertions([
-            {
-                type: "llm-rubric",
-                template: "code-correctness",
-                criteria: ["Valid GROQ"],
-            },
-        ], { graderProvider: "openai:chat:gpt-5" });
-        assert.equal(mapped[0].provider, "openai:chat:gpt-5");
-    });
-    it("maps multiple assertions in order", () => {
-        const { mapped } = mapAssertions([
-            { type: "contains", value: "first" },
-            { type: "equals", value: "second" },
-            { type: "regex", value: "third.*pattern" },
-        ]);
-        assert.equal(mapped.length, 3);
-        assert.equal(mapped[0].type, "contains");
-        assert.equal(mapped[1].type, "equals");
-        assert.equal(mapped[2].type, "regex");
-    });
-});

package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts DELETED Viewed

@@ -1,10 +0,0 @@
-/**
- * knowledge-probe-handler.test.ts — Tests for knowledge probe mode compilation.
- *
- * Tests validation, provider assembly, prompt generation, assertion mapping
- * (including rejection of tool-use assertions), metadata generation, and
- * end-to-end compilation of example tasks.
- *
- * Run: npx tsx --test src/pipeline/compiler/__tests__/knowledge-probe-handler.test.ts
- */
-export {};