npm - @sanity/ailf - Versions diffs - 3.8.0 → 3.9.0 - Mend

@sanity/ailf 3.8.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/dist/pipeline/compiler/promptfoo-compiler.js CHANGED Viewed

@@ -11,10 +11,20 @@
  *
  * @see docs/archive/exec-plans/architecture-overhaul/phase-2-config-compiler.md
  */
+import { dirname, resolve as resolvePath } from "node:path";
+import { fileURLToPath } from "node:url";
 import { mapAssertions } from "./assertion-mapper.js";
 import { resolveTaskFixtures } from "./fixture-resolver.js";
 import { LiteracyVariant } from "../normalize-mode.js";
 import { resolveVariables } from "./variable-resolver.js";
+/**
+ * Absolute filesystem path to the AILF mock Promptfoo provider. Resolved
+ * once at module load relative to this file. Promptfoo's `file://` provider
+ * loader requires an absolute path. See buildProviders for the env-var
+ * gate that swaps real providers for this mock.
+ */
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const MOCK_PROVIDER_ABSPATH = resolvePath(__dirname, "..", "..", "promptfoo-providers", "mock-provider.cjs");
 // ---------------------------------------------------------------------------
 // Public API
 // ---------------------------------------------------------------------------
@@ -143,6 +153,19 @@ function buildProviders(models, mode) {
             },
         });
     }
+    // Replay swap — when AILF_REPLAY_LLMS=1 is set, rewrite every provider's
+    // `id` to the file-based AILF mock provider so the Promptfoo subprocess
+    // never makes a live LLM call. We preserve `label` and stash the
+    // original `id` in `config.originalId` so the mock provider can surface
+    // model identity in its output and reports remain interpretable.
+    // See W0110 (M5.1) and docs/design-docs/testing-strategy.md.
+    if (process.env.AILF_REPLAY_LLMS === "1") {
+        return providers.map((p) => ({
+            id: `file://${MOCK_PROVIDER_ABSPATH}`,
+            label: p.label,
+            config: { ...p.config, originalId: p.id },
+        }));
+    }
     return providers;
 }
 /**

package/dist/pipeline/mirror-repo-tasks.d.ts CHANGED Viewed

@@ -107,7 +107,7 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
     slugToDocId: Map<string, string>;
 }): {
     baseline?: {
-        rubric?: "full" | "abbreviated" | "none" | undefined;
+        rubric?: "abbreviated" | "full" | "none" | undefined;
         enabled?: boolean | undefined;
     } | undefined;
     _id: string;

package/dist/tasks/knowledge-probe/groq-projections.task.ts CHANGED Viewed

@@ -41,22 +41,40 @@ export default defineTask({
   assertions: [
     { type: "contains", value: "->" },
     { type: "contains", value: "select(" },
+    // Templated rubrics so the compiled assertions carry `metadata.dimension`
+    // and the scoring engine can populate per-dimension scores from the KP
+    // profile (factual-correctness 0.45 / completeness 0.35 / currency 0.20).
     {
       type: "llm-rubric",
-      value:
-        "The response should demonstrate accurate knowledge of GROQ " +
-        "projection syntax with working code examples. Check that the " +
-        "dereference operator, spread syntax, and select() are correctly " +
-        "explained with valid GROQ code.",
-      weight: 0.6,
+      template: "factual-correctness",
+      criteria: [
+        "The dereference operator `->` is correctly explained for following references",
+        "The spread operator `...` is shown in a valid projection example",
+        "`select()` is used with valid syntax for conditional projections",
+        'Computed field names (e.g., `"label": title`) are demonstrated correctly',
+        "Code examples use valid GROQ — no fabricated operators or deprecated syntax",
+      ],
     },
     {
       type: "llm-rubric",
-      value:
-        "Evaluate whether the response reflects current GROQ syntax " +
-        "(post-2023). Check for deprecated patterns or outdated " +
-        "recommendations.",
-      weight: 0.4,
+      template: "completeness",
+      criteria: [
+        "Basic object projection with `{}` is covered",
+        "Nested projections and the spread operator are both addressed",
+        "Computed/aliased field names are demonstrated",
+        "The dereference operator `->` is included with a worked example",
+        "Both inclusive (`[0..5]`) and exclusive (`[0...5]`) array slicing are explained",
+        "Conditional projections via `select()` are covered",
+      ],
+    },
+    {
+      type: "llm-rubric",
+      template: "currency",
+      criteria: [
+        "Examples reflect current GROQ syntax (post-2023) — no deprecated patterns",
+        "Recommendations don't reference removed or legacy query forms",
+        "Modern projection idioms are used (e.g., spread + override)",
+      ],
     },
   ],
 })

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "3.8.0",
+  "version": "3.9.0",
   "private": false,
   "publishConfig": {
     "access": "public"
@@ -50,6 +50,7 @@
     "@anthropic-ai/claude-agent-sdk": "^0.2.105",
     "@types/js-yaml": "^4.0.9",
     "@types/node": "^22.13.1",
+    "nock": "^14.0.13",
     "tsx": "^4.19.2",
     "typescript": "^5.7.3",
     "@sanity/ailf-core": "0.1.0",
@@ -73,9 +74,11 @@
     "cli": "tsx src/cli.ts",
     "pipeline": "tsx src/cli.ts pipeline",
     "validate": "tsx src/cli.ts validate config",
-    "test": "tsx --test src/__tests__/*.test.ts",
+    "test": "tsx --test src/__tests__/*.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
     "test:e2e": "AILF_E2E=1 tsx --test src/__tests__/e2e/*.e2e.test.ts",
-    "test:all": "AILF_E2E=1 tsx --test src/__tests__/*.test.ts src/pipeline/compiler/__tests__/*.test.ts src/__tests__/e2e/*.e2e.test.ts",
+    "test:e2e:adapters": "AILF_E2E=1 tsx --test src/adapters/**/__tests__/*.adapter.test.ts",
+    "test:e2e:api": "AILF_E2E_API=1 tsx --test src/__tests__/api-tier2-tenant-integration.test.ts src/__tests__/run-remote-tier2.test.ts",
+    "test:all": "AILF_E2E=1 tsx --test src/__tests__/*.test.ts src/pipeline/compiler/__tests__/*.test.ts src/__tests__/e2e/*.e2e.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
     "pr-comment": "tsx src/cli.ts pr-comment",
     "coverage-audit": "tsx src/cli.ts report coverage",
     "readiness-report": "tsx src/cli.ts report readiness",

package/tasks/knowledge-probe/groq-projections.task.ts CHANGED Viewed

@@ -41,22 +41,40 @@ export default defineTask({
   assertions: [
     { type: "contains", value: "->" },
     { type: "contains", value: "select(" },
+    // Templated rubrics so the compiled assertions carry `metadata.dimension`
+    // and the scoring engine can populate per-dimension scores from the KP
+    // profile (factual-correctness 0.45 / completeness 0.35 / currency 0.20).
     {
       type: "llm-rubric",
-      value:
-        "The response should demonstrate accurate knowledge of GROQ " +
-        "projection syntax with working code examples. Check that the " +
-        "dereference operator, spread syntax, and select() are correctly " +
-        "explained with valid GROQ code.",
-      weight: 0.6,
+      template: "factual-correctness",
+      criteria: [
+        "The dereference operator `->` is correctly explained for following references",
+        "The spread operator `...` is shown in a valid projection example",
+        "`select()` is used with valid syntax for conditional projections",
+        'Computed field names (e.g., `"label": title`) are demonstrated correctly',
+        "Code examples use valid GROQ — no fabricated operators or deprecated syntax",
+      ],
     },
     {
       type: "llm-rubric",
-      value:
-        "Evaluate whether the response reflects current GROQ syntax " +
-        "(post-2023). Check for deprecated patterns or outdated " +
-        "recommendations.",
-      weight: 0.4,
+      template: "completeness",
+      criteria: [
+        "Basic object projection with `{}` is covered",
+        "Nested projections and the spread operator are both addressed",
+        "Computed/aliased field names are demonstrated",
+        "The dereference operator `->` is included with a worked example",
+        "Both inclusive (`[0..5]`) and exclusive (`[0...5]`) array slicing are explained",
+        "Conditional projections via `select()` are covered",
+      ],
+    },
+    {
+      type: "llm-rubric",
+      template: "currency",
+      criteria: [
+        "Examples reflect current GROQ syntax (post-2023) — no deprecated patterns",
+        "Recommendations don't reference removed or legacy query forms",
+        "Modern projection idioms are used (e.g., spread + override)",
+      ],
     },
   ],
 })

package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts DELETED Viewed

@@ -1,10 +0,0 @@
-/**
- * agent-harness-handler.test.ts — Tests for agent harness mode compilation.
- *
- * Tests validation, provider assembly, tool permission resolution,
- * assertion mapping, sandbox config, lifecycle extensions, and
- * end-to-end compilation of example tasks.
- *
- * Run: npx tsx --test src/pipeline/compiler/__tests__/agent-harness-handler.test.ts
- */
-export {};

package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js DELETED Viewed

@@ -1,366 +0,0 @@
-/**
- * agent-harness-handler.test.ts — Tests for agent harness mode compilation.
- *
- * Tests validation, provider assembly, tool permission resolution,
- * assertion mapping, sandbox config, lifecycle extensions, and
- * end-to-end compilation of example tasks.
- *
- * Run: npx tsx --test src/pipeline/compiler/__tests__/agent-harness-handler.test.ts
- */
-import assert from "node:assert/strict";
-import { describe, it } from "node:test";
-import { LiteracyVariant } from "../../normalize-mode.js";
-import { compileAgentHarnessTask, AGENT_HARNESS_PROMPT_TEMPLATES, handler as agentHandler, validateAgentHarnessTask, } from "../mode-handlers/agent-harness/index.js";
-import { allAgentHarnessExampleTasks, scaffoldProjectTask, modifyCodeTask, multiFileRefactorTask, } from "../mode-handlers/__fixtures__/agent-harness-example-tasks.js";
-// ---------------------------------------------------------------------------
-// Helpers
-// ---------------------------------------------------------------------------
-function makeTask(overrides) {
-    return {
-        mode: "agent-harness",
-        id: "test-agent-task",
-        title: "Test Agent Task",
-        description: "A test agent harness task",
-        area: "studio",
-        ...overrides,
-    };
-}
-// ---------------------------------------------------------------------------
-// handler.getPrompts() — prompt template ownership
-// ---------------------------------------------------------------------------
-describe("AgentHarnessHandler.getPrompts", () => {
-    it("returns prompt templates", () => {
-        const prompts = agentHandler.getPrompts();
-        assert.ok(prompts, "getPrompts() should return a record");
-        assert.ok(Object.keys(prompts).length > 0, "should return at least one template");
-    });
-    it("returns templates keyed by agent-specific IDs (not literacy names)", () => {
-        const prompts = agentHandler.getPrompts();
-        const keys = Object.keys(prompts);
-        // Must not use literacy template names
-        assert.ok(!keys.includes("with-docs"), "should not use literacy key 'with-docs'");
-        assert.ok(!keys.includes("without-docs"), "should not use literacy key 'without-docs'");
-        assert.ok(!keys.includes(LiteracyVariant.AGENTIC), "should not use literacy key 'agentic'");
-        // Must have agent-appropriate key(s)
-        assert.ok(keys.includes("agent-harness"), "should include 'agent-harness' template");
-    });
-    it("agent-harness template describes task for agent execution", () => {
-        const prompts = agentHandler.getPrompts();
-        const template = prompts["agent-harness"];
-        assert.ok(template, "agent-harness template should exist");
-        assert.ok(template.template.includes("{{task}}"), "should include {{task}} placeholder");
-        // Should reference agent / sandbox / tool execution context
-        assert.ok(/sandbox|file|tool|implement|code/i.test(template.template), "template should reference agent execution concepts");
-    });
-    it("template has correct PromptTemplate shape", () => {
-        const prompts = agentHandler.getPrompts();
-        const template = prompts["agent-harness"];
-        assert.equal(template.id, "agent-harness");
-        assert.ok(template.label, "should have a human-readable label");
-        assert.ok(template.template, "should have a template string");
-        assert.ok(Array.isArray(template.variables), "should declare variables");
-        assert.ok(template.variables.includes("task"), "variables should include 'task'");
-    });
-    it("exported AGENT_HARNESS_PROMPT_TEMPLATES matches handler.getPrompts()", () => {
-        const fromHandler = agentHandler.getPrompts();
-        assert.deepEqual(fromHandler, AGENT_HARNESS_PROMPT_TEMPLATES);
-    });
-});
-// ---------------------------------------------------------------------------
-// validateAgentHarnessTask
-// ---------------------------------------------------------------------------
-describe("validateAgentHarnessTask", () => {
-    it("passes for a valid minimal task", () => {
-        const errors = validateAgentHarnessTask(makeTask());
-        assert.equal(errors.length, 0);
-    });
-    it("errors on missing ID", () => {
-        const errors = validateAgentHarnessTask(makeTask({ id: "" }));
-        assert.ok(errors.some((e) => e.field === "id"));
-    });
-    it("errors on missing title", () => {
-        const errors = validateAgentHarnessTask(makeTask({ title: "" }));
-        assert.ok(errors.some((e) => e.field === "title"));
-    });
-});
-// ---------------------------------------------------------------------------
-// compileAgentHarnessTask — provider assembly
-// ---------------------------------------------------------------------------
-describe("compileAgentHarnessTask — providers", () => {
-    it("produces a Claude Agent SDK provider", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.ok(result.providers.length > 0);
-        assert.equal(result.providers[0].id, "anthropic:claude-agent-sdk");
-    });
-    it("sets default agent config", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        const config = result.providers[0].config;
-        assert.ok(config.model, "should set a model");
-        assert.ok(config.max_turns, "should set max_turns");
-        assert.ok(config.max_budget_usd, "should set budget cap");
-        assert.equal(config.permission_mode, "bypassPermissions");
-    });
-    it("resolves coding tool preset into custom_allowed_tools", () => {
-        const result = compileAgentHarnessTask(makeTask({ tools: ["coding"] }));
-        const config = result.providers[0].config;
-        const tools = config.custom_allowed_tools;
-        assert.ok(tools.includes("Bash"));
-        assert.ok(tools.includes("Read"));
-        assert.ok(tools.includes("Write"));
-        assert.ok(tools.includes("Edit"));
-    });
-    it("resolves read-only tool preset", () => {
-        const result = compileAgentHarnessTask(makeTask({ tools: ["read-only"] }));
-        const config = result.providers[0].config;
-        const tools = config.custom_allowed_tools;
-        assert.ok(tools.includes("Read"));
-        assert.ok(tools.includes("Grep"));
-        assert.ok(!tools.includes("Write"), "read-only should not include Write");
-    });
-    it("mixes preset and explicit tools", () => {
-        const result = compileAgentHarnessTask(makeTask({ tools: ["read-only", "WebFetch"] }));
-        const config = result.providers[0].config;
-        const tools = config.custom_allowed_tools;
-        assert.ok(tools.includes("Read"));
-        assert.ok(tools.includes("WebFetch"));
-    });
-});
-// ---------------------------------------------------------------------------
-// compileAgentHarnessTask — test cases
-// ---------------------------------------------------------------------------
-describe("compileAgentHarnessTask — test cases", () => {
-    it("produces at least one test case", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.ok(result.tests.length > 0);
-    });
-    it("includes task description in vars", () => {
-        const result = compileAgentHarnessTask(makeTask({ description: "Do the thing" }));
-        assert.equal(result.tests[0].vars.task, "Do the thing");
-    });
-    it("prefers prompt.vars.task over description", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            description: "Description",
-            prompt: { vars: { task: "Custom prompt" } },
-        }));
-        assert.equal(result.tests[0].vars.task, "Custom prompt");
-    });
-    it("creates multi-turn test case", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            multiTurn: {
-                turns: [
-                    { role: "user", content: "Hello" },
-                    { role: "assistant", content: "Hi" },
-                ],
-            },
-        }));
-        assert.equal(result.tests.length, 2);
-        assert.ok(result.tests[1].description.includes("[multi-turn]"));
-    });
-    it("sets sandbox metadata in vars", () => {
-        const result = compileAgentHarnessTask(makeTask({ sandbox: { type: "docker" } }));
-        assert.equal(result.tests[0].vars.__sandboxType, "docker");
-    });
-});
-// ---------------------------------------------------------------------------
-// compileAgentHarnessTask — assertions
-// ---------------------------------------------------------------------------
-describe("compileAgentHarnessTask — assertions", () => {
-    const RUNTIME = "file://dist/agent-harness/assertions-runtime.js";
-    it("maps file-exists to file-based javascript assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "file-exists", value: "sanity.config.ts" }],
-        }));
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion);
-        assert.equal(assertion.type, "javascript");
-        assert.equal(assertion.value, `${RUNTIME}:fileExists`);
-        assert.deepEqual(assertion.config, {
-            filePath: "sanity.config.ts",
-        });
-    });
-    it("maps file-contains to file-based javascript assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [
-                {
-                    type: "file-contains",
-                    value: { path: "config.ts", content: "projectId" },
-                },
-            ],
-        }));
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion);
-        assert.equal(assertion.type, "javascript");
-        assert.equal(assertion.value, `${RUNTIME}:fileContains`);
-        assert.deepEqual(assertion.config, {
-            filePath: "config.ts",
-            content: "projectId",
-        });
-    });
-    it("maps command-succeeds to file-based javascript assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "command-succeeds", value: "npx tsc --noEmit" }],
-        }));
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion);
-        assert.equal(assertion.type, "javascript");
-        assert.equal(assertion.value, `${RUNTIME}:commandSucceeds`);
-        assert.deepEqual(assertion.config, {
-            command: "npx tsc --noEmit",
-        });
-    });
-    it("maps diff-matches to file-based javascript assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "diff-matches", value: "createClient" }],
-        }));
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion);
-        assert.equal(assertion.type, "javascript");
-        assert.equal(assertion.value, `${RUNTIME}:diffMatches`);
-        assert.deepEqual(assertion.config, {
-            expected: "createClient",
-        });
-    });
-    it("passes through standard assertions", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "contains", value: "sanity" }],
-        }));
-        assert.equal(result.tests[0].assert?.[0]?.type, "contains");
-    });
-    it("sets grader provider on llm-rubric", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [{ type: "llm-rubric", value: "Check quality" }],
-        }), { graderProvider: "openai:chat:gpt-5" });
-        assert.equal(result.tests[0].assert?.[0]?.provider, "openai:chat:gpt-5");
-    });
-    it("resolves templated llm-rubric with rubric text and dimension metadata", () => {
-        const rubricConfig = {
-            templates: {
-                "agent-output": {
-                    dimension: "agent-output",
-                    header: "Score the agent's final output from 0 to 100:",
-                    scale: ["0: Failed", "50: Partial", "100: Complete"],
-                    criteria_label: "Check for:",
-                },
-            },
-        };
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [
-                {
-                    type: "llm-rubric",
-                    template: "agent-output",
-                    criteria: ["File created", "Correct content"],
-                },
-            ],
-        }), { rubricConfig, graderProvider: "anthropic:messages:claude-opus-4-5" });
-        const assertion = result.tests[0].assert?.[0];
-        assert.ok(assertion, "should produce an assertion");
-        assert.equal(assertion.type, "llm-rubric");
-        // Rubric text should be fully rendered (not empty)
-        assert.ok(assertion.value.includes("Score the agent"), "should contain rendered rubric header");
-        assert.ok(assertion.value.includes("File created"), "should contain task-specific criteria");
-        // Dimension metadata should be attached
-        const metadata = assertion.metadata;
-        assert.ok(metadata, "should have metadata");
-        assert.equal(metadata.dimension, "agent-output");
-        assert.equal(metadata.maxScore, 100);
-        // Grader provider should be set
-        assert.equal(assertion.provider, "anthropic:messages:claude-opus-4-5");
-    });
-    it("warns when rubric template is unknown", () => {
-        const rubricConfig = { templates: {} };
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [
-                {
-                    type: "llm-rubric",
-                    template: "nonexistent-template",
-                    criteria: ["Something"],
-                },
-            ],
-        }), { rubricConfig });
-        // Unknown template produces a warning and no assertion
-        assert.ok(result.warnings.some((w) => w.includes("nonexistent-template")), "should warn about unknown template");
-        // The assertion should be null (filtered out)
-        assert.equal(result.tests[0].assert?.length ?? 0, 0, "should not produce an assertion for unknown template");
-    });
-    it("warns when rubricConfig is not provided for templated assertion", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            assertions: [
-                {
-                    type: "llm-rubric",
-                    template: "agent-output",
-                    criteria: ["Something"],
-                },
-            ],
-        })
-        // No rubricConfig in options
-        );
-        assert.ok(result.warnings.some((w) => w.includes("No rubric config")), "should warn about missing rubric config");
-    });
-});
-// ---------------------------------------------------------------------------
-// compileAgentHarnessTask — lifecycle extensions
-// ---------------------------------------------------------------------------
-describe("compileAgentHarnessTask — lifecycle", () => {
-    it("produces beforeEach and afterEach extensions", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.equal(result.extensions.length, 2);
-        assert.equal(result.extensions[0].type, "beforeEach");
-        assert.equal(result.extensions[1].type, "afterEach");
-    });
-    it("beforeEach hook creates working directory", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.ok(result.extensions[0].code.includes("mkdirSync"));
-        assert.ok(result.extensions[0].code.includes("__workingDir"));
-    });
-    it("afterEach hook cleans up", () => {
-        const result = compileAgentHarnessTask(makeTask());
-        assert.ok(result.extensions[1].code.includes("rmSync"));
-    });
-    it("sandbox config captures task settings", () => {
-        const result = compileAgentHarnessTask(makeTask({
-            sandbox: {
-                type: "docker",
-                image: "node:22",
-                limits: { cpus: 2, networkAccess: false },
-            },
-            fixtures: ["file://schema.ts"],
-        }));
-        assert.equal(result.sandboxConfig.type, "docker");
-        assert.equal(result.sandboxConfig.image, "node:22");
-        assert.deepEqual(result.sandboxConfig.fixtures, ["schema.ts"]);
-        assert.equal(result.sandboxConfig.limits?.cpus, 2);
-        assert.equal(result.sandboxConfig.limits?.networkAccess, false);
-    });
-});
-// ---------------------------------------------------------------------------
-// Example task compilation (end-to-end)
-// ---------------------------------------------------------------------------
-describe("example agent harness tasks — end-to-end", () => {
-    it("compiles all example tasks without errors", () => {
-        for (const task of allAgentHarnessExampleTasks) {
-            const result = compileAgentHarnessTask(task);
-            assert.ok(result.providers.length > 0, `${task.id}: should produce providers`);
-            assert.ok(result.tests.length > 0, `${task.id}: should produce test cases`);
-            assert.ok(result.extensions.length > 0, `${task.id}: should produce lifecycle extensions`);
-        }
-    });
-    it("scaffold task has file-exists assertions", () => {
-        const result = compileAgentHarnessTask(scaffoldProjectTask);
-        assert.ok(result.tests[0].assert);
-        assert.ok(result.tests[0].assert.length >= 3);
-        // First two are file-exists (javascript), third is file-contains, fourth is command-succeeds
-        assert.equal(result.tests[0].assert[0].type, "javascript");
-    });
-    it("modify task has file-contains assertions", () => {
-        const result = compileAgentHarnessTask(modifyCodeTask);
-        assert.ok(result.tests[0].assert);
-        assert.ok(result.tests[0].assert.some((a) => a.type === "javascript" &&
-            a.value.includes("fileContains") &&
-            a.config != null));
-    });
-    it("refactor task has docker sandbox config", () => {
-        const result = compileAgentHarnessTask(multiFileRefactorTask);
-        assert.equal(result.sandboxConfig.type, "docker");
-        assert.equal(result.sandboxConfig.image, "node:22-slim");
-    });
-});

package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts DELETED Viewed

@@ -1,9 +0,0 @@
-/**
- * assertion-mapper.test.ts — Unit tests for the assertion type mapper.
- *
- * Tests mapping of AILF assertion types to Promptfoo assertion types,
- * mode compatibility checking, negation support, and templated assertions.
- *
- * Run: npx tsx --test src/pipeline/compiler/__tests__/assertion-mapper.test.ts
- */
-export {};