npm - @sanity/ailf - Versions diffs - 2.0.1 → 2.1.0 - Mend

@sanity/ailf 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (160) hide show

package/LICENSE +21 -0
package/dist/cli.js +0 -0
package/dist/orchestration/steps/run-eval-step.js +1 -1
package/dist/pipeline/checks.d.ts +8 -3
package/dist/pipeline/checks.js +23 -3
package/package.json +25 -25
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
package/dist/_vendor/ailf-tasks/cli.js +0 -61
package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
package/dist/_vendor/ailf-tasks/index.js +0 -16
package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
package/dist/_vendor/ailf-tasks/parser.js +0 -73
package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
package/dist/_vendor/ailf-tasks/schemas.js +0 -180
package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
package/dist/_vendor/ailf-tasks/validation.js +0 -162
package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
package/dist/adapters/task-sources/yaml-task-source.js +0 -139
package/dist/agent-observer/test-imports.d.ts +0 -7
package/dist/agent-observer/test-imports.js +0 -185
package/dist/commands/update-quality-scores.d.ts +0 -5
package/dist/commands/update-quality-scores.js +0 -20
package/dist/lib/agent-behavior-report.d.ts +0 -8
package/dist/lib/agent-behavior-report.js +0 -185
package/dist/lib/baseline.d.ts +0 -19
package/dist/lib/baseline.js +0 -153
package/dist/lib/calculate-scores.d.ts +0 -23
package/dist/lib/calculate-scores.js +0 -42
package/dist/lib/compare.d.ts +0 -18
package/dist/lib/compare.js +0 -170
package/dist/lib/coverage-audit.d.ts +0 -4
package/dist/lib/coverage-audit.js +0 -42
package/dist/lib/discovery-report.d.ts +0 -13
package/dist/lib/discovery-report.js +0 -57
package/dist/lib/fetch-docs.d.ts +0 -30
package/dist/lib/fetch-docs.js +0 -171
package/dist/lib/generate-configs.d.ts +0 -25
package/dist/lib/generate-configs.js +0 -42
package/dist/lib/grader-api.d.ts +0 -21
package/dist/lib/grader-api.js +0 -34
package/dist/lib/grader-compare.d.ts +0 -19
package/dist/lib/grader-compare.js +0 -91
package/dist/lib/grader-consistency.d.ts +0 -27
package/dist/lib/grader-consistency.js +0 -79
package/dist/lib/grader-sensitivity.d.ts +0 -19
package/dist/lib/grader-sensitivity.js +0 -75
package/dist/lib/grader-validate.d.ts +0 -19
package/dist/lib/grader-validate.js +0 -78
package/dist/lib/measure-retrieval.d.ts +0 -14
package/dist/lib/measure-retrieval.js +0 -71
package/dist/lib/pr-comment.d.ts +0 -16
package/dist/lib/pr-comment.js +0 -28
package/dist/lib/readiness-report.d.ts +0 -13
package/dist/lib/readiness-report.js +0 -108
package/dist/lib/webhook-server.d.ts +0 -11
package/dist/lib/webhook-server.js +0 -24
package/dist/lib/weekly-digest.d.ts +0 -24
package/dist/lib/weekly-digest.js +0 -148
package/dist/orchestration/env-bridge.d.ts +0 -21
package/dist/orchestration/env-bridge.js +0 -66
package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
package/dist/pipeline/compiler/task-bridge.js +0 -92
package/dist/pipeline/expand-tasks.d.ts +0 -232
package/dist/pipeline/expand-tasks.js +0 -467
package/dist/pipeline/generate-configs.d.ts +0 -92
package/dist/pipeline/generate-configs.js +0 -445
package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
package/dist/pipeline/steps/calculate-scores-step.js +0 -89
package/dist/pipeline/steps/compare-step.d.ts +0 -18
package/dist/pipeline/steps/compare-step.js +0 -90
package/dist/pipeline/steps/eval-step.d.ts +0 -53
package/dist/pipeline/steps/eval-step.js +0 -347
package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
package/dist/pipeline/steps/fetch-docs-step.js +0 -84
package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
package/dist/pipeline/steps/generate-configs-step.js +0 -98
package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
package/dist/pipeline/steps/grader-consistency-step.js +0 -74
package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
package/dist/pipeline/steps/publish-report-step.js +0 -243
package/dist/pipeline/steps/report-step.d.ts +0 -13
package/dist/pipeline/steps/report-step.js +0 -56
package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
package/dist/pipeline/steps/update-scores-step.js +0 -42
package/dist/scripts/agent-behavior-report.d.ts +0 -19
package/dist/scripts/agent-behavior-report.js +0 -315
package/dist/scripts/baseline.d.ts +0 -43
package/dist/scripts/baseline.js +0 -267
package/dist/scripts/calculate-scores.d.ts +0 -166
package/dist/scripts/calculate-scores.js +0 -1296
package/dist/scripts/compare.d.ts +0 -22
package/dist/scripts/compare.js +0 -334
package/dist/scripts/coverage-audit.d.ts +0 -44
package/dist/scripts/coverage-audit.js +0 -209
package/dist/scripts/debug-eval.d.ts +0 -19
package/dist/scripts/debug-eval.js +0 -73
package/dist/scripts/discovery-report.d.ts +0 -58
package/dist/scripts/discovery-report.js +0 -250
package/dist/scripts/fetch-docs.d.ts +0 -35
package/dist/scripts/fetch-docs.js +0 -472
package/dist/scripts/generate-configs.d.ts +0 -66
package/dist/scripts/generate-configs.js +0 -459
package/dist/scripts/grader-api.d.ts +0 -27
package/dist/scripts/grader-api.js +0 -206
package/dist/scripts/grader-compare.d.ts +0 -22
package/dist/scripts/grader-compare.js +0 -368
package/dist/scripts/grader-consistency.d.ts +0 -20
package/dist/scripts/grader-consistency.js +0 -313
package/dist/scripts/grader-sensitivity.d.ts +0 -22
package/dist/scripts/grader-sensitivity.js +0 -354
package/dist/scripts/grader-validate.d.ts +0 -19
package/dist/scripts/grader-validate.js +0 -267
package/dist/scripts/measure-retrieval.d.ts +0 -10
package/dist/scripts/measure-retrieval.js +0 -145
package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
package/dist/scripts/pipeline.d.ts +0 -76
package/dist/scripts/pipeline.js +0 -1031
package/dist/scripts/pr-comment.d.ts +0 -10
package/dist/scripts/pr-comment.js +0 -510
package/dist/scripts/readiness-report.d.ts +0 -88
package/dist/scripts/readiness-report.js +0 -342
package/dist/scripts/update-quality-scores.d.ts +0 -15
package/dist/scripts/update-quality-scores.js +0 -184
package/dist/scripts/validate-task-sources.d.ts +0 -21
package/dist/scripts/validate-task-sources.js +0 -210
package/dist/scripts/validate.d.ts +0 -13
package/dist/scripts/validate.js +0 -79
package/dist/scripts/webhook-server.d.ts +0 -26
package/dist/scripts/webhook-server.js +0 -147
package/dist/scripts/weekly-digest.d.ts +0 -24
package/dist/scripts/weekly-digest.js +0 -144
package/dist/sinks/format-slack.d.ts +0 -64
package/dist/sinks/format-slack.js +0 -306
package/dist/sinks/slack-sink.d.ts +0 -27
package/dist/sinks/slack-sink.js +0 -78
package/dist/sinks/webhook-sink.d.ts +0 -19
package/dist/sinks/webhook-sink.js +0 -50
package/tasks/.expanded.agentic.yaml +0 -280
package/tasks/.expanded.yaml +0 -565

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Sanity.io
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/dist/cli.js CHANGED Viewed

File without changes

package/dist/orchestration/steps/run-eval-step.js CHANGED Viewed

@@ -29,7 +29,7 @@ export class RunEvalStep {
         const start = Date.now();
         const { rootDir, debug, concurrency, noCache } = ctx.config;
         // Precondition: config file exists
-        const configIssues = checkGeneratedConfigsExist(rootDir);
+        const configIssues = checkGeneratedConfigsExist(rootDir, this.mode);
         const configErrors = configIssues.filter((i) => i.severity === "error");
         if (configErrors.length > 0) {
             return {

package/dist/pipeline/checks.d.ts CHANGED Viewed

@@ -23,10 +23,15 @@ export declare function checkContextsExist(rootDir: string, areas: string[]): Va
  */
 export declare function checkEnvironment(rootDir: string): ValidationIssue[];
 /**
- * Check that the baseline `promptfooconfig.yaml` exists. Optionally check
- * for `promptfooconfig.observed.yaml` and `promptfooconfig.agentic.yaml`.
+ * Check that the generated promptfoo config for a given mode exists.
+ *
+ * When `mode` is provided, checks only for that mode's config file
+ * (e.g. `promptfooconfig.agent-harness.yaml` for mode `"agent-harness"`).
+ *
+ * When `mode` is omitted, falls back to the legacy literacy check:
+ * baseline `promptfooconfig.yaml` (required) plus optional observed/agentic.
  */
-export declare function checkGeneratedConfigsExist(rootDir: string): ValidationIssue[];
+export declare function checkGeneratedConfigsExist(rootDir: string, mode?: string): ValidationIssue[];
 /**
  * Check that the eval results JSON file exists, is valid JSON, and contains
  * a `results` array.

package/dist/pipeline/checks.js CHANGED Viewed

@@ -8,6 +8,7 @@
 import { config as loadEnv } from "dotenv";
 import { existsSync, readFileSync, statSync } from "fs";
 import { join, resolve } from "path";
+import { configFileForMode } from "./eval-constants.js";
 // ---------------------------------------------------------------------------
 // Precondition: contexts exist for each feature area
 // ---------------------------------------------------------------------------
@@ -109,11 +110,30 @@ export function checkEnvironment(rootDir) {
 // Postcondition: score summary is valid
 // ---------------------------------------------------------------------------
 /**
- * Check that the baseline `promptfooconfig.yaml` exists. Optionally check
- * for `promptfooconfig.observed.yaml` and `promptfooconfig.agentic.yaml`.
+ * Check that the generated promptfoo config for a given mode exists.
+ *
+ * When `mode` is provided, checks only for that mode's config file
+ * (e.g. `promptfooconfig.agent-harness.yaml` for mode `"agent-harness"`).
+ *
+ * When `mode` is omitted, falls back to the legacy literacy check:
+ * baseline `promptfooconfig.yaml` (required) plus optional observed/agentic.
  */
-export function checkGeneratedConfigsExist(rootDir) {
+export function checkGeneratedConfigsExist(rootDir, mode) {
     const issues = [];
+    if (mode) {
+        const configName = configFileForMode(mode);
+        const configPath = resolve(rootDir, configName);
+        if (!existsSync(configPath)) {
+            issues.push({
+                message: `Config '${configName}' not found for mode '${mode}'. Run the pipeline to generate it.`,
+                path: configPath,
+                severity: "error",
+                source: "checkGeneratedConfigsExist",
+            });
+        }
+        return issues;
+    }
+    // Legacy literacy check: baseline required, observed/agentic optional
     const baselinePath = resolve(rootDir, "promptfooconfig.yaml");
     if (!existsSync(baselinePath)) {
         issues.push({

package/package.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "name": "@sanity/ailf",
-  "version": "2.0.1",
+  "version": "2.1.0",
   "private": false,
   "publishConfig": {
-    "access": "restricted"
+    "access": "public"
   },
   "license": "MIT",
   "repository": {
@@ -31,6 +31,28 @@
     "canonical",
     "tasks"
   ],
+  "dependencies": {
+    "@google-cloud/bigquery": "^8.1.1",
+    "@inquirer/prompts": "^8.3.0",
+    "@modelcontextprotocol/sdk": "^1.29.0",
+    "@portabletext/markdown": "^1.0.0",
+    "@sanity/client": "^7.3.0",
+    "commander": "^14.0.3",
+    "dotenv": "^16.4.7",
+    "dotenv-cli": "^11.0.0",
+    "jiti": "^2.6.1",
+    "js-yaml": "^4.1.0",
+    "promptfoo": "^0.120.24",
+    "zod": "^4.3.6"
+  },
+  "devDependencies": {
+    "@types/js-yaml": "^4.0.9",
+    "@types/node": "^22.13.1",
+    "tsx": "^4.19.2",
+    "typescript": "^5.7.3",
+    "@sanity/ailf-core": "0.1.0",
+    "@sanity/ailf-shared": "0.1.0"
+  },
   "scripts": {
     "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
     "generate-configs": "tsx src/cli.ts generate-configs",
@@ -58,27 +80,5 @@
     "discovery-report": "tsx src/cli.ts discovery-report",
     "webhook-server": "tsx src/cli.ts webhook-server",
     "weekly-digest": "tsx src/cli.ts weekly-digest"
-  },
-  "dependencies": {
-    "@google-cloud/bigquery": "^8.1.1",
-    "@inquirer/prompts": "^8.3.0",
-    "@modelcontextprotocol/sdk": "^1.29.0",
-    "@portabletext/markdown": "^1.0.0",
-    "@sanity/client": "^7.3.0",
-    "commander": "^14.0.3",
-    "dotenv": "^16.4.7",
-    "dotenv-cli": "^11.0.0",
-    "jiti": "^2.6.1",
-    "js-yaml": "^4.1.0",
-    "promptfoo": "^0.120.24",
-    "zod": "^4.3.6"
-  },
-  "devDependencies": {
-    "@sanity/ailf-core": "workspace:*",
-    "@sanity/ailf-shared": "workspace:*",
-    "@types/js-yaml": "^4.0.9",
-    "@types/node": "^22.13.1",
-    "tsx": "^4.19.2",
-    "typescript": "^5.7.3"
   }
-}
+}

package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts DELETED Viewed

@@ -1,10 +0,0 @@
-/**
- * comparison-formatters.test.ts
- *
- * Verifies that formatComparisonMarkdown() and formatComparisonTable()
- * dynamically derive column headers from the dimension keys present
- * in the report data, rather than hardcoding literacy-specific names.
- *
- * Run: npx tsx --test src/__tests__/comparison-formatters.test.ts
- */
-export {};

package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js DELETED Viewed

@@ -1,185 +0,0 @@
-/**
- * comparison-formatters.test.ts
- *
- * Verifies that formatComparisonMarkdown() and formatComparisonTable()
- * dynamically derive column headers from the dimension keys present
- * in the report data, rather than hardcoding literacy-specific names.
- *
- * Run: npx tsx --test src/__tests__/comparison-formatters.test.ts
- */
-import assert from "node:assert/strict";
-import { describe, it } from "node:test";
-import { formatComparisonMarkdown, formatComparisonTable, } from "../services/comparison-formatters.js";
-// ---------------------------------------------------------------------------
-// Helpers
-// ---------------------------------------------------------------------------
-/** Minimal ScoreSummary stub — only fields the formatters actually read */
-function stubSummary(avgScore) {
-    return {
-        belowCritical: [],
-        lowestArea: "area-a",
-        lowestScore: 40,
-        overall: {
-            avgCeilingScore: 80,
-            avgScore,
-            avgDocLift: 10,
-            avgDocQualityGap: 20,
-            avgFloorScore: 30,
-            negativeDocLiftCount: 0,
-        },
-        scores: [],
-        timestamp: "2026-04-05T00:00:00.000Z",
-    };
-}
-function makeReport(overrides) {
-    return {
-        areas: [
-            {
-                area: "area-a",
-                baseline: 60,
-                experiment: 65,
-                delta: 5,
-                change: "improved",
-                dimensions: overrides.areaDimensions,
-                ceilingDelta: 0,
-                docLiftDelta: 2,
-                floorDelta: 0,
-            },
-        ],
-        baseline: stubSummary(60),
-        experiment: stubSummary(65),
-        deltas: {
-            overall: 5,
-            perArea: { "area-a": 5 },
-            perDimension: overrides.perDimension,
-            docLift: 2,
-        },
-        generatedAt: "2026-04-05T00:00:00.000Z",
-        improved: ["area-a"],
-        regressed: [],
-        unchanged: [],
-        notEvaluated: [],
-        mismatched: { onlyInBaseline: [], onlyInExperiment: [] },
-        noiseThreshold: 2,
-        noiseThresholdEmpirical: false,
-    };
-}
-// ---------------------------------------------------------------------------
-// Tests — literacy dimensions (backward compatibility)
-// ---------------------------------------------------------------------------
-describe("formatComparisonMarkdown", () => {
-    it("renders literacy dimension columns dynamically", () => {
-        const report = makeReport({
-            areaDimensions: {
-                "task-completion": { baseline: 60, experiment: 65, delta: 5 },
-                "code-correctness": { baseline: 50, experiment: 55, delta: 5 },
-                "doc-coverage": { baseline: 40, experiment: 42, delta: 2 },
-            },
-            perDimension: {
-                "task-completion": 5,
-                "code-correctness": 5,
-                "doc-coverage": 2,
-            },
-        });
-        const md = formatComparisonMarkdown(report);
-        // Column headers should be title-cased from kebab-case
-        assert.ok(md.includes("Task Completion"), "should have Task Completion column header");
-        assert.ok(md.includes("Code Correctness"), "should have Code Correctness column header");
-        assert.ok(md.includes("Doc Coverage"), "should have Doc Coverage column header");
-        // Per-dimension averages section should also show dynamic labels
-        assert.ok(md.includes("| Task Completion |"), "dimension averages should include Task Completion");
-        assert.ok(md.includes("| Code Correctness |"), "dimension averages should include Code Correctness");
-        assert.ok(md.includes("| Doc Coverage |"), "dimension averages should include Doc Coverage");
-    });
-    it("renders MCP dimension columns dynamically", () => {
-        const report = makeReport({
-            areaDimensions: {
-                "input-validation": { baseline: 50, experiment: 60, delta: 10 },
-                "output-correctness": { baseline: 70, experiment: 75, delta: 5 },
-                "error-handling": { baseline: 40, experiment: 45, delta: 5 },
-                security: { baseline: 80, experiment: 82, delta: 2 },
-            },
-            perDimension: {
-                "input-validation": 10,
-                "output-correctness": 5,
-                "error-handling": 5,
-                security: 2,
-            },
-        });
-        const md = formatComparisonMarkdown(report);
-        // 4 MCP columns instead of 3 literacy columns
-        assert.ok(md.includes("Input Validation"), "should have Input Validation column");
-        assert.ok(md.includes("Output Correctness"), "should have Output Correctness column");
-        assert.ok(md.includes("Error Handling"), "should have Error Handling column");
-        assert.ok(md.includes("Security"), "should have Security column");
-        // Per-dimension averages
-        assert.ok(md.includes("| Input Validation |"), "dimension averages should include Input Validation");
-        assert.ok(md.includes("| Security |"), "dimension averages should include Security");
-    });
-});
-describe("formatComparisonTable", () => {
-    it("renders literacy dimension columns dynamically", () => {
-        const report = makeReport({
-            areaDimensions: {
-                "task-completion": { baseline: 60, experiment: 65, delta: 5 },
-                "code-correctness": { baseline: 50, experiment: 55, delta: 5 },
-                "doc-coverage": { baseline: 40, experiment: 42, delta: 2 },
-            },
-            perDimension: {
-                "task-completion": 5,
-                "code-correctness": 5,
-                "doc-coverage": 2,
-            },
-        });
-        const table = formatComparisonTable(report);
-        // Dimension averages section
-        assert.ok(table.includes("Task Completion:"), "should show Task Completion in dimension averages");
-        assert.ok(table.includes("Code Correctness:"), "should show Code Correctness in dimension averages");
-        assert.ok(table.includes("Doc Coverage:"), "should show Doc Coverage in dimension averages");
-        // Per-area table header
-        assert.ok(table.includes("Task Completion"), "per-area table should have Task Completion header");
-        assert.ok(table.includes("Code Correctness"), "per-area table should have Code Correctness header");
-        assert.ok(table.includes("Doc Coverage"), "per-area table should have Doc Coverage header");
-    });
-    it("renders MCP dimension columns dynamically", () => {
-        const report = makeReport({
-            areaDimensions: {
-                "input-validation": { baseline: 50, experiment: 60, delta: 10 },
-                "output-correctness": { baseline: 70, experiment: 75, delta: 5 },
-                "error-handling": { baseline: 40, experiment: 45, delta: 5 },
-                security: { baseline: 80, experiment: 82, delta: 2 },
-            },
-            perDimension: {
-                "input-validation": 10,
-                "output-correctness": 5,
-                "error-handling": 5,
-                security: 2,
-            },
-        });
-        const table = formatComparisonTable(report);
-        // 4 MCP columns in the per-area table
-        assert.ok(table.includes("Input Validation"), "should have Input Validation");
-        assert.ok(table.includes("Output Correctness"), "should have Output Correctness");
-        assert.ok(table.includes("Error Handling"), "should have Error Handling");
-        assert.ok(table.includes("Security"), "should have Security");
-        // Should NOT have literacy dimension headers
-        assert.ok(!table.includes("Task Completion"), "should not contain Task Completion");
-        assert.ok(!table.includes("Doc Coverage"), "should not contain Doc Coverage");
-    });
-    it("includes delta values for each dimension in the per-area rows", () => {
-        const report = makeReport({
-            areaDimensions: {
-                "input-validation": { baseline: 50, experiment: 60, delta: 10 },
-                "output-correctness": { baseline: 70, experiment: 75, delta: 5 },
-            },
-            perDimension: {
-                "input-validation": 10,
-                "output-correctness": 5,
-            },
-        });
-        const table = formatComparisonTable(report);
-        // The per-area row should include the delta values (+10 and +5)
-        assert.ok(table.includes("+10"), "should show +10 delta for area-a");
-        assert.ok(table.includes("+5"), "should show +5 delta for area-a");
-    });
-});

package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts DELETED Viewed

@@ -1,6 +0,0 @@
-/**
- * noop-collector.test.ts — verifies the NoOpArtifactCollector is truly zero-cost.
- *
- * Run: npx tsx --test src/artifact-capture/__tests__/noop-collector.test.ts
- */
-export {};

package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js DELETED Viewed

@@ -1,42 +0,0 @@
-/**
- * noop-collector.test.ts — verifies the NoOpArtifactCollector is truly zero-cost.
- *
- * Run: npx tsx --test src/artifact-capture/__tests__/noop-collector.test.ts
- */
-import assert from "node:assert/strict";
-import { describe, it } from "node:test";
-import { NoOpArtifactCollector } from "../noop-collector.js";
-describe("NoOpArtifactCollector", () => {
-    it("enabled returns false", () => {
-        const collector = new NoOpArtifactCollector();
-        assert.equal(collector.enabled, false);
-    });
-    it("extrasEnabled returns false", () => {
-        const collector = new NoOpArtifactCollector();
-        assert.equal(collector.extrasEnabled, false);
-    });
-    it("capture() is callable and returns void", () => {
-        const collector = new NoOpArtifactCollector();
-        const result = collector.capture("step", "type", { data: true });
-        assert.equal(result, undefined);
-    });
-    it("captureFile() is callable and returns void", () => {
-        const collector = new NoOpArtifactCollector();
-        const result = collector.captureFile("step", "type", "/some/path");
-        assert.equal(result, undefined);
-    });
-    it("flush() returns zero-count result", async () => {
-        const collector = new NoOpArtifactCollector();
-        const result = await collector.flush();
-        assert.equal(result.artifactCount, 0);
-        assert.equal(result.destination, "");
-        assert.equal(result.totalBytes, 0);
-        assert.equal(result.compressed, false);
-    });
-    it("flush() returns the same frozen object every time", async () => {
-        const collector = new NoOpArtifactCollector();
-        const a = await collector.flush();
-        const b = await collector.flush();
-        assert.equal(a, b);
-    });
-});

package/dist/_vendor/ailf-tasks/cli.d.ts DELETED Viewed

@@ -1,8 +0,0 @@
-/**
- * cli.ts — Minimal CLI for standalone task validation.
- *
- * Usage:
- *   npx @sanity/ailf-tasks validate .ailf/tasks/
- *   npx @sanity/ailf-tasks validate             # defaults to .ailf/tasks/
- */
-export declare function run(): void;

package/dist/_vendor/ailf-tasks/cli.js DELETED Viewed

@@ -1,61 +0,0 @@
-/**
- * cli.ts — Minimal CLI for standalone task validation.
- *
- * Usage:
- *   npx @sanity/ailf-tasks validate .ailf/tasks/
- *   npx @sanity/ailf-tasks validate             # defaults to .ailf/tasks/
- */
-import { loadTaskDir } from "./parser.js";
-import { formatValidationResult, validateRepoTasks } from "./validation.js";
-export function run() {
-    const args = process.argv.slice(2);
-    const command = args[0];
-    if (command === "validate") {
-        const dir = args[1] ?? ".ailf/tasks";
-        validateCommand(dir);
-    }
-    else if (command === "--help" ||
-        command === "-h" ||
-        command === undefined) {
-        printUsage();
-    }
-    else {
-        console.error(`Unknown command: ${command}`);
-        printUsage();
-        process.exit(1);
-    }
-}
-function validateCommand(dir) {
-    try {
-        const tasks = loadTaskDir(dir);
-        // Run semantic validation
-        const result = validateRepoTasks(tasks);
-        const formatted = formatValidationResult(result);
-        console.log(`✅ ${tasks.length} task(s) validated from ${dir}`);
-        for (const task of tasks) {
-            console.log(`   ${task.id} — ${task.description}`);
-        }
-        if (result.warnings.length > 0 || result.errors.length > 0) {
-            console.log("");
-            console.log(formatted);
-        }
-        if (!result.valid) {
-            process.exit(1);
-        }
-    }
-    catch (err) {
-        console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
-        process.exit(1);
-    }
-}
-function printUsage() {
-    console.log("Usage: ailf-tasks <command> [options]");
-    console.log("");
-    console.log("Commands:");
-    console.log("  validate [dir]  Validate task YAML files (default: .ailf/tasks/)");
-    console.log("");
-    console.log("Examples:");
-    console.log("  ailf-tasks validate");
-    console.log("  ailf-tasks validate .ailf/tasks/");
-    console.log("  ailf-tasks validate /path/to/tasks/");
-}

package/dist/_vendor/ailf-tasks/index.d.ts DELETED Viewed

@@ -1,13 +0,0 @@
-/**
- * @sanity/ailf-tasks — Task definition schemas and YAML parser.
- *
- * Lightweight package for parsing and validating .ailf/tasks/*.yaml files
- * without depending on the full AILF CLI or its heavyweight dependencies
- * (Promptfoo, LLM SDKs, Sanity client).
- *
- * Usage:
- *   import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
- */
-export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "./schemas.js";
-export { loadTaskDir, parseTaskFile } from "./parser.js";
-export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "./validation.js";

package/dist/_vendor/ailf-tasks/index.js DELETED Viewed

@@ -1,16 +0,0 @@
-/**
- * @sanity/ailf-tasks — Task definition schemas and YAML parser.
- *
- * Lightweight package for parsing and validating .ailf/tasks/*.yaml files
- * without depending on the full AILF CLI or its heavyweight dependencies
- * (Promptfoo, LLM SDKs, Sanity client).
- *
- * Usage:
- *   import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
- */
-// Schemas and types
-export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "./schemas.js";
-// Parsing
-export { loadTaskDir, parseTaskFile } from "./parser.js";
-// Validation
-export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "./validation.js";

package/dist/_vendor/ailf-tasks/parser.d.ts DELETED Viewed

@@ -1,27 +0,0 @@
-/**
- * parser.ts — Standalone task file and directory parsing.
- *
- * High-level functions for loading and validating .ailf/tasks/ YAML
- * files without any dependency on the eval pipeline.
- *
- * Usage:
- *   import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
- */
-import { type RepoTask } from "./schemas.js";
-/**
- * Parse a single task YAML string and return validated tasks.
- *
- * @param content - Raw YAML string content
- * @param filename - Source filename (for error messages)
- * @returns Validated array of RepoTask objects
- * @throws Error if YAML parsing or Zod validation fails
- */
-export declare function parseTaskFile(content: string, filename?: string): RepoTask[];
-/**
- * Load and parse all task YAML files from a directory.
- *
- * @param dirPath - Path to directory containing .yaml/.yml files
- * @returns All validated tasks, sorted by filename
- * @throws Error if directory not found, no YAML files, or validation fails
- */
-export declare function loadTaskDir(dirPath: string): RepoTask[];

package/dist/_vendor/ailf-tasks/parser.js DELETED Viewed

@@ -1,73 +0,0 @@
-/**
- * parser.ts — Standalone task file and directory parsing.
- *
- * High-level functions for loading and validating .ailf/tasks/ YAML
- * files without any dependency on the eval pipeline.
- *
- * Usage:
- *   import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
- */
-import { existsSync, readdirSync, readFileSync } from "fs";
-import { resolve } from "path";
-import { load } from "js-yaml";
-import { RepoTaskFileSchema } from "./schemas.js";
-// ---------------------------------------------------------------------------
-// Public API
-// ---------------------------------------------------------------------------
-/**
- * Parse a single task YAML string and return validated tasks.
- *
- * @param content - Raw YAML string content
- * @param filename - Source filename (for error messages)
- * @returns Validated array of RepoTask objects
- * @throws Error if YAML parsing or Zod validation fails
- */
-export function parseTaskFile(content, filename = "<string>") {
-    const parsed = load(content);
-    if (!Array.isArray(parsed)) {
-        throw new Error(`${filename} did not parse to an array of tasks. ` +
-            "Task files must contain a YAML array of task definitions.");
-    }
-    const result = RepoTaskFileSchema.safeParse(parsed);
-    if (!result.success) {
-        const messages = result.error.issues
-            .map((i) => `  [${i.path.join(".")}]: ${i.message}`)
-            .join("\n");
-        throw new Error(`Invalid task file "${filename}":\n${messages}`);
-    }
-    return result.data;
-}
-/**
- * Load and parse all task YAML files from a directory.
- *
- * @param dirPath - Path to directory containing .yaml/.yml files
- * @returns All validated tasks, sorted by filename
- * @throws Error if directory not found, no YAML files, or validation fails
- */
-export function loadTaskDir(dirPath) {
-    if (!existsSync(dirPath)) {
-        throw new Error(`Tasks directory not found: ${dirPath}\n` +
-            "  Expected a directory containing .ailf/tasks/*.yaml files.");
-    }
-    const yamlFiles = readdirSync(dirPath)
-        .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
-        .sort();
-    if (yamlFiles.length === 0) {
-        throw new Error(`No YAML files found in ${dirPath}\n` +
-            "  Expected .ailf/tasks/*.yaml files with task definitions.");
-    }
-    const allTasks = [];
-    for (const file of yamlFiles) {
-        const filePath = resolve(dirPath, file);
-        const content = readFileSync(filePath, "utf-8");
-        try {
-            const tasks = parseTaskFile(content, file);
-            allTasks.push(...tasks);
-        }
-        catch (err) {
-            const msg = err instanceof Error ? err.message : String(err);
-            throw new Error(`Failed to load ${file}:\n${msg}`, { cause: err });
-        }
-    }
-    return allTasks;
-}