npm - @sanity/ailf - Versions diffs - 2.2.0 → 2.3.1 - Mend

@sanity/ailf 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/config/rubrics.ts +3 -3
package/dist/_vendor/ailf-core/types/index.d.ts +25 -0
package/dist/adapters/task-sources/content-lake-task-source.js +15 -7
package/dist/commands/calculate-scores.js +7 -2
package/dist/commands/capture-list.d.ts +1 -1
package/dist/commands/capture-list.js +6 -3
package/dist/commands/compare.js +11 -7
package/dist/commands/explain-handler.js +22 -24
package/dist/commands/fetch-docs.js +4 -2
package/dist/commands/generate-configs.js +6 -2
package/dist/commands/pipeline-action.js +8 -24
package/dist/commands/pipeline.js +1 -1
package/dist/commands/pr-comment.js +6 -2
package/dist/commands/publish.d.ts +1 -0
package/dist/commands/publish.js +12 -8
package/dist/commands/remote-pipeline.js +1 -1
package/dist/commands/remote-results.d.ts +8 -8
package/dist/commands/remote-results.js +7 -7
package/dist/commands/shared/options.d.ts +8 -0
package/dist/commands/shared/options.js +10 -0
package/dist/commands/shared/resolve-output-dir.d.ts +27 -0
package/dist/commands/shared/resolve-output-dir.js +36 -0
package/dist/composition-root.js +1 -1
package/dist/config/rubrics.ts +3 -3
package/dist/orchestration/build-app-context.js +1 -1
package/dist/orchestration/steps/fetch-docs-step.js +23 -9
package/dist/orchestration/steps/gap-analysis-step.js +86 -75
package/dist/orchestration/steps/generate-configs-step.d.ts +15 -0
package/dist/orchestration/steps/generate-configs-step.js +56 -0
package/dist/orchestration/steps/run-eval-step.js +14 -0
package/dist/pipeline/calculate-scores.js +113 -2
package/dist/pipeline/compare.js +50 -19
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +64 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +6 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +14 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +3 -0
package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +1 -27
package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +2 -9
package/dist/pipeline/compiler/rubric-resolution.d.ts +40 -0
package/dist/pipeline/compiler/rubric-resolution.js +52 -0
package/dist/pipeline/compiler/scoring-bridge.js +59 -7
package/dist/pipeline/provenance.js +7 -1
package/dist/pipeline/validate.d.ts +5 -4
package/dist/pipeline/validate.js +34 -113
package/dist/webhook/eval-request-handler.js +4 -0
package/package.json +1 -1

package/dist/pipeline/validate.d.ts CHANGED Viewed

@@ -51,10 +51,11 @@ export declare function validateReferenceSolutions(rootDir: string): ValidationI
  */
 export declare function validateRubricsYaml(rootDir: string): ValidationIssue[];
 /**
- * Check that tasks/*.yaml files exist, parse, and conform to the Zod schema.
- * Validates both the new single-definition format (with `id`) and the legacy
- * paired format. Uses `TaskFileSchema` from schemas.ts for structural
- * validation, plus cross-entry checks (duplicate IDs, docs path consistency).
+ * Check that task definition files exist.
+ *
+ * Tasks live as `*.task.ts` files in mode subdirectories (e.g.
+ * `tasks/literacy/groq.task.ts`). Legacy YAML task files are no longer
+ * used. Warns only if no task files are found at all.
  */
 export declare function validateTaskFiles(rootDir: string): ValidationIssue[];
 /**

package/dist/pipeline/validate.js CHANGED Viewed

@@ -9,10 +9,9 @@
  */
 import fs from "fs";
 import path from "path";
-import { load } from "js-yaml";
 import { tryLoadConfigFile } from "./compiler/config-loader.js";
 import { resolveMappings } from "./resolve-mappings.js";
-import { FeatureRegistrySchema, formatZodErrors, RubricConfigSchema, TaskFileSchema, ThresholdConfigSchema, } from "./schemas.js";
+import { FeatureRegistrySchema, formatZodErrors, RubricConfigSchema, ThresholdConfigSchema, } from "./schemas.js";
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
@@ -248,10 +247,11 @@ export function validateRubricsYaml(rootDir) {
     return issues;
 }
 /**
- * Check that tasks/*.yaml files exist, parse, and conform to the Zod schema.
- * Validates both the new single-definition format (with `id`) and the legacy
- * paired format. Uses `TaskFileSchema` from schemas.ts for structural
- * validation, plus cross-entry checks (duplicate IDs, docs path consistency).
+ * Check that task definition files exist.
+ *
+ * Tasks live as `*.task.ts` files in mode subdirectories (e.g.
+ * `tasks/literacy/groq.task.ts`). Legacy YAML task files are no longer
+ * used. Warns only if no task files are found at all.
  */
 export function validateTaskFiles(rootDir) {
     const source = "validateTaskFiles";
@@ -261,70 +261,9 @@ export function validateTaskFiles(rootDir) {
         issues.push(warning(source, "tasks/ directory not found (using Content Lake tasks?)", tasksDir));
         return issues;
     }
-    const yamlFiles = fs
-        .readdirSync(tasksDir)
-        .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
-    if (yamlFiles.length === 0) {
-        issues.push(warning(source, "No task YAML files found in tasks/ (using Content Lake tasks?)", tasksDir));
-        return issues;
-    }
-    const allIds = new Map(); // id → source file
-    const templateKeys = loadTemplateKeys(rootDir);
-    for (const file of yamlFiles) {
-        const filePath = path.join(tasksDir, file);
-        // Step 1: Parse YAML
-        const result = parseYamlFile(filePath, source);
-        if (!result.ok) {
-            issues.push(result.issue);
-            continue;
-        }
-        const { data } = result;
-        if (!Array.isArray(data)) {
-            issues.push(error(source, `${file} did not parse to an array of tasks`, filePath));
-            continue;
-        }
-        // Step 2: Validate each entry with Zod schema
-        const zodResult = TaskFileSchema.safeParse(data);
-        if (!zodResult.success) {
-            const lines = formatZodErrors(zodResult.error);
-            for (const line of lines) {
-                issues.push(error(source, `${file}: ${line.trim()}`, filePath));
-            }
-            continue;
-        }
-        // Step 3: Cross-entry validation (duplicate IDs, docs path consistency)
-        for (const entry of zodResult.data) {
-            if ("id" in entry && typeof entry.id === "string") {
-                // Check for duplicate IDs across all files
-                if (allIds.has(entry.id)) {
-                    issues.push(error(source, `${file}: duplicate id '${entry.id}' (also in ${allIds.get(entry.id)})`, filePath));
-                }
-                else {
-                    allIds.set(entry.id, file);
-                }
-                // Check docs path matches task id
-                const vars = entry.vars;
-                if (vars.docs && typeof vars.docs === "string") {
-                    const expectedPath = `file://contexts/canonical/${entry.id}.md`;
-                    if (vars.docs !== expectedPath) {
-                        issues.push(warning(source, `${file}: id is '${entry.id}' but docs path is '${vars.docs}' (expected '${expectedPath}')`, filePath));
-                    }
-                }
-                // Check that llm-rubric template references exist in config/rubrics
-                const asserts = entry.assert;
-                if (Array.isArray(asserts) && templateKeys.size > 0) {
-                    for (const a of asserts) {
-                        const assertion = a;
-                        if (assertion.type === "llm-rubric" &&
-                            typeof assertion.template === "string") {
-                            if (!templateKeys.has(assertion.template)) {
-                                issues.push(error(source, `${file}: task '${entry.id}' references unknown rubric template '${assertion.template}' (available: ${[...templateKeys].join(", ")})`, filePath));
-                            }
-                        }
-                    }
-                }
-            }
-        }
+    const taskAreas = collectTaskAreas(tasksDir);
+    if (taskAreas.size === 0) {
+        issues.push(warning(source, "No task files found in tasks/ (using Content Lake tasks?)", tasksDir));
     }
     return issues;
 }
@@ -355,15 +294,10 @@ export function validateThresholdsYaml(rootDir) {
     // Cross-reference: warn if an area override references an area with no task file
     if (zodResult.data.areas) {
         const tasksDir = path.join(rootDir, "tasks");
-        if (fs.existsSync(tasksDir)) {
-            const taskFiles = new Set(fs
-                .readdirSync(tasksDir)
-                .filter((f) => /\.(yaml|yml|task\.ts|task\.js)$/.test(f))
-                .map((f) => f.replace(/\.(yaml|yml|task\.ts|task\.js)$/, "")));
-            for (const areaName of Object.keys(zodResult.data.areas)) {
-                if (!taskFiles.has(areaName)) {
-                    issues.push(warning(source, `config/thresholds: area override '${areaName}' has no matching tasks/${areaName}`, loaded.filePath));
-                }
+        const taskAreas = collectTaskAreas(tasksDir);
+        for (const areaName of Object.keys(zodResult.data.areas)) {
+            if (!taskAreas.has(areaName)) {
+                issues.push(warning(source, `config/thresholds: area override '${areaName}' has no matching task file`, loaded.filePath));
             }
         }
     }
@@ -378,44 +312,31 @@ function error(source, message, filePath) {
     };
 }
 /**
- * Load the set of valid rubric template keys from config/rubrics.
- * Returns an empty set if the file is missing or invalid.
+ * Collect task area names from all subdirectories of `tasksDir`.
+ *
+ * Task files live in mode subdirectories (e.g. `tasks/literacy/groq.task.ts`).
+ * Returns a set of basenames without the `.task.ts`/`.task.js` extension.
  */
-function loadTemplateKeys(rootDir) {
-    const loaded = tryLoadConfigFile("rubrics", rootDir);
-    if (!loaded)
+function collectTaskAreas(tasksDir) {
+    if (!fs.existsSync(tasksDir))
         return new Set();
-    try {
-        const templates = loaded.data?.templates;
-        if (templates && typeof templates === "object") {
-            return new Set(Object.keys(templates));
+    const areas = new Set();
+    const taskFilePattern = /\.task\.(ts|js)$/;
+    for (const entry of fs.readdirSync(tasksDir, { withFileTypes: true })) {
+        if (entry.isDirectory()) {
+            const subdir = path.join(tasksDir, entry.name);
+            for (const file of fs.readdirSync(subdir)) {
+                if (taskFilePattern.test(file)) {
+                    areas.add(file.replace(taskFilePattern, ""));
+                }
+            }
+        }
+        // Also check top-level task files for backwards compatibility
+        if (entry.isFile() && taskFilePattern.test(entry.name)) {
+            areas.add(entry.name.replace(taskFilePattern, ""));
         }
     }
-    catch {
-        // Ignore — structural errors are caught by validateRubricsYaml
-    }
-    return new Set();
-}
-/** Safely parse a YAML file, returning the parsed value or a validation issue. */
-function parseYamlFile(filePath, source) {
-    if (!fs.existsSync(filePath)) {
-        return {
-            issue: error(source, `File not found: ${filePath}`, filePath),
-            ok: false,
-        };
-    }
-    try {
-        const raw = fs.readFileSync(filePath, "utf-8");
-        const data = load(raw);
-        return { data, ok: true };
-    }
-    catch (err) {
-        const message = err instanceof Error ? err.message : "Unknown YAML parse error";
-        return {
-            issue: error(source, `Failed to parse YAML: ${message}`, filePath),
-            ok: false,
-        };
-    }
+    return areas;
 }
 // ---------------------------------------------------------------------------
 // Main entry point

package/dist/webhook/eval-request-handler.js CHANGED Viewed

@@ -173,6 +173,10 @@ async function dispatchGitHubEval(repo, payload, config) {
             projectId: payload.projectId,
             publish: true,
             source: "production",
+            // Studio-initiated evals always use Content Lake as the task source.
+            // Without this, the pipeline only loads filesystem .task.ts files and
+            // Studio-owned tasks are invisible.
+            taskMode: "content-lake",
             // Release-scoped fields
             ...(hasPerspective ? { perspective: payload.perspective } : {}),
             // Task-scoped fields

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "2.2.0",
+  "version": "2.3.1",
   "private": false,
   "publishConfig": {
     "access": "public"