npm - @sanity/ailf - Versions diffs - 4.0.6 → 4.1.0 - Mend

@sanity/ailf 4.0.6 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/bin/ailf.js +6 -1
package/dist/_vendor/ailf-core/schemas/external-providers.d.ts +136 -0
package/dist/_vendor/ailf-core/schemas/external-providers.js +136 -0
package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
package/dist/_vendor/ailf-core/schemas/index.js +2 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -3
package/dist/_vendor/ailf-core/schemas/report.d.ts +251 -0
package/dist/_vendor/ailf-core/schemas/report.js +235 -0
package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
package/dist/_vendor/ailf-core/services/index.js +1 -0
package/dist/_vendor/ailf-core/services/report-to-markdown.d.ts +38 -0
package/dist/_vendor/ailf-core/services/report-to-markdown.js +696 -0
package/dist/_vendor/ailf-core/types/api-requests.d.ts +159 -0
package/dist/_vendor/ailf-core/types/api-requests.js +27 -0
package/dist/_vendor/ailf-core/types/index.d.ts +3 -0
package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +112 -0
package/dist/_vendor/ailf-core/types/pipeline-request.js +18 -0
package/dist/_vendor/ailf-core/types/repo-config.d.ts +146 -0
package/dist/_vendor/ailf-core/types/repo-config.js +18 -0
package/dist/_vendor/ailf-shared/index.d.ts +7 -5
package/dist/_vendor/ailf-shared/index.js +7 -5
package/dist/adapters/api-client/types.d.ts +2 -5
package/dist/adapters/task-sources/content-lake-task-source.d.ts +58 -1
package/dist/adapters/task-sources/content-lake-task-source.js +1 -1
package/dist/adapters/task-sources/index.d.ts +1 -1
package/dist/adapters/task-sources/index.js +1 -1
package/dist/adapters/task-sources/repo-schemas.d.ts +3 -2
package/dist/adapters/task-sources/repo-schemas.js +3 -1
package/dist/adapters/task-sources/repo-task-source.d.ts +11 -1
package/dist/adapters/task-sources/repo-task-source.js +7 -4
package/dist/adapters/task-sources/repo-validation.d.ts +6 -6
package/dist/adapters/task-sources/repo-validation.js +1 -1
package/dist/agent-observer/agentic-provider.d.ts +1 -0
package/dist/agent-observer/agentic-provider.js +43 -36
package/dist/agent-observer/config-schemas.d.ts +61 -0
package/dist/agent-observer/config-schemas.js +65 -0
package/dist/agent-observer/provider.d.ts +1 -0
package/dist/agent-observer/provider.js +19 -17
package/dist/cli.js +4 -4
package/dist/commands/validate-tasks.js +2 -2
package/dist/composition-root.d.ts +7 -0
package/dist/composition-root.js +27 -12
package/dist/index.d.ts +1 -1
package/dist/index.js +1 -1
package/dist/job-store.js +2 -2
package/dist/lib/dotenv-resolution.d.ts +21 -0
package/dist/lib/dotenv-resolution.js +30 -0
package/dist/orchestration/steps/fetch-docs-step.js +10 -30
package/dist/orchestration/steps/generate-configs-step.d.ts +8 -15
package/dist/orchestration/steps/generate-configs-step.js +26 -118
package/dist/orchestration/steps/mirror-repo-tasks-step.js +26 -3
package/dist/orchestration/steps/run-eval-step.js +21 -3
package/dist/pipeline/agent-behavior-report.d.ts +2 -8
package/dist/pipeline/cache.d.ts +2 -2
package/dist/pipeline/checks.d.ts +10 -2
package/dist/pipeline/checks.js +14 -4
package/dist/pipeline/compiler/literacy-bridge.js +2 -2
package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +2 -2
package/dist/pipeline/compiler/mode-handlers/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +2 -2
package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +3 -3
package/dist/pipeline/compiler/promptfoo-compiler.js +7 -11
package/dist/pipeline/compiler/provider-assembler.js +33 -3
package/dist/pipeline/compiler/rubric-resolution.d.ts +2 -2
package/dist/pipeline/mirror-repo-tasks.d.ts +13 -5
package/dist/pipeline/mirror-repo-tasks.js +16 -8
package/dist/pipeline/pr-comment.d.ts +22 -9
package/dist/pipeline/pr-comment.js +52 -472
package/dist/pipeline/resolve-mappings.d.ts +8 -3
package/dist/promptfoo-providers/mock-path.d.ts +12 -0
package/dist/promptfoo-providers/mock-path.js +15 -0
package/dist/report-store.d.ts +63 -1
package/dist/report-store.js +111 -31
package/dist/sanity/client.d.ts +58 -0
package/dist/sanity/client.js +106 -0
package/package.json +8 -7
package/dist/orchestration/load-pipeline-tasks.d.ts +0 -40
package/dist/orchestration/load-pipeline-tasks.js +0 -57

package/dist/adapters/task-sources/repo-schemas.d.ts CHANGED Viewed

@@ -16,6 +16,7 @@
  * @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
  */
 import { z } from "zod";
+import type { AilfEvalWorkflow, RepoConfig } from "../../_vendor/ailf-core/index.d.ts";
 /**
  * The set of assertion types allowed in task files.
  *
@@ -1521,7 +1522,7 @@ export declare const RepoConfigSchema: z.ZodObject<{
         }, z.core.$strip>>;
     }, z.core.$strip>>;
 }, z.core.$strip>;
-export type RepoConfig = z.infer<typeof RepoConfigSchema>;
+export type { RepoConfig } from "../../_vendor/ailf-core/index.d.ts";
 /**
  * Parse and validate .ailf/config.yaml content. Returns typed config or throws.
  */
@@ -1551,7 +1552,7 @@ export declare const AilfEvalWorkflowSchema: z.ZodObject<{
         }, z.core.$loose>>;
     }, z.core.$loose>>;
 }, z.core.$loose>;
-export type AilfEvalWorkflow = z.infer<typeof AilfEvalWorkflowSchema>;
+export type { AilfEvalWorkflow } from "../../_vendor/ailf-core/index.d.ts";
 /**
  * Parse and validate a `.github/workflows/ailf-eval.yml` payload (already
  * loaded from YAML). Throws with a Zod-formatted message on failure.

package/dist/adapters/task-sources/repo-schemas.js CHANGED Viewed

@@ -334,7 +334,9 @@ export const ContentLakeAuthorableTaskSchema = LiteracyTaskSchema;
  * Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
  * file contains. Each file must define at least one task.
  */
-export const CanonicalTaskFileSchema = z.array(CanonicalTaskSchema).min(1);
+export const CanonicalTaskFileSchema = z
+    .array(CanonicalTaskSchema)
+    .min(1);
 /**
  * Pre-process raw task entries before discriminated-union parsing: when
  * `mode` is missing, default it to `"literacy"`. Zod cannot default a

package/dist/adapters/task-sources/repo-task-source.d.ts CHANGED Viewed

@@ -19,8 +19,18 @@
  * @see packages/core/src/ports/task-source.ts — TaskSource port
  */
 import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
+export interface RepoTaskSourceOptions {
+    /**
+     * When true, treat a missing directory or empty task set as a valid
+     * empty result instead of throwing. Used by the composition root for
+     * the AILF-bundled `tasks/${mode}/` source, which is missing in some
+     * test rootDirs and modes that ship no defaults.
+     */
+    allowMissing?: boolean;
+}
 export declare class RepoTaskSource implements TaskSource {
     private readonly tasksDir;
-    constructor(tasksDir: string);
+    private readonly options;
+    constructor(tasksDir: string, options?: RepoTaskSourceOptions);
     loadTasks(filter?: FilterOptions): Promise<GeneralizedTaskDefinition[]>;
 }

package/dist/adapters/task-sources/repo-task-source.js CHANGED Viewed

@@ -26,16 +26,17 @@ import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "./repo-schemas.
 import { discoverTsTaskFiles, loadTsTaskFile } from "./task-file-loader.js";
 /** Set of canonical mode names for O(1) lookup */
 const KNOWN_MODES = new Set(CANONICAL_EVAL_MODES);
-// ---------------------------------------------------------------------------
-// RepoTaskSource adapter
-// ---------------------------------------------------------------------------
 export class RepoTaskSource {
     tasksDir;
-    constructor(tasksDir) {
+    options;
+    constructor(tasksDir, options = {}) {
         this.tasksDir = tasksDir;
+        this.options = options;
     }
     async loadTasks(filter) {
         if (!existsSync(this.tasksDir)) {
+            if (this.options.allowMissing)
+                return [];
             throw new Error(`Repo tasks directory not found: ${this.tasksDir}\n` +
                 "  Provide a valid path via --repo-tasks-path");
         }
@@ -44,6 +45,8 @@ export class RepoTaskSource {
             .sort();
         const tsFiles = discoverTsTaskFiles(this.tasksDir);
         if (yamlFiles.length === 0 && tsFiles.length === 0) {
+            if (this.options.allowMissing)
+                return [];
             throw new Error(`No task files found in ${this.tasksDir}\n` +
                 "  Expected .ailf/tasks/*.yaml or .ailf/tasks/*.task.ts files");
         }

package/dist/adapters/task-sources/repo-validation.d.ts CHANGED Viewed

@@ -15,12 +15,12 @@
  * has been eliminated — all validation logic now lives here.
  */
 import { type CanonicalTask } from "./repo-schemas.js";
-export interface ValidationResult {
+export interface RepoValidationResult {
     valid: boolean;
-    errors: ValidationMessage[];
-    warnings: ValidationMessage[];
+    errors: RepoValidationMessage[];
+    warnings: RepoValidationMessage[];
 }
-export interface ValidationMessage {
+export interface RepoValidationMessage {
     taskId: string;
     field: string;
     message: string;
@@ -32,8 +32,8 @@ export interface ValidationMessage {
  * areas, unresolved slugs) and errors for issues that would cause pipeline
  * failures (completely missing required fields — though Zod catches most).
  */
-export declare function validateCanonicalTasks(tasks: CanonicalTask[]): ValidationResult;
+export declare function validateCanonicalTasks(tasks: CanonicalTask[]): RepoValidationResult;
 /**
  * Format validation results for console output.
  */
-export declare function formatValidationResult(result: ValidationResult): string;
+export declare function formatRepoValidationResult(result: RepoValidationResult): string;

package/dist/adapters/task-sources/repo-validation.js CHANGED Viewed

@@ -110,7 +110,7 @@ export function validateCanonicalTasks(tasks) {
 /**
  * Format validation results for console output.
  */
-export function formatValidationResult(result) {
+export function formatRepoValidationResult(result) {
     const lines = [];
     if (result.errors.length > 0) {
         lines.push("Errors:");

package/dist/agent-observer/agentic-provider.d.ts CHANGED Viewed

@@ -63,6 +63,7 @@ export default class AgenticProvider {
     private docBaseUrl;
     private docsUrlPattern;
     private llmsTxtUrl;
+    private parsedConfig;
     private priorityDomain;
     private recorder;
     private searchMode;

package/dist/agent-observer/agentic-provider.js CHANGED Viewed

@@ -31,6 +31,8 @@
  */
 import { config as loadDotenv } from "dotenv";
 import { randomUUID } from "crypto";
+import { AnthropicResponseSchema, FetchPageToolArgsSchema, GoogleSearchResponseSchema, ListDocsToolArgsSchema, OpenAIChatResponseSchema, WebSearchToolArgsSchema, } from "../_vendor/ailf-core/index.js";
+import { AgenticProviderConfigSchema, } from "./config-schemas.js";
 import { RequestRecorder } from "./proxy.js";
 import { calculateCost } from "./pricing.js";
 import { isAllowedOrigin } from "../sources.js";
@@ -120,32 +122,31 @@ export default class AgenticProvider {
     docBaseUrl;
     docsUrlPattern;
     llmsTxtUrl;
+    parsedConfig;
     priorityDomain;
     recorder;
     searchMode;
     constructor(options) {
         this.providerId = options.id ?? "agentic-observer";
         this.config = options.config ?? {};
-        this.agentMode = this.config.agentMode || "naive";
+        this.parsedConfig = AgenticProviderConfigSchema.parse(this.config);
+        this.agentMode = this.parsedConfig.agentMode ?? "naive";
         // Documentation source configuration — defaults to Sanity production
-        this.docBaseUrl = this.config.docBaseUrl || DEFAULT_DOC_BASE_URL;
-        this.llmsTxtUrl = this.config.llmsTxtUrl || DEFAULT_LLMS_TXT_URL;
+        this.docBaseUrl = this.parsedConfig.docBaseUrl ?? DEFAULT_DOC_BASE_URL;
+        this.llmsTxtUrl = this.parsedConfig.llmsTxtUrl ?? DEFAULT_LLMS_TXT_URL;
         this.docsUrlPattern = buildDocsUrlPattern(this.docBaseUrl);
         // Custom HTTP headers (e.g., Vercel bypass protection token)
-        this.customHeaders =
-            this.config.customHeaders || {};
+        this.customHeaders = this.parsedConfig.customHeaders ?? {};
         // Extract priority domain from docBaseUrl for search result ranking
         const baseUrlObj = new URL(this.docBaseUrl);
         this.priorityDomain =
-            this.config.priorityDomain ||
+            this.parsedConfig.priorityDomain ??
                 baseUrlObj.hostname.replace(/^www\./, "");
         // Optional origin sandboxing — restrict which URLs the agent can access
-        this.allowedOrigins = Array.isArray(this.config.allowedOrigins)
-            ? this.config.allowedOrigins.filter(Boolean)
-            : [];
+        this.allowedOrigins = (this.parsedConfig.allowedOrigins ?? []).filter(Boolean);
         // Search mode: controls web_search tool availability and filtering
-        this.searchMode = this.config.searchMode || "open";
-        this.recorder = new RequestRecorder(this.config.observerOptions || {});
+        this.searchMode = this.parsedConfig.searchMode ?? "open";
+        this.recorder = new RequestRecorder(this.parsedConfig.observerOptions ?? {});
     }
     /**
      * Main Promptfoo provider entry point. Runs the full agentic loop.
@@ -155,7 +156,7 @@ export default class AgenticProvider {
         const taskDescription = context?.vars?.task ||
             context?.prompt?.label ||
             "unknown-task";
-        const observe = this.config.observe !== false;
+        const observe = this.parsedConfig.observe !== false;
         if (observe) {
             this.recorder.start(sessionId, this.id(), taskDescription);
         }
@@ -188,7 +189,7 @@ export default class AgenticProvider {
         return this.recorder;
     }
     id() {
-        const model = this.config.model || this.providerId;
+        const model = this.parsedConfig.model ?? this.providerId;
         return `agentic:${this.agentMode}:${model}`;
     }
     // -------------------------------------------------------------------------
@@ -276,13 +277,13 @@ export default class AgenticProvider {
      * fallback heuristics for backward compatibility.
      */
     detectProvider() {
-        const explicit = this.config.provider;
+        const explicit = this.parsedConfig.provider;
         if (explicit === "anthropic")
             return "anthropic";
         if (explicit === "openai")
             return "openai";
         // Heuristic fallback: detect from model name
-        const model = this.config.model || "";
+        const model = this.parsedConfig.model ?? "";
         if (model.startsWith("claude"))
             return "anthropic";
         return "openai";
@@ -421,14 +422,20 @@ export default class AgenticProvider {
     }
     async executeTool(name, argsJson, fetchFn) {
         try {
-            const args = JSON.parse(argsJson);
+            const rawArgs = JSON.parse(argsJson);
             switch (name) {
-                case "fetch_page":
+                case "fetch_page": {
+                    const args = FetchPageToolArgsSchema.parse(rawArgs);
                     return await this.executeFetchPage(args.url, fetchFn);
-                case "list_docs":
+                }
+                case "list_docs": {
+                    const args = ListDocsToolArgsSchema.parse(rawArgs);
                     return await this.executeListDocs(args.site, fetchFn);
-                case "web_search":
+                }
+                case "web_search": {
+                    const args = WebSearchToolArgsSchema.parse(rawArgs);
                     return await this.executeWebSearch(args.query, fetchFn);
+                }
                 default:
                     return JSON.stringify({ error: `Unknown tool: ${name}` });
             }
@@ -454,7 +461,7 @@ export default class AgenticProvider {
                 q: query,
             });
             const response = await fetchFn(`https://www.googleapis.com/customsearch/v1?${params}`);
-            const data = (await response.json());
+            const data = GoogleSearchResponseSchema.parse(await response.json());
             if (data.items?.length) {
                 results = data.items.map((item) => ({
                     snippet: item.snippet,
@@ -617,11 +624,11 @@ export default class AgenticProvider {
     // OpenAI agentic loop
     // -------------------------------------------------------------------------
     async runAnthropicLoop(prompt) {
-        const model = this.config.model || "claude-sonnet-4-20250514";
-        const temperature = this.config.temperature ?? 0.2;
-        const maxTokens = this.config.max_tokens || 4096;
-        const maxToolRounds = this.config.maxToolRounds || 5;
-        const apiKey = this.config.apiKey || process.env.ANTHROPIC_API_KEY;
+        const model = this.parsedConfig.model ?? "claude-sonnet-4-20250514";
+        const temperature = this.parsedConfig.temperature ?? 0.2;
+        const maxTokens = this.parsedConfig.max_tokens ?? 4096;
+        const maxToolRounds = this.parsedConfig.maxToolRounds ?? 5;
+        const apiKey = this.parsedConfig.apiKey ?? process.env.ANTHROPIC_API_KEY;
         if (!apiKey) {
             return {
                 error: "ANTHROPIC_API_KEY not set. Configure it in env or provider config.",
@@ -691,7 +698,7 @@ export default class AgenticProvider {
                 },
                 method: "POST",
             });
-            const data = (await response.json());
+            const data = AnthropicResponseSchema.parse(await response.json());
             if (data.error) {
                 return {
                     error: data.error.message ??
@@ -799,22 +806,22 @@ export default class AgenticProvider {
     // Anthropic agentic loop
     // -------------------------------------------------------------------------
     async runOpenAILoop(prompt) {
-        const model = this.config.model || "gpt-4o";
-        const temperature = this.config.temperature ?? 0.2;
-        const maxToolRounds = this.config.maxToolRounds || 5;
-        const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
+        const model = this.parsedConfig.model ?? "gpt-4o";
+        const temperature = this.parsedConfig.temperature ?? 0.2;
+        const maxToolRounds = this.parsedConfig.maxToolRounds ?? 5;
+        const apiKey = this.parsedConfig.apiKey ?? process.env.OPENAI_API_KEY;
         // Newer OpenAI models (gpt-5.x, o-series) use max_completion_tokens
         // instead of max_tokens, and reject custom temperature values. Detect
         // from config or model name. See W0131.
         const isReasoningModel = model.startsWith("gpt-5") ||
             model.startsWith("o3") ||
             model.startsWith("o4");
-        const useMaxCompletionTokens = this.config.max_output_tokens != null ||
-            this.config.max_completion_tokens != null ||
+        const useMaxCompletionTokens = this.parsedConfig.max_output_tokens != null ||
+            this.parsedConfig.max_completion_tokens != null ||
             isReasoningModel;
-        const maxTokensValue = this.config.max_output_tokens ??
-            this.config.max_completion_tokens ??
-            this.config.max_tokens ??
+        const maxTokensValue = this.parsedConfig.max_output_tokens ??
+            this.parsedConfig.max_completion_tokens ??
+            this.parsedConfig.max_tokens ??
             4096;
         const tokenLimitParam = useMaxCompletionTokens
             ? { max_completion_tokens: maxTokensValue }
@@ -860,7 +867,7 @@ export default class AgenticProvider {
                 },
                 method: "POST",
             });
-            const data = (await response.json());
+            const data = OpenAIChatResponseSchema.parse(await response.json());
             if (data.error) {
                 return {
                     error: data.error.message ?? "Unknown OpenAI error",

package/dist/agent-observer/config-schemas.d.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/**
+ * config-schemas.ts
+ *
+ * Zod schemas for the promptfoo provider config blocks read by the
+ * agent-observer providers. Promptfoo passes config as
+ * `Record<string, unknown>`; parsing it once at the constructor turns
+ * those untyped reads into a typed struct and surfaces typos / wrong
+ * shapes as clear `ZodError`s instead of silent `undefined` reads.
+ *
+ * Lives in `eval` (not `core`) because these schemas are 1:1 with the
+ * providers' constructor surfaces and have no consumers outside this
+ * package. See docs/work-items/W0004.json.
+ */
+import { z } from "zod";
+export declare const AgenticProviderConfigSchema: z.ZodObject<{
+    agentMode: z.ZodOptional<z.ZodEnum<{
+        naive: "naive";
+        optimized: "optimized";
+    }>>;
+    allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
+    apiKey: z.ZodOptional<z.ZodString>;
+    customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
+    docBaseUrl: z.ZodOptional<z.ZodString>;
+    llmsTxtUrl: z.ZodOptional<z.ZodString>;
+    max_completion_tokens: z.ZodOptional<z.ZodNumber>;
+    max_output_tokens: z.ZodOptional<z.ZodNumber>;
+    max_tokens: z.ZodOptional<z.ZodNumber>;
+    maxToolRounds: z.ZodOptional<z.ZodNumber>;
+    model: z.ZodOptional<z.ZodString>;
+    observe: z.ZodOptional<z.ZodBoolean>;
+    observerOptions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+    priorityDomain: z.ZodOptional<z.ZodString>;
+    provider: z.ZodOptional<z.ZodEnum<{
+        anthropic: "anthropic";
+        openai: "openai";
+    }>>;
+    searchMode: z.ZodOptional<z.ZodEnum<{
+        open: "open";
+        off: "off";
+        "origin-only": "origin-only";
+    }>>;
+    temperature: z.ZodOptional<z.ZodNumber>;
+}, z.core.$strip>;
+export type AgenticProviderConfig = z.infer<typeof AgenticProviderConfigSchema>;
+export declare const InstrumentedProviderConfigSchema: z.ZodObject<{
+    apiKey: z.ZodOptional<z.ZodString>;
+    max_output_tokens: z.ZodOptional<z.ZodNumber>;
+    max_tokens: z.ZodOptional<z.ZodNumber>;
+    model: z.ZodOptional<z.ZodString>;
+    modelName: z.ZodOptional<z.ZodString>;
+    observe: z.ZodOptional<z.ZodBoolean>;
+    observerOptions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+    reasoning: z.ZodOptional<z.ZodObject<{
+        effort: z.ZodOptional<z.ZodString>;
+        summary: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    reasoning_effort: z.ZodOptional<z.ZodString>;
+    temperature: z.ZodOptional<z.ZodNumber>;
+    verbosity: z.ZodOptional<z.ZodString>;
+}, z.core.$strip>;
+export type InstrumentedProviderConfig = z.infer<typeof InstrumentedProviderConfigSchema>;

package/dist/agent-observer/config-schemas.js ADDED Viewed

@@ -0,0 +1,65 @@
+/**
+ * config-schemas.ts
+ *
+ * Zod schemas for the promptfoo provider config blocks read by the
+ * agent-observer providers. Promptfoo passes config as
+ * `Record<string, unknown>`; parsing it once at the constructor turns
+ * those untyped reads into a typed struct and surfaces typos / wrong
+ * shapes as clear `ZodError`s instead of silent `undefined` reads.
+ *
+ * Lives in `eval` (not `core`) because these schemas are 1:1 with the
+ * providers' constructor surfaces and have no consumers outside this
+ * package. See docs/work-items/W0004.json.
+ */
+import { z } from "zod";
+// ---------------------------------------------------------------------------
+// Shared sub-schemas
+// ---------------------------------------------------------------------------
+/**
+ * `RecorderOptions` is owned by `proxy.ts` and has its own resolution
+ * logic. We accept any object here and let RequestRecorder do the rest;
+ * no double-validation at this boundary.
+ */
+const ObserverOptionsSchema = z.record(z.string(), z.unknown());
+const ReasoningSchema = z.object({
+    effort: z.string().optional(),
+    summary: z.string().optional(),
+});
+// ---------------------------------------------------------------------------
+// AgenticProvider config
+// ---------------------------------------------------------------------------
+export const AgenticProviderConfigSchema = z.object({
+    agentMode: z.enum(["naive", "optimized"]).optional(),
+    allowedOrigins: z.array(z.string()).optional(),
+    apiKey: z.string().optional(),
+    customHeaders: z.record(z.string(), z.string()).optional(),
+    docBaseUrl: z.string().optional(),
+    llmsTxtUrl: z.string().optional(),
+    max_completion_tokens: z.number().optional(),
+    max_output_tokens: z.number().optional(),
+    max_tokens: z.number().optional(),
+    maxToolRounds: z.number().optional(),
+    model: z.string().optional(),
+    observe: z.boolean().optional(),
+    observerOptions: ObserverOptionsSchema.optional(),
+    priorityDomain: z.string().optional(),
+    provider: z.enum(["anthropic", "openai"]).optional(),
+    searchMode: z.enum(["off", "open", "origin-only"]).optional(),
+    temperature: z.number().optional(),
+});
+// ---------------------------------------------------------------------------
+// InstrumentedProvider config (provider.ts — wraps OpenAI directly)
+// ---------------------------------------------------------------------------
+export const InstrumentedProviderConfigSchema = z.object({
+    apiKey: z.string().optional(),
+    max_output_tokens: z.number().optional(),
+    max_tokens: z.number().optional(),
+    model: z.string().optional(),
+    modelName: z.string().optional(),
+    observe: z.boolean().optional(),
+    observerOptions: ObserverOptionsSchema.optional(),
+    reasoning: ReasoningSchema.optional(),
+    reasoning_effort: z.string().optional(),
+    temperature: z.number().optional(),
+    verbosity: z.string().optional(),
+});

package/dist/agent-observer/provider.d.ts CHANGED Viewed

@@ -56,6 +56,7 @@ interface ProviderResponse {
 export default class InstrumentedProvider {
     config: Record<string, unknown>;
     protected providerId: string;
+    private parsedConfig;
     private recorder;
     constructor(options: ProviderOptions);
     /**

package/dist/agent-observer/provider.js CHANGED Viewed

@@ -30,6 +30,8 @@
  */
 import { config as loadDotenv } from "dotenv";
 import { randomUUID } from "crypto";
+import { OpenAIChatResponseSchema, OpenAIResponsesResponseSchema, } from "../_vendor/ailf-core/index.js";
+import { InstrumentedProviderConfigSchema, } from "./config-schemas.js";
 import { RequestRecorder } from "./proxy.js";
 import { calculateCost } from "./pricing.js";
 loadDotenv({
@@ -42,11 +44,13 @@ loadDotenv({
 export default class InstrumentedProvider {
     config;
     providerId;
+    parsedConfig;
     recorder;
     constructor(options) {
         this.providerId = options.id ?? "instrumented-observer";
         this.config = options.config ?? {};
-        this.recorder = new RequestRecorder(this.config.observerOptions ?? {});
+        this.parsedConfig = InstrumentedProviderConfigSchema.parse(this.config);
+        this.recorder = new RequestRecorder(this.parsedConfig.observerOptions ?? {});
     }
     /**
      * Main Promptfoo provider entry point. Called for each test case.
@@ -56,7 +60,7 @@ export default class InstrumentedProvider {
         const taskDescription = context?.vars?.task ||
             context?.prompt?.label ||
             "unknown-task";
-        const observe = this.config.observe !== false;
+        const observe = this.parsedConfig.observe !== false;
         // Start observation
         if (observe) {
             this.recorder.start(sessionId, this.id(), taskDescription);
@@ -97,9 +101,9 @@ export default class InstrumentedProvider {
      * Detect whether the model should use the Responses API based on config.
      */
     isResponsesModel() {
-        const model = this.config.modelName || this.config.model || "";
-        return (this.config.reasoning != null ||
-            this.config.reasoning_effort != null ||
+        const model = this.parsedConfig.modelName ?? this.parsedConfig.model ?? "";
+        return (this.parsedConfig.reasoning != null ||
+            this.parsedConfig.reasoning_effort != null ||
             model.startsWith("gpt-5") ||
             model.startsWith("o1") ||
             model.startsWith("o3") ||
@@ -111,10 +115,8 @@ export default class InstrumentedProvider {
      * recorder's fetch wrapper so the API call is captured.
      */
     async callOpenAI(prompt) {
-        const model = this.config.modelName ||
-            this.config.model ||
-            "gpt-4o";
-        const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
+        const model = this.parsedConfig.modelName ?? this.parsedConfig.model ?? "gpt-4o";
+        const apiKey = this.parsedConfig.apiKey ?? process.env.OPENAI_API_KEY;
         if (!apiKey) {
             return {
                 error: "OPENAI_API_KEY not set. Configure it in env or provider config.",
@@ -130,8 +132,8 @@ export default class InstrumentedProvider {
      * Calls OpenAI Chat Completions API (non-reasoning models).
      */
     async callOpenAIChatCompletions(prompt, model, apiKey) {
-        const temperature = this.config.temperature ?? 0;
-        const maxTokens = this.config.max_tokens || 4096;
+        const temperature = this.parsedConfig.temperature ?? 0;
+        const maxTokens = this.parsedConfig.max_tokens ?? 4096;
         const fetchFn = this.recorder.isRunning()
             ? this.recorder.fetch.bind(this.recorder)
             : globalThis.fetch;
@@ -149,7 +151,7 @@ export default class InstrumentedProvider {
             },
             method: "POST",
         });
-        const data = (await response.json());
+        const data = OpenAIChatResponseSchema.parse(await response.json());
         if (data.error) {
             return {
                 error: data.error.message ?? "Unknown OpenAI error",
@@ -175,11 +177,11 @@ export default class InstrumentedProvider {
      * Calls OpenAI Responses API (reasoning models like GPT-5.x, o-series).
      */
     async callOpenAIResponses(prompt, model, apiKey) {
-        const maxOutputTokens = this.config.max_output_tokens || 32_000;
-        const reasoning = this.config.reasoning;
-        const reasoningEffort = reasoning?.effort || this.config.reasoning_effort || "medium";
+        const maxOutputTokens = this.parsedConfig.max_output_tokens ?? 32_000;
+        const reasoning = this.parsedConfig.reasoning;
+        const reasoningEffort = reasoning?.effort ?? this.parsedConfig.reasoning_effort ?? "medium";
         const reasoningSummary = reasoning?.summary;
-        const verbosity = this.config.verbosity;
+        const verbosity = this.parsedConfig.verbosity;
         const fetchFn = this.recorder.isRunning()
             ? this.recorder.fetch.bind(this.recorder)
             : globalThis.fetch;
@@ -201,7 +203,7 @@ export default class InstrumentedProvider {
             },
             method: "POST",
         });
-        const data = (await response.json());
+        const data = OpenAIResponsesResponseSchema.parse(await response.json());
         if (data.error) {
             return {
                 error: data.error.message ?? "Unknown OpenAI error",

package/dist/cli.js CHANGED Viewed

@@ -37,6 +37,7 @@ import { existsSync } from "fs";
 import { dirname, resolve } from "path";
 import { fileURLToPath } from "url";
 import { buildCliProgram } from "./cli-program.js";
+import { findExplicitDotenvArg } from "./lib/dotenv-resolution.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..");
 // ---------------------------------------------------------------------------
@@ -52,10 +53,9 @@ const ROOT = resolve(__dirname, "..");
 // installed globally via npm (production).
 // ---------------------------------------------------------------------------
 function resolveEnvPath() {
-    const idx = process.argv.indexOf("--dotenv");
-    if (idx !== -1 && process.argv[idx + 1]) {
-        return resolve(process.argv[idx + 1]);
-    }
+    const explicit = findExplicitDotenvArg();
+    if (explicit)
+        return explicit;
     // Monorepo root .env (dev mode)
     const monorepoEnv = resolve(ROOT, "..", "..", ".env");
     if (existsSync(monorepoEnv))

package/dist/commands/validate-tasks.js CHANGED Viewed

@@ -18,7 +18,7 @@ import { resolve, relative, basename } from "path";
 import { Command } from "commander";
 import { load } from "js-yaml";
 import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/task-sources/repo-schemas.js";
-import { validateCanonicalTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
+import { validateCanonicalTasks, formatRepoValidationResult, } from "../adapters/task-sources/repo-validation.js";
 import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
 export function createValidateTasksCommand() {
     return new Command("tasks")
@@ -103,7 +103,7 @@ export async function runValidateTasks(tasksPath, opts) {
     if (allTasks.length > 0) {
         console.log();
         const semanticResult = validateCanonicalTasks(allTasks);
-        const formatted = formatValidationResult(semanticResult);
+        const formatted = formatRepoValidationResult(semanticResult);
         console.log(formatted);
         if (!semanticResult.valid) {
             hasErrors = true;

package/dist/composition-root.d.ts CHANGED Viewed

@@ -16,6 +16,7 @@
  * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
  */
 import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
+import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource } from "./adapters/task-sources/index.js";
 /**
  * Create a fully wired AppContext from resolved configuration.
  *
@@ -42,6 +43,12 @@ export declare function createAppContext(config: ResolvedConfig): AppContext;
  * Exported for unit-test access; not part of the public package API.
  */
 export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger, progress?: ArtifactWriterProgressOptions): ArtifactWriter;
+/**
+ * Build the `TaskSource` adapter wired by the composition root for a
+ * given `ResolvedConfig`. Exported for test access — composition-root
+ * wiring is a contract worth asserting directly.
+ */
+export declare function createTaskSource(config: ResolvedConfig): CompositeTaskSource | ContentLakeTaskSource | RepoTaskSource;
 /**
  * Generic Promptfoo assertion types available to all evaluation modes.
  *