npm - @sanity/ailf - Versions diffs - 4.0.7 → 4.1.0 - Mend

@sanity/ailf 4.0.7 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/bin/ailf.js +6 -1
package/dist/_vendor/ailf-core/schemas/external-providers.d.ts +136 -0
package/dist/_vendor/ailf-core/schemas/external-providers.js +136 -0
package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
package/dist/_vendor/ailf-core/schemas/index.js +2 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -3
package/dist/_vendor/ailf-core/schemas/report.d.ts +251 -0
package/dist/_vendor/ailf-core/schemas/report.js +235 -0
package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
package/dist/_vendor/ailf-core/services/index.js +1 -0
package/dist/_vendor/ailf-core/services/report-to-markdown.d.ts +38 -0
package/dist/_vendor/ailf-core/services/report-to-markdown.js +696 -0
package/dist/_vendor/ailf-core/types/api-requests.d.ts +159 -0
package/dist/_vendor/ailf-core/types/api-requests.js +27 -0
package/dist/_vendor/ailf-core/types/index.d.ts +3 -0
package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +112 -0
package/dist/_vendor/ailf-core/types/pipeline-request.js +18 -0
package/dist/_vendor/ailf-core/types/repo-config.d.ts +146 -0
package/dist/_vendor/ailf-core/types/repo-config.js +18 -0
package/dist/_vendor/ailf-shared/index.d.ts +7 -5
package/dist/_vendor/ailf-shared/index.js +7 -5
package/dist/adapters/api-client/types.d.ts +2 -5
package/dist/adapters/task-sources/content-lake-task-source.d.ts +58 -1
package/dist/adapters/task-sources/content-lake-task-source.js +1 -1
package/dist/adapters/task-sources/index.d.ts +1 -1
package/dist/adapters/task-sources/index.js +1 -1
package/dist/adapters/task-sources/repo-schemas.d.ts +3 -2
package/dist/adapters/task-sources/repo-schemas.js +3 -1
package/dist/adapters/task-sources/repo-validation.d.ts +6 -6
package/dist/adapters/task-sources/repo-validation.js +1 -1
package/dist/agent-observer/agentic-provider.d.ts +1 -0
package/dist/agent-observer/agentic-provider.js +43 -36
package/dist/agent-observer/config-schemas.d.ts +61 -0
package/dist/agent-observer/config-schemas.js +65 -0
package/dist/agent-observer/provider.d.ts +1 -0
package/dist/agent-observer/provider.js +19 -17
package/dist/cli.js +4 -4
package/dist/commands/validate-tasks.js +2 -2
package/dist/composition-root.js +4 -2
package/dist/index.d.ts +1 -1
package/dist/index.js +1 -1
package/dist/job-store.js +2 -2
package/dist/lib/dotenv-resolution.d.ts +21 -0
package/dist/lib/dotenv-resolution.js +30 -0
package/dist/orchestration/steps/mirror-repo-tasks-step.js +14 -3
package/dist/orchestration/steps/run-eval-step.js +21 -3
package/dist/pipeline/agent-behavior-report.d.ts +2 -8
package/dist/pipeline/cache.d.ts +2 -2
package/dist/pipeline/checks.d.ts +10 -2
package/dist/pipeline/checks.js +14 -4
package/dist/pipeline/compiler/literacy-bridge.js +2 -2
package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +2 -2
package/dist/pipeline/compiler/mode-handlers/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +2 -2
package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +3 -3
package/dist/pipeline/compiler/promptfoo-compiler.js +7 -11
package/dist/pipeline/compiler/provider-assembler.js +33 -3
package/dist/pipeline/compiler/rubric-resolution.d.ts +2 -2
package/dist/pipeline/mirror-repo-tasks.d.ts +13 -5
package/dist/pipeline/mirror-repo-tasks.js +16 -8
package/dist/pipeline/pr-comment.d.ts +22 -9
package/dist/pipeline/pr-comment.js +52 -472
package/dist/pipeline/resolve-mappings.d.ts +8 -3
package/dist/promptfoo-providers/mock-path.d.ts +12 -0
package/dist/promptfoo-providers/mock-path.js +15 -0
package/dist/report-store.d.ts +63 -1
package/dist/report-store.js +111 -31
package/dist/sanity/client.d.ts +58 -0
package/dist/sanity/client.js +106 -0
package/package.json +8 -7

package/dist/adapters/task-sources/repo-schemas.d.ts CHANGED Viewed

@@ -16,6 +16,7 @@
  * @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
  */
 import { z } from "zod";
+import type { AilfEvalWorkflow, RepoConfig } from "../../_vendor/ailf-core/index.d.ts";
 /**
  * The set of assertion types allowed in task files.
  *
@@ -1521,7 +1522,7 @@ export declare const RepoConfigSchema: z.ZodObject<{
         }, z.core.$strip>>;
     }, z.core.$strip>>;
 }, z.core.$strip>;
-export type RepoConfig = z.infer<typeof RepoConfigSchema>;
+export type { RepoConfig } from "../../_vendor/ailf-core/index.d.ts";
 /**
  * Parse and validate .ailf/config.yaml content. Returns typed config or throws.
  */
@@ -1551,7 +1552,7 @@ export declare const AilfEvalWorkflowSchema: z.ZodObject<{
         }, z.core.$loose>>;
     }, z.core.$loose>>;
 }, z.core.$loose>;
-export type AilfEvalWorkflow = z.infer<typeof AilfEvalWorkflowSchema>;
+export type { AilfEvalWorkflow } from "../../_vendor/ailf-core/index.d.ts";
 /**
  * Parse and validate a `.github/workflows/ailf-eval.yml` payload (already
  * loaded from YAML). Throws with a Zod-formatted message on failure.

package/dist/adapters/task-sources/repo-schemas.js CHANGED Viewed

@@ -334,7 +334,9 @@ export const ContentLakeAuthorableTaskSchema = LiteracyTaskSchema;
  * Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
  * file contains. Each file must define at least one task.
  */
-export const CanonicalTaskFileSchema = z.array(CanonicalTaskSchema).min(1);
+export const CanonicalTaskFileSchema = z
+    .array(CanonicalTaskSchema)
+    .min(1);
 /**
  * Pre-process raw task entries before discriminated-union parsing: when
  * `mode` is missing, default it to `"literacy"`. Zod cannot default a

package/dist/adapters/task-sources/repo-validation.d.ts CHANGED Viewed

@@ -15,12 +15,12 @@
  * has been eliminated — all validation logic now lives here.
  */
 import { type CanonicalTask } from "./repo-schemas.js";
-export interface ValidationResult {
+export interface RepoValidationResult {
     valid: boolean;
-    errors: ValidationMessage[];
-    warnings: ValidationMessage[];
+    errors: RepoValidationMessage[];
+    warnings: RepoValidationMessage[];
 }
-export interface ValidationMessage {
+export interface RepoValidationMessage {
     taskId: string;
     field: string;
     message: string;
@@ -32,8 +32,8 @@ export interface ValidationMessage {
  * areas, unresolved slugs) and errors for issues that would cause pipeline
  * failures (completely missing required fields — though Zod catches most).
  */
-export declare function validateCanonicalTasks(tasks: CanonicalTask[]): ValidationResult;
+export declare function validateCanonicalTasks(tasks: CanonicalTask[]): RepoValidationResult;
 /**
  * Format validation results for console output.
  */
-export declare function formatValidationResult(result: ValidationResult): string;
+export declare function formatRepoValidationResult(result: RepoValidationResult): string;

package/dist/adapters/task-sources/repo-validation.js CHANGED Viewed

@@ -110,7 +110,7 @@ export function validateCanonicalTasks(tasks) {
 /**
  * Format validation results for console output.
  */
-export function formatValidationResult(result) {
+export function formatRepoValidationResult(result) {
     const lines = [];
     if (result.errors.length > 0) {
         lines.push("Errors:");

package/dist/agent-observer/agentic-provider.d.ts CHANGED Viewed

@@ -63,6 +63,7 @@ export default class AgenticProvider {
     private docBaseUrl;
     private docsUrlPattern;
     private llmsTxtUrl;
+    private parsedConfig;
     private priorityDomain;
     private recorder;
     private searchMode;

package/dist/agent-observer/agentic-provider.js CHANGED Viewed

@@ -31,6 +31,8 @@
  */
 import { config as loadDotenv } from "dotenv";
 import { randomUUID } from "crypto";
+import { AnthropicResponseSchema, FetchPageToolArgsSchema, GoogleSearchResponseSchema, ListDocsToolArgsSchema, OpenAIChatResponseSchema, WebSearchToolArgsSchema, } from "../_vendor/ailf-core/index.js";
+import { AgenticProviderConfigSchema, } from "./config-schemas.js";
 import { RequestRecorder } from "./proxy.js";
 import { calculateCost } from "./pricing.js";
 import { isAllowedOrigin } from "../sources.js";
@@ -120,32 +122,31 @@ export default class AgenticProvider {
     docBaseUrl;
     docsUrlPattern;
     llmsTxtUrl;
+    parsedConfig;
     priorityDomain;
     recorder;
     searchMode;
     constructor(options) {
         this.providerId = options.id ?? "agentic-observer";
         this.config = options.config ?? {};
-        this.agentMode = this.config.agentMode || "naive";
+        this.parsedConfig = AgenticProviderConfigSchema.parse(this.config);
+        this.agentMode = this.parsedConfig.agentMode ?? "naive";
         // Documentation source configuration — defaults to Sanity production
-        this.docBaseUrl = this.config.docBaseUrl || DEFAULT_DOC_BASE_URL;
-        this.llmsTxtUrl = this.config.llmsTxtUrl || DEFAULT_LLMS_TXT_URL;
+        this.docBaseUrl = this.parsedConfig.docBaseUrl ?? DEFAULT_DOC_BASE_URL;
+        this.llmsTxtUrl = this.parsedConfig.llmsTxtUrl ?? DEFAULT_LLMS_TXT_URL;
         this.docsUrlPattern = buildDocsUrlPattern(this.docBaseUrl);
         // Custom HTTP headers (e.g., Vercel bypass protection token)
-        this.customHeaders =
-            this.config.customHeaders || {};
+        this.customHeaders = this.parsedConfig.customHeaders ?? {};
         // Extract priority domain from docBaseUrl for search result ranking
         const baseUrlObj = new URL(this.docBaseUrl);
         this.priorityDomain =
-            this.config.priorityDomain ||
+            this.parsedConfig.priorityDomain ??
                 baseUrlObj.hostname.replace(/^www\./, "");
         // Optional origin sandboxing — restrict which URLs the agent can access
-        this.allowedOrigins = Array.isArray(this.config.allowedOrigins)
-            ? this.config.allowedOrigins.filter(Boolean)
-            : [];
+        this.allowedOrigins = (this.parsedConfig.allowedOrigins ?? []).filter(Boolean);
         // Search mode: controls web_search tool availability and filtering
-        this.searchMode = this.config.searchMode || "open";
-        this.recorder = new RequestRecorder(this.config.observerOptions || {});
+        this.searchMode = this.parsedConfig.searchMode ?? "open";
+        this.recorder = new RequestRecorder(this.parsedConfig.observerOptions ?? {});
     }
     /**
      * Main Promptfoo provider entry point. Runs the full agentic loop.
@@ -155,7 +156,7 @@ export default class AgenticProvider {
         const taskDescription = context?.vars?.task ||
             context?.prompt?.label ||
             "unknown-task";
-        const observe = this.config.observe !== false;
+        const observe = this.parsedConfig.observe !== false;
         if (observe) {
             this.recorder.start(sessionId, this.id(), taskDescription);
         }
@@ -188,7 +189,7 @@ export default class AgenticProvider {
         return this.recorder;
     }
     id() {
-        const model = this.config.model || this.providerId;
+        const model = this.parsedConfig.model ?? this.providerId;
         return `agentic:${this.agentMode}:${model}`;
     }
     // -------------------------------------------------------------------------
@@ -276,13 +277,13 @@ export default class AgenticProvider {
      * fallback heuristics for backward compatibility.
      */
     detectProvider() {
-        const explicit = this.config.provider;
+        const explicit = this.parsedConfig.provider;
         if (explicit === "anthropic")
             return "anthropic";
         if (explicit === "openai")
             return "openai";
         // Heuristic fallback: detect from model name
-        const model = this.config.model || "";
+        const model = this.parsedConfig.model ?? "";
         if (model.startsWith("claude"))
             return "anthropic";
         return "openai";
@@ -421,14 +422,20 @@ export default class AgenticProvider {
     }
     async executeTool(name, argsJson, fetchFn) {
         try {
-            const args = JSON.parse(argsJson);
+            const rawArgs = JSON.parse(argsJson);
             switch (name) {
-                case "fetch_page":
+                case "fetch_page": {
+                    const args = FetchPageToolArgsSchema.parse(rawArgs);
                     return await this.executeFetchPage(args.url, fetchFn);
-                case "list_docs":
+                }
+                case "list_docs": {
+                    const args = ListDocsToolArgsSchema.parse(rawArgs);
                     return await this.executeListDocs(args.site, fetchFn);
-                case "web_search":
+                }
+                case "web_search": {
+                    const args = WebSearchToolArgsSchema.parse(rawArgs);
                     return await this.executeWebSearch(args.query, fetchFn);
+                }
                 default:
                     return JSON.stringify({ error: `Unknown tool: ${name}` });
             }
@@ -454,7 +461,7 @@ export default class AgenticProvider {
                 q: query,
             });
             const response = await fetchFn(`https://www.googleapis.com/customsearch/v1?${params}`);
-            const data = (await response.json());
+            const data = GoogleSearchResponseSchema.parse(await response.json());
             if (data.items?.length) {
                 results = data.items.map((item) => ({
                     snippet: item.snippet,
@@ -617,11 +624,11 @@ export default class AgenticProvider {
     // OpenAI agentic loop
     // -------------------------------------------------------------------------
     async runAnthropicLoop(prompt) {
-        const model = this.config.model || "claude-sonnet-4-20250514";
-        const temperature = this.config.temperature ?? 0.2;
-        const maxTokens = this.config.max_tokens || 4096;
-        const maxToolRounds = this.config.maxToolRounds || 5;
-        const apiKey = this.config.apiKey || process.env.ANTHROPIC_API_KEY;
+        const model = this.parsedConfig.model ?? "claude-sonnet-4-20250514";
+        const temperature = this.parsedConfig.temperature ?? 0.2;
+        const maxTokens = this.parsedConfig.max_tokens ?? 4096;
+        const maxToolRounds = this.parsedConfig.maxToolRounds ?? 5;
+        const apiKey = this.parsedConfig.apiKey ?? process.env.ANTHROPIC_API_KEY;
         if (!apiKey) {
             return {
                 error: "ANTHROPIC_API_KEY not set. Configure it in env or provider config.",
@@ -691,7 +698,7 @@ export default class AgenticProvider {
                 },
                 method: "POST",
             });
-            const data = (await response.json());
+            const data = AnthropicResponseSchema.parse(await response.json());
             if (data.error) {
                 return {
                     error: data.error.message ??
@@ -799,22 +806,22 @@ export default class AgenticProvider {
     // Anthropic agentic loop
     // -------------------------------------------------------------------------
     async runOpenAILoop(prompt) {
-        const model = this.config.model || "gpt-4o";
-        const temperature = this.config.temperature ?? 0.2;
-        const maxToolRounds = this.config.maxToolRounds || 5;
-        const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
+        const model = this.parsedConfig.model ?? "gpt-4o";
+        const temperature = this.parsedConfig.temperature ?? 0.2;
+        const maxToolRounds = this.parsedConfig.maxToolRounds ?? 5;
+        const apiKey = this.parsedConfig.apiKey ?? process.env.OPENAI_API_KEY;
         // Newer OpenAI models (gpt-5.x, o-series) use max_completion_tokens
         // instead of max_tokens, and reject custom temperature values. Detect
         // from config or model name. See W0131.
         const isReasoningModel = model.startsWith("gpt-5") ||
             model.startsWith("o3") ||
             model.startsWith("o4");
-        const useMaxCompletionTokens = this.config.max_output_tokens != null ||
-            this.config.max_completion_tokens != null ||
+        const useMaxCompletionTokens = this.parsedConfig.max_output_tokens != null ||
+            this.parsedConfig.max_completion_tokens != null ||
             isReasoningModel;
-        const maxTokensValue = this.config.max_output_tokens ??
-            this.config.max_completion_tokens ??
-            this.config.max_tokens ??
+        const maxTokensValue = this.parsedConfig.max_output_tokens ??
+            this.parsedConfig.max_completion_tokens ??
+            this.parsedConfig.max_tokens ??
             4096;
         const tokenLimitParam = useMaxCompletionTokens
             ? { max_completion_tokens: maxTokensValue }
@@ -860,7 +867,7 @@ export default class AgenticProvider {
                 },
                 method: "POST",
             });
-            const data = (await response.json());
+            const data = OpenAIChatResponseSchema.parse(await response.json());
             if (data.error) {
                 return {
                     error: data.error.message ?? "Unknown OpenAI error",

package/dist/agent-observer/config-schemas.d.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/**
+ * config-schemas.ts
+ *
+ * Zod schemas for the promptfoo provider config blocks read by the
+ * agent-observer providers. Promptfoo passes config as
+ * `Record<string, unknown>`; parsing it once at the constructor turns
+ * those untyped reads into a typed struct and surfaces typos / wrong
+ * shapes as clear `ZodError`s instead of silent `undefined` reads.
+ *
+ * Lives in `eval` (not `core`) because these schemas are 1:1 with the
+ * providers' constructor surfaces and have no consumers outside this
+ * package. See docs/work-items/W0004.json.
+ */
+import { z } from "zod";
+export declare const AgenticProviderConfigSchema: z.ZodObject<{
+    agentMode: z.ZodOptional<z.ZodEnum<{
+        naive: "naive";
+        optimized: "optimized";
+    }>>;
+    allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
+    apiKey: z.ZodOptional<z.ZodString>;
+    customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
+    docBaseUrl: z.ZodOptional<z.ZodString>;
+    llmsTxtUrl: z.ZodOptional<z.ZodString>;
+    max_completion_tokens: z.ZodOptional<z.ZodNumber>;
+    max_output_tokens: z.ZodOptional<z.ZodNumber>;
+    max_tokens: z.ZodOptional<z.ZodNumber>;
+    maxToolRounds: z.ZodOptional<z.ZodNumber>;
+    model: z.ZodOptional<z.ZodString>;
+    observe: z.ZodOptional<z.ZodBoolean>;
+    observerOptions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+    priorityDomain: z.ZodOptional<z.ZodString>;
+    provider: z.ZodOptional<z.ZodEnum<{
+        anthropic: "anthropic";
+        openai: "openai";
+    }>>;
+    searchMode: z.ZodOptional<z.ZodEnum<{
+        open: "open";
+        off: "off";
+        "origin-only": "origin-only";
+    }>>;
+    temperature: z.ZodOptional<z.ZodNumber>;
+}, z.core.$strip>;
+export type AgenticProviderConfig = z.infer<typeof AgenticProviderConfigSchema>;
+export declare const InstrumentedProviderConfigSchema: z.ZodObject<{
+    apiKey: z.ZodOptional<z.ZodString>;
+    max_output_tokens: z.ZodOptional<z.ZodNumber>;
+    max_tokens: z.ZodOptional<z.ZodNumber>;
+    model: z.ZodOptional<z.ZodString>;
+    modelName: z.ZodOptional<z.ZodString>;
+    observe: z.ZodOptional<z.ZodBoolean>;
+    observerOptions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+    reasoning: z.ZodOptional<z.ZodObject<{
+        effort: z.ZodOptional<z.ZodString>;
+        summary: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    reasoning_effort: z.ZodOptional<z.ZodString>;
+    temperature: z.ZodOptional<z.ZodNumber>;
+    verbosity: z.ZodOptional<z.ZodString>;
+}, z.core.$strip>;
+export type InstrumentedProviderConfig = z.infer<typeof InstrumentedProviderConfigSchema>;

package/dist/agent-observer/config-schemas.js ADDED Viewed

@@ -0,0 +1,65 @@
+/**
+ * config-schemas.ts
+ *
+ * Zod schemas for the promptfoo provider config blocks read by the
+ * agent-observer providers. Promptfoo passes config as
+ * `Record<string, unknown>`; parsing it once at the constructor turns
+ * those untyped reads into a typed struct and surfaces typos / wrong
+ * shapes as clear `ZodError`s instead of silent `undefined` reads.
+ *
+ * Lives in `eval` (not `core`) because these schemas are 1:1 with the
+ * providers' constructor surfaces and have no consumers outside this
+ * package. See docs/work-items/W0004.json.
+ */
+import { z } from "zod";
+// ---------------------------------------------------------------------------
+// Shared sub-schemas
+// ---------------------------------------------------------------------------
+/**
+ * `RecorderOptions` is owned by `proxy.ts` and has its own resolution
+ * logic. We accept any object here and let RequestRecorder do the rest;
+ * no double-validation at this boundary.
+ */
+const ObserverOptionsSchema = z.record(z.string(), z.unknown());
+const ReasoningSchema = z.object({
+    effort: z.string().optional(),
+    summary: z.string().optional(),
+});
+// ---------------------------------------------------------------------------
+// AgenticProvider config
+// ---------------------------------------------------------------------------
+export const AgenticProviderConfigSchema = z.object({
+    agentMode: z.enum(["naive", "optimized"]).optional(),
+    allowedOrigins: z.array(z.string()).optional(),
+    apiKey: z.string().optional(),
+    customHeaders: z.record(z.string(), z.string()).optional(),
+    docBaseUrl: z.string().optional(),
+    llmsTxtUrl: z.string().optional(),
+    max_completion_tokens: z.number().optional(),
+    max_output_tokens: z.number().optional(),
+    max_tokens: z.number().optional(),
+    maxToolRounds: z.number().optional(),
+    model: z.string().optional(),
+    observe: z.boolean().optional(),
+    observerOptions: ObserverOptionsSchema.optional(),
+    priorityDomain: z.string().optional(),
+    provider: z.enum(["anthropic", "openai"]).optional(),
+    searchMode: z.enum(["off", "open", "origin-only"]).optional(),
+    temperature: z.number().optional(),
+});
+// ---------------------------------------------------------------------------
+// InstrumentedProvider config (provider.ts — wraps OpenAI directly)
+// ---------------------------------------------------------------------------
+export const InstrumentedProviderConfigSchema = z.object({
+    apiKey: z.string().optional(),
+    max_output_tokens: z.number().optional(),
+    max_tokens: z.number().optional(),
+    model: z.string().optional(),
+    modelName: z.string().optional(),
+    observe: z.boolean().optional(),
+    observerOptions: ObserverOptionsSchema.optional(),
+    reasoning: ReasoningSchema.optional(),
+    reasoning_effort: z.string().optional(),
+    temperature: z.number().optional(),
+    verbosity: z.string().optional(),
+});

package/dist/agent-observer/provider.d.ts CHANGED Viewed

@@ -56,6 +56,7 @@ interface ProviderResponse {
 export default class InstrumentedProvider {
     config: Record<string, unknown>;
     protected providerId: string;
+    private parsedConfig;
     private recorder;
     constructor(options: ProviderOptions);
     /**

package/dist/agent-observer/provider.js CHANGED Viewed

@@ -30,6 +30,8 @@
  */
 import { config as loadDotenv } from "dotenv";
 import { randomUUID } from "crypto";
+import { OpenAIChatResponseSchema, OpenAIResponsesResponseSchema, } from "../_vendor/ailf-core/index.js";
+import { InstrumentedProviderConfigSchema, } from "./config-schemas.js";
 import { RequestRecorder } from "./proxy.js";
 import { calculateCost } from "./pricing.js";
 loadDotenv({
@@ -42,11 +44,13 @@ loadDotenv({
 export default class InstrumentedProvider {
     config;
     providerId;
+    parsedConfig;
     recorder;
     constructor(options) {
         this.providerId = options.id ?? "instrumented-observer";
         this.config = options.config ?? {};
-        this.recorder = new RequestRecorder(this.config.observerOptions ?? {});
+        this.parsedConfig = InstrumentedProviderConfigSchema.parse(this.config);
+        this.recorder = new RequestRecorder(this.parsedConfig.observerOptions ?? {});
     }
     /**
      * Main Promptfoo provider entry point. Called for each test case.
@@ -56,7 +60,7 @@ export default class InstrumentedProvider {
         const taskDescription = context?.vars?.task ||
             context?.prompt?.label ||
             "unknown-task";
-        const observe = this.config.observe !== false;
+        const observe = this.parsedConfig.observe !== false;
         // Start observation
         if (observe) {
             this.recorder.start(sessionId, this.id(), taskDescription);
@@ -97,9 +101,9 @@ export default class InstrumentedProvider {
      * Detect whether the model should use the Responses API based on config.
      */
     isResponsesModel() {
-        const model = this.config.modelName || this.config.model || "";
-        return (this.config.reasoning != null ||
-            this.config.reasoning_effort != null ||
+        const model = this.parsedConfig.modelName ?? this.parsedConfig.model ?? "";
+        return (this.parsedConfig.reasoning != null ||
+            this.parsedConfig.reasoning_effort != null ||
             model.startsWith("gpt-5") ||
             model.startsWith("o1") ||
             model.startsWith("o3") ||
@@ -111,10 +115,8 @@ export default class InstrumentedProvider {
      * recorder's fetch wrapper so the API call is captured.
      */
     async callOpenAI(prompt) {
-        const model = this.config.modelName ||
-            this.config.model ||
-            "gpt-4o";
-        const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
+        const model = this.parsedConfig.modelName ?? this.parsedConfig.model ?? "gpt-4o";
+        const apiKey = this.parsedConfig.apiKey ?? process.env.OPENAI_API_KEY;
         if (!apiKey) {
             return {
                 error: "OPENAI_API_KEY not set. Configure it in env or provider config.",
@@ -130,8 +132,8 @@ export default class InstrumentedProvider {
      * Calls OpenAI Chat Completions API (non-reasoning models).
      */
     async callOpenAIChatCompletions(prompt, model, apiKey) {
-        const temperature = this.config.temperature ?? 0;
-        const maxTokens = this.config.max_tokens || 4096;
+        const temperature = this.parsedConfig.temperature ?? 0;
+        const maxTokens = this.parsedConfig.max_tokens ?? 4096;
         const fetchFn = this.recorder.isRunning()
             ? this.recorder.fetch.bind(this.recorder)
             : globalThis.fetch;
@@ -149,7 +151,7 @@ export default class InstrumentedProvider {
             },
             method: "POST",
         });
-        const data = (await response.json());
+        const data = OpenAIChatResponseSchema.parse(await response.json());
         if (data.error) {
             return {
                 error: data.error.message ?? "Unknown OpenAI error",
@@ -175,11 +177,11 @@ export default class InstrumentedProvider {
      * Calls OpenAI Responses API (reasoning models like GPT-5.x, o-series).
      */
     async callOpenAIResponses(prompt, model, apiKey) {
-        const maxOutputTokens = this.config.max_output_tokens || 32_000;
-        const reasoning = this.config.reasoning;
-        const reasoningEffort = reasoning?.effort || this.config.reasoning_effort || "medium";
+        const maxOutputTokens = this.parsedConfig.max_output_tokens ?? 32_000;
+        const reasoning = this.parsedConfig.reasoning;
+        const reasoningEffort = reasoning?.effort ?? this.parsedConfig.reasoning_effort ?? "medium";
         const reasoningSummary = reasoning?.summary;
-        const verbosity = this.config.verbosity;
+        const verbosity = this.parsedConfig.verbosity;
         const fetchFn = this.recorder.isRunning()
             ? this.recorder.fetch.bind(this.recorder)
             : globalThis.fetch;
@@ -201,7 +203,7 @@ export default class InstrumentedProvider {
             },
             method: "POST",
         });
-        const data = (await response.json());
+        const data = OpenAIResponsesResponseSchema.parse(await response.json());
         if (data.error) {
             return {
                 error: data.error.message ?? "Unknown OpenAI error",

package/dist/cli.js CHANGED Viewed

@@ -37,6 +37,7 @@ import { existsSync } from "fs";
 import { dirname, resolve } from "path";
 import { fileURLToPath } from "url";
 import { buildCliProgram } from "./cli-program.js";
+import { findExplicitDotenvArg } from "./lib/dotenv-resolution.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..");
 // ---------------------------------------------------------------------------
@@ -52,10 +53,9 @@ const ROOT = resolve(__dirname, "..");
 // installed globally via npm (production).
 // ---------------------------------------------------------------------------
 function resolveEnvPath() {
-    const idx = process.argv.indexOf("--dotenv");
-    if (idx !== -1 && process.argv[idx + 1]) {
-        return resolve(process.argv[idx + 1]);
-    }
+    const explicit = findExplicitDotenvArg();
+    if (explicit)
+        return explicit;
     // Monorepo root .env (dev mode)
     const monorepoEnv = resolve(ROOT, "..", "..", ".env");
     if (existsSync(monorepoEnv))

package/dist/commands/validate-tasks.js CHANGED Viewed

@@ -18,7 +18,7 @@ import { resolve, relative, basename } from "path";
 import { Command } from "commander";
 import { load } from "js-yaml";
 import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/task-sources/repo-schemas.js";
-import { validateCanonicalTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
+import { validateCanonicalTasks, formatRepoValidationResult, } from "../adapters/task-sources/repo-validation.js";
 import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
 export function createValidateTasksCommand() {
     return new Command("tasks")
@@ -103,7 +103,7 @@ export async function runValidateTasks(tasksPath, opts) {
     if (allTasks.length > 0) {
         console.log();
         const semanticResult = validateCanonicalTasks(allTasks);
-        const formatted = formatValidationResult(semanticResult);
+        const formatted = formatRepoValidationResult(semanticResult);
         console.log(formatted);
         if (!semanticResult.valid) {
             hasErrors = true;

package/dist/composition-root.js CHANGED Viewed

@@ -35,7 +35,7 @@ import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, } from "./a
 import { resolveVendoredSubdir } from "./pipeline/compiler/config-loader.js";
 import { createAgentHarnessBase, createKnowledgeProbeBase, createLiteracyModeBase, createMcpServerModeBase, } from "./pipeline/compiler/mode-bases/index.js";
 import { createSanityLiteracyPreset } from "./pipeline/compiler/presets/index.js";
-import { getSanityClient } from "./sanity/client.js";
+import { getAilfSanityClient } from "./sanity/client.js";
 import { ReportStore } from "./report-store.js";
 import { loadSinks } from "./sinks/index.js";
 /**
@@ -316,8 +316,10 @@ export function createTaskSource(config) {
         return new RepoTaskSource(config.repoTasksPath);
     }
     // "content-lake" — Studio-authored ailf.task documents only.
+    // AILF documents live in the private dataset (D0043) — route through
+    // the AILF client factory so reads target `ailf-prod-private`.
     if (config.taskSourceType === "content-lake") {
-        return new ContentLakeTaskSource(getSanityClient({
+        return new ContentLakeTaskSource(getAilfSanityClient({
             token: process.env.AILF_REPORT_SANITY_API_TOKEN ??
                 process.env.SANITY_API_TOKEN ??
                 undefined,

package/dist/index.d.ts CHANGED Viewed

@@ -38,6 +38,6 @@ export type { PricingEntry, PromptEntry, SourceEntry } from "./_vendor/ailf-core
 export { env } from "./_vendor/ailf-core/index.d.ts";
 export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
 export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RubricTemplateName, } from "./adapters/task-sources/repo-schemas.js";
-export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./adapters/task-sources/repo-validation.js";
+export { formatRepoValidationResult, validateCanonicalTasks, type RepoValidationMessage, type RepoValidationResult, } from "./adapters/task-sources/repo-validation.js";
 export { InMemoryPluginRegistry } from "./_vendor/ailf-core/index.d.ts";
 export type { CompilationContext, ModeBase, ModeCompileResult, ModeHandler, PresetDefinition, } from "./_vendor/ailf-core/index.d.ts";

package/dist/index.js CHANGED Viewed

@@ -45,7 +45,7 @@ export { env } from "./_vendor/ailf-core/index.js";
 // Validation — for programmatic validation of task YAML
 // ---------------------------------------------------------------------------
 export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, } from "./adapters/task-sources/repo-schemas.js";
-export { formatValidationResult, validateCanonicalTasks, } from "./adapters/task-sources/repo-validation.js";
+export { formatRepoValidationResult, validateCanonicalTasks, } from "./adapters/task-sources/repo-validation.js";
 // ---------------------------------------------------------------------------
 // Plugin extension points — for authoring custom presets, modes, and registries
 // ---------------------------------------------------------------------------

package/dist/job-store.js CHANGED Viewed

@@ -11,7 +11,7 @@
  * @see docs/design-docs/api-service-gateway.md
  * @see packages/studio/src/schema/job.ts — Sanity document schema
  */
-import { getSanityClient } from "./sanity/client.js";
+import { getAilfSanityClient } from "./sanity/client.js";
 // ---------------------------------------------------------------------------
 // Constants
 // ---------------------------------------------------------------------------
@@ -28,7 +28,7 @@ export class JobStore {
             this.client = options.client;
         }
         else {
-            this.client = getSanityClient({
+            this.client = getAilfSanityClient({
                 ...(options.dataset ? { dataset: options.dataset } : {}),
                 ...(options.projectId ? { projectId: options.projectId } : {}),
                 ...(options.token ? { token: options.token } : {}),

package/dist/lib/dotenv-resolution.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+/**
+ * Dotenv resolution helpers shared between the CLI bootstrap
+ * (`packages/eval/src/cli.ts`) and any code path that needs to honor the
+ * same `--dotenv <path>` override (today: `pipeline/checks.ts::checkEnvironment`,
+ * which re-loads the active env file as part of validation).
+ *
+ * Centralizing the argv parse means future changes — validating the path
+ * exists before returning, supporting `--dotenv=path` form, accepting an
+ * env-var fallback — happen in one place instead of drifting between
+ * call sites.
+ */
+/**
+ * Find an explicit `--dotenv <path>` argument and return its absolute,
+ * resolved path. Returns `undefined` when the flag is absent or has no
+ * following value.
+ *
+ * @param argv - Defaults to `process.argv`. Pass an explicit array in
+ *   tests or in non-CLI hosts that have already shifted off the script
+ *   prefix.
+ */
+export declare function findExplicitDotenvArg(argv?: readonly string[]): string | undefined;