@sanity/ailf 4.0.7 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/bin/ailf.js +6 -1
  2. package/dist/_vendor/ailf-core/schemas/external-providers.d.ts +136 -0
  3. package/dist/_vendor/ailf-core/schemas/external-providers.js +136 -0
  4. package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
  5. package/dist/_vendor/ailf-core/schemas/index.js +2 -0
  6. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -3
  7. package/dist/_vendor/ailf-core/schemas/report.d.ts +251 -0
  8. package/dist/_vendor/ailf-core/schemas/report.js +235 -0
  9. package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
  10. package/dist/_vendor/ailf-core/services/index.js +1 -0
  11. package/dist/_vendor/ailf-core/services/report-to-markdown.d.ts +38 -0
  12. package/dist/_vendor/ailf-core/services/report-to-markdown.js +696 -0
  13. package/dist/_vendor/ailf-core/types/api-requests.d.ts +159 -0
  14. package/dist/_vendor/ailf-core/types/api-requests.js +27 -0
  15. package/dist/_vendor/ailf-core/types/index.d.ts +3 -0
  16. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +112 -0
  17. package/dist/_vendor/ailf-core/types/pipeline-request.js +18 -0
  18. package/dist/_vendor/ailf-core/types/repo-config.d.ts +146 -0
  19. package/dist/_vendor/ailf-core/types/repo-config.js +18 -0
  20. package/dist/_vendor/ailf-shared/index.d.ts +7 -5
  21. package/dist/_vendor/ailf-shared/index.js +7 -5
  22. package/dist/adapters/api-client/types.d.ts +2 -5
  23. package/dist/adapters/task-sources/content-lake-task-source.d.ts +58 -1
  24. package/dist/adapters/task-sources/content-lake-task-source.js +1 -1
  25. package/dist/adapters/task-sources/index.d.ts +1 -1
  26. package/dist/adapters/task-sources/index.js +1 -1
  27. package/dist/adapters/task-sources/repo-schemas.d.ts +3 -2
  28. package/dist/adapters/task-sources/repo-schemas.js +3 -1
  29. package/dist/adapters/task-sources/repo-validation.d.ts +6 -6
  30. package/dist/adapters/task-sources/repo-validation.js +1 -1
  31. package/dist/agent-observer/agentic-provider.d.ts +1 -0
  32. package/dist/agent-observer/agentic-provider.js +43 -36
  33. package/dist/agent-observer/config-schemas.d.ts +61 -0
  34. package/dist/agent-observer/config-schemas.js +65 -0
  35. package/dist/agent-observer/provider.d.ts +1 -0
  36. package/dist/agent-observer/provider.js +19 -17
  37. package/dist/cli.js +4 -4
  38. package/dist/commands/validate-tasks.js +2 -2
  39. package/dist/composition-root.js +4 -2
  40. package/dist/index.d.ts +1 -1
  41. package/dist/index.js +1 -1
  42. package/dist/job-store.js +2 -2
  43. package/dist/lib/dotenv-resolution.d.ts +21 -0
  44. package/dist/lib/dotenv-resolution.js +30 -0
  45. package/dist/orchestration/steps/mirror-repo-tasks-step.js +14 -3
  46. package/dist/orchestration/steps/run-eval-step.js +21 -3
  47. package/dist/pipeline/agent-behavior-report.d.ts +2 -8
  48. package/dist/pipeline/cache.d.ts +2 -2
  49. package/dist/pipeline/checks.d.ts +10 -2
  50. package/dist/pipeline/checks.js +14 -4
  51. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  52. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +2 -2
  53. package/dist/pipeline/compiler/mode-handlers/index.d.ts +1 -1
  54. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +2 -2
  55. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
  56. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +3 -3
  57. package/dist/pipeline/compiler/promptfoo-compiler.js +7 -11
  58. package/dist/pipeline/compiler/provider-assembler.js +33 -3
  59. package/dist/pipeline/compiler/rubric-resolution.d.ts +2 -2
  60. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -5
  61. package/dist/pipeline/mirror-repo-tasks.js +16 -8
  62. package/dist/pipeline/pr-comment.d.ts +22 -9
  63. package/dist/pipeline/pr-comment.js +52 -472
  64. package/dist/pipeline/resolve-mappings.d.ts +8 -3
  65. package/dist/promptfoo-providers/mock-path.d.ts +12 -0
  66. package/dist/promptfoo-providers/mock-path.js +15 -0
  67. package/dist/report-store.d.ts +63 -1
  68. package/dist/report-store.js +111 -31
  69. package/dist/sanity/client.d.ts +58 -0
  70. package/dist/sanity/client.js +106 -0
  71. package/package.json +8 -7
@@ -16,6 +16,7 @@
16
16
  * @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
17
17
  */
18
18
  import { z } from "zod";
19
+ import type { AilfEvalWorkflow, RepoConfig } from "../../_vendor/ailf-core/index.d.ts";
19
20
  /**
20
21
  * The set of assertion types allowed in task files.
21
22
  *
@@ -1521,7 +1522,7 @@ export declare const RepoConfigSchema: z.ZodObject<{
1521
1522
  }, z.core.$strip>>;
1522
1523
  }, z.core.$strip>>;
1523
1524
  }, z.core.$strip>;
1524
- export type RepoConfig = z.infer<typeof RepoConfigSchema>;
1525
+ export type { RepoConfig } from "../../_vendor/ailf-core/index.d.ts";
1525
1526
  /**
1526
1527
  * Parse and validate .ailf/config.yaml content. Returns typed config or throws.
1527
1528
  */
@@ -1551,7 +1552,7 @@ export declare const AilfEvalWorkflowSchema: z.ZodObject<{
1551
1552
  }, z.core.$loose>>;
1552
1553
  }, z.core.$loose>>;
1553
1554
  }, z.core.$loose>;
1554
- export type AilfEvalWorkflow = z.infer<typeof AilfEvalWorkflowSchema>;
1555
+ export type { AilfEvalWorkflow } from "../../_vendor/ailf-core/index.d.ts";
1555
1556
  /**
1556
1557
  * Parse and validate a `.github/workflows/ailf-eval.yml` payload (already
1557
1558
  * loaded from YAML). Throws with a Zod-formatted message on failure.
@@ -334,7 +334,9 @@ export const ContentLakeAuthorableTaskSchema = LiteracyTaskSchema;
334
334
  * Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
335
335
  * file contains. Each file must define at least one task.
336
336
  */
337
- export const CanonicalTaskFileSchema = z.array(CanonicalTaskSchema).min(1);
337
+ export const CanonicalTaskFileSchema = z
338
+ .array(CanonicalTaskSchema)
339
+ .min(1);
338
340
  /**
339
341
  * Pre-process raw task entries before discriminated-union parsing: when
340
342
  * `mode` is missing, default it to `"literacy"`. Zod cannot default a
@@ -15,12 +15,12 @@
15
15
  * has been eliminated — all validation logic now lives here.
16
16
  */
17
17
  import { type CanonicalTask } from "./repo-schemas.js";
18
- export interface ValidationResult {
18
+ export interface RepoValidationResult {
19
19
  valid: boolean;
20
- errors: ValidationMessage[];
21
- warnings: ValidationMessage[];
20
+ errors: RepoValidationMessage[];
21
+ warnings: RepoValidationMessage[];
22
22
  }
23
- export interface ValidationMessage {
23
+ export interface RepoValidationMessage {
24
24
  taskId: string;
25
25
  field: string;
26
26
  message: string;
@@ -32,8 +32,8 @@ export interface ValidationMessage {
32
32
  * areas, unresolved slugs) and errors for issues that would cause pipeline
33
33
  * failures (completely missing required fields — though Zod catches most).
34
34
  */
35
- export declare function validateCanonicalTasks(tasks: CanonicalTask[]): ValidationResult;
35
+ export declare function validateCanonicalTasks(tasks: CanonicalTask[]): RepoValidationResult;
36
36
  /**
37
37
  * Format validation results for console output.
38
38
  */
39
- export declare function formatValidationResult(result: ValidationResult): string;
39
+ export declare function formatRepoValidationResult(result: RepoValidationResult): string;
@@ -110,7 +110,7 @@ export function validateCanonicalTasks(tasks) {
110
110
  /**
111
111
  * Format validation results for console output.
112
112
  */
113
- export function formatValidationResult(result) {
113
+ export function formatRepoValidationResult(result) {
114
114
  const lines = [];
115
115
  if (result.errors.length > 0) {
116
116
  lines.push("Errors:");
@@ -63,6 +63,7 @@ export default class AgenticProvider {
63
63
  private docBaseUrl;
64
64
  private docsUrlPattern;
65
65
  private llmsTxtUrl;
66
+ private parsedConfig;
66
67
  private priorityDomain;
67
68
  private recorder;
68
69
  private searchMode;
@@ -31,6 +31,8 @@
31
31
  */
32
32
  import { config as loadDotenv } from "dotenv";
33
33
  import { randomUUID } from "crypto";
34
+ import { AnthropicResponseSchema, FetchPageToolArgsSchema, GoogleSearchResponseSchema, ListDocsToolArgsSchema, OpenAIChatResponseSchema, WebSearchToolArgsSchema, } from "../_vendor/ailf-core/index.js";
35
+ import { AgenticProviderConfigSchema, } from "./config-schemas.js";
34
36
  import { RequestRecorder } from "./proxy.js";
35
37
  import { calculateCost } from "./pricing.js";
36
38
  import { isAllowedOrigin } from "../sources.js";
@@ -120,32 +122,31 @@ export default class AgenticProvider {
120
122
  docBaseUrl;
121
123
  docsUrlPattern;
122
124
  llmsTxtUrl;
125
+ parsedConfig;
123
126
  priorityDomain;
124
127
  recorder;
125
128
  searchMode;
126
129
  constructor(options) {
127
130
  this.providerId = options.id ?? "agentic-observer";
128
131
  this.config = options.config ?? {};
129
- this.agentMode = this.config.agentMode || "naive";
132
+ this.parsedConfig = AgenticProviderConfigSchema.parse(this.config);
133
+ this.agentMode = this.parsedConfig.agentMode ?? "naive";
130
134
  // Documentation source configuration — defaults to Sanity production
131
- this.docBaseUrl = this.config.docBaseUrl || DEFAULT_DOC_BASE_URL;
132
- this.llmsTxtUrl = this.config.llmsTxtUrl || DEFAULT_LLMS_TXT_URL;
135
+ this.docBaseUrl = this.parsedConfig.docBaseUrl ?? DEFAULT_DOC_BASE_URL;
136
+ this.llmsTxtUrl = this.parsedConfig.llmsTxtUrl ?? DEFAULT_LLMS_TXT_URL;
133
137
  this.docsUrlPattern = buildDocsUrlPattern(this.docBaseUrl);
134
138
  // Custom HTTP headers (e.g., Vercel bypass protection token)
135
- this.customHeaders =
136
- this.config.customHeaders || {};
139
+ this.customHeaders = this.parsedConfig.customHeaders ?? {};
137
140
  // Extract priority domain from docBaseUrl for search result ranking
138
141
  const baseUrlObj = new URL(this.docBaseUrl);
139
142
  this.priorityDomain =
140
- this.config.priorityDomain ||
143
+ this.parsedConfig.priorityDomain ??
141
144
  baseUrlObj.hostname.replace(/^www\./, "");
142
145
  // Optional origin sandboxing — restrict which URLs the agent can access
143
- this.allowedOrigins = Array.isArray(this.config.allowedOrigins)
144
- ? this.config.allowedOrigins.filter(Boolean)
145
- : [];
146
+ this.allowedOrigins = (this.parsedConfig.allowedOrigins ?? []).filter(Boolean);
146
147
  // Search mode: controls web_search tool availability and filtering
147
- this.searchMode = this.config.searchMode || "open";
148
- this.recorder = new RequestRecorder(this.config.observerOptions || {});
148
+ this.searchMode = this.parsedConfig.searchMode ?? "open";
149
+ this.recorder = new RequestRecorder(this.parsedConfig.observerOptions ?? {});
149
150
  }
150
151
  /**
151
152
  * Main Promptfoo provider entry point. Runs the full agentic loop.
@@ -155,7 +156,7 @@ export default class AgenticProvider {
155
156
  const taskDescription = context?.vars?.task ||
156
157
  context?.prompt?.label ||
157
158
  "unknown-task";
158
- const observe = this.config.observe !== false;
159
+ const observe = this.parsedConfig.observe !== false;
159
160
  if (observe) {
160
161
  this.recorder.start(sessionId, this.id(), taskDescription);
161
162
  }
@@ -188,7 +189,7 @@ export default class AgenticProvider {
188
189
  return this.recorder;
189
190
  }
190
191
  id() {
191
- const model = this.config.model || this.providerId;
192
+ const model = this.parsedConfig.model ?? this.providerId;
192
193
  return `agentic:${this.agentMode}:${model}`;
193
194
  }
194
195
  // -------------------------------------------------------------------------
@@ -276,13 +277,13 @@ export default class AgenticProvider {
276
277
  * fallback heuristics for backward compatibility.
277
278
  */
278
279
  detectProvider() {
279
- const explicit = this.config.provider;
280
+ const explicit = this.parsedConfig.provider;
280
281
  if (explicit === "anthropic")
281
282
  return "anthropic";
282
283
  if (explicit === "openai")
283
284
  return "openai";
284
285
  // Heuristic fallback: detect from model name
285
- const model = this.config.model || "";
286
+ const model = this.parsedConfig.model ?? "";
286
287
  if (model.startsWith("claude"))
287
288
  return "anthropic";
288
289
  return "openai";
@@ -421,14 +422,20 @@ export default class AgenticProvider {
421
422
  }
422
423
  async executeTool(name, argsJson, fetchFn) {
423
424
  try {
424
- const args = JSON.parse(argsJson);
425
+ const rawArgs = JSON.parse(argsJson);
425
426
  switch (name) {
426
- case "fetch_page":
427
+ case "fetch_page": {
428
+ const args = FetchPageToolArgsSchema.parse(rawArgs);
427
429
  return await this.executeFetchPage(args.url, fetchFn);
428
- case "list_docs":
430
+ }
431
+ case "list_docs": {
432
+ const args = ListDocsToolArgsSchema.parse(rawArgs);
429
433
  return await this.executeListDocs(args.site, fetchFn);
430
- case "web_search":
434
+ }
435
+ case "web_search": {
436
+ const args = WebSearchToolArgsSchema.parse(rawArgs);
431
437
  return await this.executeWebSearch(args.query, fetchFn);
438
+ }
432
439
  default:
433
440
  return JSON.stringify({ error: `Unknown tool: ${name}` });
434
441
  }
@@ -454,7 +461,7 @@ export default class AgenticProvider {
454
461
  q: query,
455
462
  });
456
463
  const response = await fetchFn(`https://www.googleapis.com/customsearch/v1?${params}`);
457
- const data = (await response.json());
464
+ const data = GoogleSearchResponseSchema.parse(await response.json());
458
465
  if (data.items?.length) {
459
466
  results = data.items.map((item) => ({
460
467
  snippet: item.snippet,
@@ -617,11 +624,11 @@ export default class AgenticProvider {
617
624
  // OpenAI agentic loop
618
625
  // -------------------------------------------------------------------------
619
626
  async runAnthropicLoop(prompt) {
620
- const model = this.config.model || "claude-sonnet-4-20250514";
621
- const temperature = this.config.temperature ?? 0.2;
622
- const maxTokens = this.config.max_tokens || 4096;
623
- const maxToolRounds = this.config.maxToolRounds || 5;
624
- const apiKey = this.config.apiKey || process.env.ANTHROPIC_API_KEY;
627
+ const model = this.parsedConfig.model ?? "claude-sonnet-4-20250514";
628
+ const temperature = this.parsedConfig.temperature ?? 0.2;
629
+ const maxTokens = this.parsedConfig.max_tokens ?? 4096;
630
+ const maxToolRounds = this.parsedConfig.maxToolRounds ?? 5;
631
+ const apiKey = this.parsedConfig.apiKey ?? process.env.ANTHROPIC_API_KEY;
625
632
  if (!apiKey) {
626
633
  return {
627
634
  error: "ANTHROPIC_API_KEY not set. Configure it in env or provider config.",
@@ -691,7 +698,7 @@ export default class AgenticProvider {
691
698
  },
692
699
  method: "POST",
693
700
  });
694
- const data = (await response.json());
701
+ const data = AnthropicResponseSchema.parse(await response.json());
695
702
  if (data.error) {
696
703
  return {
697
704
  error: data.error.message ??
@@ -799,22 +806,22 @@ export default class AgenticProvider {
799
806
  // Anthropic agentic loop
800
807
  // -------------------------------------------------------------------------
801
808
  async runOpenAILoop(prompt) {
802
- const model = this.config.model || "gpt-4o";
803
- const temperature = this.config.temperature ?? 0.2;
804
- const maxToolRounds = this.config.maxToolRounds || 5;
805
- const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
809
+ const model = this.parsedConfig.model ?? "gpt-4o";
810
+ const temperature = this.parsedConfig.temperature ?? 0.2;
811
+ const maxToolRounds = this.parsedConfig.maxToolRounds ?? 5;
812
+ const apiKey = this.parsedConfig.apiKey ?? process.env.OPENAI_API_KEY;
806
813
  // Newer OpenAI models (gpt-5.x, o-series) use max_completion_tokens
807
814
  // instead of max_tokens, and reject custom temperature values. Detect
808
815
  // from config or model name. See W0131.
809
816
  const isReasoningModel = model.startsWith("gpt-5") ||
810
817
  model.startsWith("o3") ||
811
818
  model.startsWith("o4");
812
- const useMaxCompletionTokens = this.config.max_output_tokens != null ||
813
- this.config.max_completion_tokens != null ||
819
+ const useMaxCompletionTokens = this.parsedConfig.max_output_tokens != null ||
820
+ this.parsedConfig.max_completion_tokens != null ||
814
821
  isReasoningModel;
815
- const maxTokensValue = this.config.max_output_tokens ??
816
- this.config.max_completion_tokens ??
817
- this.config.max_tokens ??
822
+ const maxTokensValue = this.parsedConfig.max_output_tokens ??
823
+ this.parsedConfig.max_completion_tokens ??
824
+ this.parsedConfig.max_tokens ??
818
825
  4096;
819
826
  const tokenLimitParam = useMaxCompletionTokens
820
827
  ? { max_completion_tokens: maxTokensValue }
@@ -860,7 +867,7 @@ export default class AgenticProvider {
860
867
  },
861
868
  method: "POST",
862
869
  });
863
- const data = (await response.json());
870
+ const data = OpenAIChatResponseSchema.parse(await response.json());
864
871
  if (data.error) {
865
872
  return {
866
873
  error: data.error.message ?? "Unknown OpenAI error",
@@ -0,0 +1,61 @@
1
+ /**
2
+ * config-schemas.ts
3
+ *
4
+ * Zod schemas for the promptfoo provider config blocks read by the
5
+ * agent-observer providers. Promptfoo passes config as
6
+ * `Record<string, unknown>`; parsing it once at the constructor turns
7
+ * those untyped reads into a typed struct and surfaces typos / wrong
8
+ * shapes as clear `ZodError`s instead of silent `undefined` reads.
9
+ *
10
+ * Lives in `eval` (not `core`) because these schemas are 1:1 with the
11
+ * providers' constructor surfaces and have no consumers outside this
12
+ * package. See docs/work-items/W0004.json.
13
+ */
14
+ import { z } from "zod";
15
+ export declare const AgenticProviderConfigSchema: z.ZodObject<{
16
+ agentMode: z.ZodOptional<z.ZodEnum<{
17
+ naive: "naive";
18
+ optimized: "optimized";
19
+ }>>;
20
+ allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
21
+ apiKey: z.ZodOptional<z.ZodString>;
22
+ customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
23
+ docBaseUrl: z.ZodOptional<z.ZodString>;
24
+ llmsTxtUrl: z.ZodOptional<z.ZodString>;
25
+ max_completion_tokens: z.ZodOptional<z.ZodNumber>;
26
+ max_output_tokens: z.ZodOptional<z.ZodNumber>;
27
+ max_tokens: z.ZodOptional<z.ZodNumber>;
28
+ maxToolRounds: z.ZodOptional<z.ZodNumber>;
29
+ model: z.ZodOptional<z.ZodString>;
30
+ observe: z.ZodOptional<z.ZodBoolean>;
31
+ observerOptions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
32
+ priorityDomain: z.ZodOptional<z.ZodString>;
33
+ provider: z.ZodOptional<z.ZodEnum<{
34
+ anthropic: "anthropic";
35
+ openai: "openai";
36
+ }>>;
37
+ searchMode: z.ZodOptional<z.ZodEnum<{
38
+ open: "open";
39
+ off: "off";
40
+ "origin-only": "origin-only";
41
+ }>>;
42
+ temperature: z.ZodOptional<z.ZodNumber>;
43
+ }, z.core.$strip>;
44
+ export type AgenticProviderConfig = z.infer<typeof AgenticProviderConfigSchema>;
45
+ export declare const InstrumentedProviderConfigSchema: z.ZodObject<{
46
+ apiKey: z.ZodOptional<z.ZodString>;
47
+ max_output_tokens: z.ZodOptional<z.ZodNumber>;
48
+ max_tokens: z.ZodOptional<z.ZodNumber>;
49
+ model: z.ZodOptional<z.ZodString>;
50
+ modelName: z.ZodOptional<z.ZodString>;
51
+ observe: z.ZodOptional<z.ZodBoolean>;
52
+ observerOptions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
53
+ reasoning: z.ZodOptional<z.ZodObject<{
54
+ effort: z.ZodOptional<z.ZodString>;
55
+ summary: z.ZodOptional<z.ZodString>;
56
+ }, z.core.$strip>>;
57
+ reasoning_effort: z.ZodOptional<z.ZodString>;
58
+ temperature: z.ZodOptional<z.ZodNumber>;
59
+ verbosity: z.ZodOptional<z.ZodString>;
60
+ }, z.core.$strip>;
61
+ export type InstrumentedProviderConfig = z.infer<typeof InstrumentedProviderConfigSchema>;
@@ -0,0 +1,65 @@
1
+ /**
2
+ * config-schemas.ts
3
+ *
4
+ * Zod schemas for the promptfoo provider config blocks read by the
5
+ * agent-observer providers. Promptfoo passes config as
6
+ * `Record<string, unknown>`; parsing it once at the constructor turns
7
+ * those untyped reads into a typed struct and surfaces typos / wrong
8
+ * shapes as clear `ZodError`s instead of silent `undefined` reads.
9
+ *
10
+ * Lives in `eval` (not `core`) because these schemas are 1:1 with the
11
+ * providers' constructor surfaces and have no consumers outside this
12
+ * package. See docs/work-items/W0004.json.
13
+ */
14
+ import { z } from "zod";
15
+ // ---------------------------------------------------------------------------
16
+ // Shared sub-schemas
17
+ // ---------------------------------------------------------------------------
18
+ /**
19
+ * `RecorderOptions` is owned by `proxy.ts` and has its own resolution
20
+ * logic. We accept any object here and let RequestRecorder do the rest;
21
+ * no double-validation at this boundary.
22
+ */
23
+ const ObserverOptionsSchema = z.record(z.string(), z.unknown());
24
+ const ReasoningSchema = z.object({
25
+ effort: z.string().optional(),
26
+ summary: z.string().optional(),
27
+ });
28
+ // ---------------------------------------------------------------------------
29
+ // AgenticProvider config
30
+ // ---------------------------------------------------------------------------
31
+ export const AgenticProviderConfigSchema = z.object({
32
+ agentMode: z.enum(["naive", "optimized"]).optional(),
33
+ allowedOrigins: z.array(z.string()).optional(),
34
+ apiKey: z.string().optional(),
35
+ customHeaders: z.record(z.string(), z.string()).optional(),
36
+ docBaseUrl: z.string().optional(),
37
+ llmsTxtUrl: z.string().optional(),
38
+ max_completion_tokens: z.number().optional(),
39
+ max_output_tokens: z.number().optional(),
40
+ max_tokens: z.number().optional(),
41
+ maxToolRounds: z.number().optional(),
42
+ model: z.string().optional(),
43
+ observe: z.boolean().optional(),
44
+ observerOptions: ObserverOptionsSchema.optional(),
45
+ priorityDomain: z.string().optional(),
46
+ provider: z.enum(["anthropic", "openai"]).optional(),
47
+ searchMode: z.enum(["off", "open", "origin-only"]).optional(),
48
+ temperature: z.number().optional(),
49
+ });
50
+ // ---------------------------------------------------------------------------
51
+ // InstrumentedProvider config (provider.ts — wraps OpenAI directly)
52
+ // ---------------------------------------------------------------------------
53
+ export const InstrumentedProviderConfigSchema = z.object({
54
+ apiKey: z.string().optional(),
55
+ max_output_tokens: z.number().optional(),
56
+ max_tokens: z.number().optional(),
57
+ model: z.string().optional(),
58
+ modelName: z.string().optional(),
59
+ observe: z.boolean().optional(),
60
+ observerOptions: ObserverOptionsSchema.optional(),
61
+ reasoning: ReasoningSchema.optional(),
62
+ reasoning_effort: z.string().optional(),
63
+ temperature: z.number().optional(),
64
+ verbosity: z.string().optional(),
65
+ });
@@ -56,6 +56,7 @@ interface ProviderResponse {
56
56
  export default class InstrumentedProvider {
57
57
  config: Record<string, unknown>;
58
58
  protected providerId: string;
59
+ private parsedConfig;
59
60
  private recorder;
60
61
  constructor(options: ProviderOptions);
61
62
  /**
@@ -30,6 +30,8 @@
30
30
  */
31
31
  import { config as loadDotenv } from "dotenv";
32
32
  import { randomUUID } from "crypto";
33
+ import { OpenAIChatResponseSchema, OpenAIResponsesResponseSchema, } from "../_vendor/ailf-core/index.js";
34
+ import { InstrumentedProviderConfigSchema, } from "./config-schemas.js";
33
35
  import { RequestRecorder } from "./proxy.js";
34
36
  import { calculateCost } from "./pricing.js";
35
37
  loadDotenv({
@@ -42,11 +44,13 @@ loadDotenv({
42
44
  export default class InstrumentedProvider {
43
45
  config;
44
46
  providerId;
47
+ parsedConfig;
45
48
  recorder;
46
49
  constructor(options) {
47
50
  this.providerId = options.id ?? "instrumented-observer";
48
51
  this.config = options.config ?? {};
49
- this.recorder = new RequestRecorder(this.config.observerOptions ?? {});
52
+ this.parsedConfig = InstrumentedProviderConfigSchema.parse(this.config);
53
+ this.recorder = new RequestRecorder(this.parsedConfig.observerOptions ?? {});
50
54
  }
51
55
  /**
52
56
  * Main Promptfoo provider entry point. Called for each test case.
@@ -56,7 +60,7 @@ export default class InstrumentedProvider {
56
60
  const taskDescription = context?.vars?.task ||
57
61
  context?.prompt?.label ||
58
62
  "unknown-task";
59
- const observe = this.config.observe !== false;
63
+ const observe = this.parsedConfig.observe !== false;
60
64
  // Start observation
61
65
  if (observe) {
62
66
  this.recorder.start(sessionId, this.id(), taskDescription);
@@ -97,9 +101,9 @@ export default class InstrumentedProvider {
97
101
  * Detect whether the model should use the Responses API based on config.
98
102
  */
99
103
  isResponsesModel() {
100
- const model = this.config.modelName || this.config.model || "";
101
- return (this.config.reasoning != null ||
102
- this.config.reasoning_effort != null ||
104
+ const model = this.parsedConfig.modelName ?? this.parsedConfig.model ?? "";
105
+ return (this.parsedConfig.reasoning != null ||
106
+ this.parsedConfig.reasoning_effort != null ||
103
107
  model.startsWith("gpt-5") ||
104
108
  model.startsWith("o1") ||
105
109
  model.startsWith("o3") ||
@@ -111,10 +115,8 @@ export default class InstrumentedProvider {
111
115
  * recorder's fetch wrapper so the API call is captured.
112
116
  */
113
117
  async callOpenAI(prompt) {
114
- const model = this.config.modelName ||
115
- this.config.model ||
116
- "gpt-4o";
117
- const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
118
+ const model = this.parsedConfig.modelName ?? this.parsedConfig.model ?? "gpt-4o";
119
+ const apiKey = this.parsedConfig.apiKey ?? process.env.OPENAI_API_KEY;
118
120
  if (!apiKey) {
119
121
  return {
120
122
  error: "OPENAI_API_KEY not set. Configure it in env or provider config.",
@@ -130,8 +132,8 @@ export default class InstrumentedProvider {
130
132
  * Calls OpenAI Chat Completions API (non-reasoning models).
131
133
  */
132
134
  async callOpenAIChatCompletions(prompt, model, apiKey) {
133
- const temperature = this.config.temperature ?? 0;
134
- const maxTokens = this.config.max_tokens || 4096;
135
+ const temperature = this.parsedConfig.temperature ?? 0;
136
+ const maxTokens = this.parsedConfig.max_tokens ?? 4096;
135
137
  const fetchFn = this.recorder.isRunning()
136
138
  ? this.recorder.fetch.bind(this.recorder)
137
139
  : globalThis.fetch;
@@ -149,7 +151,7 @@ export default class InstrumentedProvider {
149
151
  },
150
152
  method: "POST",
151
153
  });
152
- const data = (await response.json());
154
+ const data = OpenAIChatResponseSchema.parse(await response.json());
153
155
  if (data.error) {
154
156
  return {
155
157
  error: data.error.message ?? "Unknown OpenAI error",
@@ -175,11 +177,11 @@ export default class InstrumentedProvider {
175
177
  * Calls OpenAI Responses API (reasoning models like GPT-5.x, o-series).
176
178
  */
177
179
  async callOpenAIResponses(prompt, model, apiKey) {
178
- const maxOutputTokens = this.config.max_output_tokens || 32_000;
179
- const reasoning = this.config.reasoning;
180
- const reasoningEffort = reasoning?.effort || this.config.reasoning_effort || "medium";
180
+ const maxOutputTokens = this.parsedConfig.max_output_tokens ?? 32_000;
181
+ const reasoning = this.parsedConfig.reasoning;
182
+ const reasoningEffort = reasoning?.effort ?? this.parsedConfig.reasoning_effort ?? "medium";
181
183
  const reasoningSummary = reasoning?.summary;
182
- const verbosity = this.config.verbosity;
184
+ const verbosity = this.parsedConfig.verbosity;
183
185
  const fetchFn = this.recorder.isRunning()
184
186
  ? this.recorder.fetch.bind(this.recorder)
185
187
  : globalThis.fetch;
@@ -201,7 +203,7 @@ export default class InstrumentedProvider {
201
203
  },
202
204
  method: "POST",
203
205
  });
204
- const data = (await response.json());
206
+ const data = OpenAIResponsesResponseSchema.parse(await response.json());
205
207
  if (data.error) {
206
208
  return {
207
209
  error: data.error.message ?? "Unknown OpenAI error",
package/dist/cli.js CHANGED
@@ -37,6 +37,7 @@ import { existsSync } from "fs";
37
37
  import { dirname, resolve } from "path";
38
38
  import { fileURLToPath } from "url";
39
39
  import { buildCliProgram } from "./cli-program.js";
40
+ import { findExplicitDotenvArg } from "./lib/dotenv-resolution.js";
40
41
  const __dirname = dirname(fileURLToPath(import.meta.url));
41
42
  const ROOT = resolve(__dirname, "..");
42
43
  // ---------------------------------------------------------------------------
@@ -52,10 +53,9 @@ const ROOT = resolve(__dirname, "..");
52
53
  // installed globally via npm (production).
53
54
  // ---------------------------------------------------------------------------
54
55
  function resolveEnvPath() {
55
- const idx = process.argv.indexOf("--dotenv");
56
- if (idx !== -1 && process.argv[idx + 1]) {
57
- return resolve(process.argv[idx + 1]);
58
- }
56
+ const explicit = findExplicitDotenvArg();
57
+ if (explicit)
58
+ return explicit;
59
59
  // Monorepo root .env (dev mode)
60
60
  const monorepoEnv = resolve(ROOT, "..", "..", ".env");
61
61
  if (existsSync(monorepoEnv))
@@ -18,7 +18,7 @@ import { resolve, relative, basename } from "path";
18
18
  import { Command } from "commander";
19
19
  import { load } from "js-yaml";
20
20
  import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/task-sources/repo-schemas.js";
21
- import { validateCanonicalTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
21
+ import { validateCanonicalTasks, formatRepoValidationResult, } from "../adapters/task-sources/repo-validation.js";
22
22
  import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
23
23
  export function createValidateTasksCommand() {
24
24
  return new Command("tasks")
@@ -103,7 +103,7 @@ export async function runValidateTasks(tasksPath, opts) {
103
103
  if (allTasks.length > 0) {
104
104
  console.log();
105
105
  const semanticResult = validateCanonicalTasks(allTasks);
106
- const formatted = formatValidationResult(semanticResult);
106
+ const formatted = formatRepoValidationResult(semanticResult);
107
107
  console.log(formatted);
108
108
  if (!semanticResult.valid) {
109
109
  hasErrors = true;
@@ -35,7 +35,7 @@ import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, } from "./a
35
35
  import { resolveVendoredSubdir } from "./pipeline/compiler/config-loader.js";
36
36
  import { createAgentHarnessBase, createKnowledgeProbeBase, createLiteracyModeBase, createMcpServerModeBase, } from "./pipeline/compiler/mode-bases/index.js";
37
37
  import { createSanityLiteracyPreset } from "./pipeline/compiler/presets/index.js";
38
- import { getSanityClient } from "./sanity/client.js";
38
+ import { getAilfSanityClient } from "./sanity/client.js";
39
39
  import { ReportStore } from "./report-store.js";
40
40
  import { loadSinks } from "./sinks/index.js";
41
41
  /**
@@ -316,8 +316,10 @@ export function createTaskSource(config) {
316
316
  return new RepoTaskSource(config.repoTasksPath);
317
317
  }
318
318
  // "content-lake" — Studio-authored ailf.task documents only.
319
+ // AILF documents live in the private dataset (D0043) — route through
320
+ // the AILF client factory so reads target `ailf-prod-private`.
319
321
  if (config.taskSourceType === "content-lake") {
320
- return new ContentLakeTaskSource(getSanityClient({
322
+ return new ContentLakeTaskSource(getAilfSanityClient({
321
323
  token: process.env.AILF_REPORT_SANITY_API_TOKEN ??
322
324
  process.env.SANITY_API_TOKEN ??
323
325
  undefined,
package/dist/index.d.ts CHANGED
@@ -38,6 +38,6 @@ export type { PricingEntry, PromptEntry, SourceEntry } from "./_vendor/ailf-core
38
38
  export { env } from "./_vendor/ailf-core/index.d.ts";
39
39
  export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
40
40
  export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RubricTemplateName, } from "./adapters/task-sources/repo-schemas.js";
41
- export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./adapters/task-sources/repo-validation.js";
41
+ export { formatRepoValidationResult, validateCanonicalTasks, type RepoValidationMessage, type RepoValidationResult, } from "./adapters/task-sources/repo-validation.js";
42
42
  export { InMemoryPluginRegistry } from "./_vendor/ailf-core/index.d.ts";
43
43
  export type { CompilationContext, ModeBase, ModeCompileResult, ModeHandler, PresetDefinition, } from "./_vendor/ailf-core/index.d.ts";
package/dist/index.js CHANGED
@@ -45,7 +45,7 @@ export { env } from "./_vendor/ailf-core/index.js";
45
45
  // Validation — for programmatic validation of task YAML
46
46
  // ---------------------------------------------------------------------------
47
47
  export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, } from "./adapters/task-sources/repo-schemas.js";
48
- export { formatValidationResult, validateCanonicalTasks, } from "./adapters/task-sources/repo-validation.js";
48
+ export { formatRepoValidationResult, validateCanonicalTasks, } from "./adapters/task-sources/repo-validation.js";
49
49
  // ---------------------------------------------------------------------------
50
50
  // Plugin extension points — for authoring custom presets, modes, and registries
51
51
  // ---------------------------------------------------------------------------
package/dist/job-store.js CHANGED
@@ -11,7 +11,7 @@
11
11
  * @see docs/design-docs/api-service-gateway.md
12
12
  * @see packages/studio/src/schema/job.ts — Sanity document schema
13
13
  */
14
- import { getSanityClient } from "./sanity/client.js";
14
+ import { getAilfSanityClient } from "./sanity/client.js";
15
15
  // ---------------------------------------------------------------------------
16
16
  // Constants
17
17
  // ---------------------------------------------------------------------------
@@ -28,7 +28,7 @@ export class JobStore {
28
28
  this.client = options.client;
29
29
  }
30
30
  else {
31
- this.client = getSanityClient({
31
+ this.client = getAilfSanityClient({
32
32
  ...(options.dataset ? { dataset: options.dataset } : {}),
33
33
  ...(options.projectId ? { projectId: options.projectId } : {}),
34
34
  ...(options.token ? { token: options.token } : {}),
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Dotenv resolution helpers shared between the CLI bootstrap
3
+ * (`packages/eval/src/cli.ts`) and any code path that needs to honor the
4
+ * same `--dotenv <path>` override (today: `pipeline/checks.ts::checkEnvironment`,
5
+ * which re-loads the active env file as part of validation).
6
+ *
7
+ * Centralizing the argv parse means future changes — validating the path
8
+ * exists before returning, supporting `--dotenv=path` form, accepting an
9
+ * env-var fallback — happen in one place instead of drifting between
10
+ * call sites.
11
+ */
12
+ /**
13
+ * Find an explicit `--dotenv <path>` argument and return its absolute,
14
+ * resolved path. Returns `undefined` when the flag is absent or has no
15
+ * following value.
16
+ *
17
+ * @param argv - Defaults to `process.argv`. Pass an explicit array in
18
+ * tests or in non-CLI hosts that have already shifted off the script
19
+ * prefix.
20
+ */
21
+ export declare function findExplicitDotenvArg(argv?: readonly string[]): string | undefined;