@sanity/ailf 4.0.6 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ailf.js +6 -1
- package/dist/_vendor/ailf-core/schemas/external-providers.d.ts +136 -0
- package/dist/_vendor/ailf-core/schemas/external-providers.js +136 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/index.js +2 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -3
- package/dist/_vendor/ailf-core/schemas/report.d.ts +251 -0
- package/dist/_vendor/ailf-core/schemas/report.js +235 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/services/index.js +1 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.d.ts +38 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.js +696 -0
- package/dist/_vendor/ailf-core/types/api-requests.d.ts +159 -0
- package/dist/_vendor/ailf-core/types/api-requests.js +27 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +112 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.js +18 -0
- package/dist/_vendor/ailf-core/types/repo-config.d.ts +146 -0
- package/dist/_vendor/ailf-core/types/repo-config.js +18 -0
- package/dist/_vendor/ailf-shared/index.d.ts +7 -5
- package/dist/_vendor/ailf-shared/index.js +7 -5
- package/dist/adapters/api-client/types.d.ts +2 -5
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +58 -1
- package/dist/adapters/task-sources/content-lake-task-source.js +1 -1
- package/dist/adapters/task-sources/index.d.ts +1 -1
- package/dist/adapters/task-sources/index.js +1 -1
- package/dist/adapters/task-sources/repo-schemas.d.ts +3 -2
- package/dist/adapters/task-sources/repo-schemas.js +3 -1
- package/dist/adapters/task-sources/repo-task-source.d.ts +11 -1
- package/dist/adapters/task-sources/repo-task-source.js +7 -4
- package/dist/adapters/task-sources/repo-validation.d.ts +6 -6
- package/dist/adapters/task-sources/repo-validation.js +1 -1
- package/dist/agent-observer/agentic-provider.d.ts +1 -0
- package/dist/agent-observer/agentic-provider.js +43 -36
- package/dist/agent-observer/config-schemas.d.ts +61 -0
- package/dist/agent-observer/config-schemas.js +65 -0
- package/dist/agent-observer/provider.d.ts +1 -0
- package/dist/agent-observer/provider.js +19 -17
- package/dist/cli.js +4 -4
- package/dist/commands/validate-tasks.js +2 -2
- package/dist/composition-root.d.ts +7 -0
- package/dist/composition-root.js +27 -12
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/job-store.js +2 -2
- package/dist/lib/dotenv-resolution.d.ts +21 -0
- package/dist/lib/dotenv-resolution.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.js +10 -30
- package/dist/orchestration/steps/generate-configs-step.d.ts +8 -15
- package/dist/orchestration/steps/generate-configs-step.js +26 -118
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +26 -3
- package/dist/orchestration/steps/run-eval-step.js +21 -3
- package/dist/pipeline/agent-behavior-report.d.ts +2 -8
- package/dist/pipeline/cache.d.ts +2 -2
- package/dist/pipeline/checks.d.ts +10 -2
- package/dist/pipeline/checks.js +14 -4
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +3 -3
- package/dist/pipeline/compiler/promptfoo-compiler.js +7 -11
- package/dist/pipeline/compiler/provider-assembler.js +33 -3
- package/dist/pipeline/compiler/rubric-resolution.d.ts +2 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -5
- package/dist/pipeline/mirror-repo-tasks.js +16 -8
- package/dist/pipeline/pr-comment.d.ts +22 -9
- package/dist/pipeline/pr-comment.js +52 -472
- package/dist/pipeline/resolve-mappings.d.ts +8 -3
- package/dist/promptfoo-providers/mock-path.d.ts +12 -0
- package/dist/promptfoo-providers/mock-path.js +15 -0
- package/dist/report-store.d.ts +63 -1
- package/dist/report-store.js +111 -31
- package/dist/sanity/client.d.ts +58 -0
- package/dist/sanity/client.js +106 -0
- package/package.json +8 -7
- package/dist/orchestration/load-pipeline-tasks.d.ts +0 -40
- package/dist/orchestration/load-pipeline-tasks.js +0 -57
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
* @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
17
17
|
*/
|
|
18
18
|
import { z } from "zod";
|
|
19
|
+
import type { AilfEvalWorkflow, RepoConfig } from "../../_vendor/ailf-core/index.d.ts";
|
|
19
20
|
/**
|
|
20
21
|
* The set of assertion types allowed in task files.
|
|
21
22
|
*
|
|
@@ -1521,7 +1522,7 @@ export declare const RepoConfigSchema: z.ZodObject<{
|
|
|
1521
1522
|
}, z.core.$strip>>;
|
|
1522
1523
|
}, z.core.$strip>>;
|
|
1523
1524
|
}, z.core.$strip>;
|
|
1524
|
-
export type RepoConfig
|
|
1525
|
+
export type { RepoConfig } from "../../_vendor/ailf-core/index.d.ts";
|
|
1525
1526
|
/**
|
|
1526
1527
|
* Parse and validate .ailf/config.yaml content. Returns typed config or throws.
|
|
1527
1528
|
*/
|
|
@@ -1551,7 +1552,7 @@ export declare const AilfEvalWorkflowSchema: z.ZodObject<{
|
|
|
1551
1552
|
}, z.core.$loose>>;
|
|
1552
1553
|
}, z.core.$loose>>;
|
|
1553
1554
|
}, z.core.$loose>;
|
|
1554
|
-
export type AilfEvalWorkflow
|
|
1555
|
+
export type { AilfEvalWorkflow } from "../../_vendor/ailf-core/index.d.ts";
|
|
1555
1556
|
/**
|
|
1556
1557
|
* Parse and validate a `.github/workflows/ailf-eval.yml` payload (already
|
|
1557
1558
|
* loaded from YAML). Throws with a Zod-formatted message on failure.
|
|
@@ -334,7 +334,9 @@ export const ContentLakeAuthorableTaskSchema = LiteracyTaskSchema;
|
|
|
334
334
|
* Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
|
|
335
335
|
* file contains. Each file must define at least one task.
|
|
336
336
|
*/
|
|
337
|
-
export const CanonicalTaskFileSchema = z
|
|
337
|
+
export const CanonicalTaskFileSchema = z
|
|
338
|
+
.array(CanonicalTaskSchema)
|
|
339
|
+
.min(1);
|
|
338
340
|
/**
|
|
339
341
|
* Pre-process raw task entries before discriminated-union parsing: when
|
|
340
342
|
* `mode` is missing, default it to `"literacy"`. Zod cannot default a
|
|
@@ -19,8 +19,18 @@
|
|
|
19
19
|
* @see packages/core/src/ports/task-source.ts — TaskSource port
|
|
20
20
|
*/
|
|
21
21
|
import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
|
|
22
|
+
export interface RepoTaskSourceOptions {
|
|
23
|
+
/**
|
|
24
|
+
* When true, treat a missing directory or empty task set as a valid
|
|
25
|
+
* empty result instead of throwing. Used by the composition root for
|
|
26
|
+
* the AILF-bundled `tasks/${mode}/` source, which is missing in some
|
|
27
|
+
* test rootDirs and modes that ship no defaults.
|
|
28
|
+
*/
|
|
29
|
+
allowMissing?: boolean;
|
|
30
|
+
}
|
|
22
31
|
export declare class RepoTaskSource implements TaskSource {
|
|
23
32
|
private readonly tasksDir;
|
|
24
|
-
|
|
33
|
+
private readonly options;
|
|
34
|
+
constructor(tasksDir: string, options?: RepoTaskSourceOptions);
|
|
25
35
|
loadTasks(filter?: FilterOptions): Promise<GeneralizedTaskDefinition[]>;
|
|
26
36
|
}
|
|
@@ -26,16 +26,17 @@ import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "./repo-schemas.
|
|
|
26
26
|
import { discoverTsTaskFiles, loadTsTaskFile } from "./task-file-loader.js";
|
|
27
27
|
/** Set of canonical mode names for O(1) lookup */
|
|
28
28
|
const KNOWN_MODES = new Set(CANONICAL_EVAL_MODES);
|
|
29
|
-
// ---------------------------------------------------------------------------
|
|
30
|
-
// RepoTaskSource adapter
|
|
31
|
-
// ---------------------------------------------------------------------------
|
|
32
29
|
export class RepoTaskSource {
|
|
33
30
|
tasksDir;
|
|
34
|
-
|
|
31
|
+
options;
|
|
32
|
+
constructor(tasksDir, options = {}) {
|
|
35
33
|
this.tasksDir = tasksDir;
|
|
34
|
+
this.options = options;
|
|
36
35
|
}
|
|
37
36
|
async loadTasks(filter) {
|
|
38
37
|
if (!existsSync(this.tasksDir)) {
|
|
38
|
+
if (this.options.allowMissing)
|
|
39
|
+
return [];
|
|
39
40
|
throw new Error(`Repo tasks directory not found: ${this.tasksDir}\n` +
|
|
40
41
|
" Provide a valid path via --repo-tasks-path");
|
|
41
42
|
}
|
|
@@ -44,6 +45,8 @@ export class RepoTaskSource {
|
|
|
44
45
|
.sort();
|
|
45
46
|
const tsFiles = discoverTsTaskFiles(this.tasksDir);
|
|
46
47
|
if (yamlFiles.length === 0 && tsFiles.length === 0) {
|
|
48
|
+
if (this.options.allowMissing)
|
|
49
|
+
return [];
|
|
47
50
|
throw new Error(`No task files found in ${this.tasksDir}\n` +
|
|
48
51
|
" Expected .ailf/tasks/*.yaml or .ailf/tasks/*.task.ts files");
|
|
49
52
|
}
|
|
@@ -15,12 +15,12 @@
|
|
|
15
15
|
* has been eliminated — all validation logic now lives here.
|
|
16
16
|
*/
|
|
17
17
|
import { type CanonicalTask } from "./repo-schemas.js";
|
|
18
|
-
export interface
|
|
18
|
+
export interface RepoValidationResult {
|
|
19
19
|
valid: boolean;
|
|
20
|
-
errors:
|
|
21
|
-
warnings:
|
|
20
|
+
errors: RepoValidationMessage[];
|
|
21
|
+
warnings: RepoValidationMessage[];
|
|
22
22
|
}
|
|
23
|
-
export interface
|
|
23
|
+
export interface RepoValidationMessage {
|
|
24
24
|
taskId: string;
|
|
25
25
|
field: string;
|
|
26
26
|
message: string;
|
|
@@ -32,8 +32,8 @@ export interface ValidationMessage {
|
|
|
32
32
|
* areas, unresolved slugs) and errors for issues that would cause pipeline
|
|
33
33
|
* failures (completely missing required fields — though Zod catches most).
|
|
34
34
|
*/
|
|
35
|
-
export declare function validateCanonicalTasks(tasks: CanonicalTask[]):
|
|
35
|
+
export declare function validateCanonicalTasks(tasks: CanonicalTask[]): RepoValidationResult;
|
|
36
36
|
/**
|
|
37
37
|
* Format validation results for console output.
|
|
38
38
|
*/
|
|
39
|
-
export declare function
|
|
39
|
+
export declare function formatRepoValidationResult(result: RepoValidationResult): string;
|
|
@@ -110,7 +110,7 @@ export function validateCanonicalTasks(tasks) {
|
|
|
110
110
|
/**
|
|
111
111
|
* Format validation results for console output.
|
|
112
112
|
*/
|
|
113
|
-
export function
|
|
113
|
+
export function formatRepoValidationResult(result) {
|
|
114
114
|
const lines = [];
|
|
115
115
|
if (result.errors.length > 0) {
|
|
116
116
|
lines.push("Errors:");
|
|
@@ -31,6 +31,8 @@
|
|
|
31
31
|
*/
|
|
32
32
|
import { config as loadDotenv } from "dotenv";
|
|
33
33
|
import { randomUUID } from "crypto";
|
|
34
|
+
import { AnthropicResponseSchema, FetchPageToolArgsSchema, GoogleSearchResponseSchema, ListDocsToolArgsSchema, OpenAIChatResponseSchema, WebSearchToolArgsSchema, } from "../_vendor/ailf-core/index.js";
|
|
35
|
+
import { AgenticProviderConfigSchema, } from "./config-schemas.js";
|
|
34
36
|
import { RequestRecorder } from "./proxy.js";
|
|
35
37
|
import { calculateCost } from "./pricing.js";
|
|
36
38
|
import { isAllowedOrigin } from "../sources.js";
|
|
@@ -120,32 +122,31 @@ export default class AgenticProvider {
|
|
|
120
122
|
docBaseUrl;
|
|
121
123
|
docsUrlPattern;
|
|
122
124
|
llmsTxtUrl;
|
|
125
|
+
parsedConfig;
|
|
123
126
|
priorityDomain;
|
|
124
127
|
recorder;
|
|
125
128
|
searchMode;
|
|
126
129
|
constructor(options) {
|
|
127
130
|
this.providerId = options.id ?? "agentic-observer";
|
|
128
131
|
this.config = options.config ?? {};
|
|
129
|
-
this.
|
|
132
|
+
this.parsedConfig = AgenticProviderConfigSchema.parse(this.config);
|
|
133
|
+
this.agentMode = this.parsedConfig.agentMode ?? "naive";
|
|
130
134
|
// Documentation source configuration — defaults to Sanity production
|
|
131
|
-
this.docBaseUrl = this.
|
|
132
|
-
this.llmsTxtUrl = this.
|
|
135
|
+
this.docBaseUrl = this.parsedConfig.docBaseUrl ?? DEFAULT_DOC_BASE_URL;
|
|
136
|
+
this.llmsTxtUrl = this.parsedConfig.llmsTxtUrl ?? DEFAULT_LLMS_TXT_URL;
|
|
133
137
|
this.docsUrlPattern = buildDocsUrlPattern(this.docBaseUrl);
|
|
134
138
|
// Custom HTTP headers (e.g., Vercel bypass protection token)
|
|
135
|
-
this.customHeaders =
|
|
136
|
-
this.config.customHeaders || {};
|
|
139
|
+
this.customHeaders = this.parsedConfig.customHeaders ?? {};
|
|
137
140
|
// Extract priority domain from docBaseUrl for search result ranking
|
|
138
141
|
const baseUrlObj = new URL(this.docBaseUrl);
|
|
139
142
|
this.priorityDomain =
|
|
140
|
-
this.
|
|
143
|
+
this.parsedConfig.priorityDomain ??
|
|
141
144
|
baseUrlObj.hostname.replace(/^www\./, "");
|
|
142
145
|
// Optional origin sandboxing — restrict which URLs the agent can access
|
|
143
|
-
this.allowedOrigins =
|
|
144
|
-
? this.config.allowedOrigins.filter(Boolean)
|
|
145
|
-
: [];
|
|
146
|
+
this.allowedOrigins = (this.parsedConfig.allowedOrigins ?? []).filter(Boolean);
|
|
146
147
|
// Search mode: controls web_search tool availability and filtering
|
|
147
|
-
this.searchMode = this.
|
|
148
|
-
this.recorder = new RequestRecorder(this.
|
|
148
|
+
this.searchMode = this.parsedConfig.searchMode ?? "open";
|
|
149
|
+
this.recorder = new RequestRecorder(this.parsedConfig.observerOptions ?? {});
|
|
149
150
|
}
|
|
150
151
|
/**
|
|
151
152
|
* Main Promptfoo provider entry point. Runs the full agentic loop.
|
|
@@ -155,7 +156,7 @@ export default class AgenticProvider {
|
|
|
155
156
|
const taskDescription = context?.vars?.task ||
|
|
156
157
|
context?.prompt?.label ||
|
|
157
158
|
"unknown-task";
|
|
158
|
-
const observe = this.
|
|
159
|
+
const observe = this.parsedConfig.observe !== false;
|
|
159
160
|
if (observe) {
|
|
160
161
|
this.recorder.start(sessionId, this.id(), taskDescription);
|
|
161
162
|
}
|
|
@@ -188,7 +189,7 @@ export default class AgenticProvider {
|
|
|
188
189
|
return this.recorder;
|
|
189
190
|
}
|
|
190
191
|
id() {
|
|
191
|
-
const model = this.
|
|
192
|
+
const model = this.parsedConfig.model ?? this.providerId;
|
|
192
193
|
return `agentic:${this.agentMode}:${model}`;
|
|
193
194
|
}
|
|
194
195
|
// -------------------------------------------------------------------------
|
|
@@ -276,13 +277,13 @@ export default class AgenticProvider {
|
|
|
276
277
|
* fallback heuristics for backward compatibility.
|
|
277
278
|
*/
|
|
278
279
|
detectProvider() {
|
|
279
|
-
const explicit = this.
|
|
280
|
+
const explicit = this.parsedConfig.provider;
|
|
280
281
|
if (explicit === "anthropic")
|
|
281
282
|
return "anthropic";
|
|
282
283
|
if (explicit === "openai")
|
|
283
284
|
return "openai";
|
|
284
285
|
// Heuristic fallback: detect from model name
|
|
285
|
-
const model = this.
|
|
286
|
+
const model = this.parsedConfig.model ?? "";
|
|
286
287
|
if (model.startsWith("claude"))
|
|
287
288
|
return "anthropic";
|
|
288
289
|
return "openai";
|
|
@@ -421,14 +422,20 @@ export default class AgenticProvider {
|
|
|
421
422
|
}
|
|
422
423
|
async executeTool(name, argsJson, fetchFn) {
|
|
423
424
|
try {
|
|
424
|
-
const
|
|
425
|
+
const rawArgs = JSON.parse(argsJson);
|
|
425
426
|
switch (name) {
|
|
426
|
-
case "fetch_page":
|
|
427
|
+
case "fetch_page": {
|
|
428
|
+
const args = FetchPageToolArgsSchema.parse(rawArgs);
|
|
427
429
|
return await this.executeFetchPage(args.url, fetchFn);
|
|
428
|
-
|
|
430
|
+
}
|
|
431
|
+
case "list_docs": {
|
|
432
|
+
const args = ListDocsToolArgsSchema.parse(rawArgs);
|
|
429
433
|
return await this.executeListDocs(args.site, fetchFn);
|
|
430
|
-
|
|
434
|
+
}
|
|
435
|
+
case "web_search": {
|
|
436
|
+
const args = WebSearchToolArgsSchema.parse(rawArgs);
|
|
431
437
|
return await this.executeWebSearch(args.query, fetchFn);
|
|
438
|
+
}
|
|
432
439
|
default:
|
|
433
440
|
return JSON.stringify({ error: `Unknown tool: ${name}` });
|
|
434
441
|
}
|
|
@@ -454,7 +461,7 @@ export default class AgenticProvider {
|
|
|
454
461
|
q: query,
|
|
455
462
|
});
|
|
456
463
|
const response = await fetchFn(`https://www.googleapis.com/customsearch/v1?${params}`);
|
|
457
|
-
const data = (await response.json());
|
|
464
|
+
const data = GoogleSearchResponseSchema.parse(await response.json());
|
|
458
465
|
if (data.items?.length) {
|
|
459
466
|
results = data.items.map((item) => ({
|
|
460
467
|
snippet: item.snippet,
|
|
@@ -617,11 +624,11 @@ export default class AgenticProvider {
|
|
|
617
624
|
// OpenAI agentic loop
|
|
618
625
|
// -------------------------------------------------------------------------
|
|
619
626
|
async runAnthropicLoop(prompt) {
|
|
620
|
-
const model = this.
|
|
621
|
-
const temperature = this.
|
|
622
|
-
const maxTokens = this.
|
|
623
|
-
const maxToolRounds = this.
|
|
624
|
-
const apiKey = this.
|
|
627
|
+
const model = this.parsedConfig.model ?? "claude-sonnet-4-20250514";
|
|
628
|
+
const temperature = this.parsedConfig.temperature ?? 0.2;
|
|
629
|
+
const maxTokens = this.parsedConfig.max_tokens ?? 4096;
|
|
630
|
+
const maxToolRounds = this.parsedConfig.maxToolRounds ?? 5;
|
|
631
|
+
const apiKey = this.parsedConfig.apiKey ?? process.env.ANTHROPIC_API_KEY;
|
|
625
632
|
if (!apiKey) {
|
|
626
633
|
return {
|
|
627
634
|
error: "ANTHROPIC_API_KEY not set. Configure it in env or provider config.",
|
|
@@ -691,7 +698,7 @@ export default class AgenticProvider {
|
|
|
691
698
|
},
|
|
692
699
|
method: "POST",
|
|
693
700
|
});
|
|
694
|
-
const data = (await response.json());
|
|
701
|
+
const data = AnthropicResponseSchema.parse(await response.json());
|
|
695
702
|
if (data.error) {
|
|
696
703
|
return {
|
|
697
704
|
error: data.error.message ??
|
|
@@ -799,22 +806,22 @@ export default class AgenticProvider {
|
|
|
799
806
|
// Anthropic agentic loop
|
|
800
807
|
// -------------------------------------------------------------------------
|
|
801
808
|
async runOpenAILoop(prompt) {
|
|
802
|
-
const model = this.
|
|
803
|
-
const temperature = this.
|
|
804
|
-
const maxToolRounds = this.
|
|
805
|
-
const apiKey = this.
|
|
809
|
+
const model = this.parsedConfig.model ?? "gpt-4o";
|
|
810
|
+
const temperature = this.parsedConfig.temperature ?? 0.2;
|
|
811
|
+
const maxToolRounds = this.parsedConfig.maxToolRounds ?? 5;
|
|
812
|
+
const apiKey = this.parsedConfig.apiKey ?? process.env.OPENAI_API_KEY;
|
|
806
813
|
// Newer OpenAI models (gpt-5.x, o-series) use max_completion_tokens
|
|
807
814
|
// instead of max_tokens, and reject custom temperature values. Detect
|
|
808
815
|
// from config or model name. See W0131.
|
|
809
816
|
const isReasoningModel = model.startsWith("gpt-5") ||
|
|
810
817
|
model.startsWith("o3") ||
|
|
811
818
|
model.startsWith("o4");
|
|
812
|
-
const useMaxCompletionTokens = this.
|
|
813
|
-
this.
|
|
819
|
+
const useMaxCompletionTokens = this.parsedConfig.max_output_tokens != null ||
|
|
820
|
+
this.parsedConfig.max_completion_tokens != null ||
|
|
814
821
|
isReasoningModel;
|
|
815
|
-
const maxTokensValue = this.
|
|
816
|
-
this.
|
|
817
|
-
this.
|
|
822
|
+
const maxTokensValue = this.parsedConfig.max_output_tokens ??
|
|
823
|
+
this.parsedConfig.max_completion_tokens ??
|
|
824
|
+
this.parsedConfig.max_tokens ??
|
|
818
825
|
4096;
|
|
819
826
|
const tokenLimitParam = useMaxCompletionTokens
|
|
820
827
|
? { max_completion_tokens: maxTokensValue }
|
|
@@ -860,7 +867,7 @@ export default class AgenticProvider {
|
|
|
860
867
|
},
|
|
861
868
|
method: "POST",
|
|
862
869
|
});
|
|
863
|
-
const data = (await response.json());
|
|
870
|
+
const data = OpenAIChatResponseSchema.parse(await response.json());
|
|
864
871
|
if (data.error) {
|
|
865
872
|
return {
|
|
866
873
|
error: data.error.message ?? "Unknown OpenAI error",
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* config-schemas.ts
|
|
3
|
+
*
|
|
4
|
+
* Zod schemas for the promptfoo provider config blocks read by the
|
|
5
|
+
* agent-observer providers. Promptfoo passes config as
|
|
6
|
+
* `Record<string, unknown>`; parsing it once at the constructor turns
|
|
7
|
+
* those untyped reads into a typed struct and surfaces typos / wrong
|
|
8
|
+
* shapes as clear `ZodError`s instead of silent `undefined` reads.
|
|
9
|
+
*
|
|
10
|
+
* Lives in `eval` (not `core`) because these schemas are 1:1 with the
|
|
11
|
+
* providers' constructor surfaces and have no consumers outside this
|
|
12
|
+
* package. See docs/work-items/W0004.json.
|
|
13
|
+
*/
|
|
14
|
+
import { z } from "zod";
|
|
15
|
+
export declare const AgenticProviderConfigSchema: z.ZodObject<{
|
|
16
|
+
agentMode: z.ZodOptional<z.ZodEnum<{
|
|
17
|
+
naive: "naive";
|
|
18
|
+
optimized: "optimized";
|
|
19
|
+
}>>;
|
|
20
|
+
allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
21
|
+
apiKey: z.ZodOptional<z.ZodString>;
|
|
22
|
+
customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
23
|
+
docBaseUrl: z.ZodOptional<z.ZodString>;
|
|
24
|
+
llmsTxtUrl: z.ZodOptional<z.ZodString>;
|
|
25
|
+
max_completion_tokens: z.ZodOptional<z.ZodNumber>;
|
|
26
|
+
max_output_tokens: z.ZodOptional<z.ZodNumber>;
|
|
27
|
+
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
28
|
+
maxToolRounds: z.ZodOptional<z.ZodNumber>;
|
|
29
|
+
model: z.ZodOptional<z.ZodString>;
|
|
30
|
+
observe: z.ZodOptional<z.ZodBoolean>;
|
|
31
|
+
observerOptions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
32
|
+
priorityDomain: z.ZodOptional<z.ZodString>;
|
|
33
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
34
|
+
anthropic: "anthropic";
|
|
35
|
+
openai: "openai";
|
|
36
|
+
}>>;
|
|
37
|
+
searchMode: z.ZodOptional<z.ZodEnum<{
|
|
38
|
+
open: "open";
|
|
39
|
+
off: "off";
|
|
40
|
+
"origin-only": "origin-only";
|
|
41
|
+
}>>;
|
|
42
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
43
|
+
}, z.core.$strip>;
|
|
44
|
+
export type AgenticProviderConfig = z.infer<typeof AgenticProviderConfigSchema>;
|
|
45
|
+
export declare const InstrumentedProviderConfigSchema: z.ZodObject<{
|
|
46
|
+
apiKey: z.ZodOptional<z.ZodString>;
|
|
47
|
+
max_output_tokens: z.ZodOptional<z.ZodNumber>;
|
|
48
|
+
max_tokens: z.ZodOptional<z.ZodNumber>;
|
|
49
|
+
model: z.ZodOptional<z.ZodString>;
|
|
50
|
+
modelName: z.ZodOptional<z.ZodString>;
|
|
51
|
+
observe: z.ZodOptional<z.ZodBoolean>;
|
|
52
|
+
observerOptions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
53
|
+
reasoning: z.ZodOptional<z.ZodObject<{
|
|
54
|
+
effort: z.ZodOptional<z.ZodString>;
|
|
55
|
+
summary: z.ZodOptional<z.ZodString>;
|
|
56
|
+
}, z.core.$strip>>;
|
|
57
|
+
reasoning_effort: z.ZodOptional<z.ZodString>;
|
|
58
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
59
|
+
verbosity: z.ZodOptional<z.ZodString>;
|
|
60
|
+
}, z.core.$strip>;
|
|
61
|
+
export type InstrumentedProviderConfig = z.infer<typeof InstrumentedProviderConfigSchema>;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* config-schemas.ts
|
|
3
|
+
*
|
|
4
|
+
* Zod schemas for the promptfoo provider config blocks read by the
|
|
5
|
+
* agent-observer providers. Promptfoo passes config as
|
|
6
|
+
* `Record<string, unknown>`; parsing it once at the constructor turns
|
|
7
|
+
* those untyped reads into a typed struct and surfaces typos / wrong
|
|
8
|
+
* shapes as clear `ZodError`s instead of silent `undefined` reads.
|
|
9
|
+
*
|
|
10
|
+
* Lives in `eval` (not `core`) because these schemas are 1:1 with the
|
|
11
|
+
* providers' constructor surfaces and have no consumers outside this
|
|
12
|
+
* package. See docs/work-items/W0004.json.
|
|
13
|
+
*/
|
|
14
|
+
import { z } from "zod";
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Shared sub-schemas
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
/**
|
|
19
|
+
* `RecorderOptions` is owned by `proxy.ts` and has its own resolution
|
|
20
|
+
* logic. We accept any object here and let RequestRecorder do the rest;
|
|
21
|
+
* no double-validation at this boundary.
|
|
22
|
+
*/
|
|
23
|
+
const ObserverOptionsSchema = z.record(z.string(), z.unknown());
|
|
24
|
+
const ReasoningSchema = z.object({
|
|
25
|
+
effort: z.string().optional(),
|
|
26
|
+
summary: z.string().optional(),
|
|
27
|
+
});
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// AgenticProvider config
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
export const AgenticProviderConfigSchema = z.object({
|
|
32
|
+
agentMode: z.enum(["naive", "optimized"]).optional(),
|
|
33
|
+
allowedOrigins: z.array(z.string()).optional(),
|
|
34
|
+
apiKey: z.string().optional(),
|
|
35
|
+
customHeaders: z.record(z.string(), z.string()).optional(),
|
|
36
|
+
docBaseUrl: z.string().optional(),
|
|
37
|
+
llmsTxtUrl: z.string().optional(),
|
|
38
|
+
max_completion_tokens: z.number().optional(),
|
|
39
|
+
max_output_tokens: z.number().optional(),
|
|
40
|
+
max_tokens: z.number().optional(),
|
|
41
|
+
maxToolRounds: z.number().optional(),
|
|
42
|
+
model: z.string().optional(),
|
|
43
|
+
observe: z.boolean().optional(),
|
|
44
|
+
observerOptions: ObserverOptionsSchema.optional(),
|
|
45
|
+
priorityDomain: z.string().optional(),
|
|
46
|
+
provider: z.enum(["anthropic", "openai"]).optional(),
|
|
47
|
+
searchMode: z.enum(["off", "open", "origin-only"]).optional(),
|
|
48
|
+
temperature: z.number().optional(),
|
|
49
|
+
});
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// InstrumentedProvider config (provider.ts — wraps OpenAI directly)
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
export const InstrumentedProviderConfigSchema = z.object({
|
|
54
|
+
apiKey: z.string().optional(),
|
|
55
|
+
max_output_tokens: z.number().optional(),
|
|
56
|
+
max_tokens: z.number().optional(),
|
|
57
|
+
model: z.string().optional(),
|
|
58
|
+
modelName: z.string().optional(),
|
|
59
|
+
observe: z.boolean().optional(),
|
|
60
|
+
observerOptions: ObserverOptionsSchema.optional(),
|
|
61
|
+
reasoning: ReasoningSchema.optional(),
|
|
62
|
+
reasoning_effort: z.string().optional(),
|
|
63
|
+
temperature: z.number().optional(),
|
|
64
|
+
verbosity: z.string().optional(),
|
|
65
|
+
});
|
|
@@ -30,6 +30,8 @@
|
|
|
30
30
|
*/
|
|
31
31
|
import { config as loadDotenv } from "dotenv";
|
|
32
32
|
import { randomUUID } from "crypto";
|
|
33
|
+
import { OpenAIChatResponseSchema, OpenAIResponsesResponseSchema, } from "../_vendor/ailf-core/index.js";
|
|
34
|
+
import { InstrumentedProviderConfigSchema, } from "./config-schemas.js";
|
|
33
35
|
import { RequestRecorder } from "./proxy.js";
|
|
34
36
|
import { calculateCost } from "./pricing.js";
|
|
35
37
|
loadDotenv({
|
|
@@ -42,11 +44,13 @@ loadDotenv({
|
|
|
42
44
|
export default class InstrumentedProvider {
|
|
43
45
|
config;
|
|
44
46
|
providerId;
|
|
47
|
+
parsedConfig;
|
|
45
48
|
recorder;
|
|
46
49
|
constructor(options) {
|
|
47
50
|
this.providerId = options.id ?? "instrumented-observer";
|
|
48
51
|
this.config = options.config ?? {};
|
|
49
|
-
this.
|
|
52
|
+
this.parsedConfig = InstrumentedProviderConfigSchema.parse(this.config);
|
|
53
|
+
this.recorder = new RequestRecorder(this.parsedConfig.observerOptions ?? {});
|
|
50
54
|
}
|
|
51
55
|
/**
|
|
52
56
|
* Main Promptfoo provider entry point. Called for each test case.
|
|
@@ -56,7 +60,7 @@ export default class InstrumentedProvider {
|
|
|
56
60
|
const taskDescription = context?.vars?.task ||
|
|
57
61
|
context?.prompt?.label ||
|
|
58
62
|
"unknown-task";
|
|
59
|
-
const observe = this.
|
|
63
|
+
const observe = this.parsedConfig.observe !== false;
|
|
60
64
|
// Start observation
|
|
61
65
|
if (observe) {
|
|
62
66
|
this.recorder.start(sessionId, this.id(), taskDescription);
|
|
@@ -97,9 +101,9 @@ export default class InstrumentedProvider {
|
|
|
97
101
|
* Detect whether the model should use the Responses API based on config.
|
|
98
102
|
*/
|
|
99
103
|
isResponsesModel() {
|
|
100
|
-
const model = this.
|
|
101
|
-
return (this.
|
|
102
|
-
this.
|
|
104
|
+
const model = this.parsedConfig.modelName ?? this.parsedConfig.model ?? "";
|
|
105
|
+
return (this.parsedConfig.reasoning != null ||
|
|
106
|
+
this.parsedConfig.reasoning_effort != null ||
|
|
103
107
|
model.startsWith("gpt-5") ||
|
|
104
108
|
model.startsWith("o1") ||
|
|
105
109
|
model.startsWith("o3") ||
|
|
@@ -111,10 +115,8 @@ export default class InstrumentedProvider {
|
|
|
111
115
|
* recorder's fetch wrapper so the API call is captured.
|
|
112
116
|
*/
|
|
113
117
|
async callOpenAI(prompt) {
|
|
114
|
-
const model = this.
|
|
115
|
-
|
|
116
|
-
"gpt-4o";
|
|
117
|
-
const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
|
|
118
|
+
const model = this.parsedConfig.modelName ?? this.parsedConfig.model ?? "gpt-4o";
|
|
119
|
+
const apiKey = this.parsedConfig.apiKey ?? process.env.OPENAI_API_KEY;
|
|
118
120
|
if (!apiKey) {
|
|
119
121
|
return {
|
|
120
122
|
error: "OPENAI_API_KEY not set. Configure it in env or provider config.",
|
|
@@ -130,8 +132,8 @@ export default class InstrumentedProvider {
|
|
|
130
132
|
* Calls OpenAI Chat Completions API (non-reasoning models).
|
|
131
133
|
*/
|
|
132
134
|
async callOpenAIChatCompletions(prompt, model, apiKey) {
|
|
133
|
-
const temperature = this.
|
|
134
|
-
const maxTokens = this.
|
|
135
|
+
const temperature = this.parsedConfig.temperature ?? 0;
|
|
136
|
+
const maxTokens = this.parsedConfig.max_tokens ?? 4096;
|
|
135
137
|
const fetchFn = this.recorder.isRunning()
|
|
136
138
|
? this.recorder.fetch.bind(this.recorder)
|
|
137
139
|
: globalThis.fetch;
|
|
@@ -149,7 +151,7 @@ export default class InstrumentedProvider {
|
|
|
149
151
|
},
|
|
150
152
|
method: "POST",
|
|
151
153
|
});
|
|
152
|
-
const data = (await response.json());
|
|
154
|
+
const data = OpenAIChatResponseSchema.parse(await response.json());
|
|
153
155
|
if (data.error) {
|
|
154
156
|
return {
|
|
155
157
|
error: data.error.message ?? "Unknown OpenAI error",
|
|
@@ -175,11 +177,11 @@ export default class InstrumentedProvider {
|
|
|
175
177
|
* Calls OpenAI Responses API (reasoning models like GPT-5.x, o-series).
|
|
176
178
|
*/
|
|
177
179
|
async callOpenAIResponses(prompt, model, apiKey) {
|
|
178
|
-
const maxOutputTokens = this.
|
|
179
|
-
const reasoning = this.
|
|
180
|
-
const reasoningEffort = reasoning?.effort
|
|
180
|
+
const maxOutputTokens = this.parsedConfig.max_output_tokens ?? 32_000;
|
|
181
|
+
const reasoning = this.parsedConfig.reasoning;
|
|
182
|
+
const reasoningEffort = reasoning?.effort ?? this.parsedConfig.reasoning_effort ?? "medium";
|
|
181
183
|
const reasoningSummary = reasoning?.summary;
|
|
182
|
-
const verbosity = this.
|
|
184
|
+
const verbosity = this.parsedConfig.verbosity;
|
|
183
185
|
const fetchFn = this.recorder.isRunning()
|
|
184
186
|
? this.recorder.fetch.bind(this.recorder)
|
|
185
187
|
: globalThis.fetch;
|
|
@@ -201,7 +203,7 @@ export default class InstrumentedProvider {
|
|
|
201
203
|
},
|
|
202
204
|
method: "POST",
|
|
203
205
|
});
|
|
204
|
-
const data = (await response.json());
|
|
206
|
+
const data = OpenAIResponsesResponseSchema.parse(await response.json());
|
|
205
207
|
if (data.error) {
|
|
206
208
|
return {
|
|
207
209
|
error: data.error.message ?? "Unknown OpenAI error",
|
package/dist/cli.js
CHANGED
|
@@ -37,6 +37,7 @@ import { existsSync } from "fs";
|
|
|
37
37
|
import { dirname, resolve } from "path";
|
|
38
38
|
import { fileURLToPath } from "url";
|
|
39
39
|
import { buildCliProgram } from "./cli-program.js";
|
|
40
|
+
import { findExplicitDotenvArg } from "./lib/dotenv-resolution.js";
|
|
40
41
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
41
42
|
const ROOT = resolve(__dirname, "..");
|
|
42
43
|
// ---------------------------------------------------------------------------
|
|
@@ -52,10 +53,9 @@ const ROOT = resolve(__dirname, "..");
|
|
|
52
53
|
// installed globally via npm (production).
|
|
53
54
|
// ---------------------------------------------------------------------------
|
|
54
55
|
function resolveEnvPath() {
|
|
55
|
-
const
|
|
56
|
-
if (
|
|
57
|
-
return
|
|
58
|
-
}
|
|
56
|
+
const explicit = findExplicitDotenvArg();
|
|
57
|
+
if (explicit)
|
|
58
|
+
return explicit;
|
|
59
59
|
// Monorepo root .env (dev mode)
|
|
60
60
|
const monorepoEnv = resolve(ROOT, "..", "..", ".env");
|
|
61
61
|
if (existsSync(monorepoEnv))
|
|
@@ -18,7 +18,7 @@ import { resolve, relative, basename } from "path";
|
|
|
18
18
|
import { Command } from "commander";
|
|
19
19
|
import { load } from "js-yaml";
|
|
20
20
|
import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/task-sources/repo-schemas.js";
|
|
21
|
-
import { validateCanonicalTasks,
|
|
21
|
+
import { validateCanonicalTasks, formatRepoValidationResult, } from "../adapters/task-sources/repo-validation.js";
|
|
22
22
|
import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
|
|
23
23
|
export function createValidateTasksCommand() {
|
|
24
24
|
return new Command("tasks")
|
|
@@ -103,7 +103,7 @@ export async function runValidateTasks(tasksPath, opts) {
|
|
|
103
103
|
if (allTasks.length > 0) {
|
|
104
104
|
console.log();
|
|
105
105
|
const semanticResult = validateCanonicalTasks(allTasks);
|
|
106
|
-
const formatted =
|
|
106
|
+
const formatted = formatRepoValidationResult(semanticResult);
|
|
107
107
|
console.log(formatted);
|
|
108
108
|
if (!semanticResult.valid) {
|
|
109
109
|
hasErrors = true;
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
* @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
18
|
import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
|
|
19
|
+
import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource } from "./adapters/task-sources/index.js";
|
|
19
20
|
/**
|
|
20
21
|
* Create a fully wired AppContext from resolved configuration.
|
|
21
22
|
*
|
|
@@ -42,6 +43,12 @@ export declare function createAppContext(config: ResolvedConfig): AppContext;
|
|
|
42
43
|
* Exported for unit-test access; not part of the public package API.
|
|
43
44
|
*/
|
|
44
45
|
export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger, progress?: ArtifactWriterProgressOptions): ArtifactWriter;
|
|
46
|
+
/**
|
|
47
|
+
* Build the `TaskSource` adapter wired by the composition root for a
|
|
48
|
+
* given `ResolvedConfig`. Exported for test access — composition-root
|
|
49
|
+
* wiring is a contract worth asserting directly.
|
|
50
|
+
*/
|
|
51
|
+
export declare function createTaskSource(config: ResolvedConfig): CompositeTaskSource | ContentLakeTaskSource | RepoTaskSource;
|
|
45
52
|
/**
|
|
46
53
|
* Generic Promptfoo assertion types available to all evaluation modes.
|
|
47
54
|
*
|