@sanity/ailf 3.2.0 → 3.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/examples/index.d.ts +8 -8
- package/dist/_vendor/ailf-core/examples/index.js +8 -8
- package/dist/_vendor/ailf-shared/feature-flags.d.ts +59 -0
- package/dist/_vendor/ailf-shared/feature-flags.js +44 -0
- package/dist/_vendor/ailf-shared/index.d.ts +1 -0
- package/dist/_vendor/ailf-shared/index.js +1 -0
- package/dist/adapters/config-sources/ailf-resolver.d.ts +55 -0
- package/dist/adapters/config-sources/ailf-resolver.js +147 -0
- package/dist/adapters/config-sources/ts-config-loader.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +35 -5
- package/dist/adapters/task-sources/repo-schemas.js +25 -3
- package/dist/adapters/task-sources/task-file-loader.js +3 -0
- package/dist/commands/init.d.ts +1 -1
- package/dist/commands/init.js +19 -5
- package/dist/commands/pipeline-action.js +51 -6
- package/dist/commands/pipeline.js +1 -1
- package/dist/commands/validate-tasks.d.ts +14 -3
- package/dist/commands/validate-tasks.js +125 -81
- package/dist/index.d.ts +2 -0
- package/dist/index.js +4 -0
- package/dist/pipeline/compiler/config-loader.js +6 -1
- package/dist/pipeline/compiler/preset-loader.js +3 -0
- package/package.json +1 -1
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
|
|
3
|
+
*
|
|
4
|
+
* User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
|
|
5
|
+
* from `@sanity/ailf`. In a fresh project with no local install, that bare
|
|
6
|
+
* specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
|
|
7
|
+
* pipeline` working out of the box we transparently fall back to the CLI's own
|
|
8
|
+
* copy of `@sanity/ailf` by registering a jiti module alias. A user-local
|
|
9
|
+
* install always wins — the fallback kicks in only when resolution fails.
|
|
10
|
+
*
|
|
11
|
+
* All jiti callsites across the eval package use `resolveAilfAlias()` to get
|
|
12
|
+
* a consistent resolution + warning story. Callers pass the returned map (or
|
|
13
|
+
* nothing) to `createJiti`.
|
|
14
|
+
*/
|
|
15
|
+
/**
|
|
16
|
+
* Probe whether the user has `@sanity/ailf` installed as a local dependency
|
|
17
|
+
* reachable from the given path. Walks up the directory tree looking for a
|
|
18
|
+
* `node_modules/@sanity/ailf/package.json`. Returns the package entry point
|
|
19
|
+
* path on success, null otherwise.
|
|
20
|
+
*
|
|
21
|
+
* We intentionally do NOT use Node's `require.resolve` self-reference path:
|
|
22
|
+
* tsx and some bundler setups make it unreliable, and a self-reference
|
|
23
|
+
* would only match when the caller *is* the `@sanity/ailf` package (the
|
|
24
|
+
* monorepo devving case), which is semantically the same as having no
|
|
25
|
+
* install — the bundled fallback handles it.
|
|
26
|
+
*/
|
|
27
|
+
export declare function probeUserLocalAilf(fromPath: string): string | null;
|
|
28
|
+
/**
|
|
29
|
+
* Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
|
|
30
|
+
* fallback target when a user's project does not have it installed.
|
|
31
|
+
*
|
|
32
|
+
* We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
|
|
33
|
+
* self-reference resolution is unreliable under tsx and some bundler setups.
|
|
34
|
+
* Returns null in exotic setups where no ancestor package.json matches.
|
|
35
|
+
*/
|
|
36
|
+
export declare function getBundledAilfPath(): string | null;
|
|
37
|
+
/**
|
|
38
|
+
* Emit a one-shot stderr advisory when the loader falls back to the bundled
|
|
39
|
+
* `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
|
|
40
|
+
* most once, no matter how many TS files trigger the fallback.
|
|
41
|
+
*/
|
|
42
|
+
export declare function warnBundledFallbackOnce(): void;
|
|
43
|
+
/** Test-only: reset the warn-once flag between unit tests. */
|
|
44
|
+
export declare function resetBundledFallbackWarning(): void;
|
|
45
|
+
/**
|
|
46
|
+
* Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
|
|
47
|
+
* file. Returns the alias map or null.
|
|
48
|
+
*
|
|
49
|
+
* - User-local resolves → returns null (jiti's natural walk finds it).
|
|
50
|
+
* - User-local fails + bundled path available → returns alias map, fires
|
|
51
|
+
* one-shot warning, logs at verbose level.
|
|
52
|
+
* - User-local fails + no bundled path → returns null (nothing we can do;
|
|
53
|
+
* jiti will surface the original MODULE_NOT_FOUND).
|
|
54
|
+
*/
|
|
55
|
+
export declare function resolveAilfAlias(filePath: string): Record<string, string> | null;
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
|
|
3
|
+
*
|
|
4
|
+
* User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
|
|
5
|
+
* from `@sanity/ailf`. In a fresh project with no local install, that bare
|
|
6
|
+
* specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
|
|
7
|
+
* pipeline` working out of the box we transparently fall back to the CLI's own
|
|
8
|
+
* copy of `@sanity/ailf` by registering a jiti module alias. A user-local
|
|
9
|
+
* install always wins — the fallback kicks in only when resolution fails.
|
|
10
|
+
*
|
|
11
|
+
* All jiti callsites across the eval package use `resolveAilfAlias()` to get
|
|
12
|
+
* a consistent resolution + warning story. Callers pass the returned map (or
|
|
13
|
+
* nothing) to `createJiti`.
|
|
14
|
+
*/
|
|
15
|
+
import { existsSync, readFileSync, statSync } from "node:fs";
|
|
16
|
+
import { dirname, resolve as pathResolve } from "node:path";
|
|
17
|
+
import { fileURLToPath } from "node:url";
|
|
18
|
+
/**
|
|
19
|
+
* Walk up from this module's location to find the enclosing `@sanity/ailf`
|
|
20
|
+
* package root. Works in both dev (source under `packages/eval/src/`) and
|
|
21
|
+
* production (compiled under `packages/eval/dist/`) because both share the
|
|
22
|
+
* same package.json anchor.
|
|
23
|
+
*/
|
|
24
|
+
function findAilfPackageRoot() {
|
|
25
|
+
let dir = dirname(fileURLToPath(import.meta.url));
|
|
26
|
+
while (dir !== dirname(dir)) {
|
|
27
|
+
const pkgPath = pathResolve(dir, "package.json");
|
|
28
|
+
if (existsSync(pkgPath)) {
|
|
29
|
+
try {
|
|
30
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
31
|
+
if (pkg.name === "@sanity/ailf")
|
|
32
|
+
return dir;
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
/* ignore malformed package.json */
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
dir = dirname(dir);
|
|
39
|
+
}
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Probe whether the user has `@sanity/ailf` installed as a local dependency
|
|
44
|
+
* reachable from the given path. Walks up the directory tree looking for a
|
|
45
|
+
* `node_modules/@sanity/ailf/package.json`. Returns the package entry point
|
|
46
|
+
* path on success, null otherwise.
|
|
47
|
+
*
|
|
48
|
+
* We intentionally do NOT use Node's `require.resolve` self-reference path:
|
|
49
|
+
* tsx and some bundler setups make it unreliable, and a self-reference
|
|
50
|
+
* would only match when the caller *is* the `@sanity/ailf` package (the
|
|
51
|
+
* monorepo devving case), which is semantically the same as having no
|
|
52
|
+
* install — the bundled fallback handles it.
|
|
53
|
+
*/
|
|
54
|
+
export function probeUserLocalAilf(fromPath) {
|
|
55
|
+
let dir;
|
|
56
|
+
try {
|
|
57
|
+
dir =
|
|
58
|
+
existsSync(fromPath) && statSync(fromPath).isDirectory()
|
|
59
|
+
? fromPath
|
|
60
|
+
: dirname(fromPath);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
dir = dirname(fromPath);
|
|
64
|
+
}
|
|
65
|
+
while (dir !== dirname(dir)) {
|
|
66
|
+
const pkgJson = pathResolve(dir, "node_modules", "@sanity", "ailf", "package.json");
|
|
67
|
+
if (existsSync(pkgJson)) {
|
|
68
|
+
try {
|
|
69
|
+
const pkg = JSON.parse(readFileSync(pkgJson, "utf-8"));
|
|
70
|
+
const entry = pkg.module ?? pkg.main ?? "index.js";
|
|
71
|
+
return pathResolve(dirname(pkgJson), entry);
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
dir = dirname(dir);
|
|
78
|
+
}
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
|
|
83
|
+
* fallback target when a user's project does not have it installed.
|
|
84
|
+
*
|
|
85
|
+
* We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
|
|
86
|
+
* self-reference resolution is unreliable under tsx and some bundler setups.
|
|
87
|
+
* Returns null in exotic setups where no ancestor package.json matches.
|
|
88
|
+
*/
|
|
89
|
+
export function getBundledAilfPath() {
|
|
90
|
+
const pkgRoot = findAilfPackageRoot();
|
|
91
|
+
if (!pkgRoot)
|
|
92
|
+
return null;
|
|
93
|
+
// Production layout: packages/eval/dist/index.js
|
|
94
|
+
const distEntry = pathResolve(pkgRoot, "dist", "index.js");
|
|
95
|
+
if (existsSync(distEntry))
|
|
96
|
+
return distEntry;
|
|
97
|
+
// Development layout (tsx on source): packages/eval/src/index.ts
|
|
98
|
+
const srcEntry = pathResolve(pkgRoot, "src", "index.ts");
|
|
99
|
+
if (existsSync(srcEntry))
|
|
100
|
+
return srcEntry;
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
let hasWarnedOnce = false;
|
|
104
|
+
/**
|
|
105
|
+
* Emit a one-shot stderr advisory when the loader falls back to the bundled
|
|
106
|
+
* `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
|
|
107
|
+
* most once, no matter how many TS files trigger the fallback.
|
|
108
|
+
*/
|
|
109
|
+
export function warnBundledFallbackOnce() {
|
|
110
|
+
if (hasWarnedOnce)
|
|
111
|
+
return;
|
|
112
|
+
hasWarnedOnce = true;
|
|
113
|
+
process.stderr.write(" ⚠ @sanity/ailf is not installed in your project — using the CLI's bundled copy.\n" +
|
|
114
|
+
" Pin it locally for reproducibility: npm install -D @sanity/ailf\n");
|
|
115
|
+
}
|
|
116
|
+
/** Test-only: reset the warn-once flag between unit tests. */
|
|
117
|
+
export function resetBundledFallbackWarning() {
|
|
118
|
+
hasWarnedOnce = false;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
|
|
122
|
+
* file. Returns the alias map or null.
|
|
123
|
+
*
|
|
124
|
+
* - User-local resolves → returns null (jiti's natural walk finds it).
|
|
125
|
+
* - User-local fails + bundled path available → returns alias map, fires
|
|
126
|
+
* one-shot warning, logs at verbose level.
|
|
127
|
+
* - User-local fails + no bundled path → returns null (nothing we can do;
|
|
128
|
+
* jiti will surface the original MODULE_NOT_FOUND).
|
|
129
|
+
*/
|
|
130
|
+
export function resolveAilfAlias(filePath) {
|
|
131
|
+
const userLocal = probeUserLocalAilf(filePath);
|
|
132
|
+
const verbose = process.env.AILF_LOG_LEVEL === "verbose";
|
|
133
|
+
if (userLocal) {
|
|
134
|
+
if (verbose) {
|
|
135
|
+
process.stderr.write(` [ts-loader] ${filePath} → @sanity/ailf resolved locally at ${userLocal}\n`);
|
|
136
|
+
}
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
const bundled = getBundledAilfPath();
|
|
140
|
+
if (!bundled)
|
|
141
|
+
return null;
|
|
142
|
+
if (verbose) {
|
|
143
|
+
process.stderr.write(` [ts-loader] ${filePath} → @sanity/ailf not installed locally; using bundled copy at ${bundled}\n`);
|
|
144
|
+
}
|
|
145
|
+
warnBundledFallbackOnce();
|
|
146
|
+
return { "@sanity/ailf": bundled };
|
|
147
|
+
}
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import { existsSync } from "fs";
|
|
16
16
|
import { pathToFileURL } from "node:url";
|
|
17
17
|
import { createJiti } from "jiti";
|
|
18
|
+
import { resolveAilfAlias } from "./ailf-resolver.js";
|
|
18
19
|
// ---------------------------------------------------------------------------
|
|
19
20
|
// jiti instance factory — resolves imports relative to the loaded file
|
|
20
21
|
// ---------------------------------------------------------------------------
|
|
@@ -28,13 +29,19 @@ import { createJiti } from "jiti";
|
|
|
28
29
|
*
|
|
29
30
|
* We pass a `file://` URL (not a bare path) so jiti uses ESM resolution,
|
|
30
31
|
* which matches the `"import"` condition in package.json exports maps.
|
|
32
|
+
*
|
|
33
|
+
* When the user's project cannot resolve `@sanity/ailf` (fresh directory
|
|
34
|
+
* without a local install), we register an alias pointing at the CLI's own
|
|
35
|
+
* bundled copy so the load still succeeds. See `ailf-resolver.ts`.
|
|
31
36
|
*/
|
|
32
37
|
function createJitiForFile(filePath) {
|
|
38
|
+
const alias = resolveAilfAlias(filePath);
|
|
33
39
|
return createJiti(pathToFileURL(filePath).href, {
|
|
34
40
|
// Interop: handle both `export default` and `module.exports`
|
|
35
41
|
interopDefault: true,
|
|
36
42
|
// Don't require file extensions in imports
|
|
37
43
|
requireCache: true,
|
|
44
|
+
...(alias ? { alias } : {}),
|
|
38
45
|
});
|
|
39
46
|
}
|
|
40
47
|
/**
|
|
@@ -19,15 +19,17 @@ import { z } from "zod";
|
|
|
19
19
|
/**
|
|
20
20
|
* The set of assertion types allowed in task files.
|
|
21
21
|
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
22
|
+
* Combines a curated subset of Promptfoo assertion types (stable, well-
|
|
23
|
+
* documented, useful for external authors) with the agent-harness-specific
|
|
24
|
+
* types mapped by `mode-handlers/agent-harness/assertions.ts`.
|
|
24
25
|
*/
|
|
25
|
-
export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
|
|
26
|
+
export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency", "file-exists", "file-contains", "command-succeeds", "diff-matches"];
|
|
26
27
|
export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
|
|
27
28
|
/**
|
|
28
|
-
* Valid rubric template names — must match keys in
|
|
29
|
+
* Valid rubric template names — must match template keys in
|
|
30
|
+
* `packages/eval/config/rubrics.ts`.
|
|
29
31
|
*/
|
|
30
|
-
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
|
|
32
|
+
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage", "mcp-input-validation", "mcp-output-correctness", "mcp-error-handling", "mcp-security", "factual-correctness", "completeness", "currency", "process-quality", "agent-output", "agent-tool-usage"];
|
|
31
33
|
export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
|
|
32
34
|
/**
|
|
33
35
|
* Zod schema for a single task definition using canonical field names.
|
|
@@ -84,6 +86,16 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
|
|
|
84
86
|
"task-completion": "task-completion";
|
|
85
87
|
"code-correctness": "code-correctness";
|
|
86
88
|
"doc-coverage": "doc-coverage";
|
|
89
|
+
"mcp-input-validation": "mcp-input-validation";
|
|
90
|
+
"mcp-output-correctness": "mcp-output-correctness";
|
|
91
|
+
"mcp-error-handling": "mcp-error-handling";
|
|
92
|
+
"mcp-security": "mcp-security";
|
|
93
|
+
"factual-correctness": "factual-correctness";
|
|
94
|
+
completeness: "completeness";
|
|
95
|
+
currency: "currency";
|
|
96
|
+
"process-quality": "process-quality";
|
|
97
|
+
"agent-output": "agent-output";
|
|
98
|
+
"agent-tool-usage": "agent-tool-usage";
|
|
87
99
|
}>;
|
|
88
100
|
criteria: z.ZodArray<z.ZodString>;
|
|
89
101
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
@@ -101,6 +113,10 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
|
|
|
101
113
|
similar: "similar";
|
|
102
114
|
cost: "cost";
|
|
103
115
|
latency: "latency";
|
|
116
|
+
"file-exists": "file-exists";
|
|
117
|
+
"file-contains": "file-contains";
|
|
118
|
+
"command-succeeds": "command-succeeds";
|
|
119
|
+
"diff-matches": "diff-matches";
|
|
104
120
|
}>;
|
|
105
121
|
value: z.ZodOptional<z.ZodUnknown>;
|
|
106
122
|
threshold: z.ZodOptional<z.ZodNumber>;
|
|
@@ -174,6 +190,16 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
|
|
|
174
190
|
"task-completion": "task-completion";
|
|
175
191
|
"code-correctness": "code-correctness";
|
|
176
192
|
"doc-coverage": "doc-coverage";
|
|
193
|
+
"mcp-input-validation": "mcp-input-validation";
|
|
194
|
+
"mcp-output-correctness": "mcp-output-correctness";
|
|
195
|
+
"mcp-error-handling": "mcp-error-handling";
|
|
196
|
+
"mcp-security": "mcp-security";
|
|
197
|
+
"factual-correctness": "factual-correctness";
|
|
198
|
+
completeness: "completeness";
|
|
199
|
+
currency: "currency";
|
|
200
|
+
"process-quality": "process-quality";
|
|
201
|
+
"agent-output": "agent-output";
|
|
202
|
+
"agent-tool-usage": "agent-tool-usage";
|
|
177
203
|
}>;
|
|
178
204
|
criteria: z.ZodArray<z.ZodString>;
|
|
179
205
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
@@ -191,6 +217,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
|
|
|
191
217
|
similar: "similar";
|
|
192
218
|
cost: "cost";
|
|
193
219
|
latency: "latency";
|
|
220
|
+
"file-exists": "file-exists";
|
|
221
|
+
"file-contains": "file-contains";
|
|
222
|
+
"command-succeeds": "command-succeeds";
|
|
223
|
+
"diff-matches": "diff-matches";
|
|
194
224
|
}>;
|
|
195
225
|
value: z.ZodOptional<z.ZodUnknown>;
|
|
196
226
|
threshold: z.ZodOptional<z.ZodNumber>;
|
|
@@ -22,8 +22,9 @@ import { z } from "zod";
|
|
|
22
22
|
/**
|
|
23
23
|
* The set of assertion types allowed in task files.
|
|
24
24
|
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
25
|
+
* Combines a curated subset of Promptfoo assertion types (stable, well-
|
|
26
|
+
* documented, useful for external authors) with the agent-harness-specific
|
|
27
|
+
* types mapped by `mode-handlers/agent-harness/assertions.ts`.
|
|
27
28
|
*/
|
|
28
29
|
export const CURATED_ASSERTION_TYPES = [
|
|
29
30
|
"llm-rubric",
|
|
@@ -38,14 +39,35 @@ export const CURATED_ASSERTION_TYPES = [
|
|
|
38
39
|
"similar",
|
|
39
40
|
"cost",
|
|
40
41
|
"latency",
|
|
42
|
+
// Agent-harness assertions — verify sandbox state after the agent runs.
|
|
43
|
+
// See src/pipeline/compiler/mode-handlers/agent-harness/assertions.ts
|
|
44
|
+
"file-exists",
|
|
45
|
+
"file-contains",
|
|
46
|
+
"command-succeeds",
|
|
47
|
+
"diff-matches",
|
|
41
48
|
];
|
|
42
49
|
/**
|
|
43
|
-
* Valid rubric template names — must match keys in
|
|
50
|
+
* Valid rubric template names — must match template keys in
|
|
51
|
+
* `packages/eval/config/rubrics.ts`.
|
|
44
52
|
*/
|
|
45
53
|
export const RUBRIC_TEMPLATE_NAMES = [
|
|
54
|
+
// Core literacy dimensions
|
|
46
55
|
"task-completion",
|
|
47
56
|
"code-correctness",
|
|
48
57
|
"doc-coverage",
|
|
58
|
+
// MCP server dimensions
|
|
59
|
+
"mcp-input-validation",
|
|
60
|
+
"mcp-output-correctness",
|
|
61
|
+
"mcp-error-handling",
|
|
62
|
+
"mcp-security",
|
|
63
|
+
// Knowledge probe dimensions
|
|
64
|
+
"factual-correctness",
|
|
65
|
+
"completeness",
|
|
66
|
+
"currency",
|
|
67
|
+
// Agent harness dimensions
|
|
68
|
+
"process-quality",
|
|
69
|
+
"agent-output",
|
|
70
|
+
"agent-tool-usage",
|
|
49
71
|
];
|
|
50
72
|
// ---------------------------------------------------------------------------
|
|
51
73
|
// Doc ref schemas — polymorphic canonical doc references
|
|
@@ -25,6 +25,7 @@ import { existsSync, readdirSync } from "fs";
|
|
|
25
25
|
import { pathToFileURL } from "node:url";
|
|
26
26
|
import { resolve } from "path";
|
|
27
27
|
import { createJiti } from "jiti";
|
|
28
|
+
import { resolveAilfAlias } from "../config-sources/ailf-resolver.js";
|
|
28
29
|
import { loadTsConfig } from "../config-sources/ts-config-loader.js";
|
|
29
30
|
/**
|
|
30
31
|
* Discover TS/JS task files in a directory.
|
|
@@ -72,9 +73,11 @@ export async function loadTsTaskFile(filePath) {
|
|
|
72
73
|
* Needed by resolve-mappings.ts which is called from sync contexts.
|
|
73
74
|
*/
|
|
74
75
|
export function loadTsTaskFileSync(filePath) {
|
|
76
|
+
const alias = resolveAilfAlias(filePath);
|
|
75
77
|
const jiti = createJiti(pathToFileURL(filePath).href, {
|
|
76
78
|
interopDefault: true,
|
|
77
79
|
requireCache: true,
|
|
80
|
+
...(alias ? { alias } : {}),
|
|
78
81
|
});
|
|
79
82
|
const mod = jiti(filePath);
|
|
80
83
|
const value = mod && typeof mod === "object" && "default" in mod ? mod.default : mod;
|
package/dist/commands/init.d.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* task files. The generated files are ready-to-edit starting points —
|
|
6
6
|
* not live evaluation tasks.
|
|
7
7
|
*
|
|
8
|
-
* TypeScript output (default) uses define* helpers from @sanity/ailf
|
|
8
|
+
* TypeScript output (default) uses define* helpers from @sanity/ailf
|
|
9
9
|
* for full IDE autocomplete and type checking. YAML output serializes the
|
|
10
10
|
* parsed task data. JSON output is a plain serialization of the parsed data.
|
|
11
11
|
*
|
package/dist/commands/init.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* task files. The generated files are ready-to-edit starting points —
|
|
6
6
|
* not live evaluation tasks.
|
|
7
7
|
*
|
|
8
|
-
* TypeScript output (default) uses define* helpers from @sanity/ailf
|
|
8
|
+
* TypeScript output (default) uses define* helpers from @sanity/ailf
|
|
9
9
|
* for full IDE autocomplete and type checking. YAML output serializes the
|
|
10
10
|
* parsed task data. JSON output is a plain serialization of the parsed data.
|
|
11
11
|
*
|
|
@@ -20,6 +20,7 @@ import { Command } from "commander";
|
|
|
20
20
|
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
21
21
|
import { resolve, relative } from "path";
|
|
22
22
|
import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
|
|
23
|
+
import { probeUserLocalAilf } from "../adapters/config-sources/ailf-resolver.js";
|
|
23
24
|
// ---------------------------------------------------------------------------
|
|
24
25
|
// Command factory
|
|
25
26
|
// ---------------------------------------------------------------------------
|
|
@@ -82,6 +83,13 @@ async function runInit(opts) {
|
|
|
82
83
|
console.log();
|
|
83
84
|
console.log(" 🚀 Initializing AI Literacy Framework");
|
|
84
85
|
console.log();
|
|
86
|
+
if (format === "ts" && !probeUserLocalAilf(targetDir)) {
|
|
87
|
+
console.log(" ℹ @sanity/ailf is not installed in this project yet.");
|
|
88
|
+
console.log(" For reproducibility and IDE autocomplete, install it after init:");
|
|
89
|
+
console.log(" npm install -D @sanity/ailf (or pnpm add -D, yarn add -D)");
|
|
90
|
+
console.log(" The pipeline will fall back to the CLI's bundled copy until you do.");
|
|
91
|
+
console.log();
|
|
92
|
+
}
|
|
85
93
|
// 1. Create directories
|
|
86
94
|
mkdirSync(tasksDir, { recursive: true });
|
|
87
95
|
console.log(` ✓ Created ${rel(targetDir, ailfDir)}/`);
|
|
@@ -252,13 +260,13 @@ async function runInit(opts) {
|
|
|
252
260
|
console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
|
|
253
261
|
console.log(" 3. Add a GitHub Actions secret");
|
|
254
262
|
console.log(" (Settings → Secrets and variables → Actions):");
|
|
255
|
-
console.log(" • AILF_API_KEY — your API key
|
|
263
|
+
console.log(" • AILF_API_KEY — your API key");
|
|
256
264
|
console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
|
|
257
265
|
console.log(" automatically on PRs");
|
|
258
266
|
if (format === "ts") {
|
|
259
267
|
console.log();
|
|
260
268
|
console.log(` 💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
|
|
261
|
-
console.log(" via defineTask() from @sanity/ailf
|
|
269
|
+
console.log(" via defineTask() from @sanity/ailf.");
|
|
262
270
|
}
|
|
263
271
|
console.log();
|
|
264
272
|
console.log(" 🔑 Retrieve the API key from 1Password (Sanity employees):");
|
|
@@ -268,9 +276,15 @@ async function runInit(opts) {
|
|
|
268
276
|
console.log();
|
|
269
277
|
console.log(" Not a Sanity employee? Request an API key from the AILF team.");
|
|
270
278
|
console.log();
|
|
271
|
-
console.log(" 💡 Test
|
|
279
|
+
console.log(" 💡 Test a remote run (executes against the AILF API) before pushing:");
|
|
272
280
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
273
281
|
console.log();
|
|
282
|
+
console.log(" 💡 Or test a remote run against your repo tasks:");
|
|
283
|
+
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --task-source=repo --debug");
|
|
284
|
+
console.log();
|
|
285
|
+
console.log(" 💡 Or run locally against your repo tasks:");
|
|
286
|
+
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --mode=literacy --variant=full --task-source=repo --debug --explain -y");
|
|
287
|
+
console.log();
|
|
274
288
|
}
|
|
275
289
|
// ---------------------------------------------------------------------------
|
|
276
290
|
// Custom preset scaffold template
|
|
@@ -289,7 +303,7 @@ const CUSTOM_PRESET_TS = `/**
|
|
|
289
303
|
* @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
|
|
290
304
|
*/
|
|
291
305
|
|
|
292
|
-
import { definePreset } from "
|
|
306
|
+
import { definePreset } from "@sanity/ailf"
|
|
293
307
|
|
|
294
308
|
export default definePreset({
|
|
295
309
|
name: "my-docs-evaluation",
|
|
@@ -191,12 +191,18 @@ export function computeResolvedOptions(opts) {
|
|
|
191
191
|
// Smart default: full runs auto-publish when store is configured
|
|
192
192
|
publishEnabled = reportStoreConfigured && !debugEnabled;
|
|
193
193
|
}
|
|
194
|
+
// Resolve task source + repo tasks path before anything that depends on
|
|
195
|
+
// them (report store overrides, output dir). When --task-source=repo is
|
|
196
|
+
// set without --repo-tasks-path, default to ./.ailf/tasks/ — the location
|
|
197
|
+
// created by `ailf init`.
|
|
198
|
+
const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
|
|
199
|
+
const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, opts.repoTasksPath, resolvedTaskSourceType);
|
|
194
200
|
// Report store overrides — resolution order:
|
|
195
201
|
// 1. Explicit CLI flags (--report-dataset, --report-project)
|
|
196
202
|
// 2. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
|
|
197
|
-
// 3. .ailf/config.yaml reportStore block (when
|
|
203
|
+
// 3. .ailf/config.yaml reportStore block (when repo tasks path is set)
|
|
198
204
|
// 4. Eval dataset override (so perspective evals publish to the same dataset)
|
|
199
|
-
const repoConfig = loadRepoConfigIfPresent(
|
|
205
|
+
const repoConfig = loadRepoConfigIfPresent(resolvedRepoTasksPath);
|
|
200
206
|
const reportDataset = opts.reportDataset ??
|
|
201
207
|
process.env.AILF_REPORT_DATASET ??
|
|
202
208
|
repoConfig?.reportStore?.dataset ??
|
|
@@ -211,10 +217,6 @@ export function computeResolvedOptions(opts) {
|
|
|
211
217
|
const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
|
|
212
218
|
const apiKey = process.env.AILF_API_KEY ?? undefined;
|
|
213
219
|
// Output directory: explicit --output-dir → $CWD/.ailf/results/latest/
|
|
214
|
-
const resolvedRepoTasksPath = opts.repoTasksPath
|
|
215
|
-
? resolve(callerCwd, opts.repoTasksPath)
|
|
216
|
-
: undefined;
|
|
217
|
-
const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
|
|
218
220
|
const outputDir = resolveOutputDir(opts.outputDir);
|
|
219
221
|
return {
|
|
220
222
|
allowedOriginArgs,
|
|
@@ -299,6 +301,39 @@ function resolveTaskSourceType(raw) {
|
|
|
299
301
|
console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
|
|
300
302
|
process.exit(1);
|
|
301
303
|
}
|
|
304
|
+
/**
|
|
305
|
+
* Resolve the repo tasks path.
|
|
306
|
+
*
|
|
307
|
+
* - Explicit `--repo-tasks-path` wins (resolved relative to callerCwd).
|
|
308
|
+
* - When `--task-source=repo` is set without a path, defaults to
|
|
309
|
+
* `./.ailf/tasks/` in callerCwd — the location created by `ailf init`.
|
|
310
|
+
* - Otherwise returns undefined (Content Lake source).
|
|
311
|
+
*
|
|
312
|
+
* Exits with a helpful error when an explicit path doesn't exist, or when
|
|
313
|
+
* the repo source was requested but no tasks directory can be found.
|
|
314
|
+
*/
|
|
315
|
+
function resolveRepoTasksPath(callerCwd, explicitPath, taskSourceType) {
|
|
316
|
+
if (explicitPath) {
|
|
317
|
+
const abs = resolve(callerCwd, explicitPath);
|
|
318
|
+
if (!existsSync(abs)) {
|
|
319
|
+
console.error(`❌ Repo tasks directory not found: ${abs}\n` +
|
|
320
|
+
" Provide a valid --repo-tasks-path, or run 'ailf init' to scaffold .ailf/tasks/.");
|
|
321
|
+
process.exit(1);
|
|
322
|
+
}
|
|
323
|
+
return abs;
|
|
324
|
+
}
|
|
325
|
+
if (taskSourceType === "repo") {
|
|
326
|
+
const defaultPath = resolve(callerCwd, ".ailf", "tasks");
|
|
327
|
+
if (!existsSync(defaultPath)) {
|
|
328
|
+
console.error(`❌ --task-source=repo was set but no tasks directory was found.\n` +
|
|
329
|
+
` Looked for: ${defaultPath}\n` +
|
|
330
|
+
" Run 'ailf init' to scaffold .ailf/tasks/, or pass --repo-tasks-path <path>.");
|
|
331
|
+
process.exit(1);
|
|
332
|
+
}
|
|
333
|
+
return defaultPath;
|
|
334
|
+
}
|
|
335
|
+
return undefined;
|
|
336
|
+
}
|
|
302
337
|
// ---------------------------------------------------------------------------
|
|
303
338
|
// Pipeline entry point
|
|
304
339
|
// ---------------------------------------------------------------------------
|
|
@@ -330,6 +365,16 @@ export async function executePipeline(cliOpts) {
|
|
|
330
365
|
if (cliOpts.repoTasksPath) {
|
|
331
366
|
config.repoTasksPath = resolve(callerCwd, cliOpts.repoTasksPath);
|
|
332
367
|
}
|
|
368
|
+
else if (config.taskSourceType === "repo" && !config.repoTasksPath) {
|
|
369
|
+
// Default: when taskSource=repo but no path set, look in .ailf/tasks/
|
|
370
|
+
// (matches the `ailf init` scaffold location). Silent fallback here —
|
|
371
|
+
// composition root will surface a helpful error if the directory is
|
|
372
|
+
// missing.
|
|
373
|
+
const defaultPath = resolve(callerCwd, ".ailf", "tasks");
|
|
374
|
+
if (existsSync(defaultPath)) {
|
|
375
|
+
config.repoTasksPath = defaultPath;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
333
378
|
if (cliOpts.output) {
|
|
334
379
|
config.outputPath = resolve(callerCwd, cliOpts.output);
|
|
335
380
|
}
|
|
@@ -51,7 +51,7 @@ export function createPipelineCommand() {
|
|
|
51
51
|
.option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
|
|
52
52
|
.option("--promptfoo-url <url>", "Promptfoo share URL for report")
|
|
53
53
|
.option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
|
|
54
|
-
.option("--repo-tasks-path <path>", "Path to repo-based task definitions
|
|
54
|
+
.option("--repo-tasks-path <path>", "Path to repo-based task definitions. Defaults to ./.ailf/tasks/ when --task-source=repo.")
|
|
55
55
|
.option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
|
|
56
56
|
.option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
|
|
57
57
|
.option("--no-artifacts", "Disable all artifact writers (D0033). Overrides --artifacts-dir.")
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* validate-tasks command — standalone validation of task files.
|
|
3
3
|
*
|
|
4
|
-
* Validates .ailf/tasks/*.yaml files against the
|
|
5
|
-
* running the full pipeline. Useful for
|
|
6
|
-
* in external repos.
|
|
4
|
+
* Validates .ailf/tasks/*.yaml and .ailf/tasks/*.task.ts files against the
|
|
5
|
+
* CanonicalTaskSchema without running the full pipeline. Useful for
|
|
6
|
+
* pre-commit hooks and CI checks in external repos.
|
|
7
7
|
*
|
|
8
8
|
* Usage:
|
|
9
9
|
* ailf validate-tasks .ailf/tasks/
|
|
@@ -11,6 +11,17 @@
|
|
|
11
11
|
*
|
|
12
12
|
* @see packages/eval/src/adapters/task-sources/repo-schemas.ts
|
|
13
13
|
* @see packages/eval/src/adapters/task-sources/repo-validation.ts
|
|
14
|
+
* @see packages/eval/src/adapters/task-sources/task-file-loader.ts
|
|
14
15
|
*/
|
|
15
16
|
import { Command } from "commander";
|
|
17
|
+
export interface ValidateTasksOptions {
|
|
18
|
+
strict: boolean;
|
|
19
|
+
callerCwd?: string;
|
|
20
|
+
}
|
|
16
21
|
export declare function createValidateTasksCommand(): Command;
|
|
22
|
+
/**
|
|
23
|
+
* Execute the validate-tasks command logic. Returns the exit code (0 success,
|
|
24
|
+
* 1 failure) so callers can decide how to surface it — the CLI wrapper calls
|
|
25
|
+
* `process.exit`, tests can assert directly.
|
|
26
|
+
*/
|
|
27
|
+
export declare function runValidateTasks(tasksPath: string, opts: ValidateTasksOptions): Promise<number>;
|