@sanity/ailf 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/artifact-registry.js +76 -0
- package/dist/_vendor/ailf-shared/feature-flags.d.ts +59 -0
- package/dist/_vendor/ailf-shared/feature-flags.js +44 -0
- package/dist/_vendor/ailf-shared/index.d.ts +1 -0
- package/dist/_vendor/ailf-shared/index.js +1 -0
- package/dist/adapters/config-sources/ailf-resolver.d.ts +55 -0
- package/dist/adapters/config-sources/ailf-resolver.js +147 -0
- package/dist/adapters/config-sources/ts-config-loader.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +35 -5
- package/dist/adapters/task-sources/repo-schemas.js +25 -3
- package/dist/adapters/task-sources/task-file-loader.js +3 -0
- package/dist/commands/init.js +16 -2
- package/dist/pipeline/compiler/config-loader.js +6 -1
- package/dist/pipeline/compiler/preset-loader.js +3 -0
- package/package.json +1 -1
|
@@ -320,6 +320,32 @@ const graderPromptPreviewSchema = z.object({
|
|
|
320
320
|
rubricName: z.string().max(60).optional(),
|
|
321
321
|
snippet: z.string().max(120),
|
|
322
322
|
});
|
|
323
|
+
/**
|
|
324
|
+
* Preview shape for the run-scoped `pipelineContext` bulk artifact (W0063 /
|
|
325
|
+
* D0033 M7). Lets the Studio Overview tab render a Pipeline Execution header
|
|
326
|
+
* row (step count, wall-clock, failed-step badge, quality-gate badge, cache
|
|
327
|
+
* hit count) without fetching the full context payload — `config` and per-
|
|
328
|
+
* step detail only land when the panel is expanded.
|
|
329
|
+
*
|
|
330
|
+
* Bounds chosen so the worst-case preview fits comfortably under 384 bytes:
|
|
331
|
+
* - `failedSteps` is capped at 5 entries with each name ≤ 40 chars. Real
|
|
332
|
+
* step names ("fetch-docs", "calculate-scores", "gap-analysis") are 10–
|
|
333
|
+
* 25 chars; 40 is a defensive ceiling. The array cap exists because
|
|
334
|
+
* `fitPreviewToCap` only shortens string fields — an unbounded array
|
|
335
|
+
* could push the preview over cap and force it to drop entirely.
|
|
336
|
+
* 5 is a triage ceiling: the panel shows "showed 5 of N failed steps"
|
|
337
|
+
* when `failedSteps.length < stepCount - successCount`, and the full
|
|
338
|
+
* per-step list is available in the drilldown payload.
|
|
339
|
+
* - `belowCritical` and `cacheHits` are optional — absent on old runs,
|
|
340
|
+
* skipped pipelines, or runs without remote-cache telemetry.
|
|
341
|
+
*/
|
|
342
|
+
const pipelineContextPreviewSchema = z.object({
|
|
343
|
+
stepCount: z.number().int().nonnegative(),
|
|
344
|
+
totalDurationMs: z.number().nonnegative(),
|
|
345
|
+
failedSteps: z.array(z.string().max(40)).max(5),
|
|
346
|
+
belowCritical: z.boolean().optional(),
|
|
347
|
+
cacheHits: z.number().int().nonnegative().optional(),
|
|
348
|
+
});
|
|
323
349
|
// Aspirational: most payload shapes are still loose. Tightening per-type as
|
|
324
350
|
// consumers stabilize is explicitly a W0050/W0051 concern — W0049 fixes the
|
|
325
351
|
// structural shape around them without changing the payload contracts.
|
|
@@ -495,6 +521,56 @@ export const ARTIFACT_REGISTRY = {
|
|
|
495
521
|
entrySchema: unknownEntry,
|
|
496
522
|
mime: "application/json",
|
|
497
523
|
capBytes: 64_000,
|
|
524
|
+
manifestPreview: {
|
|
525
|
+
schema: pipelineContextPreviewSchema,
|
|
526
|
+
extract: (entry) => {
|
|
527
|
+
// Producer shape from `capturePipelineContext` in
|
|
528
|
+
// packages/eval/src/orchestration/pipeline-orchestrator.ts:
|
|
529
|
+
// { config, state: { belowCritical, remoteCacheHits, ... },
|
|
530
|
+
// steps: [{ name, status: "success"|"failed"|"skipped",
|
|
531
|
+
// durationMs? }] }
|
|
532
|
+
//
|
|
533
|
+
// `config` and everything else on `state` are drilldown-only and
|
|
534
|
+
// intentionally absent from the preview — they're what the panel
|
|
535
|
+
// fetches lazily when expanded.
|
|
536
|
+
const e = entry;
|
|
537
|
+
const stepsRaw = Array.isArray(e.steps) ? e.steps : [];
|
|
538
|
+
let totalDurationMs = 0;
|
|
539
|
+
const failedSteps = [];
|
|
540
|
+
let stepCount = 0;
|
|
541
|
+
for (const raw of stepsRaw) {
|
|
542
|
+
if (raw === null || typeof raw !== "object")
|
|
543
|
+
continue;
|
|
544
|
+
stepCount += 1;
|
|
545
|
+
const s = raw;
|
|
546
|
+
if (typeof s.durationMs === "number" &&
|
|
547
|
+
Number.isFinite(s.durationMs) &&
|
|
548
|
+
s.durationMs >= 0) {
|
|
549
|
+
totalDurationMs += s.durationMs;
|
|
550
|
+
}
|
|
551
|
+
if (s.status === "failed" &&
|
|
552
|
+
typeof s.name === "string" &&
|
|
553
|
+
failedSteps.length < 5) {
|
|
554
|
+
failedSteps.push(truncateString(s.name, 40));
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
const belowCritical = typeof e.state?.belowCritical === "boolean"
|
|
558
|
+
? e.state.belowCritical
|
|
559
|
+
: undefined;
|
|
560
|
+
const cacheHitsRaw = e.state?.remoteCacheHits;
|
|
561
|
+
const cacheHits = Array.isArray(cacheHitsRaw)
|
|
562
|
+
? cacheHitsRaw.length
|
|
563
|
+
: undefined;
|
|
564
|
+
return {
|
|
565
|
+
stepCount,
|
|
566
|
+
totalDurationMs,
|
|
567
|
+
failedSteps,
|
|
568
|
+
...(belowCritical === undefined ? {} : { belowCritical }),
|
|
569
|
+
...(cacheHits === undefined ? {} : { cacheHits }),
|
|
570
|
+
};
|
|
571
|
+
},
|
|
572
|
+
capBytes: 384,
|
|
573
|
+
},
|
|
498
574
|
}),
|
|
499
575
|
documentManifest: buildDescriptor({
|
|
500
576
|
type: "documentManifest",
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feature flags — compile-time UI/feature visibility toggles.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for "temporary" flags that hide in-flight features,
|
|
5
|
+
* gate partially-built panels, or carry a known rollback. Each entry carries
|
|
6
|
+
* the metadata needed to answer "why is this off and when can it go?" so
|
|
7
|
+
* flags don't rot into undiscoverable tombstones.
|
|
8
|
+
*
|
|
9
|
+
* This is intentionally NOT a runtime feature-flag system — no user
|
|
10
|
+
* segmentation, no A/B, no env-var overrides. Just a typed map of booleans
|
|
11
|
+
* with audit metadata. Flipping a flag is a code change.
|
|
12
|
+
*
|
|
13
|
+
* Adding a flag:
|
|
14
|
+
* 1. Add an entry below with every metadata field populated.
|
|
15
|
+
* 2. Import `FEATURE_FLAGS` at the call site and read `.enabled`.
|
|
16
|
+
* 3. When the re-enable condition is met, remove the entry and the gate.
|
|
17
|
+
*
|
|
18
|
+
* See docs/guides/feature-flags.md for the full lifecycle.
|
|
19
|
+
*/
|
|
20
|
+
/** Shape of a single feature-flag entry. All fields required. */
|
|
21
|
+
export interface FeatureFlag {
|
|
22
|
+
/** Whether the gated feature is visible / active. */
|
|
23
|
+
readonly enabled: boolean;
|
|
24
|
+
/** Why the flag exists. Answers "what problem did turning this off solve?" */
|
|
25
|
+
readonly rationale: string;
|
|
26
|
+
/** The condition under which this flag should be re-enabled or removed. */
|
|
27
|
+
readonly reEnableWhen: string;
|
|
28
|
+
/** ID of the work item that owns the flag's resolution, or null if none. */
|
|
29
|
+
readonly relatedWorkItem: `W${string}` | null;
|
|
30
|
+
/** ISO 8601 date (YYYY-MM-DD) the flag was introduced. Used for staleness audits. */
|
|
31
|
+
readonly addedAt: string;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Registry of all active feature flags across AILF packages.
|
|
35
|
+
*
|
|
36
|
+
* Consumers read values directly:
|
|
37
|
+
* if (FEATURE_FLAGS.showFailureModes.enabled) { ... }
|
|
38
|
+
*
|
|
39
|
+
* Adding a key here extends the `FeatureFlagKey` union automatically; typos
|
|
40
|
+
* at call sites fail at compile time.
|
|
41
|
+
*/
|
|
42
|
+
export declare const FEATURE_FLAGS: {
|
|
43
|
+
readonly showFailureModes: {
|
|
44
|
+
readonly enabled: false;
|
|
45
|
+
readonly rationale: "Current classification is too broad (majority \"Unclassified\") to be actionable in the diagnostics view.";
|
|
46
|
+
readonly reEnableWhen: "Failure taxonomy is refined so non-Unclassified buckets carry meaningful signal.";
|
|
47
|
+
readonly relatedWorkItem: "W0037-detect-model-output-failures";
|
|
48
|
+
readonly addedAt: "2026-04-22";
|
|
49
|
+
};
|
|
50
|
+
readonly showRegressedSinceLastRun: {
|
|
51
|
+
readonly enabled: false;
|
|
52
|
+
readonly rationale: "Bare list of regressed area names lacks explanatory context for why each regressed.";
|
|
53
|
+
readonly reEnableWhen: "Per-area regression attribution can be surfaced alongside the list.";
|
|
54
|
+
readonly relatedWorkItem: null;
|
|
55
|
+
readonly addedAt: "2026-04-22";
|
|
56
|
+
};
|
|
57
|
+
};
|
|
58
|
+
/** Union of all registered flag keys. Typos at call sites fail at compile time. */
|
|
59
|
+
export type FeatureFlagKey = keyof typeof FEATURE_FLAGS;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Feature flags — compile-time UI/feature visibility toggles.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for "temporary" flags that hide in-flight features,
|
|
5
|
+
* gate partially-built panels, or carry a known rollback. Each entry carries
|
|
6
|
+
* the metadata needed to answer "why is this off and when can it go?" so
|
|
7
|
+
* flags don't rot into undiscoverable tombstones.
|
|
8
|
+
*
|
|
9
|
+
* This is intentionally NOT a runtime feature-flag system — no user
|
|
10
|
+
* segmentation, no A/B, no env-var overrides. Just a typed map of booleans
|
|
11
|
+
* with audit metadata. Flipping a flag is a code change.
|
|
12
|
+
*
|
|
13
|
+
* Adding a flag:
|
|
14
|
+
* 1. Add an entry below with every metadata field populated.
|
|
15
|
+
* 2. Import `FEATURE_FLAGS` at the call site and read `.enabled`.
|
|
16
|
+
* 3. When the re-enable condition is met, remove the entry and the gate.
|
|
17
|
+
*
|
|
18
|
+
* See docs/guides/feature-flags.md for the full lifecycle.
|
|
19
|
+
*/
|
|
20
|
+
/**
|
|
21
|
+
* Registry of all active feature flags across AILF packages.
|
|
22
|
+
*
|
|
23
|
+
* Consumers read values directly:
|
|
24
|
+
* if (FEATURE_FLAGS.showFailureModes.enabled) { ... }
|
|
25
|
+
*
|
|
26
|
+
* Adding a key here extends the `FeatureFlagKey` union automatically; typos
|
|
27
|
+
* at call sites fail at compile time.
|
|
28
|
+
*/
|
|
29
|
+
export const FEATURE_FLAGS = {
|
|
30
|
+
showFailureModes: {
|
|
31
|
+
enabled: false,
|
|
32
|
+
rationale: 'Current classification is too broad (majority "Unclassified") to be actionable in the diagnostics view.',
|
|
33
|
+
reEnableWhen: "Failure taxonomy is refined so non-Unclassified buckets carry meaningful signal.",
|
|
34
|
+
relatedWorkItem: "W0037-detect-model-output-failures",
|
|
35
|
+
addedAt: "2026-04-22",
|
|
36
|
+
},
|
|
37
|
+
showRegressedSinceLastRun: {
|
|
38
|
+
enabled: false,
|
|
39
|
+
rationale: "Bare list of regressed area names lacks explanatory context for why each regressed.",
|
|
40
|
+
reEnableWhen: "Per-area regression attribution can be surfaced alongside the list.",
|
|
41
|
+
relatedWorkItem: null,
|
|
42
|
+
addedAt: "2026-04-22",
|
|
43
|
+
},
|
|
44
|
+
};
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
* @sanity/ailf-studio. It is the leaf of the dependency graph.
|
|
11
11
|
*/
|
|
12
12
|
export * from "./document-ref.js";
|
|
13
|
+
export * from "./feature-flags.js";
|
|
13
14
|
export * from "./score-grades.js";
|
|
14
15
|
export * from "./noise-threshold.js";
|
|
15
16
|
export * from "./eval-modes.js";
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
* @sanity/ailf-studio. It is the leaf of the dependency graph.
|
|
11
11
|
*/
|
|
12
12
|
export * from "./document-ref.js";
|
|
13
|
+
export * from "./feature-flags.js";
|
|
13
14
|
export * from "./score-grades.js";
|
|
14
15
|
export * from "./noise-threshold.js";
|
|
15
16
|
export * from "./eval-modes.js";
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
|
|
3
|
+
*
|
|
4
|
+
* User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
|
|
5
|
+
* from `@sanity/ailf`. In a fresh project with no local install, that bare
|
|
6
|
+
* specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
|
|
7
|
+
* pipeline` working out of the box we transparently fall back to the CLI's own
|
|
8
|
+
* copy of `@sanity/ailf` by registering a jiti module alias. A user-local
|
|
9
|
+
* install always wins — the fallback kicks in only when resolution fails.
|
|
10
|
+
*
|
|
11
|
+
* All jiti callsites across the eval package use `resolveAilfAlias()` to get
|
|
12
|
+
* a consistent resolution + warning story. Callers pass the returned map (or
|
|
13
|
+
* nothing) to `createJiti`.
|
|
14
|
+
*/
|
|
15
|
+
/**
|
|
16
|
+
* Probe whether the user has `@sanity/ailf` installed as a local dependency
|
|
17
|
+
* reachable from the given path. Walks up the directory tree looking for a
|
|
18
|
+
* `node_modules/@sanity/ailf/package.json`. Returns the package entry point
|
|
19
|
+
* path on success, null otherwise.
|
|
20
|
+
*
|
|
21
|
+
* We intentionally do NOT use Node's `require.resolve` self-reference path:
|
|
22
|
+
* tsx and some bundler setups make it unreliable, and a self-reference
|
|
23
|
+
* would only match when the caller *is* the `@sanity/ailf` package (the
|
|
24
|
+
* monorepo devving case), which is semantically the same as having no
|
|
25
|
+
* install — the bundled fallback handles it.
|
|
26
|
+
*/
|
|
27
|
+
export declare function probeUserLocalAilf(fromPath: string): string | null;
|
|
28
|
+
/**
|
|
29
|
+
* Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
|
|
30
|
+
* fallback target when a user's project does not have it installed.
|
|
31
|
+
*
|
|
32
|
+
* We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
|
|
33
|
+
* self-reference resolution is unreliable under tsx and some bundler setups.
|
|
34
|
+
* Returns null in exotic setups where no ancestor package.json matches.
|
|
35
|
+
*/
|
|
36
|
+
export declare function getBundledAilfPath(): string | null;
|
|
37
|
+
/**
|
|
38
|
+
* Emit a one-shot stderr advisory when the loader falls back to the bundled
|
|
39
|
+
* `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
|
|
40
|
+
* most once, no matter how many TS files trigger the fallback.
|
|
41
|
+
*/
|
|
42
|
+
export declare function warnBundledFallbackOnce(): void;
|
|
43
|
+
/** Test-only: reset the warn-once flag between unit tests. */
|
|
44
|
+
export declare function resetBundledFallbackWarning(): void;
|
|
45
|
+
/**
|
|
46
|
+
* Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
|
|
47
|
+
* file. Returns the alias map or null.
|
|
48
|
+
*
|
|
49
|
+
* - User-local resolves → returns null (jiti's natural walk finds it).
|
|
50
|
+
* - User-local fails + bundled path available → returns alias map, fires
|
|
51
|
+
* one-shot warning, logs at verbose level.
|
|
52
|
+
* - User-local fails + no bundled path → returns null (nothing we can do;
|
|
53
|
+
* jiti will surface the original MODULE_NOT_FOUND).
|
|
54
|
+
*/
|
|
55
|
+
export declare function resolveAilfAlias(filePath: string): Record<string, string> | null;
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
|
|
3
|
+
*
|
|
4
|
+
* User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
|
|
5
|
+
* from `@sanity/ailf`. In a fresh project with no local install, that bare
|
|
6
|
+
* specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
|
|
7
|
+
* pipeline` working out of the box we transparently fall back to the CLI's own
|
|
8
|
+
* copy of `@sanity/ailf` by registering a jiti module alias. A user-local
|
|
9
|
+
* install always wins — the fallback kicks in only when resolution fails.
|
|
10
|
+
*
|
|
11
|
+
* All jiti callsites across the eval package use `resolveAilfAlias()` to get
|
|
12
|
+
* a consistent resolution + warning story. Callers pass the returned map (or
|
|
13
|
+
* nothing) to `createJiti`.
|
|
14
|
+
*/
|
|
15
|
+
import { existsSync, readFileSync, statSync } from "node:fs";
|
|
16
|
+
import { dirname, resolve as pathResolve } from "node:path";
|
|
17
|
+
import { fileURLToPath } from "node:url";
|
|
18
|
+
/**
|
|
19
|
+
* Walk up from this module's location to find the enclosing `@sanity/ailf`
|
|
20
|
+
* package root. Works in both dev (source under `packages/eval/src/`) and
|
|
21
|
+
* production (compiled under `packages/eval/dist/`) because both share the
|
|
22
|
+
* same package.json anchor.
|
|
23
|
+
*/
|
|
24
|
+
function findAilfPackageRoot() {
|
|
25
|
+
let dir = dirname(fileURLToPath(import.meta.url));
|
|
26
|
+
while (dir !== dirname(dir)) {
|
|
27
|
+
const pkgPath = pathResolve(dir, "package.json");
|
|
28
|
+
if (existsSync(pkgPath)) {
|
|
29
|
+
try {
|
|
30
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
31
|
+
if (pkg.name === "@sanity/ailf")
|
|
32
|
+
return dir;
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
/* ignore malformed package.json */
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
dir = dirname(dir);
|
|
39
|
+
}
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Probe whether the user has `@sanity/ailf` installed as a local dependency
|
|
44
|
+
* reachable from the given path. Walks up the directory tree looking for a
|
|
45
|
+
* `node_modules/@sanity/ailf/package.json`. Returns the package entry point
|
|
46
|
+
* path on success, null otherwise.
|
|
47
|
+
*
|
|
48
|
+
* We intentionally do NOT use Node's `require.resolve` self-reference path:
|
|
49
|
+
* tsx and some bundler setups make it unreliable, and a self-reference
|
|
50
|
+
* would only match when the caller *is* the `@sanity/ailf` package (the
|
|
51
|
+
* monorepo devving case), which is semantically the same as having no
|
|
52
|
+
* install — the bundled fallback handles it.
|
|
53
|
+
*/
|
|
54
|
+
export function probeUserLocalAilf(fromPath) {
|
|
55
|
+
let dir;
|
|
56
|
+
try {
|
|
57
|
+
dir =
|
|
58
|
+
existsSync(fromPath) && statSync(fromPath).isDirectory()
|
|
59
|
+
? fromPath
|
|
60
|
+
: dirname(fromPath);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
dir = dirname(fromPath);
|
|
64
|
+
}
|
|
65
|
+
while (dir !== dirname(dir)) {
|
|
66
|
+
const pkgJson = pathResolve(dir, "node_modules", "@sanity", "ailf", "package.json");
|
|
67
|
+
if (existsSync(pkgJson)) {
|
|
68
|
+
try {
|
|
69
|
+
const pkg = JSON.parse(readFileSync(pkgJson, "utf-8"));
|
|
70
|
+
const entry = pkg.module ?? pkg.main ?? "index.js";
|
|
71
|
+
return pathResolve(dirname(pkgJson), entry);
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
dir = dirname(dir);
|
|
78
|
+
}
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
|
|
83
|
+
* fallback target when a user's project does not have it installed.
|
|
84
|
+
*
|
|
85
|
+
* We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
|
|
86
|
+
* self-reference resolution is unreliable under tsx and some bundler setups.
|
|
87
|
+
* Returns null in exotic setups where no ancestor package.json matches.
|
|
88
|
+
*/
|
|
89
|
+
export function getBundledAilfPath() {
|
|
90
|
+
const pkgRoot = findAilfPackageRoot();
|
|
91
|
+
if (!pkgRoot)
|
|
92
|
+
return null;
|
|
93
|
+
// Production layout: packages/eval/dist/index.js
|
|
94
|
+
const distEntry = pathResolve(pkgRoot, "dist", "index.js");
|
|
95
|
+
if (existsSync(distEntry))
|
|
96
|
+
return distEntry;
|
|
97
|
+
// Development layout (tsx on source): packages/eval/src/index.ts
|
|
98
|
+
const srcEntry = pathResolve(pkgRoot, "src", "index.ts");
|
|
99
|
+
if (existsSync(srcEntry))
|
|
100
|
+
return srcEntry;
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
let hasWarnedOnce = false;
|
|
104
|
+
/**
|
|
105
|
+
* Emit a one-shot stderr advisory when the loader falls back to the bundled
|
|
106
|
+
* `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
|
|
107
|
+
* most once, no matter how many TS files trigger the fallback.
|
|
108
|
+
*/
|
|
109
|
+
export function warnBundledFallbackOnce() {
|
|
110
|
+
if (hasWarnedOnce)
|
|
111
|
+
return;
|
|
112
|
+
hasWarnedOnce = true;
|
|
113
|
+
process.stderr.write(" ⚠ @sanity/ailf is not installed in your project — using the CLI's bundled copy.\n" +
|
|
114
|
+
" Pin it locally for reproducibility: npm install -D @sanity/ailf\n");
|
|
115
|
+
}
|
|
116
|
+
/** Test-only: reset the warn-once flag between unit tests. */
|
|
117
|
+
export function resetBundledFallbackWarning() {
|
|
118
|
+
hasWarnedOnce = false;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
|
|
122
|
+
* file. Returns the alias map or null.
|
|
123
|
+
*
|
|
124
|
+
* - User-local resolves → returns null (jiti's natural walk finds it).
|
|
125
|
+
* - User-local fails + bundled path available → returns alias map, fires
|
|
126
|
+
* one-shot warning, logs at verbose level.
|
|
127
|
+
* - User-local fails + no bundled path → returns null (nothing we can do;
|
|
128
|
+
* jiti will surface the original MODULE_NOT_FOUND).
|
|
129
|
+
*/
|
|
130
|
+
export function resolveAilfAlias(filePath) {
|
|
131
|
+
const userLocal = probeUserLocalAilf(filePath);
|
|
132
|
+
const verbose = process.env.AILF_LOG_LEVEL === "verbose";
|
|
133
|
+
if (userLocal) {
|
|
134
|
+
if (verbose) {
|
|
135
|
+
process.stderr.write(` [ts-loader] ${filePath} → @sanity/ailf resolved locally at ${userLocal}\n`);
|
|
136
|
+
}
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
const bundled = getBundledAilfPath();
|
|
140
|
+
if (!bundled)
|
|
141
|
+
return null;
|
|
142
|
+
if (verbose) {
|
|
143
|
+
process.stderr.write(` [ts-loader] ${filePath} → @sanity/ailf not installed locally; using bundled copy at ${bundled}\n`);
|
|
144
|
+
}
|
|
145
|
+
warnBundledFallbackOnce();
|
|
146
|
+
return { "@sanity/ailf": bundled };
|
|
147
|
+
}
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import { existsSync } from "fs";
|
|
16
16
|
import { pathToFileURL } from "node:url";
|
|
17
17
|
import { createJiti } from "jiti";
|
|
18
|
+
import { resolveAilfAlias } from "./ailf-resolver.js";
|
|
18
19
|
// ---------------------------------------------------------------------------
|
|
19
20
|
// jiti instance factory — resolves imports relative to the loaded file
|
|
20
21
|
// ---------------------------------------------------------------------------
|
|
@@ -28,13 +29,19 @@ import { createJiti } from "jiti";
|
|
|
28
29
|
*
|
|
29
30
|
* We pass a `file://` URL (not a bare path) so jiti uses ESM resolution,
|
|
30
31
|
* which matches the `"import"` condition in package.json exports maps.
|
|
32
|
+
*
|
|
33
|
+
* When the user's project cannot resolve `@sanity/ailf` (fresh directory
|
|
34
|
+
* without a local install), we register an alias pointing at the CLI's own
|
|
35
|
+
* bundled copy so the load still succeeds. See `ailf-resolver.ts`.
|
|
31
36
|
*/
|
|
32
37
|
function createJitiForFile(filePath) {
|
|
38
|
+
const alias = resolveAilfAlias(filePath);
|
|
33
39
|
return createJiti(pathToFileURL(filePath).href, {
|
|
34
40
|
// Interop: handle both `export default` and `module.exports`
|
|
35
41
|
interopDefault: true,
|
|
36
42
|
// Don't require file extensions in imports
|
|
37
43
|
requireCache: true,
|
|
44
|
+
...(alias ? { alias } : {}),
|
|
38
45
|
});
|
|
39
46
|
}
|
|
40
47
|
/**
|
|
@@ -19,15 +19,17 @@ import { z } from "zod";
|
|
|
19
19
|
/**
|
|
20
20
|
* The set of assertion types allowed in task files.
|
|
21
21
|
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
22
|
+
* Combines a curated subset of Promptfoo assertion types (stable, well-
|
|
23
|
+
* documented, useful for external authors) with the agent-harness-specific
|
|
24
|
+
* types mapped by `mode-handlers/agent-harness/assertions.ts`.
|
|
24
25
|
*/
|
|
25
|
-
export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
|
|
26
|
+
export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency", "file-exists", "file-contains", "command-succeeds", "diff-matches"];
|
|
26
27
|
export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
|
|
27
28
|
/**
|
|
28
|
-
* Valid rubric template names — must match keys in
|
|
29
|
+
* Valid rubric template names — must match template keys in
|
|
30
|
+
* `packages/eval/config/rubrics.ts`.
|
|
29
31
|
*/
|
|
30
|
-
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
|
|
32
|
+
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage", "mcp-input-validation", "mcp-output-correctness", "mcp-error-handling", "mcp-security", "factual-correctness", "completeness", "currency", "process-quality", "agent-output", "agent-tool-usage"];
|
|
31
33
|
export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
|
|
32
34
|
/**
|
|
33
35
|
* Zod schema for a single task definition using canonical field names.
|
|
@@ -84,6 +86,16 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
|
|
|
84
86
|
"task-completion": "task-completion";
|
|
85
87
|
"code-correctness": "code-correctness";
|
|
86
88
|
"doc-coverage": "doc-coverage";
|
|
89
|
+
"mcp-input-validation": "mcp-input-validation";
|
|
90
|
+
"mcp-output-correctness": "mcp-output-correctness";
|
|
91
|
+
"mcp-error-handling": "mcp-error-handling";
|
|
92
|
+
"mcp-security": "mcp-security";
|
|
93
|
+
"factual-correctness": "factual-correctness";
|
|
94
|
+
completeness: "completeness";
|
|
95
|
+
currency: "currency";
|
|
96
|
+
"process-quality": "process-quality";
|
|
97
|
+
"agent-output": "agent-output";
|
|
98
|
+
"agent-tool-usage": "agent-tool-usage";
|
|
87
99
|
}>;
|
|
88
100
|
criteria: z.ZodArray<z.ZodString>;
|
|
89
101
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
@@ -101,6 +113,10 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
|
|
|
101
113
|
similar: "similar";
|
|
102
114
|
cost: "cost";
|
|
103
115
|
latency: "latency";
|
|
116
|
+
"file-exists": "file-exists";
|
|
117
|
+
"file-contains": "file-contains";
|
|
118
|
+
"command-succeeds": "command-succeeds";
|
|
119
|
+
"diff-matches": "diff-matches";
|
|
104
120
|
}>;
|
|
105
121
|
value: z.ZodOptional<z.ZodUnknown>;
|
|
106
122
|
threshold: z.ZodOptional<z.ZodNumber>;
|
|
@@ -174,6 +190,16 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
|
|
|
174
190
|
"task-completion": "task-completion";
|
|
175
191
|
"code-correctness": "code-correctness";
|
|
176
192
|
"doc-coverage": "doc-coverage";
|
|
193
|
+
"mcp-input-validation": "mcp-input-validation";
|
|
194
|
+
"mcp-output-correctness": "mcp-output-correctness";
|
|
195
|
+
"mcp-error-handling": "mcp-error-handling";
|
|
196
|
+
"mcp-security": "mcp-security";
|
|
197
|
+
"factual-correctness": "factual-correctness";
|
|
198
|
+
completeness: "completeness";
|
|
199
|
+
currency: "currency";
|
|
200
|
+
"process-quality": "process-quality";
|
|
201
|
+
"agent-output": "agent-output";
|
|
202
|
+
"agent-tool-usage": "agent-tool-usage";
|
|
177
203
|
}>;
|
|
178
204
|
criteria: z.ZodArray<z.ZodString>;
|
|
179
205
|
weight: z.ZodOptional<z.ZodNumber>;
|
|
@@ -191,6 +217,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
|
|
|
191
217
|
similar: "similar";
|
|
192
218
|
cost: "cost";
|
|
193
219
|
latency: "latency";
|
|
220
|
+
"file-exists": "file-exists";
|
|
221
|
+
"file-contains": "file-contains";
|
|
222
|
+
"command-succeeds": "command-succeeds";
|
|
223
|
+
"diff-matches": "diff-matches";
|
|
194
224
|
}>;
|
|
195
225
|
value: z.ZodOptional<z.ZodUnknown>;
|
|
196
226
|
threshold: z.ZodOptional<z.ZodNumber>;
|
|
@@ -22,8 +22,9 @@ import { z } from "zod";
|
|
|
22
22
|
/**
|
|
23
23
|
* The set of assertion types allowed in task files.
|
|
24
24
|
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
25
|
+
* Combines a curated subset of Promptfoo assertion types (stable, well-
|
|
26
|
+
* documented, useful for external authors) with the agent-harness-specific
|
|
27
|
+
* types mapped by `mode-handlers/agent-harness/assertions.ts`.
|
|
27
28
|
*/
|
|
28
29
|
export const CURATED_ASSERTION_TYPES = [
|
|
29
30
|
"llm-rubric",
|
|
@@ -38,14 +39,35 @@ export const CURATED_ASSERTION_TYPES = [
|
|
|
38
39
|
"similar",
|
|
39
40
|
"cost",
|
|
40
41
|
"latency",
|
|
42
|
+
// Agent-harness assertions — verify sandbox state after the agent runs.
|
|
43
|
+
// See src/pipeline/compiler/mode-handlers/agent-harness/assertions.ts
|
|
44
|
+
"file-exists",
|
|
45
|
+
"file-contains",
|
|
46
|
+
"command-succeeds",
|
|
47
|
+
"diff-matches",
|
|
41
48
|
];
|
|
42
49
|
/**
|
|
43
|
-
* Valid rubric template names — must match keys in
|
|
50
|
+
* Valid rubric template names — must match template keys in
|
|
51
|
+
* `packages/eval/config/rubrics.ts`.
|
|
44
52
|
*/
|
|
45
53
|
export const RUBRIC_TEMPLATE_NAMES = [
|
|
54
|
+
// Core literacy dimensions
|
|
46
55
|
"task-completion",
|
|
47
56
|
"code-correctness",
|
|
48
57
|
"doc-coverage",
|
|
58
|
+
// MCP server dimensions
|
|
59
|
+
"mcp-input-validation",
|
|
60
|
+
"mcp-output-correctness",
|
|
61
|
+
"mcp-error-handling",
|
|
62
|
+
"mcp-security",
|
|
63
|
+
// Knowledge probe dimensions
|
|
64
|
+
"factual-correctness",
|
|
65
|
+
"completeness",
|
|
66
|
+
"currency",
|
|
67
|
+
// Agent harness dimensions
|
|
68
|
+
"process-quality",
|
|
69
|
+
"agent-output",
|
|
70
|
+
"agent-tool-usage",
|
|
49
71
|
];
|
|
50
72
|
// ---------------------------------------------------------------------------
|
|
51
73
|
// Doc ref schemas — polymorphic canonical doc references
|
|
@@ -25,6 +25,7 @@ import { existsSync, readdirSync } from "fs";
|
|
|
25
25
|
import { pathToFileURL } from "node:url";
|
|
26
26
|
import { resolve } from "path";
|
|
27
27
|
import { createJiti } from "jiti";
|
|
28
|
+
import { resolveAilfAlias } from "../config-sources/ailf-resolver.js";
|
|
28
29
|
import { loadTsConfig } from "../config-sources/ts-config-loader.js";
|
|
29
30
|
/**
|
|
30
31
|
* Discover TS/JS task files in a directory.
|
|
@@ -72,9 +73,11 @@ export async function loadTsTaskFile(filePath) {
|
|
|
72
73
|
* Needed by resolve-mappings.ts which is called from sync contexts.
|
|
73
74
|
*/
|
|
74
75
|
export function loadTsTaskFileSync(filePath) {
|
|
76
|
+
const alias = resolveAilfAlias(filePath);
|
|
75
77
|
const jiti = createJiti(pathToFileURL(filePath).href, {
|
|
76
78
|
interopDefault: true,
|
|
77
79
|
requireCache: true,
|
|
80
|
+
...(alias ? { alias } : {}),
|
|
78
81
|
});
|
|
79
82
|
const mod = jiti(filePath);
|
|
80
83
|
const value = mod && typeof mod === "object" && "default" in mod ? mod.default : mod;
|
package/dist/commands/init.js
CHANGED
|
@@ -20,6 +20,7 @@ import { Command } from "commander";
|
|
|
20
20
|
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
21
21
|
import { resolve, relative } from "path";
|
|
22
22
|
import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
|
|
23
|
+
import { probeUserLocalAilf } from "../adapters/config-sources/ailf-resolver.js";
|
|
23
24
|
// ---------------------------------------------------------------------------
|
|
24
25
|
// Command factory
|
|
25
26
|
// ---------------------------------------------------------------------------
|
|
@@ -82,6 +83,13 @@ async function runInit(opts) {
|
|
|
82
83
|
console.log();
|
|
83
84
|
console.log(" 🚀 Initializing AI Literacy Framework");
|
|
84
85
|
console.log();
|
|
86
|
+
if (format === "ts" && !probeUserLocalAilf(targetDir)) {
|
|
87
|
+
console.log(" ℹ @sanity/ailf is not installed in this project yet.");
|
|
88
|
+
console.log(" For reproducibility and IDE autocomplete, install it after init:");
|
|
89
|
+
console.log(" npm install -D @sanity/ailf (or pnpm add -D, yarn add -D)");
|
|
90
|
+
console.log(" The pipeline will fall back to the CLI's bundled copy until you do.");
|
|
91
|
+
console.log();
|
|
92
|
+
}
|
|
85
93
|
// 1. Create directories
|
|
86
94
|
mkdirSync(tasksDir, { recursive: true });
|
|
87
95
|
console.log(` ✓ Created ${rel(targetDir, ailfDir)}/`);
|
|
@@ -252,7 +260,7 @@ async function runInit(opts) {
|
|
|
252
260
|
console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
|
|
253
261
|
console.log(" 3. Add a GitHub Actions secret");
|
|
254
262
|
console.log(" (Settings → Secrets and variables → Actions):");
|
|
255
|
-
console.log(" • AILF_API_KEY — your API key
|
|
263
|
+
console.log(" • AILF_API_KEY — your API key");
|
|
256
264
|
console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
|
|
257
265
|
console.log(" automatically on PRs");
|
|
258
266
|
if (format === "ts") {
|
|
@@ -268,9 +276,15 @@ async function runInit(opts) {
|
|
|
268
276
|
console.log();
|
|
269
277
|
console.log(" Not a Sanity employee? Request an API key from the AILF team.");
|
|
270
278
|
console.log();
|
|
271
|
-
console.log(" 💡 Test
|
|
279
|
+
console.log(" 💡 Test a remote run (executes against the AILF API) before pushing:");
|
|
272
280
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
273
281
|
console.log();
|
|
282
|
+
console.log(" 💡 Or test a remote run against your repo tasks:");
|
|
283
|
+
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --task-source=repo --debug");
|
|
284
|
+
console.log();
|
|
285
|
+
console.log(" 💡 Or run locally against your repo tasks:");
|
|
286
|
+
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --mode=literacy --variant=full --task-source=repo --debug --explain -y");
|
|
287
|
+
console.log();
|
|
274
288
|
}
|
|
275
289
|
// ---------------------------------------------------------------------------
|
|
276
290
|
// Custom preset scaffold template
|
|
@@ -25,6 +25,7 @@ import { createRequire } from "module";
|
|
|
25
25
|
import { existsSync, readFileSync } from "fs";
|
|
26
26
|
import { load } from "js-yaml";
|
|
27
27
|
import { resolve } from "path";
|
|
28
|
+
import { resolveAilfAlias } from "../../adapters/config-sources/ailf-resolver.js";
|
|
28
29
|
/**
|
|
29
30
|
* Load a config file by name, searching for TS/JS/YAML/JSON variants.
|
|
30
31
|
*
|
|
@@ -134,7 +135,11 @@ function loadTsFile(filePath, format) {
|
|
|
134
135
|
// jiti supports sync loading. Use createRequire for ESM compatibility.
|
|
135
136
|
const esmRequire = createRequire(import.meta.url);
|
|
136
137
|
const { createJiti } = esmRequire("jiti");
|
|
137
|
-
const
|
|
138
|
+
const alias = resolveAilfAlias(filePath);
|
|
139
|
+
const jiti = createJiti(filePath, {
|
|
140
|
+
interopDefault: true,
|
|
141
|
+
...(alias ? { alias } : {}),
|
|
142
|
+
});
|
|
138
143
|
const mod = jiti(filePath);
|
|
139
144
|
const data = (mod?.default ?? mod);
|
|
140
145
|
return { data, filePath, format };
|
|
@@ -14,6 +14,7 @@ import { existsSync } from "fs";
|
|
|
14
14
|
import { resolve } from "path";
|
|
15
15
|
import { pathToFileURL } from "url";
|
|
16
16
|
import { createJiti } from "jiti";
|
|
17
|
+
import { resolveAilfAlias } from "../../adapters/config-sources/ailf-resolver.js";
|
|
17
18
|
/** Thrown for preset-specific load errors (distinguishes from third-party errors) */
|
|
18
19
|
class PresetLoadError extends Error {
|
|
19
20
|
constructor(message) {
|
|
@@ -53,9 +54,11 @@ function loadSinglePreset(ref, rootDir) {
|
|
|
53
54
|
}
|
|
54
55
|
}
|
|
55
56
|
try {
|
|
57
|
+
const alias = resolveAilfAlias(filePath);
|
|
56
58
|
const jiti = createJiti(pathToFileURL(rootDir).href, {
|
|
57
59
|
interopDefault: true,
|
|
58
60
|
requireCache: true,
|
|
61
|
+
...(alias ? { alias } : {}),
|
|
59
62
|
});
|
|
60
63
|
// jiti() is the synchronous loader
|
|
61
64
|
const mod = jiti(filePath);
|