@sanity/ailf 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/models.ts +15 -3
- package/dist/_vendor/ailf-core/config-helpers.d.ts +14 -17
- package/dist/_vendor/ailf-core/config-helpers.js +22 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +2 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1 -3
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +78 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/task-sources/content-lake-task-source.js +17 -20
- package/dist/adapters/task-sources/index.d.ts +2 -2
- package/dist/adapters/task-sources/index.js +2 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +2 -2
- package/dist/adapters/task-sources/task-file-loader.js +2 -2
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +73 -41
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/steps/fetch-docs-step.js +2 -3
- package/dist/orchestration/steps/generate-configs-step.js +28 -12
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +105 -68
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/{agent-harness-handler.d.ts → agent-harness/types.d.ts} +3 -24
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +4 -5
- package/dist/pipeline/compiler/mode-handlers/index.js +4 -6
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.d.ts → mcp-server/assertions.d.ts} +2 -10
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.js → mcp-server/assertions.js} +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +6 -9
- package/dist/pipeline/compiler/presets/sanity-literacy.js +10 -156
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/generate-configs.js +1 -1
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +7 -7
- package/dist/pipeline/mirror-repo-tasks.js +9 -9
- package/dist/pipeline/plan.js +1 -1
- package/package.json +11 -3
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -309
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* validate-tasks command — standalone validation of
|
|
2
|
+
* validate-tasks command — standalone validation of task files.
|
|
3
3
|
*
|
|
4
|
-
* Validates .ailf/tasks/*.yaml files against the
|
|
4
|
+
* Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
|
|
5
5
|
* running the full pipeline. Useful for pre-commit hooks and CI checks
|
|
6
6
|
* in external repos.
|
|
7
7
|
*
|
|
@@ -16,11 +16,11 @@ import { existsSync, readdirSync, readFileSync } from "fs";
|
|
|
16
16
|
import { resolve, relative } from "path";
|
|
17
17
|
import { Command } from "commander";
|
|
18
18
|
import { load } from "js-yaml";
|
|
19
|
-
import {
|
|
20
|
-
import {
|
|
19
|
+
import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/task-sources/repo-schemas.js";
|
|
20
|
+
import { validateCanonicalTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
|
|
21
21
|
export function createValidateTasksCommand() {
|
|
22
22
|
return new Command("validate-tasks")
|
|
23
|
-
.description("Validate
|
|
23
|
+
.description("Validate task YAML files (.ailf/tasks/) against the canonical schema")
|
|
24
24
|
.argument("[path]", "Path to tasks directory (default: .ailf/tasks/)", ".ailf/tasks")
|
|
25
25
|
.option("--strict", "Treat warnings as errors", false)
|
|
26
26
|
.action(async (tasksPath, opts) => {
|
|
@@ -29,12 +29,12 @@ export function createValidateTasksCommand() {
|
|
|
29
29
|
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
30
30
|
const resolvedPath = resolve(callerCwd, tasksPath);
|
|
31
31
|
if (!existsSync(resolvedPath)) {
|
|
32
|
-
console.error(
|
|
32
|
+
console.error(`Directory not found: ${resolvedPath}`);
|
|
33
33
|
process.exit(1);
|
|
34
34
|
}
|
|
35
35
|
const yamlFiles = readdirSync(resolvedPath).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
|
|
36
36
|
if (yamlFiles.length === 0) {
|
|
37
|
-
console.error(
|
|
37
|
+
console.error(`No YAML files found in ${resolvedPath}`);
|
|
38
38
|
process.exit(1);
|
|
39
39
|
}
|
|
40
40
|
console.log(`\nValidating ${yamlFiles.length} task file(s) in ${relative(process.cwd(), resolvedPath)}/\n`);
|
|
@@ -50,25 +50,36 @@ export function createValidateTasksCommand() {
|
|
|
50
50
|
}
|
|
51
51
|
catch (err) {
|
|
52
52
|
const msg = err instanceof Error ? err.message : String(err);
|
|
53
|
-
console.error(`
|
|
53
|
+
console.error(` ${file}: YAML parse error`);
|
|
54
54
|
console.error(` ${msg}\n`);
|
|
55
55
|
hasErrors = true;
|
|
56
56
|
continue;
|
|
57
57
|
}
|
|
58
58
|
if (!Array.isArray(parsed)) {
|
|
59
|
-
console.error(`
|
|
59
|
+
console.error(` ${file}: Expected a YAML array of task definitions`);
|
|
60
|
+
hasErrors = true;
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
// Detect legacy field names before Zod validation
|
|
64
|
+
const legacyWarnings = detectLegacyFieldNames(parsed, file);
|
|
65
|
+
if (legacyWarnings.length > 0) {
|
|
66
|
+
console.error(` ${file}: Uses legacy field names`);
|
|
67
|
+
for (const w of legacyWarnings) {
|
|
68
|
+
console.error(` ${w}`);
|
|
69
|
+
}
|
|
70
|
+
console.error();
|
|
60
71
|
hasErrors = true;
|
|
61
72
|
continue;
|
|
62
73
|
}
|
|
63
74
|
try {
|
|
64
|
-
const tasks =
|
|
65
|
-
console.log(`
|
|
75
|
+
const tasks = parseCanonicalTaskFile(parsed, file);
|
|
76
|
+
console.log(` ${file}: ${tasks.length} task${tasks.length === 1 ? "" : "s"} valid`);
|
|
66
77
|
totalTasks += tasks.length;
|
|
67
78
|
allTasks.push(...tasks);
|
|
68
79
|
}
|
|
69
80
|
catch (err) {
|
|
70
81
|
const msg = err instanceof Error ? err.message : String(err);
|
|
71
|
-
console.error(`
|
|
82
|
+
console.error(` ${file}: Schema validation failed`);
|
|
72
83
|
console.error(`${msg
|
|
73
84
|
.split("\n")
|
|
74
85
|
.map((l) => ` ${l}`)
|
|
@@ -79,7 +90,7 @@ export function createValidateTasksCommand() {
|
|
|
79
90
|
// Run semantic validation on all parsed tasks
|
|
80
91
|
if (allTasks.length > 0) {
|
|
81
92
|
console.log(); // blank line
|
|
82
|
-
const semanticResult =
|
|
93
|
+
const semanticResult = validateCanonicalTasks(allTasks);
|
|
83
94
|
const formatted = formatValidationResult(semanticResult);
|
|
84
95
|
console.log(formatted);
|
|
85
96
|
if (!semanticResult.valid) {
|
|
@@ -87,10 +98,10 @@ export function createValidateTasksCommand() {
|
|
|
87
98
|
}
|
|
88
99
|
if (opts.strict && semanticResult.warnings.length > 0) {
|
|
89
100
|
hasErrors = true;
|
|
90
|
-
console.log("\n
|
|
101
|
+
console.log("\n --strict mode: warnings treated as errors");
|
|
91
102
|
}
|
|
92
103
|
}
|
|
93
|
-
console.log(`\n${hasErrors ? "
|
|
104
|
+
console.log(`\n${hasErrors ? "FAIL" : "OK"} ${totalTasks} task${totalTasks === 1 ? "" : "s"} across ${yamlFiles.length} file${yamlFiles.length === 1 ? "" : "s"}\n`);
|
|
94
105
|
process.exit(hasErrors ? 1 : 0);
|
|
95
106
|
});
|
|
96
107
|
}
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* @see packages/core/src/ports/context.ts — AppContext interface
|
|
16
16
|
* @see docs/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
|
-
import { type AppContext, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
|
|
18
|
+
import { type AppContext, type AssertionRegistration, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
|
|
19
19
|
/**
|
|
20
20
|
* Create a fully wired AppContext from resolved configuration.
|
|
21
21
|
*
|
|
@@ -24,3 +24,15 @@ import { type AppContext, type ResolvedConfig } from "./_vendor/ailf-core/index.
|
|
|
24
24
|
* is a one-line change in this function.
|
|
25
25
|
*/
|
|
26
26
|
export declare function createAppContext(config: ResolvedConfig): AppContext;
|
|
27
|
+
/**
|
|
28
|
+
* Generic Promptfoo assertion types available to all evaluation modes.
|
|
29
|
+
*
|
|
30
|
+
* These are evaluation primitives (text matching, JSON validation, LLM grading)
|
|
31
|
+
* that aren't specific to any mode or domain. They're registered before any
|
|
32
|
+
* preset so every mode has access to them.
|
|
33
|
+
*
|
|
34
|
+
* `compatibleModes` is omitted — when undefined, the assertion is compatible
|
|
35
|
+
* with all modes. Mode-specific assertions can be registered by presets with
|
|
36
|
+
* explicit mode whitelists.
|
|
37
|
+
*/
|
|
38
|
+
export declare const FRAMEWORK_ASSERTIONS: AssertionRegistration[];
|
package/dist/composition-root.js
CHANGED
|
@@ -17,10 +17,12 @@
|
|
|
17
17
|
*/
|
|
18
18
|
import { InMemoryPluginRegistry, } from "./_vendor/ailf-core/index.js";
|
|
19
19
|
import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js";
|
|
20
|
+
import { loadExternalPresets } from "./pipeline/compiler/preset-loader.js";
|
|
20
21
|
import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
|
|
21
22
|
import { PromptfooEvalAdapter } from "./adapters/eval-runners/promptfoo-eval-adapter.js";
|
|
22
23
|
import { ConsoleLogger, JsonLogger, QuietLogger, } from "./adapters/loggers/index.js";
|
|
23
24
|
import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, YamlTaskSource, } from "./adapters/task-sources/index.js";
|
|
25
|
+
import { createAgentHarnessBase, createKnowledgeProbeBase, createLiteracyModeBase, createMcpServerModeBase, } from "./pipeline/compiler/mode-bases/index.js";
|
|
24
26
|
import { createSanityLiteracyPreset } from "./pipeline/compiler/presets/index.js";
|
|
25
27
|
import { getSanityClient } from "./sanity/client.js";
|
|
26
28
|
import { ReportStore } from "./report-store.js";
|
|
@@ -39,10 +41,12 @@ export function createAppContext(config) {
|
|
|
39
41
|
const cache = config.noCache ? undefined : createCache(config);
|
|
40
42
|
// Task source — selected by config.taskSourceType
|
|
41
43
|
const taskSource = createTaskSource(config);
|
|
42
|
-
// Plugin registry — mode
|
|
43
|
-
//
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
// Plugin registry — mode bases, assertions, presets, doc fetcher.
|
|
45
|
+
// External presets from config are loaded and registered after built-ins.
|
|
46
|
+
const externalPresets = config.presets && config.presets.length > 0
|
|
47
|
+
? loadExternalPresets(config.presets, config.rootDir)
|
|
48
|
+
: undefined;
|
|
49
|
+
const registry = createRegistry(config.rootDir, externalPresets);
|
|
46
50
|
// Doc fetcher — provided by the registered preset's factory
|
|
47
51
|
const docFetcherFactory = registry.getDocFetcherFactory();
|
|
48
52
|
const docFetcher = docFetcherFactory ? docFetcherFactory() : undefined;
|
|
@@ -121,58 +125,86 @@ function createTaskSource(config) {
|
|
|
121
125
|
return primary;
|
|
122
126
|
}
|
|
123
127
|
// ---------------------------------------------------------------------------
|
|
124
|
-
//
|
|
128
|
+
// Layer 0: Framework built-in assertions
|
|
125
129
|
// ---------------------------------------------------------------------------
|
|
126
|
-
|
|
130
|
+
/**
|
|
131
|
+
* Generic Promptfoo assertion types available to all evaluation modes.
|
|
132
|
+
*
|
|
133
|
+
* These are evaluation primitives (text matching, JSON validation, LLM grading)
|
|
134
|
+
* that aren't specific to any mode or domain. They're registered before any
|
|
135
|
+
* preset so every mode has access to them.
|
|
136
|
+
*
|
|
137
|
+
* `compatibleModes` is omitted — when undefined, the assertion is compatible
|
|
138
|
+
* with all modes. Mode-specific assertions can be registered by presets with
|
|
139
|
+
* explicit mode whitelists.
|
|
140
|
+
*/
|
|
141
|
+
export const FRAMEWORK_ASSERTIONS = [
|
|
127
142
|
{
|
|
128
|
-
|
|
129
|
-
label: "
|
|
130
|
-
|
|
131
|
-
rubricTemplateIds: [],
|
|
132
|
-
handlerModule: "./mode-handlers/knowledge-probe-handler.js",
|
|
143
|
+
type: "contains",
|
|
144
|
+
label: "Contains text",
|
|
145
|
+
handlerModule: "promptfoo:builtin",
|
|
133
146
|
},
|
|
134
147
|
{
|
|
135
|
-
|
|
136
|
-
label: "
|
|
137
|
-
|
|
138
|
-
rubricTemplateIds: [
|
|
139
|
-
"mcp-input-validation",
|
|
140
|
-
"mcp-output-correctness",
|
|
141
|
-
"mcp-error-handling",
|
|
142
|
-
],
|
|
143
|
-
handlerModule: "./mode-handlers/mcp-server-handler.js",
|
|
148
|
+
type: "contains-all",
|
|
149
|
+
label: "Contains all texts",
|
|
150
|
+
handlerModule: "promptfoo:builtin",
|
|
144
151
|
},
|
|
145
152
|
{
|
|
146
|
-
|
|
147
|
-
label: "
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
153
|
+
type: "contains-any",
|
|
154
|
+
label: "Contains any text",
|
|
155
|
+
handlerModule: "promptfoo:builtin",
|
|
156
|
+
},
|
|
157
|
+
{ type: "equals", label: "Exact match", handlerModule: "promptfoo:builtin" },
|
|
158
|
+
{ type: "regex", label: "Regex match", handlerModule: "promptfoo:builtin" },
|
|
159
|
+
{ type: "is-json", label: "Valid JSON", handlerModule: "promptfoo:builtin" },
|
|
160
|
+
{
|
|
161
|
+
type: "javascript",
|
|
162
|
+
label: "JavaScript assertion",
|
|
163
|
+
handlerModule: "promptfoo:builtin",
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
type: "llm-rubric",
|
|
167
|
+
label: "LLM-graded rubric",
|
|
168
|
+
handlerModule: "promptfoo:builtin",
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
type: "similar",
|
|
172
|
+
label: "Semantic similarity",
|
|
173
|
+
handlerModule: "promptfoo:builtin",
|
|
151
174
|
},
|
|
152
175
|
];
|
|
153
176
|
/**
|
|
154
177
|
* Build and populate the plugin registry.
|
|
155
178
|
*
|
|
156
|
-
*
|
|
157
|
-
* 1. A preset is a PresetDefinition — a bundle of modes, assertions, rubric
|
|
158
|
-
* templates, prompt templates, scoring profiles, a doc fetcher factory,
|
|
159
|
-
* source definitions, and feature definitions.
|
|
160
|
-
* 2. registerPreset() iterates the preset's fields and delegates each one to
|
|
161
|
-
* the appropriate register method (registerMode, registerRubricTemplate, …).
|
|
162
|
-
* 3. After registration the rest of createAppContext() can pull capabilities
|
|
163
|
-
* from the registry (e.g. getDocFetcherFactory()) without knowing which
|
|
164
|
-
* preset provided them.
|
|
179
|
+
* Registration follows the five-layer model:
|
|
165
180
|
*
|
|
166
|
-
*
|
|
167
|
-
*
|
|
181
|
+
* Layer 0: Framework built-in assertions (generic Promptfoo builtins)
|
|
182
|
+
* Layer 0.5: Mode bases (shared evaluation methodology per mode)
|
|
183
|
+
* Layer 1: Domain presets (domain-specific config targeting a mode base)
|
|
184
|
+
*
|
|
185
|
+
* Mode bases define HOW you evaluate (rubrics, scoring, prompts).
|
|
186
|
+
* Domain presets define WHAT you evaluate (sources, features, doc fetcher)
|
|
187
|
+
* and target a mode base by ID. When a preset is registered, it inherits
|
|
188
|
+
* its mode base's defaults and can optionally override them.
|
|
168
189
|
*/
|
|
169
|
-
function createRegistry(rootDir) {
|
|
190
|
+
function createRegistry(rootDir, externalPresets) {
|
|
170
191
|
const registry = new InMemoryPluginRegistry();
|
|
171
|
-
//
|
|
192
|
+
// Layer 0: Framework built-in assertions (available to all modes)
|
|
193
|
+
for (const assertion of FRAMEWORK_ASSERTIONS) {
|
|
194
|
+
registry.registerAssertion(assertion);
|
|
195
|
+
}
|
|
196
|
+
// Layer 0.5: Mode bases (evaluation methodology)
|
|
197
|
+
registry.registerModeBase(createLiteracyModeBase());
|
|
198
|
+
registry.registerModeBase(createMcpServerModeBase());
|
|
199
|
+
registry.registerModeBase(createKnowledgeProbeBase());
|
|
200
|
+
registry.registerModeBase(createAgentHarnessBase());
|
|
201
|
+
// Layer 1: Built-in domain presets
|
|
172
202
|
registry.registerPreset(createSanityLiteracyPreset({ rootDir }));
|
|
173
|
-
//
|
|
174
|
-
|
|
175
|
-
|
|
203
|
+
// Layer 1+: External domain presets (from config.presets)
|
|
204
|
+
if (externalPresets) {
|
|
205
|
+
for (const preset of externalPresets) {
|
|
206
|
+
registry.registerPreset(preset);
|
|
207
|
+
}
|
|
176
208
|
}
|
|
177
209
|
return registry;
|
|
178
210
|
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sanity/ailf — Public API for the AI Literacy Framework.
|
|
3
|
+
*
|
|
4
|
+
* This module is the entry point for external consumers who import from
|
|
5
|
+
* `@sanity/ailf`. It re-exports the authoring API needed to write task
|
|
6
|
+
* definitions, configuration files, and validate task YAML.
|
|
7
|
+
*
|
|
8
|
+
* ## Task authoring
|
|
9
|
+
*
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { defineTask } from "@sanity/ailf"
|
|
12
|
+
*
|
|
13
|
+
* export default defineTask({
|
|
14
|
+
* id: "groq-projection-basics",
|
|
15
|
+
* mode: "literacy",
|
|
16
|
+
* title: "GROQ Projection Basics",
|
|
17
|
+
* area: "groq",
|
|
18
|
+
* prompt: { text: "Write GROQ queries..." },
|
|
19
|
+
* assertions: [
|
|
20
|
+
* { type: "llm-rubric", template: "task-completion", criteria: ["..."] },
|
|
21
|
+
* ],
|
|
22
|
+
* })
|
|
23
|
+
* ```
|
|
24
|
+
*
|
|
25
|
+
* ## Configuration authoring
|
|
26
|
+
*
|
|
27
|
+
* ```typescript
|
|
28
|
+
* import { defineConfig, env } from "@sanity/ailf"
|
|
29
|
+
*
|
|
30
|
+
* export default defineConfig({
|
|
31
|
+
* projectId: env("SANITY_PROJECT_ID"),
|
|
32
|
+
* dataset: env("SANITY_DATASET"),
|
|
33
|
+
* })
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export { defineConfig, defineFeatures, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.d.ts";
|
|
37
|
+
export type { PricingEntry, PromptEntry, SourceEntry } from "./_vendor/ailf-core/index.d.ts";
|
|
38
|
+
export { env } from "./_vendor/ailf-core/index.d.ts";
|
|
39
|
+
export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
|
|
40
|
+
export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RubricTemplateName, } from "./adapters/task-sources/repo-schemas.js";
|
|
41
|
+
export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./adapters/task-sources/repo-validation.js";
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sanity/ailf — Public API for the AI Literacy Framework.
|
|
3
|
+
*
|
|
4
|
+
* This module is the entry point for external consumers who import from
|
|
5
|
+
* `@sanity/ailf`. It re-exports the authoring API needed to write task
|
|
6
|
+
* definitions, configuration files, and validate task YAML.
|
|
7
|
+
*
|
|
8
|
+
* ## Task authoring
|
|
9
|
+
*
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { defineTask } from "@sanity/ailf"
|
|
12
|
+
*
|
|
13
|
+
* export default defineTask({
|
|
14
|
+
* id: "groq-projection-basics",
|
|
15
|
+
* mode: "literacy",
|
|
16
|
+
* title: "GROQ Projection Basics",
|
|
17
|
+
* area: "groq",
|
|
18
|
+
* prompt: { text: "Write GROQ queries..." },
|
|
19
|
+
* assertions: [
|
|
20
|
+
* { type: "llm-rubric", template: "task-completion", criteria: ["..."] },
|
|
21
|
+
* ],
|
|
22
|
+
* })
|
|
23
|
+
* ```
|
|
24
|
+
*
|
|
25
|
+
* ## Configuration authoring
|
|
26
|
+
*
|
|
27
|
+
* ```typescript
|
|
28
|
+
* import { defineConfig, env } from "@sanity/ailf"
|
|
29
|
+
*
|
|
30
|
+
* export default defineConfig({
|
|
31
|
+
* projectId: env("SANITY_PROJECT_ID"),
|
|
32
|
+
* dataset: env("SANITY_DATASET"),
|
|
33
|
+
* })
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Configuration helpers (define* identity functions for typed authoring)
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
export { defineConfig, defineFeatures, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.js";
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Environment helper
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
export { env } from "./_vendor/ailf-core/index.js";
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// Validation — for programmatic validation of task YAML
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, } from "./adapters/task-sources/repo-schemas.js";
|
|
48
|
+
export { formatValidationResult, validateCanonicalTasks, } from "./adapters/task-sources/repo-validation.js";
|
|
@@ -35,8 +35,10 @@ export function buildStepSequence(ctx, pipelineStart = Date.now()) {
|
|
|
35
35
|
if (config.repoTasksPath) {
|
|
36
36
|
steps.push(new MirrorRepoTasksStep());
|
|
37
37
|
}
|
|
38
|
-
// Step 1: Fetch documentation (
|
|
39
|
-
|
|
38
|
+
// Step 1: Fetch documentation (literacy mode only — other modes don't use canonical docs)
|
|
39
|
+
if (config.mode === "literacy") {
|
|
40
|
+
steps.push(new FetchDocsStep());
|
|
41
|
+
}
|
|
40
42
|
// Step 2: Generate Promptfoo configs
|
|
41
43
|
steps.push(new GenerateConfigsStep());
|
|
42
44
|
// Step 3: Run evaluation (steps handle --skip-eval internally)
|
|
@@ -34,9 +34,8 @@ export class FetchDocsStep {
|
|
|
34
34
|
const tasksWithDocs = literacyTasks.filter((t) => (t.context?.docs?.length ?? 0) > 0);
|
|
35
35
|
if (tasksWithDocs.length === 0) {
|
|
36
36
|
return {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
status: "failed",
|
|
37
|
+
status: "skipped",
|
|
38
|
+
reason: "No literacy tasks with canonical_docs — nothing to fetch",
|
|
40
39
|
};
|
|
41
40
|
}
|
|
42
41
|
// Resolve source once with typed overrides
|
|
@@ -144,13 +144,18 @@ export class GenerateConfigsStep {
|
|
|
144
144
|
// ---------------------------------------------------------------------------
|
|
145
145
|
async compileSingleMode(ctx, handler, tasks, mode, models, start) {
|
|
146
146
|
ctx.logger.info(`Compiling ${tasks.length} ${mode} task(s) via registry handler...`);
|
|
147
|
+
// Filter models to those that declare this mode in their modes array
|
|
148
|
+
const modeModels = models.models
|
|
149
|
+
.filter((m) => !m.modes || m.modes.includes(mode))
|
|
150
|
+
.map((m) => ({
|
|
151
|
+
id: m.id,
|
|
152
|
+
label: m.label,
|
|
153
|
+
config: m.config,
|
|
154
|
+
}));
|
|
147
155
|
const merged = this.compileAll(handler, tasks, {
|
|
148
156
|
rootDir: ctx.config.rootDir,
|
|
149
157
|
graderProvider: models.grader.id,
|
|
150
|
-
models:
|
|
151
|
-
id: m.id,
|
|
152
|
-
label: m.label,
|
|
153
|
-
})),
|
|
158
|
+
models: modeModels,
|
|
154
159
|
});
|
|
155
160
|
for (const w of merged.warnings) {
|
|
156
161
|
ctx.logger.warn(` ⚠ ${w}`);
|
|
@@ -175,16 +180,27 @@ export class GenerateConfigsStep {
|
|
|
175
180
|
async loadTasks(ctx, mode, state) {
|
|
176
181
|
const { resolve } = await import("path");
|
|
177
182
|
const { discoverTsTaskFiles, loadTsTaskFile } = await import("../../adapters/task-sources/task-file-loader.js");
|
|
183
|
+
// Discover task files from the mode-specific directory and --repo-tasks-path
|
|
178
184
|
const tasksDir = resolve(ctx.config.rootDir, "tasks", mode);
|
|
179
|
-
const
|
|
185
|
+
const dirs = [tasksDir];
|
|
186
|
+
// Also search --repo-tasks-path (e.g., .ailf/tasks/) for repo-based tasks
|
|
187
|
+
if (ctx.config.repoTasksPath) {
|
|
188
|
+
const repoDir = resolve(ctx.config.repoTasksPath);
|
|
189
|
+
if (!dirs.includes(repoDir)) {
|
|
190
|
+
dirs.push(repoDir);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
180
193
|
const tasks = [];
|
|
181
|
-
for (const
|
|
182
|
-
const
|
|
183
|
-
for (const
|
|
184
|
-
const
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
tasks
|
|
194
|
+
for (const dir of dirs) {
|
|
195
|
+
const files = discoverTsTaskFiles(dir);
|
|
196
|
+
for (const file of files) {
|
|
197
|
+
const raw = await loadTsTaskFile(file);
|
|
198
|
+
for (const t of raw.tasks) {
|
|
199
|
+
const task = t;
|
|
200
|
+
// Filter to matching mode (skip tasks from other modes in same dir)
|
|
201
|
+
if (!("mode" in task) || task.mode === mode) {
|
|
202
|
+
tasks.push(task);
|
|
203
|
+
}
|
|
188
204
|
}
|
|
189
205
|
}
|
|
190
206
|
}
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
import assert from "node:assert/strict";
|
|
11
11
|
import { describe, it } from "node:test";
|
|
12
12
|
import { LiteracyVariant } from "../../normalize-mode.js";
|
|
13
|
-
import { compileAgentHarnessTask, AGENT_HARNESS_PROMPT_TEMPLATES, handler as agentHandler, validateAgentHarnessTask, } from "../mode-handlers/agent-harness
|
|
13
|
+
import { compileAgentHarnessTask, AGENT_HARNESS_PROMPT_TEMPLATES, handler as agentHandler, validateAgentHarnessTask, } from "../mode-handlers/agent-harness/index.js";
|
|
14
14
|
import { allAgentHarnessExampleTasks, scaffoldProjectTask, modifyCodeTask, multiFileRefactorTask, } from "../mode-handlers/__fixtures__/agent-harness-example-tasks.js";
|
|
15
15
|
// ---------------------------------------------------------------------------
|
|
16
16
|
// Helpers
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
import assert from "node:assert/strict";
|
|
11
11
|
import { describe, it } from "node:test";
|
|
12
12
|
import { LiteracyVariant } from "../../normalize-mode.js";
|
|
13
|
-
import { compileKnowledgeProbeTask, handler as probeHandler, KNOWLEDGE_PROBE_PROMPT_TEMPLATES, validateKnowledgeProbeTask, } from "../mode-handlers/knowledge-probe
|
|
13
|
+
import { compileKnowledgeProbeTask, handler as probeHandler, KNOWLEDGE_PROBE_PROMPT_TEMPLATES, validateKnowledgeProbeTask, } from "../mode-handlers/knowledge-probe/index.js";
|
|
14
14
|
import { allKnowledgeProbeExampleTasks, groqProjectionTask, defineTypeApiTask, ecosystemComparisonTask, } from "../mode-handlers/__fixtures__/knowledge-probe-example-tasks.js";
|
|
15
15
|
// ---------------------------------------------------------------------------
|
|
16
16
|
// Helpers
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
import assert from "node:assert/strict";
|
|
11
11
|
import { describe, it } from "node:test";
|
|
12
12
|
import { LiteracyVariant } from "../../normalize-mode.js";
|
|
13
|
-
import { compileLiteracyTask, validateLiteracyTask, } from "../mode-handlers/literacy
|
|
13
|
+
import { compileLiteracyTask, validateLiteracyTask, } from "../mode-handlers/literacy/index.js";
|
|
14
14
|
import { compileLiteracyTasks, compareCompilerOutputs, } from "../literacy-bridge.js";
|
|
15
15
|
// ---------------------------------------------------------------------------
|
|
16
16
|
// Helpers
|