@sanity/ailf 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +0 -1
  2. package/config/models.ts +15 -3
  3. package/dist/_vendor/ailf-core/config-helpers.d.ts +14 -17
  4. package/dist/_vendor/ailf-core/config-helpers.js +22 -2
  5. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  6. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  7. package/dist/_vendor/ailf-core/index.d.ts +2 -2
  8. package/dist/_vendor/ailf-core/index.js +1 -1
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
  10. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  11. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  12. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  13. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +2 -0
  14. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  15. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  16. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +13 -0
  17. package/dist/_vendor/ailf-core/types/index.d.ts +1 -3
  18. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +78 -23
  19. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  20. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  21. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  22. package/dist/adapters/task-sources/content-lake-task-source.js +17 -20
  23. package/dist/adapters/task-sources/index.d.ts +2 -2
  24. package/dist/adapters/task-sources/index.js +2 -2
  25. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  26. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  27. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  28. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  29. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  30. package/dist/adapters/task-sources/repo-validation.js +126 -5
  31. package/dist/adapters/task-sources/task-file-loader.d.ts +2 -2
  32. package/dist/adapters/task-sources/task-file-loader.js +2 -2
  33. package/dist/commands/coverage-audit.js +3 -1
  34. package/dist/commands/init.d.ts +6 -4
  35. package/dist/commands/init.js +302 -23
  36. package/dist/commands/validate-tasks.d.ts +2 -2
  37. package/dist/commands/validate-tasks.js +26 -15
  38. package/dist/composition-root.d.ts +13 -1
  39. package/dist/composition-root.js +73 -41
  40. package/dist/index.d.ts +41 -0
  41. package/dist/index.js +48 -0
  42. package/dist/orchestration/build-step-sequence.js +4 -2
  43. package/dist/orchestration/steps/fetch-docs-step.js +2 -3
  44. package/dist/orchestration/steps/generate-configs-step.js +28 -12
  45. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  46. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  47. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  48. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +105 -68
  49. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  50. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  51. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  52. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  53. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  54. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  55. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  56. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  57. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  58. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  59. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  60. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  61. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  62. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  63. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  64. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  65. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  66. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  67. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  68. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  69. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  70. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  71. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  72. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  73. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  74. package/dist/pipeline/compiler/mode-handlers/{agent-harness-handler.d.ts → agent-harness/types.d.ts} +3 -24
  75. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  76. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  77. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  78. package/dist/pipeline/compiler/mode-handlers/index.d.ts +4 -5
  79. package/dist/pipeline/compiler/mode-handlers/index.js +4 -6
  80. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  81. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  82. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  83. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  84. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  85. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  86. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  87. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  88. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  89. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  90. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  91. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  92. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  93. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  94. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  95. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  96. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  97. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  98. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  99. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  100. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  101. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  102. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  103. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  104. package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.d.ts → mcp-server/assertions.d.ts} +2 -10
  105. package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.js → mcp-server/assertions.js} +63 -6
  106. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  107. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  108. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  109. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  110. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  111. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  112. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  113. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  114. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  115. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  116. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  117. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  118. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  119. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  120. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  121. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  122. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  123. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  124. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  125. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  126. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  127. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  128. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  129. package/dist/pipeline/compiler/preset-loader.js +99 -0
  130. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +6 -9
  131. package/dist/pipeline/compiler/presets/sanity-literacy.js +10 -156
  132. package/dist/pipeline/expand-tasks.d.ts +2 -2
  133. package/dist/pipeline/expand-tasks.js +2 -2
  134. package/dist/pipeline/generate-configs.js +1 -1
  135. package/dist/pipeline/map-request-to-config.js +1 -0
  136. package/dist/pipeline/mirror-repo-tasks.d.ts +7 -7
  137. package/dist/pipeline/mirror-repo-tasks.js +9 -9
  138. package/dist/pipeline/plan.js +1 -1
  139. package/package.json +11 -3
  140. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  141. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  142. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  143. package/dist/_vendor/ailf-tasks/index.js +0 -16
  144. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  145. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  146. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  147. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  148. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  149. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  150. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  151. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  152. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  153. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  154. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  155. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -67
  156. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -309
@@ -1,7 +1,7 @@
1
1
  /**
2
- * validate-tasks command — standalone validation of repo-based task YAML files.
2
+ * validate-tasks command — standalone validation of task files.
3
3
  *
4
- * Validates .ailf/tasks/*.yaml files against the RepoTaskSchema without
4
+ * Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
5
5
  * running the full pipeline. Useful for pre-commit hooks and CI checks
6
6
  * in external repos.
7
7
  *
@@ -16,11 +16,11 @@ import { existsSync, readdirSync, readFileSync } from "fs";
16
16
  import { resolve, relative } from "path";
17
17
  import { Command } from "commander";
18
18
  import { load } from "js-yaml";
19
- import { parseRepoTaskFile } from "../adapters/task-sources/repo-schemas.js";
20
- import { validateRepoTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
19
+ import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/task-sources/repo-schemas.js";
20
+ import { validateCanonicalTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
21
21
  export function createValidateTasksCommand() {
22
22
  return new Command("validate-tasks")
23
- .description("Validate repo-based task YAML files (.ailf/tasks/) against the schema")
23
+ .description("Validate task YAML files (.ailf/tasks/) against the canonical schema")
24
24
  .argument("[path]", "Path to tasks directory (default: .ailf/tasks/)", ".ailf/tasks")
25
25
  .option("--strict", "Treat warnings as errors", false)
26
26
  .action(async (tasksPath, opts) => {
@@ -29,12 +29,12 @@ export function createValidateTasksCommand() {
29
29
  const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
30
30
  const resolvedPath = resolve(callerCwd, tasksPath);
31
31
  if (!existsSync(resolvedPath)) {
32
- console.error(`❌ Directory not found: ${resolvedPath}`);
32
+ console.error(`Directory not found: ${resolvedPath}`);
33
33
  process.exit(1);
34
34
  }
35
35
  const yamlFiles = readdirSync(resolvedPath).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
36
36
  if (yamlFiles.length === 0) {
37
- console.error(`❌ No YAML files found in ${resolvedPath}`);
37
+ console.error(`No YAML files found in ${resolvedPath}`);
38
38
  process.exit(1);
39
39
  }
40
40
  console.log(`\nValidating ${yamlFiles.length} task file(s) in ${relative(process.cwd(), resolvedPath)}/\n`);
@@ -50,25 +50,36 @@ export function createValidateTasksCommand() {
50
50
  }
51
51
  catch (err) {
52
52
  const msg = err instanceof Error ? err.message : String(err);
53
- console.error(` ${file}: YAML parse error`);
53
+ console.error(` ${file}: YAML parse error`);
54
54
  console.error(` ${msg}\n`);
55
55
  hasErrors = true;
56
56
  continue;
57
57
  }
58
58
  if (!Array.isArray(parsed)) {
59
- console.error(` ${file}: Expected a YAML array of task definitions`);
59
+ console.error(` ${file}: Expected a YAML array of task definitions`);
60
+ hasErrors = true;
61
+ continue;
62
+ }
63
+ // Detect legacy field names before Zod validation
64
+ const legacyWarnings = detectLegacyFieldNames(parsed, file);
65
+ if (legacyWarnings.length > 0) {
66
+ console.error(` ${file}: Uses legacy field names`);
67
+ for (const w of legacyWarnings) {
68
+ console.error(` ${w}`);
69
+ }
70
+ console.error();
60
71
  hasErrors = true;
61
72
  continue;
62
73
  }
63
74
  try {
64
- const tasks = parseRepoTaskFile(parsed, file);
65
- console.log(` ${file}: ${tasks.length} task${tasks.length === 1 ? "" : "s"} valid`);
75
+ const tasks = parseCanonicalTaskFile(parsed, file);
76
+ console.log(` ${file}: ${tasks.length} task${tasks.length === 1 ? "" : "s"} valid`);
66
77
  totalTasks += tasks.length;
67
78
  allTasks.push(...tasks);
68
79
  }
69
80
  catch (err) {
70
81
  const msg = err instanceof Error ? err.message : String(err);
71
- console.error(` ${file}: Schema validation failed`);
82
+ console.error(` ${file}: Schema validation failed`);
72
83
  console.error(`${msg
73
84
  .split("\n")
74
85
  .map((l) => ` ${l}`)
@@ -79,7 +90,7 @@ export function createValidateTasksCommand() {
79
90
  // Run semantic validation on all parsed tasks
80
91
  if (allTasks.length > 0) {
81
92
  console.log(); // blank line
82
- const semanticResult = validateRepoTasks(allTasks);
93
+ const semanticResult = validateCanonicalTasks(allTasks);
83
94
  const formatted = formatValidationResult(semanticResult);
84
95
  console.log(formatted);
85
96
  if (!semanticResult.valid) {
@@ -87,10 +98,10 @@ export function createValidateTasksCommand() {
87
98
  }
88
99
  if (opts.strict && semanticResult.warnings.length > 0) {
89
100
  hasErrors = true;
90
- console.log("\n ⚠️ --strict mode: warnings treated as errors");
101
+ console.log("\n --strict mode: warnings treated as errors");
91
102
  }
92
103
  }
93
- console.log(`\n${hasErrors ? "" : ""} ${totalTasks} task${totalTasks === 1 ? "" : "s"} across ${yamlFiles.length} file${yamlFiles.length === 1 ? "" : "s"}\n`);
104
+ console.log(`\n${hasErrors ? "FAIL" : "OK"} ${totalTasks} task${totalTasks === 1 ? "" : "s"} across ${yamlFiles.length} file${yamlFiles.length === 1 ? "" : "s"}\n`);
94
105
  process.exit(hasErrors ? 1 : 0);
95
106
  });
96
107
  }
@@ -15,7 +15,7 @@
15
15
  * @see packages/core/src/ports/context.ts — AppContext interface
16
16
  * @see docs/exec-plans/ports-and-adapters/phase-7-composition-root.md
17
17
  */
18
- import { type AppContext, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
18
+ import { type AppContext, type AssertionRegistration, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
19
19
  /**
20
20
  * Create a fully wired AppContext from resolved configuration.
21
21
  *
@@ -24,3 +24,15 @@ import { type AppContext, type ResolvedConfig } from "./_vendor/ailf-core/index.
24
24
  * is a one-line change in this function.
25
25
  */
26
26
  export declare function createAppContext(config: ResolvedConfig): AppContext;
27
+ /**
28
+ * Generic Promptfoo assertion types available to all evaluation modes.
29
+ *
30
+ * These are evaluation primitives (text matching, JSON validation, LLM grading)
31
+ * that aren't specific to any mode or domain. They're registered before any
32
+ * preset so every mode has access to them.
33
+ *
34
+ * `compatibleModes` is omitted — when undefined, the assertion is compatible
35
+ * with all modes. Mode-specific assertions can be registered by presets with
36
+ * explicit mode whitelists.
37
+ */
38
+ export declare const FRAMEWORK_ASSERTIONS: AssertionRegistration[];
@@ -17,10 +17,12 @@
17
17
  */
18
18
  import { InMemoryPluginRegistry, } from "./_vendor/ailf-core/index.js";
19
19
  import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js";
20
+ import { loadExternalPresets } from "./pipeline/compiler/preset-loader.js";
20
21
  import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
21
22
  import { PromptfooEvalAdapter } from "./adapters/eval-runners/promptfoo-eval-adapter.js";
22
23
  import { ConsoleLogger, JsonLogger, QuietLogger, } from "./adapters/loggers/index.js";
23
24
  import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, YamlTaskSource, } from "./adapters/task-sources/index.js";
25
+ import { createAgentHarnessBase, createKnowledgeProbeBase, createLiteracyModeBase, createMcpServerModeBase, } from "./pipeline/compiler/mode-bases/index.js";
24
26
  import { createSanityLiteracyPreset } from "./pipeline/compiler/presets/index.js";
25
27
  import { getSanityClient } from "./sanity/client.js";
26
28
  import { ReportStore } from "./report-store.js";
@@ -39,10 +41,12 @@ export function createAppContext(config) {
39
41
  const cache = config.noCache ? undefined : createCache(config);
40
42
  // Task source — selected by config.taskSourceType
41
43
  const taskSource = createTaskSource(config);
42
- // Plugin registry — mode handlers, assertions, rubric templates, doc fetcher.
43
- // The Sanity preset is registered here with config.rootDir so its doc fetcher
44
- // factory resolves paths relative to the eval package root (not cwd).
45
- const registry = createRegistry(config.rootDir);
44
+ // Plugin registry — mode bases, assertions, presets, doc fetcher.
45
+ // External presets from config are loaded and registered after built-ins.
46
+ const externalPresets = config.presets && config.presets.length > 0
47
+ ? loadExternalPresets(config.presets, config.rootDir)
48
+ : undefined;
49
+ const registry = createRegistry(config.rootDir, externalPresets);
46
50
  // Doc fetcher — provided by the registered preset's factory
47
51
  const docFetcherFactory = registry.getDocFetcherFactory();
48
52
  const docFetcher = docFetcherFactory ? docFetcherFactory() : undefined;
@@ -121,58 +125,86 @@ function createTaskSource(config) {
121
125
  return primary;
122
126
  }
123
127
  // ---------------------------------------------------------------------------
124
- // Built-in mode registrations for non-literacy modes
128
+ // Layer 0: Framework built-in assertions
125
129
  // ---------------------------------------------------------------------------
126
- const BUILT_IN_MODES = [
130
+ /**
131
+ * Generic Promptfoo assertion types available to all evaluation modes.
132
+ *
133
+ * These are evaluation primitives (text matching, JSON validation, LLM grading)
134
+ * that aren't specific to any mode or domain. They're registered before any
135
+ * preset so every mode has access to them.
136
+ *
137
+ * `compatibleModes` is omitted — when undefined, the assertion is compatible
138
+ * with all modes. Mode-specific assertions can be registered by presets with
139
+ * explicit mode whitelists.
140
+ */
141
+ export const FRAMEWORK_ASSERTIONS = [
127
142
  {
128
- id: "knowledge-probe",
129
- label: "Knowledge Probe",
130
- validProviderPatterns: ["^openai:", "^anthropic:", "^file://"],
131
- rubricTemplateIds: [],
132
- handlerModule: "./mode-handlers/knowledge-probe-handler.js",
143
+ type: "contains",
144
+ label: "Contains text",
145
+ handlerModule: "promptfoo:builtin",
133
146
  },
134
147
  {
135
- id: "mcp-server",
136
- label: "MCP Server Testing",
137
- validProviderPatterns: ["^mcp:", "^file://"],
138
- rubricTemplateIds: [
139
- "mcp-input-validation",
140
- "mcp-output-correctness",
141
- "mcp-error-handling",
142
- ],
143
- handlerModule: "./mode-handlers/mcp-server-handler.js",
148
+ type: "contains-all",
149
+ label: "Contains all texts",
150
+ handlerModule: "promptfoo:builtin",
144
151
  },
145
152
  {
146
- id: "agent-harness",
147
- label: "Agent Harness",
148
- validProviderPatterns: ["^openai:", "^anthropic:", "^file://"],
149
- rubricTemplateIds: [],
150
- handlerModule: "./mode-handlers/agent-harness-handler.js",
153
+ type: "contains-any",
154
+ label: "Contains any text",
155
+ handlerModule: "promptfoo:builtin",
156
+ },
157
+ { type: "equals", label: "Exact match", handlerModule: "promptfoo:builtin" },
158
+ { type: "regex", label: "Regex match", handlerModule: "promptfoo:builtin" },
159
+ { type: "is-json", label: "Valid JSON", handlerModule: "promptfoo:builtin" },
160
+ {
161
+ type: "javascript",
162
+ label: "JavaScript assertion",
163
+ handlerModule: "promptfoo:builtin",
164
+ },
165
+ {
166
+ type: "llm-rubric",
167
+ label: "LLM-graded rubric",
168
+ handlerModule: "promptfoo:builtin",
169
+ },
170
+ {
171
+ type: "similar",
172
+ label: "Semantic similarity",
173
+ handlerModule: "promptfoo:builtin",
151
174
  },
152
175
  ];
153
176
  /**
154
177
  * Build and populate the plugin registry.
155
178
  *
156
- * Preset registration flow:
157
- * 1. A preset is a PresetDefinition — a bundle of modes, assertions, rubric
158
- * templates, prompt templates, scoring profiles, a doc fetcher factory,
159
- * source definitions, and feature definitions.
160
- * 2. registerPreset() iterates the preset's fields and delegates each one to
161
- * the appropriate register method (registerMode, registerRubricTemplate, …).
162
- * 3. After registration the rest of createAppContext() can pull capabilities
163
- * from the registry (e.g. getDocFetcherFactory()) without knowing which
164
- * preset provided them.
179
+ * Registration follows the five-layer model:
165
180
  *
166
- * To add a new preset: create a PresetDefinition, then call
167
- * registry.registerPreset() here before the built-in mode registrations.
181
+ * Layer 0: Framework built-in assertions (generic Promptfoo builtins)
182
+ * Layer 0.5: Mode bases (shared evaluation methodology per mode)
183
+ * Layer 1: Domain presets (domain-specific config targeting a mode base)
184
+ *
185
+ * Mode bases define HOW you evaluate (rubrics, scoring, prompts).
186
+ * Domain presets define WHAT you evaluate (sources, features, doc fetcher)
187
+ * and target a mode base by ID. When a preset is registered, it inherits
188
+ * its mode base's defaults and can optionally override them.
168
189
  */
169
- function createRegistry(rootDir) {
190
+ function createRegistry(rootDir, externalPresets) {
170
191
  const registry = new InMemoryPluginRegistry();
171
- // Register the sanity-literacy preset the Sanity-specific evaluation bundle.
192
+ // Layer 0: Framework built-in assertions (available to all modes)
193
+ for (const assertion of FRAMEWORK_ASSERTIONS) {
194
+ registry.registerAssertion(assertion);
195
+ }
196
+ // Layer 0.5: Mode bases (evaluation methodology)
197
+ registry.registerModeBase(createLiteracyModeBase());
198
+ registry.registerModeBase(createMcpServerModeBase());
199
+ registry.registerModeBase(createKnowledgeProbeBase());
200
+ registry.registerModeBase(createAgentHarnessBase());
201
+ // Layer 1: Built-in domain presets
172
202
  registry.registerPreset(createSanityLiteracyPreset({ rootDir }));
173
- // Register other built-in modes (not part of any preset yet)
174
- for (const mode of BUILT_IN_MODES) {
175
- registry.registerMode(mode);
203
+ // Layer 1+: External domain presets (from config.presets)
204
+ if (externalPresets) {
205
+ for (const preset of externalPresets) {
206
+ registry.registerPreset(preset);
207
+ }
176
208
  }
177
209
  return registry;
178
210
  }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * @sanity/ailf — Public API for the AI Literacy Framework.
3
+ *
4
+ * This module is the entry point for external consumers who import from
5
+ * `@sanity/ailf`. It re-exports the authoring API needed to write task
6
+ * definitions, configuration files, and validate task YAML.
7
+ *
8
+ * ## Task authoring
9
+ *
10
+ * ```typescript
11
+ * import { defineTask } from "@sanity/ailf"
12
+ *
13
+ * export default defineTask({
14
+ * id: "groq-projection-basics",
15
+ * mode: "literacy",
16
+ * title: "GROQ Projection Basics",
17
+ * area: "groq",
18
+ * prompt: { text: "Write GROQ queries..." },
19
+ * assertions: [
20
+ * { type: "llm-rubric", template: "task-completion", criteria: ["..."] },
21
+ * ],
22
+ * })
23
+ * ```
24
+ *
25
+ * ## Configuration authoring
26
+ *
27
+ * ```typescript
28
+ * import { defineConfig, env } from "@sanity/ailf"
29
+ *
30
+ * export default defineConfig({
31
+ * projectId: env("SANITY_PROJECT_ID"),
32
+ * dataset: env("SANITY_DATASET"),
33
+ * })
34
+ * ```
35
+ */
36
+ export { defineConfig, defineFeatures, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.d.ts";
37
+ export type { PricingEntry, PromptEntry, SourceEntry } from "./_vendor/ailf-core/index.d.ts";
38
+ export { env } from "./_vendor/ailf-core/index.d.ts";
39
+ export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
40
+ export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RubricTemplateName, } from "./adapters/task-sources/repo-schemas.js";
41
+ export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./adapters/task-sources/repo-validation.js";
package/dist/index.js ADDED
@@ -0,0 +1,48 @@
1
+ /**
2
+ * @sanity/ailf — Public API for the AI Literacy Framework.
3
+ *
4
+ * This module is the entry point for external consumers who import from
5
+ * `@sanity/ailf`. It re-exports the authoring API needed to write task
6
+ * definitions, configuration files, and validate task YAML.
7
+ *
8
+ * ## Task authoring
9
+ *
10
+ * ```typescript
11
+ * import { defineTask } from "@sanity/ailf"
12
+ *
13
+ * export default defineTask({
14
+ * id: "groq-projection-basics",
15
+ * mode: "literacy",
16
+ * title: "GROQ Projection Basics",
17
+ * area: "groq",
18
+ * prompt: { text: "Write GROQ queries..." },
19
+ * assertions: [
20
+ * { type: "llm-rubric", template: "task-completion", criteria: ["..."] },
21
+ * ],
22
+ * })
23
+ * ```
24
+ *
25
+ * ## Configuration authoring
26
+ *
27
+ * ```typescript
28
+ * import { defineConfig, env } from "@sanity/ailf"
29
+ *
30
+ * export default defineConfig({
31
+ * projectId: env("SANITY_PROJECT_ID"),
32
+ * dataset: env("SANITY_DATASET"),
33
+ * })
34
+ * ```
35
+ */
36
+ // ---------------------------------------------------------------------------
37
+ // Configuration helpers (define* identity functions for typed authoring)
38
+ // ---------------------------------------------------------------------------
39
+ export { defineConfig, defineFeatures, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.js";
40
+ // ---------------------------------------------------------------------------
41
+ // Environment helper
42
+ // ---------------------------------------------------------------------------
43
+ export { env } from "./_vendor/ailf-core/index.js";
44
+ // ---------------------------------------------------------------------------
45
+ // Validation — for programmatic validation of task YAML
46
+ // ---------------------------------------------------------------------------
47
+ export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, } from "./adapters/task-sources/repo-schemas.js";
48
+ export { formatValidationResult, validateCanonicalTasks, } from "./adapters/task-sources/repo-validation.js";
@@ -35,8 +35,10 @@ export function buildStepSequence(ctx, pipelineStart = Date.now()) {
35
35
  if (config.repoTasksPath) {
36
36
  steps.push(new MirrorRepoTasksStep());
37
37
  }
38
- // Step 1: Fetch documentation (step handles --skip-fetch internally)
39
- steps.push(new FetchDocsStep());
38
+ // Step 1: Fetch documentation (literacy mode only — other modes don't use canonical docs)
39
+ if (config.mode === "literacy") {
40
+ steps.push(new FetchDocsStep());
41
+ }
40
42
  // Step 2: Generate Promptfoo configs
41
43
  steps.push(new GenerateConfigsStep());
42
44
  // Step 3: Run evaluation (steps handle --skip-eval internally)
@@ -34,9 +34,8 @@ export class FetchDocsStep {
34
34
  const tasksWithDocs = literacyTasks.filter((t) => (t.context?.docs?.length ?? 0) > 0);
35
35
  if (tasksWithDocs.length === 0) {
36
36
  return {
37
- durationMs: Date.now() - start,
38
- error: "No tasks with canonical_docs found. Add canonical_docs to your task definitions.",
39
- status: "failed",
37
+ status: "skipped",
38
+ reason: "No literacy tasks with canonical_docs nothing to fetch",
40
39
  };
41
40
  }
42
41
  // Resolve source once with typed overrides
@@ -144,13 +144,18 @@ export class GenerateConfigsStep {
144
144
  // ---------------------------------------------------------------------------
145
145
  async compileSingleMode(ctx, handler, tasks, mode, models, start) {
146
146
  ctx.logger.info(`Compiling ${tasks.length} ${mode} task(s) via registry handler...`);
147
+ // Filter models to those that declare this mode in their modes array
148
+ const modeModels = models.models
149
+ .filter((m) => !m.modes || m.modes.includes(mode))
150
+ .map((m) => ({
151
+ id: m.id,
152
+ label: m.label,
153
+ config: m.config,
154
+ }));
147
155
  const merged = this.compileAll(handler, tasks, {
148
156
  rootDir: ctx.config.rootDir,
149
157
  graderProvider: models.grader.id,
150
- models: models.models.map((m) => ({
151
- id: m.id,
152
- label: m.label,
153
- })),
158
+ models: modeModels,
154
159
  });
155
160
  for (const w of merged.warnings) {
156
161
  ctx.logger.warn(` ⚠ ${w}`);
@@ -175,16 +180,27 @@ export class GenerateConfigsStep {
175
180
  async loadTasks(ctx, mode, state) {
176
181
  const { resolve } = await import("path");
177
182
  const { discoverTsTaskFiles, loadTsTaskFile } = await import("../../adapters/task-sources/task-file-loader.js");
183
+ // Discover task files from the mode-specific directory and --repo-tasks-path
178
184
  const tasksDir = resolve(ctx.config.rootDir, "tasks", mode);
179
- const files = discoverTsTaskFiles(tasksDir);
185
+ const dirs = [tasksDir];
186
+ // Also search --repo-tasks-path (e.g., .ailf/tasks/) for repo-based tasks
187
+ if (ctx.config.repoTasksPath) {
188
+ const repoDir = resolve(ctx.config.repoTasksPath);
189
+ if (!dirs.includes(repoDir)) {
190
+ dirs.push(repoDir);
191
+ }
192
+ }
180
193
  const tasks = [];
181
- for (const file of files) {
182
- const raw = await loadTsTaskFile(file);
183
- for (const t of raw.tasks) {
184
- const task = t;
185
- // Filter to matching mode (skip tasks from other modes in same dir)
186
- if (!("mode" in task) || task.mode === mode) {
187
- tasks.push(task);
194
+ for (const dir of dirs) {
195
+ const files = discoverTsTaskFiles(dir);
196
+ for (const file of files) {
197
+ const raw = await loadTsTaskFile(file);
198
+ for (const t of raw.tasks) {
199
+ const task = t;
200
+ // Filter to matching mode (skip tasks from other modes in same dir)
201
+ if (!("mode" in task) || task.mode === mode) {
202
+ tasks.push(task);
203
+ }
188
204
  }
189
205
  }
190
206
  }
@@ -10,7 +10,7 @@
10
10
  import assert from "node:assert/strict";
11
11
  import { describe, it } from "node:test";
12
12
  import { LiteracyVariant } from "../../normalize-mode.js";
13
- import { compileAgentHarnessTask, AGENT_HARNESS_PROMPT_TEMPLATES, handler as agentHandler, validateAgentHarnessTask, } from "../mode-handlers/agent-harness-handler.js";
13
+ import { compileAgentHarnessTask, AGENT_HARNESS_PROMPT_TEMPLATES, handler as agentHandler, validateAgentHarnessTask, } from "../mode-handlers/agent-harness/index.js";
14
14
  import { allAgentHarnessExampleTasks, scaffoldProjectTask, modifyCodeTask, multiFileRefactorTask, } from "../mode-handlers/__fixtures__/agent-harness-example-tasks.js";
15
15
  // ---------------------------------------------------------------------------
16
16
  // Helpers
@@ -10,7 +10,7 @@
10
10
  import assert from "node:assert/strict";
11
11
  import { describe, it } from "node:test";
12
12
  import { LiteracyVariant } from "../../normalize-mode.js";
13
- import { compileKnowledgeProbeTask, handler as probeHandler, KNOWLEDGE_PROBE_PROMPT_TEMPLATES, validateKnowledgeProbeTask, } from "../mode-handlers/knowledge-probe-handler.js";
13
+ import { compileKnowledgeProbeTask, handler as probeHandler, KNOWLEDGE_PROBE_PROMPT_TEMPLATES, validateKnowledgeProbeTask, } from "../mode-handlers/knowledge-probe/index.js";
14
14
  import { allKnowledgeProbeExampleTasks, groqProjectionTask, defineTypeApiTask, ecosystemComparisonTask, } from "../mode-handlers/__fixtures__/knowledge-probe-example-tasks.js";
15
15
  // ---------------------------------------------------------------------------
16
16
  // Helpers
@@ -10,7 +10,7 @@
10
10
  import assert from "node:assert/strict";
11
11
  import { describe, it } from "node:test";
12
12
  import { LiteracyVariant } from "../../normalize-mode.js";
13
- import { compileLiteracyTask, validateLiteracyTask, } from "../mode-handlers/literacy-handler.js";
13
+ import { compileLiteracyTask, validateLiteracyTask, } from "../mode-handlers/literacy/index.js";
14
14
  import { compileLiteracyTasks, compareCompilerOutputs, } from "../literacy-bridge.js";
15
15
  // ---------------------------------------------------------------------------
16
16
  // Helpers