@sanity/ailf 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +0 -1
  2. package/config/models.ts +15 -3
  3. package/dist/_vendor/ailf-core/config-helpers.d.ts +14 -17
  4. package/dist/_vendor/ailf-core/config-helpers.js +22 -2
  5. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  6. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  7. package/dist/_vendor/ailf-core/index.d.ts +2 -2
  8. package/dist/_vendor/ailf-core/index.js +1 -1
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
  10. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  11. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  12. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  13. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +2 -0
  14. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  15. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  16. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +13 -0
  17. package/dist/_vendor/ailf-core/types/index.d.ts +1 -3
  18. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +78 -23
  19. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  20. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  21. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  22. package/dist/adapters/task-sources/content-lake-task-source.js +17 -20
  23. package/dist/adapters/task-sources/index.d.ts +2 -2
  24. package/dist/adapters/task-sources/index.js +2 -2
  25. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  26. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  27. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  28. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  29. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  30. package/dist/adapters/task-sources/repo-validation.js +126 -5
  31. package/dist/adapters/task-sources/task-file-loader.d.ts +2 -2
  32. package/dist/adapters/task-sources/task-file-loader.js +2 -2
  33. package/dist/commands/coverage-audit.js +3 -1
  34. package/dist/commands/init.d.ts +6 -4
  35. package/dist/commands/init.js +302 -23
  36. package/dist/commands/validate-tasks.d.ts +2 -2
  37. package/dist/commands/validate-tasks.js +26 -15
  38. package/dist/composition-root.d.ts +13 -1
  39. package/dist/composition-root.js +73 -41
  40. package/dist/index.d.ts +41 -0
  41. package/dist/index.js +48 -0
  42. package/dist/orchestration/build-step-sequence.js +4 -2
  43. package/dist/orchestration/steps/fetch-docs-step.js +2 -3
  44. package/dist/orchestration/steps/generate-configs-step.js +28 -12
  45. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  46. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  47. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  48. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +105 -68
  49. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  50. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  51. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  52. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  53. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  54. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  55. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  56. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  57. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  58. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  59. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  60. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  61. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  62. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  63. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  64. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  65. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  66. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  67. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  68. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  69. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  70. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  71. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  72. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  73. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  74. package/dist/pipeline/compiler/mode-handlers/{agent-harness-handler.d.ts → agent-harness/types.d.ts} +3 -24
  75. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  76. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  77. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  78. package/dist/pipeline/compiler/mode-handlers/index.d.ts +4 -5
  79. package/dist/pipeline/compiler/mode-handlers/index.js +4 -6
  80. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  81. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  82. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  83. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  84. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  85. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  86. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  87. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  88. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  89. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  90. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  91. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  92. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  93. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  94. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  95. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  96. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  97. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  98. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  99. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  100. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  101. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  102. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  103. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  104. package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.d.ts → mcp-server/assertions.d.ts} +2 -10
  105. package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.js → mcp-server/assertions.js} +63 -6
  106. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  107. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  108. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  109. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  110. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  111. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  112. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  113. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  114. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  115. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  116. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  117. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  118. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  119. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  120. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  121. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  122. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  123. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  124. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  125. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  126. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  127. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  128. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  129. package/dist/pipeline/compiler/preset-loader.js +99 -0
  130. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +6 -9
  131. package/dist/pipeline/compiler/presets/sanity-literacy.js +10 -156
  132. package/dist/pipeline/expand-tasks.d.ts +2 -2
  133. package/dist/pipeline/expand-tasks.js +2 -2
  134. package/dist/pipeline/generate-configs.js +1 -1
  135. package/dist/pipeline/map-request-to-config.js +1 -0
  136. package/dist/pipeline/mirror-repo-tasks.d.ts +7 -7
  137. package/dist/pipeline/mirror-repo-tasks.js +9 -9
  138. package/dist/pipeline/plan.js +1 -1
  139. package/package.json +11 -3
  140. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  141. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  142. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  143. package/dist/_vendor/ailf-tasks/index.js +0 -16
  144. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  145. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  146. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  147. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  148. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  149. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  150. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  151. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  152. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  153. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  154. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  155. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -67
  156. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -309
@@ -1,8 +1,129 @@
1
1
  /**
2
- * repo-validation.ts — Re-exports semantic validation from @sanity/ailf-tasks.
2
+ * repo-validation.ts — Semantic validation for task definitions.
3
3
  *
4
- * The validation logic is the single source of truth in @sanity/ailf-tasks.
5
- * This file re-exports so existing eval-package importers don't need
6
- * to change their import paths.
4
+ * Checks that go beyond Zod schema parsing:
5
+ * - Assertion types are in the curated set
6
+ * - Rubric template names resolve to known templates
7
+ * - Doc ref slugs look reasonable (slugs, not URLs)
8
+ * - Tasks have at least one LLM rubric assertion (recommended)
9
+ * - Tasks have a prompt text (recommended)
10
+ *
11
+ * These produce warnings, not errors — the pipeline can still run
12
+ * with imperfect tasks. Only structural failures (caught by Zod) block.
13
+ *
14
+ * Previously this file re-exported from @sanity/ailf-tasks. That package
15
+ * has been eliminated — all validation logic now lives here.
16
+ */
17
+ import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
18
+ // ---------------------------------------------------------------------------
19
+ // Public API
20
+ // ---------------------------------------------------------------------------
21
+ /**
22
+ * Run semantic validation on an array of parsed canonical tasks.
23
+ *
24
+ * Returns warnings for issues that don't block execution (unknown feature
25
+ * areas, unresolved slugs) and errors for issues that would cause pipeline
26
+ * failures (completely missing required fields — though Zod catches most).
27
+ */
28
+ export function validateCanonicalTasks(tasks) {
29
+ const errors = [];
30
+ const warnings = [];
31
+ // Check for duplicate IDs
32
+ const seenIds = new Set();
33
+ for (const task of tasks) {
34
+ if (seenIds.has(task.id)) {
35
+ errors.push({
36
+ taskId: task.id,
37
+ field: "id",
38
+ message: `Duplicate task ID "${task.id}"`,
39
+ });
40
+ }
41
+ seenIds.add(task.id);
42
+ }
43
+ for (const task of tasks) {
44
+ const assertions = task.assertions ?? [];
45
+ // Check assertion types
46
+ for (let i = 0; i < assertions.length; i++) {
47
+ const assertion = assertions[i];
48
+ if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
49
+ warnings.push({
50
+ taskId: task.id,
51
+ field: `assertions[${i}].type`,
52
+ message: `Unknown assertion type "${assertion.type}". ` +
53
+ `Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
54
+ });
55
+ }
56
+ // Check rubric template for llm-rubric assertions
57
+ if (assertion.type === "llm-rubric" && "template" in assertion) {
58
+ const template = assertion.template;
59
+ if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
60
+ warnings.push({
61
+ taskId: task.id,
62
+ field: `assertions[${i}].template`,
63
+ message: `Unknown rubric template "${template}". ` +
64
+ `Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
65
+ });
66
+ }
67
+ }
68
+ }
69
+ // Check canonical doc refs look reasonable
70
+ const docs = task.context?.docs ?? [];
71
+ for (let i = 0; i < docs.length; i++) {
72
+ const doc = docs[i];
73
+ // Slug refs: warn if they look like URLs or paths
74
+ if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
75
+ if (doc.slug.includes("/") || doc.slug.includes("http")) {
76
+ warnings.push({
77
+ taskId: task.id,
78
+ field: `context.docs[${i}].slug`,
79
+ message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
80
+ });
81
+ }
82
+ }
83
+ }
84
+ // Check task has at least one llm-rubric assertion (recommended but not required)
85
+ const hasLlmRubric = assertions.some((a) => a.type === "llm-rubric");
86
+ if (!hasLlmRubric) {
87
+ warnings.push({
88
+ taskId: task.id,
89
+ field: "assertions",
90
+ message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
91
+ });
92
+ }
93
+ // Check prompt text exists
94
+ if (!task.prompt?.text) {
95
+ warnings.push({
96
+ taskId: task.id,
97
+ field: "prompt.text",
98
+ message: "No task prompt found in prompt.text. The LLM will receive an empty implementation request.",
99
+ });
100
+ }
101
+ }
102
+ return {
103
+ valid: errors.length === 0,
104
+ errors,
105
+ warnings,
106
+ };
107
+ }
108
+ /**
109
+ * Format validation results for console output.
7
110
  */
8
- export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "../../_vendor/ailf-tasks/index.js";
111
+ export function formatValidationResult(result) {
112
+ const lines = [];
113
+ if (result.errors.length > 0) {
114
+ lines.push("Errors:");
115
+ for (const e of result.errors) {
116
+ lines.push(` [${e.taskId}] ${e.field}: ${e.message}`);
117
+ }
118
+ }
119
+ if (result.warnings.length > 0) {
120
+ lines.push("Warnings:");
121
+ for (const w of result.warnings) {
122
+ lines.push(` [${w.taskId}] ${w.field}: ${w.message}`);
123
+ }
124
+ }
125
+ if (result.valid && result.warnings.length === 0) {
126
+ lines.push("All tasks pass validation");
127
+ }
128
+ return lines.join("\n");
129
+ }
@@ -3,8 +3,8 @@
3
3
  *
4
4
  * Supplements the existing YAML-based task loading by supporting
5
5
  * `*.task.ts` and `*.task.js` files in task directories. Files are
6
- * loaded via jiti and validated through the RepoTaskSchema from
7
- * @sanity/ailf-tasks.
6
+ * loaded via jiti and expected to export GeneralizedTaskDefinition
7
+ * objects authored with `defineTask()`.
8
8
  *
9
9
  * TS task files export a single task or an array of tasks:
10
10
  *
@@ -3,8 +3,8 @@
3
3
  *
4
4
  * Supplements the existing YAML-based task loading by supporting
5
5
  * `*.task.ts` and `*.task.js` files in task directories. Files are
6
- * loaded via jiti and validated through the RepoTaskSchema from
7
- * @sanity/ailf-tasks.
6
+ * loaded via jiti and expected to export GeneralizedTaskDefinition
7
+ * objects authored with `defineTask()`.
8
8
  *
9
9
  * TS task files export a single task or an array of tasks:
10
10
  *
@@ -8,6 +8,7 @@ import { Command } from "commander";
8
8
  import { dirname, resolve } from "path";
9
9
  import { fileURLToPath } from "url";
10
10
  import { countReferencedDocs, formatCoverageConsole, formatCoverageMarkdown, runCoverageAudit, } from "../pipeline/coverage-audit.js";
11
+ import { createLiteracyModeBase } from "../pipeline/compiler/mode-bases/index.js";
11
12
  import { createSanityLiteracyPreset } from "../pipeline/compiler/presets/index.js";
12
13
  const __dirname = dirname(fileURLToPath(import.meta.url));
13
14
  const ROOT = resolve(__dirname, "..", "..");
@@ -17,9 +18,10 @@ export function createCoverageAuditCommand() {
17
18
  .option("--format <fmt>", "Output format: table, md, markdown")
18
19
  .option("--json", "Output raw JSON", false)
19
20
  .action(async (opts) => {
20
- // Build a registry with preset features so coverage audit works
21
+ // Build a registry with mode base + preset so coverage audit works
21
22
  // even when config/features.ts is empty (preset is source of truth).
22
23
  const registry = new InMemoryPluginRegistry();
24
+ registry.registerModeBase(createLiteracyModeBase());
23
25
  registry.registerPreset(createSanityLiteracyPreset({ rootDir: ROOT }));
24
26
  const report = runCoverageAudit(ROOT, { registry });
25
27
  if (!report) {
@@ -5,12 +5,14 @@
5
5
  * task files. The generated files are ready-to-edit starting points —
6
6
  * not live evaluation tasks.
7
7
  *
8
- * YAML output (default) preserves the inline comments from the source
9
- * YAML files in packages/core/examples/. JSON output is a plain
10
- * serialization of the parsed data no comments.
8
+ * TypeScript output (default) uses define* helpers from @sanity/ailf-core
9
+ * for full IDE autocomplete and type checking. YAML output preserves
10
+ * inline comments from the source files. JSON output is a plain
11
+ * serialization of the parsed data.
11
12
  *
12
13
  * Usage:
13
- * ailf init # YAML output (default)
14
+ * ailf init # TypeScript output (default)
15
+ * ailf init --output-format yaml # YAML output
14
16
  * ailf init --output-format json # JSON output
15
17
  * ailf init --force # overwrite existing files
16
18
  * ailf init --path ./my-dir # target a specific directory
@@ -5,12 +5,14 @@
5
5
  * task files. The generated files are ready-to-edit starting points —
6
6
  * not live evaluation tasks.
7
7
  *
8
- * YAML output (default) preserves the inline comments from the source
9
- * YAML files in packages/core/examples/. JSON output is a plain
10
- * serialization of the parsed data no comments.
8
+ * TypeScript output (default) uses define* helpers from @sanity/ailf-core
9
+ * for full IDE autocomplete and type checking. YAML output preserves
10
+ * inline comments from the source files. JSON output is a plain
11
+ * serialization of the parsed data.
11
12
  *
12
13
  * Usage:
13
- * ailf init # YAML output (default)
14
+ * ailf init # TypeScript output (default)
15
+ * ailf init --output-format yaml # YAML output
14
16
  * ailf init --output-format json # JSON output
15
17
  * ailf init --force # overwrite existing files
16
18
  * ailf init --path ./my-dir # target a specific directory
@@ -18,16 +20,17 @@
18
20
  import { Command } from "commander";
19
21
  import { existsSync, mkdirSync, writeFileSync } from "fs";
20
22
  import { resolve, relative } from "path";
21
- import { ailfConfigData, ailfConfigYaml, taskYamlFiles, TASK_FILE_NAMES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
23
+ import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_TS_FILE_NAMES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
22
24
  // ---------------------------------------------------------------------------
23
25
  // Command factory
24
26
  // ---------------------------------------------------------------------------
25
27
  export function createInitCommand() {
26
28
  return new Command("init")
27
29
  .description("Initialize a directory for AI Literacy Framework evaluation")
28
- .option("--output-format <fmt>", 'Output format for generated files: "yaml" (default) or "json"', "yaml")
30
+ .option("--output-format <fmt>", 'Output format for generated files: "ts" (default), "yaml", or "json"', "ts")
29
31
  .option("--force", "Overwrite existing files", false)
30
32
  .option("--path <dir>", "Target directory (default: current directory)", ".")
33
+ .option("--mode <mode>", "Scaffold for a specific mode: literacy, mcp-server, custom (default: all modes)")
31
34
  .action(async (opts) => {
32
35
  await runInit(opts);
33
36
  });
@@ -55,8 +58,13 @@ function rel(from, to) {
55
58
  // Init logic
56
59
  // ---------------------------------------------------------------------------
57
60
  async function runInit(opts) {
58
- const format = opts.outputFormat === "json" ? "json" : "yaml";
59
- const ext = format === "json" ? ".json" : ".yaml";
61
+ const validFormats = new Set(["ts", "yaml", "json"]);
62
+ if (!validFormats.has(opts.outputFormat)) {
63
+ console.error(` ✗ Invalid output format "${opts.outputFormat}". Valid options: ts, yaml, json`);
64
+ process.exitCode = 1;
65
+ return;
66
+ }
67
+ const format = opts.outputFormat;
60
68
  const force = opts.force;
61
69
  // Resolve target from the caller's actual working directory
62
70
  const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
@@ -72,24 +80,103 @@ async function runInit(opts) {
72
80
  console.log(` ✓ Created ${rel(targetDir, tasksDir)}/`);
73
81
  const written = [];
74
82
  const skipped = [];
75
- // 2. Write .ailf/config.yaml (or .json)
76
- // YAML: raw string passthrough (preserves comments)
77
- // JSON: serialize the parsed data
78
- const configPath = resolve(ailfDir, `config${ext}`);
79
- const configContent = format === "yaml"
80
- ? ailfConfigYaml
81
- : JSON.stringify(ailfConfigData, null, 2) + "\n";
82
- if (writeIfNew(configPath, configContent, force)) {
83
- written.push(rel(targetDir, configPath));
83
+ // 2. Write project config
84
+ if (format === "ts") {
85
+ // TypeScript: ailf.config.ts with defineConfig helper
86
+ const configPath = resolve(ailfDir, "ailf.config.ts");
87
+ if (writeIfNew(configPath, ailfConfigTs, force)) {
88
+ written.push(rel(targetDir, configPath));
89
+ }
90
+ else {
91
+ skipped.push(rel(targetDir, configPath));
92
+ }
93
+ }
94
+ else if (format === "yaml") {
95
+ // YAML: raw string passthrough (preserves comments)
96
+ const configPath = resolve(ailfDir, "config.yaml");
97
+ if (writeIfNew(configPath, ailfConfigYaml, force)) {
98
+ written.push(rel(targetDir, configPath));
99
+ }
100
+ else {
101
+ skipped.push(rel(targetDir, configPath));
102
+ }
84
103
  }
85
104
  else {
86
- skipped.push(rel(targetDir, configPath));
105
+ // JSON: serialize the parsed data
106
+ const configPath = resolve(ailfDir, "config.json");
107
+ const content = JSON.stringify(ailfConfigData, null, 2) + "\n";
108
+ if (writeIfNew(configPath, content, force)) {
109
+ written.push(rel(targetDir, configPath));
110
+ }
111
+ else {
112
+ skipped.push(rel(targetDir, configPath));
113
+ }
87
114
  }
88
115
  // 3. Write example tasks to .ailf/tasks/
89
- // YAML: raw string passthrough (preserves comments)
90
- // JSON: serialize individual task data
91
- if (format === "yaml") {
92
- // Each task is its own commented YAML file — write as-is
116
+ const modeFilter = opts.mode;
117
+ const isCustomMode = modeFilter === "custom";
118
+ if (format === "ts") {
119
+ // TypeScript: *.task.ts files with defineTask helper
120
+ // Default (no --mode): write literacy examples + draft MCP/probe examples
121
+ // --mode literacy: only literacy examples
122
+ // --mode mcp-server: only MCP examples (active, not draft)
123
+ // --mode custom: only a custom example task
124
+ if (!modeFilter || modeFilter === "literacy") {
125
+ for (const stem of TASK_TS_FILE_NAMES) {
126
+ const taskPath = resolve(tasksDir, `${stem}.task.ts`);
127
+ const content = taskTsFiles[stem];
128
+ if (writeIfNew(taskPath, content, force)) {
129
+ written.push(rel(targetDir, taskPath));
130
+ }
131
+ else {
132
+ skipped.push(rel(targetDir, taskPath));
133
+ }
134
+ }
135
+ }
136
+ // Draft examples for other modes (default init only)
137
+ if (!modeFilter) {
138
+ const mcpPath = resolve(tasksDir, "example-mcp-tool-usage.task.ts");
139
+ if (writeIfNew(mcpPath, MCP_DRAFT_TASK_TS, force)) {
140
+ written.push(rel(targetDir, mcpPath));
141
+ }
142
+ else {
143
+ skipped.push(rel(targetDir, mcpPath));
144
+ }
145
+ const probePath = resolve(tasksDir, "example-knowledge-probe.task.ts");
146
+ if (writeIfNew(probePath, PROBE_DRAFT_TASK_TS, force)) {
147
+ written.push(rel(targetDir, probePath));
148
+ }
149
+ else {
150
+ skipped.push(rel(targetDir, probePath));
151
+ }
152
+ }
153
+ // MCP-only init
154
+ if (modeFilter === "mcp-server") {
155
+ const mcpContent = MCP_DRAFT_TASK_TS.replace('status: "draft",', '// status: "active", // Activated — this task runs in evaluations');
156
+ const mcpPath = resolve(tasksDir, "example-mcp-tool-usage.task.ts");
157
+ if (writeIfNew(mcpPath, mcpContent, force)) {
158
+ written.push(rel(targetDir, mcpPath));
159
+ }
160
+ else {
161
+ skipped.push(rel(targetDir, mcpPath));
162
+ }
163
+ }
164
+ // Custom preset scaffold
165
+ if (isCustomMode) {
166
+ const customTaskPath = resolve(tasksDir, "example-custom.task.ts");
167
+ // Reuse the GROQ literacy task as a starting point
168
+ if (taskTsFiles[TASK_TS_FILE_NAMES[0]]) {
169
+ if (writeIfNew(customTaskPath, taskTsFiles[TASK_TS_FILE_NAMES[0]], force)) {
170
+ written.push(rel(targetDir, customTaskPath));
171
+ }
172
+ else {
173
+ skipped.push(rel(targetDir, customTaskPath));
174
+ }
175
+ }
176
+ }
177
+ }
178
+ else if (format === "yaml") {
179
+ // YAML: raw string passthrough (preserves comments)
93
180
  for (const stem of TASK_FILE_NAMES) {
94
181
  const taskPath = resolve(tasksDir, `${stem}.yaml`);
95
182
  const content = taskYamlFiles[stem];
@@ -118,6 +205,16 @@ async function runInit(opts) {
118
205
  }
119
206
  }
120
207
  }
208
+ // 3b. Write custom preset scaffold (--mode custom only)
209
+ if (isCustomMode && format === "ts") {
210
+ const presetPath = resolve(ailfDir, "preset.ts");
211
+ if (writeIfNew(presetPath, CUSTOM_PRESET_TS, force)) {
212
+ written.push(rel(targetDir, presetPath));
213
+ }
214
+ else {
215
+ skipped.push(rel(targetDir, presetPath));
216
+ }
217
+ }
121
218
  // 4. Write .gitignore in .ailf/ (keep results out of version control)
122
219
  const gitignorePath = resolve(ailfDir, ".gitignore");
123
220
  const gitignoreContent = `# AILF generated files\nresults/\ncontexts/\n`;
@@ -150,18 +247,25 @@ async function runInit(opts) {
150
247
  console.log(` ⊘ Skipped ${f} (already exists, use --force to overwrite)`);
151
248
  }
152
249
  }
250
+ const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
153
251
  console.log();
154
252
  console.log(" Next steps:");
155
253
  console.log();
156
254
  console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
157
255
  console.log(" slugs and prompts for your documentation");
158
- console.log(" 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/");
256
+ console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
159
257
  console.log(" 3. Add two GitHub Actions secrets");
160
258
  console.log(" (Settings → Secrets and variables → Actions):");
161
259
  console.log(" • AILF_API_KEY — your API key (starts with ailf_live_sk_)");
162
260
  console.log(" • NPM_TOKEN — npm token with read access to @sanity scope");
163
261
  console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
164
262
  console.log(" automatically on PRs");
263
+ if (format === "ts") {
264
+ console.log();
265
+ console.log(` 💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
266
+ console.log(" via defineTask() from @sanity/ailf-core. YAML and JSON are");
267
+ console.log(" also supported — re-run with --output-format yaml if preferred.");
268
+ }
165
269
  console.log();
166
270
  console.log(" 🔑 Retrieve secrets from 1Password (Sanity employees):");
167
271
  console.log();
@@ -177,3 +281,178 @@ async function runInit(opts) {
177
281
  console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
178
282
  console.log();
179
283
  }
284
+ // ---------------------------------------------------------------------------
285
+ // Draft example templates for non-literacy modes
286
+ // ---------------------------------------------------------------------------
287
+ const MCP_DRAFT_TASK_TS = `/**
288
+ * Example Task: MCP Server tool-use evaluation (DRAFT).
289
+ *
290
+ * Tests whether an LLM can correctly discover and invoke Sanity MCP server
291
+ * tools. Connects to the hosted Sanity MCP server at https://mcp.sanity.io.
292
+ *
293
+ * Prerequisites:
294
+ * - A Sanity API token with read access (for token-based auth)
295
+ * - Or: OAuth authentication will be prompted on first connect
296
+ *
297
+ * Authentication options:
298
+ * 1. Token-based: set SANITY_API_TOKEN env var
299
+ * 2. OAuth: the server prompts for login on first connect
300
+ *
301
+ * Setup: npx sanity@latest mcp configure
302
+ * Docs: https://www.sanity.io/docs/ai/mcp-server
303
+ *
304
+ * This task is a DRAFT — it won't run unless activated or explicitly targeted.
305
+ * To activate: change status to "active" or remove the status field.
306
+ */
307
+
308
+ import { defineTask } from "../_vendor/ailf-core/index.js"
309
+
310
+ export default defineTask({
311
+ mode: "mcp-server",
312
+ id: "example-mcp-tool-usage",
313
+ title: "MCP tool discovery and invocation",
314
+ description: "Example — tests Sanity MCP server tool-use (draft)",
315
+ area: "mcp",
316
+
317
+ // ── Server configuration ────────────────────────────────────
318
+ // The Sanity MCP server is hosted remotely at https://mcp.sanity.io.
319
+ // Authentication via API token header or OAuth.
320
+ //
321
+ // For token auth, set SANITY_API_TOKEN in your environment.
322
+ serverConfig: {
323
+ transport: "streamable-http",
324
+ url: "https://mcp.sanity.io",
325
+ env: {
326
+ SANITY_API_TOKEN: process.env.SANITY_API_TOKEN ?? "",
327
+ },
328
+ },
329
+
330
+ prompt: {
331
+ text: \`Use the available MCP tools to query all documents of type "article"
332
+ in the Sanity dataset. Return the title and slug for each document.
333
+ Limit results to 5 documents.\`,
334
+ },
335
+
336
+ assertions: [
337
+ {
338
+ type: "llm-rubric",
339
+ template: "mcp-input-validation",
340
+ criteria: [
341
+ "Correctly identifies the query_documents tool",
342
+ "Passes a valid GROQ query to filter by document type",
343
+ "Requests only the needed fields (title, slug)",
344
+ ],
345
+ },
346
+ ],
347
+
348
+ status: "draft",
349
+ })
350
+ `;
351
+ const PROBE_DRAFT_TASK_TS = `/**
352
+ * Example Task: Knowledge probe baseline (DRAFT).
353
+ *
354
+ * Tests what the model knows about a topic without providing documentation.
355
+ * Used to establish a baseline for comparison with literacy evaluations.
356
+ * This task is a DRAFT — it won't run unless activated or explicitly targeted.
357
+ *
358
+ * To activate: change status to "active" or remove the status field.
359
+ */
360
+
361
+ import { defineTask } from "../_vendor/ailf-core/index.js"
362
+
363
+ export default defineTask({
364
+ mode: "knowledge-probe",
365
+ id: "example-knowledge-probe",
366
+ title: "Model knowledge of GROQ syntax",
367
+ description: "Example — probes baseline model knowledge (draft)",
368
+ area: "groq",
369
+
370
+ prompt: {
371
+ text: \`Explain the GROQ query language used by Sanity. Cover:
372
+ 1. Basic query syntax and projections
373
+ 2. How to filter and sort results
374
+ 3. Common patterns for fetching related documents
375
+ Provide working code examples.\`,
376
+ },
377
+
378
+ assertions: [
379
+ {
380
+ type: "llm-rubric",
381
+ template: "task-completion",
382
+ criteria: [
383
+ "Demonstrates understanding of GROQ query syntax",
384
+ "Shows filtering and projection patterns",
385
+ "Code examples use valid GROQ syntax",
386
+ ],
387
+ },
388
+ ],
389
+
390
+ status: "draft",
391
+ })
392
+ `;
393
+ const CUSTOM_PRESET_TS = `/**
394
+ * Custom preset — your domain-specific evaluation configuration.
395
+ *
396
+ * This preset targets the "literacy" mode base and inherits its evaluation
397
+ * methodology (rubrics, scoring profiles, prompt templates). You only need
398
+ * to provide domain-specific configuration: where your docs live, what
399
+ * features to track, and how to fetch documentation.
400
+ *
401
+ * To use a different mode (e.g., "mcp-server"), change the mode field.
402
+ * Available built-in modes: literacy, mcp-server, knowledge-probe, agent-harness.
403
+ *
404
+ * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/PRESETS.md
405
+ */
406
+
407
+ import { definePreset } from "../_vendor/ailf-core/index.js"
408
+
409
+ export default definePreset({
410
+ name: "my-docs-evaluation",
411
+ manifest: {
412
+ name: "my-docs-evaluation",
413
+ version: "1.0.0",
414
+ description: "Documentation literacy evaluation for my project.",
415
+ pluginApiVersion: 1,
416
+ },
417
+
418
+ // Target the literacy mode base — inherits rubrics, scoring, prompts.
419
+ // Change to "mcp-server" to evaluate MCP tool usage instead.
420
+ mode: "literacy",
421
+
422
+ // Source definitions — where your documentation lives.
423
+ sourceDefs: [
424
+ {
425
+ name: "production",
426
+ baseUrl: "https://docs.example.com",
427
+ // projectId: "your-sanity-project-id",
428
+ // dataset: "production",
429
+ },
430
+ ],
431
+
432
+ // Feature registry — what product features you're tracking coverage for.
433
+ featureDefs: {
434
+ features: [
435
+ {
436
+ id: "getting-started",
437
+ name: "Getting Started Guide",
438
+ sections: ["guides"],
439
+ status: "covered",
440
+ area: "guides",
441
+ priority: "critical",
442
+ },
443
+ {
444
+ id: "api-reference",
445
+ name: "API Reference",
446
+ sections: ["reference"],
447
+ status: "uncovered",
448
+ priority: "high",
449
+ },
450
+ ],
451
+ },
452
+
453
+ // Optional: override mode base rubrics, scoring, or prompts here.
454
+ // rubricTemplates: [{ ... }],
455
+ // scoringProfiles: { ... },
456
+ // promptTemplates: { ... },
457
+ })
458
+ `;
@@ -1,7 +1,7 @@
1
1
  /**
2
- * validate-tasks command — standalone validation of repo-based task YAML files.
2
+ * validate-tasks command — standalone validation of task files.
3
3
  *
4
- * Validates .ailf/tasks/*.yaml files against the RepoTaskSchema without
4
+ * Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
5
5
  * running the full pipeline. Useful for pre-commit hooks and CI checks
6
6
  * in external repos.
7
7
  *