@sanity/ailf 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +0 -1
  2. package/config/models.ts +15 -3
  3. package/dist/_vendor/ailf-core/config-helpers.d.ts +14 -17
  4. package/dist/_vendor/ailf-core/config-helpers.js +22 -2
  5. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  6. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  7. package/dist/_vendor/ailf-core/index.d.ts +2 -2
  8. package/dist/_vendor/ailf-core/index.js +1 -1
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
  10. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  11. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  12. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  13. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +2 -0
  14. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  15. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  16. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +13 -0
  17. package/dist/_vendor/ailf-core/types/index.d.ts +1 -3
  18. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +78 -23
  19. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  20. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  21. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  22. package/dist/adapters/task-sources/content-lake-task-source.js +17 -20
  23. package/dist/adapters/task-sources/index.d.ts +2 -2
  24. package/dist/adapters/task-sources/index.js +2 -2
  25. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  26. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  27. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  28. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  29. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  30. package/dist/adapters/task-sources/repo-validation.js +126 -5
  31. package/dist/adapters/task-sources/task-file-loader.d.ts +2 -2
  32. package/dist/adapters/task-sources/task-file-loader.js +2 -2
  33. package/dist/commands/coverage-audit.js +3 -1
  34. package/dist/commands/init.d.ts +6 -4
  35. package/dist/commands/init.js +302 -23
  36. package/dist/commands/validate-tasks.d.ts +2 -2
  37. package/dist/commands/validate-tasks.js +26 -15
  38. package/dist/composition-root.d.ts +13 -1
  39. package/dist/composition-root.js +73 -41
  40. package/dist/index.d.ts +41 -0
  41. package/dist/index.js +48 -0
  42. package/dist/orchestration/build-step-sequence.js +4 -2
  43. package/dist/orchestration/steps/fetch-docs-step.js +2 -3
  44. package/dist/orchestration/steps/generate-configs-step.js +28 -12
  45. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  46. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  47. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  48. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +105 -68
  49. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  50. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  51. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  52. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  53. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  54. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  55. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  56. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  57. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  58. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  59. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  60. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  61. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  62. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  63. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  64. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  65. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  66. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  67. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  68. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  69. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  70. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  71. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  72. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  73. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  74. package/dist/pipeline/compiler/mode-handlers/{agent-harness-handler.d.ts → agent-harness/types.d.ts} +3 -24
  75. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  76. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  77. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  78. package/dist/pipeline/compiler/mode-handlers/index.d.ts +4 -5
  79. package/dist/pipeline/compiler/mode-handlers/index.js +4 -6
  80. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  81. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  82. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  83. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  84. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  85. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  86. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  87. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  88. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  89. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  90. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  91. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  92. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  93. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  94. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  95. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  96. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  97. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  98. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  99. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  100. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  101. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  102. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  103. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  104. package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.d.ts → mcp-server/assertions.d.ts} +2 -10
  105. package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.js → mcp-server/assertions.js} +63 -6
  106. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  107. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  108. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  109. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  110. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  111. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  112. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  113. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  114. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  115. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  116. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  117. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  118. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  119. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  120. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  121. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  122. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  123. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  124. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  125. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  126. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  127. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  128. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  129. package/dist/pipeline/compiler/preset-loader.js +99 -0
  130. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +6 -9
  131. package/dist/pipeline/compiler/presets/sanity-literacy.js +10 -156
  132. package/dist/pipeline/expand-tasks.d.ts +2 -2
  133. package/dist/pipeline/expand-tasks.js +2 -2
  134. package/dist/pipeline/generate-configs.js +1 -1
  135. package/dist/pipeline/map-request-to-config.js +1 -0
  136. package/dist/pipeline/mirror-repo-tasks.d.ts +7 -7
  137. package/dist/pipeline/mirror-repo-tasks.js +9 -9
  138. package/dist/pipeline/plan.js +1 -1
  139. package/package.json +11 -3
  140. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  141. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  142. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  143. package/dist/_vendor/ailf-tasks/index.js +0 -16
  144. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  145. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  146. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  147. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  148. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  149. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  150. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  151. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  152. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  153. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  154. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  155. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -67
  156. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -309
@@ -1,24 +1,20 @@
1
1
  /**
2
- * sanity-literacy preset — the built-in documentation literacy evaluation.
2
+ * sanity-literacy preset — Sanity-specific domain configuration for literacy evaluation.
3
3
  *
4
- * Packages ALL Sanity-specific configuration into a single preset:
5
- * - Literacy mode handler registration
6
- * - Prompt templates (with-docs, without-docs, agentic)
7
- * - Rubric templates (task-completion, code-correctness, doc-coverage)
8
- * - Scoring profiles (default, output-only)
4
+ * This is a domain preset that targets the `literacy` mode base. It provides
5
+ * Sanity-specific configuration:
9
6
  * - Sanity doc source definitions (production, branch, local)
10
7
  * - Product feature registry for coverage auditing
11
8
  * - DocFetcher factory (SanityDocFetcher)
12
- * - Standard assertions and fixture resolvers
9
+ * - Sanity fixture resolver (sanity:// scheme)
13
10
  *
14
- * Use `createSanityLiteracyPreset()` to get a fully configured preset
15
- * with a docFetcher factory bound to a specific rootDir.
11
+ * Evaluation methodology (rubrics, scoring, prompts) is inherited from the
12
+ * `literacy` mode base see mode-bases/literacy.ts.
16
13
  *
17
14
  * @see docs/exec-plans/architecture-overhaul/phase-8-scoring-storage-presets.md
18
15
  */
19
16
  import { env } from "../../../_vendor/ailf-core/index.js";
20
17
  import { SanityDocFetcher } from "../../../adapters/doc-fetchers/index.js";
21
- import { LITERACY_PROMPT_TEMPLATES } from "../mode-handlers/literacy-handler.js";
22
18
  // ---------------------------------------------------------------------------
23
19
  // Factory
24
20
  // ---------------------------------------------------------------------------
@@ -41,130 +37,10 @@ export function createSanityLiteracyPreset(options) {
41
37
  "features correctly.",
42
38
  pluginApiVersion: 1,
43
39
  },
44
- // ── Mode handler ─────────────────────────────────────────
45
- modes: [
46
- {
47
- id: "literacy",
48
- label: "Documentation Literacy",
49
- validProviderPatterns: ["^openai:", "^anthropic:", "^file://"],
50
- rubricTemplateIds: [
51
- "task-completion",
52
- "code-correctness",
53
- "doc-coverage",
54
- ],
55
- handlerModule: "./mode-handlers/literacy-handler.js",
56
- },
57
- ],
58
- // ── Assertions ───────────────────────────────────────────
59
- assertions: [
60
- {
61
- type: "contains",
62
- label: "Contains text",
63
- compatibleModes: ["literacy", "knowledge-probe", "mcp-server"],
64
- handlerModule: "promptfoo:builtin",
65
- },
66
- {
67
- type: "contains-all",
68
- label: "Contains all texts",
69
- compatibleModes: ["literacy", "knowledge-probe", "mcp-server"],
70
- handlerModule: "promptfoo:builtin",
71
- },
72
- {
73
- type: "contains-any",
74
- label: "Contains any text",
75
- compatibleModes: ["literacy", "knowledge-probe", "mcp-server"],
76
- handlerModule: "promptfoo:builtin",
77
- },
78
- {
79
- type: "equals",
80
- label: "Exact match",
81
- compatibleModes: ["literacy"],
82
- handlerModule: "promptfoo:builtin",
83
- },
84
- {
85
- type: "regex",
86
- label: "Regex match",
87
- compatibleModes: ["literacy", "knowledge-probe"],
88
- handlerModule: "promptfoo:builtin",
89
- },
90
- {
91
- type: "is-json",
92
- label: "Valid JSON",
93
- compatibleModes: ["literacy", "mcp-server"],
94
- handlerModule: "promptfoo:builtin",
95
- },
96
- {
97
- type: "javascript",
98
- label: "JavaScript assertion",
99
- compatibleModes: [
100
- "literacy",
101
- "mcp-server",
102
- "agent-harness",
103
- "knowledge-probe",
104
- "custom",
105
- ],
106
- handlerModule: "promptfoo:builtin",
107
- },
108
- {
109
- type: "llm-rubric",
110
- label: "LLM-graded rubric",
111
- compatibleModes: [
112
- "literacy",
113
- "mcp-server",
114
- "agent-harness",
115
- "knowledge-probe",
116
- "custom",
117
- ],
118
- handlerModule: "promptfoo:builtin",
119
- },
120
- {
121
- type: "similar",
122
- label: "Semantic similarity",
123
- compatibleModes: ["literacy", "knowledge-probe"],
124
- handlerModule: "promptfoo:builtin",
125
- },
126
- ],
127
- // ── Rubric templates ─────────────────────────────────────
128
- rubricTemplates: [
129
- {
130
- id: "task-completion",
131
- dimension: "task-completion",
132
- header: "Score task completion from 0 to 100:",
133
- scale: [
134
- "0: Couldn't attempt — missing critical information",
135
- "20: Attempted but fundamentally wrong approach",
136
- "50: Partial implementation — major functional gaps",
137
- "80: Mostly complete — minor issues or missing edge cases",
138
- "100: Fully functional code — works as expected",
139
- ],
140
- criteriaLabel: "Must demonstrate:",
141
- },
142
- {
143
- id: "code-correctness",
144
- dimension: "code-correctness",
145
- header: "Score code correctness from 0 to 100:",
146
- scale: [
147
- "0: Broken code, syntax errors, or deprecated APIs",
148
- "30: Works but uses anti-patterns or inefficient approaches",
149
- "50: Works but not idiomatic",
150
- "80: Follows most best practices",
151
- "100: Follows all best practices, idiomatic implementation",
152
- ],
153
- criteriaLabel: "Check for:",
154
- },
155
- {
156
- id: "doc-coverage",
157
- dimension: "doc-coverage",
158
- header: "Score documentation coverage from 0 to 100:",
159
- scale: [
160
- "0: Had to hallucinate/guess most implementation details",
161
- "30: Significant gaps — filled with assumptions",
162
- "50: Some gaps — inferred from partial information",
163
- "80: Minor gaps — almost everything was documented",
164
- "100: Complete coverage — all necessary info was in docs",
165
- ],
166
- },
167
- ],
40
+ // ── Mode ──────────────────────────────────────────────────
41
+ // Targets the literacy mode base. Evaluation methodology (rubrics,
42
+ // scoring, prompts) is inherited from mode-bases/literacy.ts.
43
+ mode: "literacy",
168
44
  // ── Fixture resolvers ────────────────────────────────────
169
45
  fixtureResolvers: [
170
46
  {
@@ -172,22 +48,6 @@ export function createSanityLiteracyPreset(options) {
172
48
  handlerModule: "./fixture-resolver.js",
173
49
  },
174
50
  ],
175
- // ── Prompt templates (from literacy handler) ─────────────
176
- promptTemplates: LITERACY_PROMPT_TEMPLATES,
177
- // ── Scoring profiles ─────────────────────────────────────
178
- // Literacy-relevant profiles only; mode-specific profiles for
179
- // mcp-server, knowledge-probe, etc. belong in their own presets.
180
- scoringProfiles: {
181
- default: {
182
- "task-completion": 0.5,
183
- "code-correctness": 0.25,
184
- "doc-coverage": 0.25,
185
- },
186
- "output-only": {
187
- "task-completion": 0.6,
188
- "code-correctness": 0.4,
189
- },
190
- },
191
51
  // ── Doc fetcher factory ──────────────────────────────────
192
52
  // Closure captures rootDir so the registry can instantiate
193
53
  // the fetcher without knowing about Sanity internals.
@@ -227,7 +87,6 @@ export function createSanityLiteracyPreset(options) {
227
87
  status: "covered",
228
88
  area: "groq",
229
89
  priority: "critical",
230
- taskCount: 3,
231
90
  },
232
91
  {
233
92
  id: "visual-editing",
@@ -236,7 +95,6 @@ export function createSanityLiteracyPreset(options) {
236
95
  status: "covered",
237
96
  area: "visual-editing",
238
97
  priority: "critical",
239
- taskCount: 1,
240
98
  },
241
99
  {
242
100
  id: "nextjs-live",
@@ -245,7 +103,6 @@ export function createSanityLiteracyPreset(options) {
245
103
  status: "covered",
246
104
  area: "nextjs-live",
247
105
  priority: "high",
248
- taskCount: 2,
249
106
  },
250
107
  {
251
108
  id: "functions",
@@ -254,7 +111,6 @@ export function createSanityLiteracyPreset(options) {
254
111
  status: "covered",
255
112
  area: "functions",
256
113
  priority: "high",
257
- taskCount: 2,
258
114
  },
259
115
  {
260
116
  id: "studio-setup",
@@ -263,7 +119,6 @@ export function createSanityLiteracyPreset(options) {
263
119
  status: "covered",
264
120
  area: "studio-setup",
265
121
  priority: "high",
266
- taskCount: 1,
267
122
  },
268
123
  {
269
124
  id: "frameworks",
@@ -272,7 +127,6 @@ export function createSanityLiteracyPreset(options) {
272
127
  status: "covered",
273
128
  area: "frameworks",
274
129
  priority: "high",
275
- taskCount: 2,
276
130
  },
277
131
  // Uncovered (no evaluation tasks yet)
278
132
  {
@@ -4,9 +4,9 @@
4
4
  * @deprecated This is part of the LEGACY compilation path. New code should
5
5
  * use the literacy handler in the compiler pipeline instead:
6
6
  *
7
- * import { compileLiteracyTask } from "./compiler/mode-handlers/literacy-handler.js"
7
+ * import { compileLiteracyTask } from "./compiler/mode-handlers/literacy/index.js"
8
8
  *
9
- * @see packages/eval/src/pipeline/compiler/mode-handlers/literacy-handler.ts
9
+ * @see packages/eval/src/pipeline/compiler/mode-handlers/literacy/index.ts
10
10
  *
11
11
  * ---
12
12
  *
@@ -4,9 +4,9 @@
4
4
  * @deprecated This is part of the LEGACY compilation path. New code should
5
5
  * use the literacy handler in the compiler pipeline instead:
6
6
  *
7
- * import { compileLiteracyTask } from "./compiler/mode-handlers/literacy-handler.js"
7
+ * import { compileLiteracyTask } from "./compiler/mode-handlers/literacy/index.js"
8
8
  *
9
- * @see packages/eval/src/pipeline/compiler/mode-handlers/literacy-handler.ts
9
+ * @see packages/eval/src/pipeline/compiler/mode-handlers/literacy/index.ts
10
10
  *
11
11
  * ---
12
12
  *
@@ -39,7 +39,7 @@ import { resolve } from "path";
39
39
  import { dump } from "js-yaml";
40
40
  import { ConsoleLogger } from "../adapters/loggers/index.js";
41
41
  import { loadConfigFile } from "./compiler/config-loader.js";
42
- import { LITERACY_PROMPT_TEMPLATES } from "./compiler/mode-handlers/literacy-handler.js";
42
+ import { LITERACY_PROMPT_TEMPLATES } from "./compiler/mode-handlers/literacy/index.js";
43
43
  import { expandTaskDefinitions, loadAndExpandTasks } from "./expand-tasks.js";
44
44
  import { validateModelsYaml } from "./validate.js";
45
45
  import { LiteracyVariant } from "./normalize-mode.js";
@@ -68,6 +68,7 @@ export function mapRequestToConfig(request, rootDir) {
68
68
  jobId: request.jobId,
69
69
  remote: false,
70
70
  apiUrl: "https://ailf-api.sanity.build",
71
+ presets: request.presets,
71
72
  };
72
73
  }
73
74
  function mapDebug(debug) {
@@ -58,7 +58,7 @@ export interface MirrorResult {
58
58
  skipped: number;
59
59
  /** Feature areas auto-created */
60
60
  areasCreated: string[];
61
- /** Canonical doc slugs that failed to resolve */
61
+ /** Context doc slugs that failed to resolve */
62
62
  unresolvedSlugs: string[];
63
63
  /** Errors (non-fatal — mirror continues) */
64
64
  errors: string[];
@@ -70,7 +70,7 @@ export interface MirrorResult {
70
70
  * 1. Compute deterministic document ID
71
71
  * 2. Compute content hash of the task definition
72
72
  * 3. Check if mirror document exists with same hash → skip if unchanged
73
- * 4. Resolve canonical doc slugs → Sanity references
73
+ * 4. Resolve context doc slugs → Sanity references
74
74
  * 5. Auto-create feature areas if needed
75
75
  * 6. Upsert the ailf.task document with origin block
76
76
  */
@@ -114,8 +114,8 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
114
114
  _type: string;
115
115
  ownership: string;
116
116
  status: import("@sanity/ailf-core").TaskStatus;
117
- assert: Record<string, unknown>[];
118
- canonicalDocs: ({
117
+ assertions: Record<string, unknown>[];
118
+ contextDocs: ({
119
119
  _key: string;
120
120
  reason: string;
121
121
  } | {
@@ -138,9 +138,9 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
138
138
  _key: string;
139
139
  reason: string;
140
140
  })[];
141
- description: string;
141
+ title: string;
142
142
  docCoverage: boolean;
143
- featureArea: {
143
+ area: {
144
144
  _ref: string;
145
145
  _type: string;
146
146
  };
@@ -161,5 +161,5 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
161
161
  author: GitAuthor;
162
162
  lastEditor: GitAuthor;
163
163
  };
164
- taskPrompt: string;
164
+ promptText: string;
165
165
  };
@@ -26,7 +26,7 @@ import { ConsoleLogger } from "../adapters/loggers/index.js";
26
26
  * 1. Compute deterministic document ID
27
27
  * 2. Compute content hash of the task definition
28
28
  * 3. Check if mirror document exists with same hash → skip if unchanged
29
- * 4. Resolve canonical doc slugs → Sanity references
29
+ * 4. Resolve context doc slugs → Sanity references
30
30
  * 5. Auto-create feature areas if needed
31
31
  * 6. Upsert the ailf.task document with origin block
32
32
  */
@@ -43,7 +43,7 @@ export async function mirrorRepoTasks(options) {
43
43
  };
44
44
  if (tasks.length === 0)
45
45
  return result;
46
- // Batch-resolve all canonical doc slugs (slug refs only — other ref types
46
+ // Batch-resolve all context doc slugs (slug refs only — other ref types
47
47
  // are stored without a resolved article reference for now)
48
48
  const allSlugs = [
49
49
  ...new Set(tasks.flatMap((t) => (t.context?.docs ?? []).filter(isSlugRef).map((d) => d.slug))),
@@ -353,10 +353,10 @@ async function fetchExistingDocState(client, docIds) {
353
353
  /** @internal Exported for testing — not part of the public API. */
354
354
  export function buildMirrorDocument(task, opts) {
355
355
  const { contentHash, docId, existingAuthor, git, slugToDocId } = opts;
356
- // Build canonical docs with resolved references and correct refType.
356
+ // Build context docs with resolved references and correct refType.
357
357
  // Each ref type gets the appropriate resolution fields set on the
358
358
  // mirror document so Studio can display them correctly.
359
- const canonicalDocs = (task.context?.docs ?? []).map((ref, i) => {
359
+ const contextDocs = (task.context?.docs ?? []).map((ref, i) => {
360
360
  const base = { _key: `cd${i}`, reason: ref.reason ?? "" };
361
361
  if (isSlugRef(ref)) {
362
362
  const resolvedId = slugToDocId.get(ref.slug);
@@ -428,11 +428,11 @@ export function buildMirrorDocument(task, opts) {
428
428
  _type: "ailf.task",
429
429
  ownership: "repo",
430
430
  status: task.status ?? "active",
431
- assert: assertArray,
432
- canonicalDocs,
433
- description: task.title,
431
+ assertions: assertArray,
432
+ contextDocs,
433
+ title: task.title,
434
434
  docCoverage: task.docCoverage ?? false,
435
- featureArea: {
435
+ area: {
436
436
  _ref: `ailf.featureArea.${area}`,
437
437
  _type: "reference",
438
438
  },
@@ -452,7 +452,7 @@ export function buildMirrorDocument(task, opts) {
452
452
  author: existingAuthor ?? git.author,
453
453
  lastEditor: git.author,
454
454
  },
455
- taskPrompt: task.prompt?.text ?? "",
455
+ promptText: task.prompt?.text ?? "",
456
456
  ...(task.baseline
457
457
  ? {
458
458
  baseline: {
@@ -145,7 +145,7 @@ export async function buildPipelinePlan(opts, rootDir) {
145
145
  const rawTasks = await loadAllTsTaskFiles(modeTasksDir);
146
146
  if (rawTasks.length > 0) {
147
147
  // Dynamic import of the handler module
148
- const handlerModulePath = `./compiler/mode-handlers/${opts.mode}-handler.js`;
148
+ const handlerModulePath = `./compiler/mode-handlers/${opts.mode}/index.js`;
149
149
  const mod = await import(handlerModulePath);
150
150
  const handler = mod.handler;
151
151
  for (const rawFile of rawTasks) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "1.0.0",
3
+ "version": "2.0.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "restricted"
@@ -13,6 +13,14 @@
13
13
  },
14
14
  "description": "AI Literacy Framework - Evaluation tool for Sanity documentation",
15
15
  "type": "module",
16
+ "exports": {
17
+ ".": {
18
+ "types": "./dist/index.d.ts",
19
+ "import": "./dist/index.js"
20
+ }
21
+ },
22
+ "main": "dist/index.js",
23
+ "types": "dist/index.d.ts",
16
24
  "bin": {
17
25
  "ailf": "./bin/ailf.js"
18
26
  },
@@ -26,6 +34,7 @@
26
34
  "dependencies": {
27
35
  "@google-cloud/bigquery": "^8.1.1",
28
36
  "@inquirer/prompts": "^8.3.0",
37
+ "@modelcontextprotocol/sdk": "^1.29.0",
29
38
  "@portabletext/markdown": "^1.0.0",
30
39
  "@sanity/client": "^7.3.0",
31
40
  "commander": "^14.0.3",
@@ -41,9 +50,8 @@
41
50
  "@types/node": "^22.13.1",
42
51
  "tsx": "^4.19.2",
43
52
  "typescript": "^5.7.3",
44
- "@sanity/ailf-core": "0.1.0",
45
53
  "@sanity/ailf-shared": "0.1.0",
46
- "@sanity/ailf-tasks": "0.1.4"
54
+ "@sanity/ailf-core": "0.1.0"
47
55
  },
48
56
  "scripts": {
49
57
  "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
@@ -1,8 +0,0 @@
1
- /**
2
- * cli.ts — Minimal CLI for standalone task validation.
3
- *
4
- * Usage:
5
- * npx @sanity/ailf-tasks validate .ailf/tasks/
6
- * npx @sanity/ailf-tasks validate # defaults to .ailf/tasks/
7
- */
8
- export declare function run(): void;
@@ -1,61 +0,0 @@
1
- /**
2
- * cli.ts — Minimal CLI for standalone task validation.
3
- *
4
- * Usage:
5
- * npx @sanity/ailf-tasks validate .ailf/tasks/
6
- * npx @sanity/ailf-tasks validate # defaults to .ailf/tasks/
7
- */
8
- import { loadTaskDir } from "./parser.js";
9
- import { formatValidationResult, validateRepoTasks } from "./validation.js";
10
- export function run() {
11
- const args = process.argv.slice(2);
12
- const command = args[0];
13
- if (command === "validate") {
14
- const dir = args[1] ?? ".ailf/tasks";
15
- validateCommand(dir);
16
- }
17
- else if (command === "--help" ||
18
- command === "-h" ||
19
- command === undefined) {
20
- printUsage();
21
- }
22
- else {
23
- console.error(`Unknown command: ${command}`);
24
- printUsage();
25
- process.exit(1);
26
- }
27
- }
28
- function validateCommand(dir) {
29
- try {
30
- const tasks = loadTaskDir(dir);
31
- // Run semantic validation
32
- const result = validateRepoTasks(tasks);
33
- const formatted = formatValidationResult(result);
34
- console.log(`✅ ${tasks.length} task(s) validated from ${dir}`);
35
- for (const task of tasks) {
36
- console.log(` ${task.id} — ${task.description}`);
37
- }
38
- if (result.warnings.length > 0 || result.errors.length > 0) {
39
- console.log("");
40
- console.log(formatted);
41
- }
42
- if (!result.valid) {
43
- process.exit(1);
44
- }
45
- }
46
- catch (err) {
47
- console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
48
- process.exit(1);
49
- }
50
- }
51
- function printUsage() {
52
- console.log("Usage: ailf-tasks <command> [options]");
53
- console.log("");
54
- console.log("Commands:");
55
- console.log(" validate [dir] Validate task YAML files (default: .ailf/tasks/)");
56
- console.log("");
57
- console.log("Examples:");
58
- console.log(" ailf-tasks validate");
59
- console.log(" ailf-tasks validate .ailf/tasks/");
60
- console.log(" ailf-tasks validate /path/to/tasks/");
61
- }
@@ -1,13 +0,0 @@
1
- /**
2
- * @sanity/ailf-tasks — Task definition schemas and YAML parser.
3
- *
4
- * Lightweight package for parsing and validating .ailf/tasks/*.yaml files
5
- * without depending on the full AILF CLI or its heavyweight dependencies
6
- * (Promptfoo, LLM SDKs, Sanity client).
7
- *
8
- * Usage:
9
- * import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
10
- */
11
- export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "./schemas.js";
12
- export { loadTaskDir, parseTaskFile } from "./parser.js";
13
- export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "./validation.js";
@@ -1,16 +0,0 @@
1
- /**
2
- * @sanity/ailf-tasks — Task definition schemas and YAML parser.
3
- *
4
- * Lightweight package for parsing and validating .ailf/tasks/*.yaml files
5
- * without depending on the full AILF CLI or its heavyweight dependencies
6
- * (Promptfoo, LLM SDKs, Sanity client).
7
- *
8
- * Usage:
9
- * import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
10
- */
11
- // Schemas and types
12
- export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "./schemas.js";
13
- // Parsing
14
- export { loadTaskDir, parseTaskFile } from "./parser.js";
15
- // Validation
16
- export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "./validation.js";
@@ -1,27 +0,0 @@
1
- /**
2
- * parser.ts — Standalone task file and directory parsing.
3
- *
4
- * High-level functions for loading and validating .ailf/tasks/ YAML
5
- * files without any dependency on the eval pipeline.
6
- *
7
- * Usage:
8
- * import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
9
- */
10
- import { type RepoTask } from "./schemas.js";
11
- /**
12
- * Parse a single task YAML string and return validated tasks.
13
- *
14
- * @param content - Raw YAML string content
15
- * @param filename - Source filename (for error messages)
16
- * @returns Validated array of RepoTask objects
17
- * @throws Error if YAML parsing or Zod validation fails
18
- */
19
- export declare function parseTaskFile(content: string, filename?: string): RepoTask[];
20
- /**
21
- * Load and parse all task YAML files from a directory.
22
- *
23
- * @param dirPath - Path to directory containing .yaml/.yml files
24
- * @returns All validated tasks, sorted by filename
25
- * @throws Error if directory not found, no YAML files, or validation fails
26
- */
27
- export declare function loadTaskDir(dirPath: string): RepoTask[];
@@ -1,73 +0,0 @@
1
- /**
2
- * parser.ts — Standalone task file and directory parsing.
3
- *
4
- * High-level functions for loading and validating .ailf/tasks/ YAML
5
- * files without any dependency on the eval pipeline.
6
- *
7
- * Usage:
8
- * import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
9
- */
10
- import { existsSync, readdirSync, readFileSync } from "fs";
11
- import { resolve } from "path";
12
- import { load } from "js-yaml";
13
- import { RepoTaskFileSchema } from "./schemas.js";
14
- // ---------------------------------------------------------------------------
15
- // Public API
16
- // ---------------------------------------------------------------------------
17
- /**
18
- * Parse a single task YAML string and return validated tasks.
19
- *
20
- * @param content - Raw YAML string content
21
- * @param filename - Source filename (for error messages)
22
- * @returns Validated array of RepoTask objects
23
- * @throws Error if YAML parsing or Zod validation fails
24
- */
25
- export function parseTaskFile(content, filename = "<string>") {
26
- const parsed = load(content);
27
- if (!Array.isArray(parsed)) {
28
- throw new Error(`${filename} did not parse to an array of tasks. ` +
29
- "Task files must contain a YAML array of task definitions.");
30
- }
31
- const result = RepoTaskFileSchema.safeParse(parsed);
32
- if (!result.success) {
33
- const messages = result.error.issues
34
- .map((i) => ` [${i.path.join(".")}]: ${i.message}`)
35
- .join("\n");
36
- throw new Error(`Invalid task file "${filename}":\n${messages}`);
37
- }
38
- return result.data;
39
- }
40
- /**
41
- * Load and parse all task YAML files from a directory.
42
- *
43
- * @param dirPath - Path to directory containing .yaml/.yml files
44
- * @returns All validated tasks, sorted by filename
45
- * @throws Error if directory not found, no YAML files, or validation fails
46
- */
47
- export function loadTaskDir(dirPath) {
48
- if (!existsSync(dirPath)) {
49
- throw new Error(`Tasks directory not found: ${dirPath}\n` +
50
- " Expected a directory containing .ailf/tasks/*.yaml files.");
51
- }
52
- const yamlFiles = readdirSync(dirPath)
53
- .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
54
- .sort();
55
- if (yamlFiles.length === 0) {
56
- throw new Error(`No YAML files found in ${dirPath}\n` +
57
- " Expected .ailf/tasks/*.yaml files with task definitions.");
58
- }
59
- const allTasks = [];
60
- for (const file of yamlFiles) {
61
- const filePath = resolve(dirPath, file);
62
- const content = readFileSync(filePath, "utf-8");
63
- try {
64
- const tasks = parseTaskFile(content, file);
65
- allTasks.push(...tasks);
66
- }
67
- catch (err) {
68
- const msg = err instanceof Error ? err.message : String(err);
69
- throw new Error(`Failed to load ${file}:\n${msg}`, { cause: err });
70
- }
71
- }
72
- return allTasks;
73
- }