@sanity/ailf 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/models.ts +15 -3
- package/dist/_vendor/ailf-core/config-helpers.d.ts +14 -17
- package/dist/_vendor/ailf-core/config-helpers.js +22 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +2 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1 -3
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +78 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/task-sources/content-lake-task-source.js +17 -20
- package/dist/adapters/task-sources/index.d.ts +2 -2
- package/dist/adapters/task-sources/index.js +2 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +2 -2
- package/dist/adapters/task-sources/task-file-loader.js +2 -2
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +73 -41
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/steps/fetch-docs-step.js +2 -3
- package/dist/orchestration/steps/generate-configs-step.js +28 -12
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +105 -68
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/{agent-harness-handler.d.ts → agent-harness/types.d.ts} +3 -24
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +4 -5
- package/dist/pipeline/compiler/mode-handlers/index.js +4 -6
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.d.ts → mcp-server/assertions.d.ts} +2 -10
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.js → mcp-server/assertions.js} +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +6 -9
- package/dist/pipeline/compiler/presets/sanity-literacy.js +10 -156
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/generate-configs.js +1 -1
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +7 -7
- package/dist/pipeline/mirror-repo-tasks.js +9 -9
- package/dist/pipeline/plan.js +1 -1
- package/package.json +11 -3
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -309
|
@@ -1,24 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* sanity-literacy preset —
|
|
2
|
+
* sanity-literacy preset — Sanity-specific domain configuration for literacy evaluation.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* -
|
|
6
|
-
* - Prompt templates (with-docs, without-docs, agentic)
|
|
7
|
-
* - Rubric templates (task-completion, code-correctness, doc-coverage)
|
|
8
|
-
* - Scoring profiles (default, output-only)
|
|
4
|
+
* This is a domain preset that targets the `literacy` mode base. It provides
|
|
5
|
+
* Sanity-specific configuration:
|
|
9
6
|
* - Sanity doc source definitions (production, branch, local)
|
|
10
7
|
* - Product feature registry for coverage auditing
|
|
11
8
|
* - DocFetcher factory (SanityDocFetcher)
|
|
12
|
-
* -
|
|
9
|
+
* - Sanity fixture resolver (sanity:// scheme)
|
|
13
10
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
11
|
+
* Evaluation methodology (rubrics, scoring, prompts) is inherited from the
|
|
12
|
+
* `literacy` mode base — see mode-bases/literacy.ts.
|
|
16
13
|
*
|
|
17
14
|
* @see docs/exec-plans/architecture-overhaul/phase-8-scoring-storage-presets.md
|
|
18
15
|
*/
|
|
19
16
|
import { env } from "../../../_vendor/ailf-core/index.js";
|
|
20
17
|
import { SanityDocFetcher } from "../../../adapters/doc-fetchers/index.js";
|
|
21
|
-
import { LITERACY_PROMPT_TEMPLATES } from "../mode-handlers/literacy-handler.js";
|
|
22
18
|
// ---------------------------------------------------------------------------
|
|
23
19
|
// Factory
|
|
24
20
|
// ---------------------------------------------------------------------------
|
|
@@ -41,130 +37,10 @@ export function createSanityLiteracyPreset(options) {
|
|
|
41
37
|
"features correctly.",
|
|
42
38
|
pluginApiVersion: 1,
|
|
43
39
|
},
|
|
44
|
-
// ── Mode
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
label: "Documentation Literacy",
|
|
49
|
-
validProviderPatterns: ["^openai:", "^anthropic:", "^file://"],
|
|
50
|
-
rubricTemplateIds: [
|
|
51
|
-
"task-completion",
|
|
52
|
-
"code-correctness",
|
|
53
|
-
"doc-coverage",
|
|
54
|
-
],
|
|
55
|
-
handlerModule: "./mode-handlers/literacy-handler.js",
|
|
56
|
-
},
|
|
57
|
-
],
|
|
58
|
-
// ── Assertions ───────────────────────────────────────────
|
|
59
|
-
assertions: [
|
|
60
|
-
{
|
|
61
|
-
type: "contains",
|
|
62
|
-
label: "Contains text",
|
|
63
|
-
compatibleModes: ["literacy", "knowledge-probe", "mcp-server"],
|
|
64
|
-
handlerModule: "promptfoo:builtin",
|
|
65
|
-
},
|
|
66
|
-
{
|
|
67
|
-
type: "contains-all",
|
|
68
|
-
label: "Contains all texts",
|
|
69
|
-
compatibleModes: ["literacy", "knowledge-probe", "mcp-server"],
|
|
70
|
-
handlerModule: "promptfoo:builtin",
|
|
71
|
-
},
|
|
72
|
-
{
|
|
73
|
-
type: "contains-any",
|
|
74
|
-
label: "Contains any text",
|
|
75
|
-
compatibleModes: ["literacy", "knowledge-probe", "mcp-server"],
|
|
76
|
-
handlerModule: "promptfoo:builtin",
|
|
77
|
-
},
|
|
78
|
-
{
|
|
79
|
-
type: "equals",
|
|
80
|
-
label: "Exact match",
|
|
81
|
-
compatibleModes: ["literacy"],
|
|
82
|
-
handlerModule: "promptfoo:builtin",
|
|
83
|
-
},
|
|
84
|
-
{
|
|
85
|
-
type: "regex",
|
|
86
|
-
label: "Regex match",
|
|
87
|
-
compatibleModes: ["literacy", "knowledge-probe"],
|
|
88
|
-
handlerModule: "promptfoo:builtin",
|
|
89
|
-
},
|
|
90
|
-
{
|
|
91
|
-
type: "is-json",
|
|
92
|
-
label: "Valid JSON",
|
|
93
|
-
compatibleModes: ["literacy", "mcp-server"],
|
|
94
|
-
handlerModule: "promptfoo:builtin",
|
|
95
|
-
},
|
|
96
|
-
{
|
|
97
|
-
type: "javascript",
|
|
98
|
-
label: "JavaScript assertion",
|
|
99
|
-
compatibleModes: [
|
|
100
|
-
"literacy",
|
|
101
|
-
"mcp-server",
|
|
102
|
-
"agent-harness",
|
|
103
|
-
"knowledge-probe",
|
|
104
|
-
"custom",
|
|
105
|
-
],
|
|
106
|
-
handlerModule: "promptfoo:builtin",
|
|
107
|
-
},
|
|
108
|
-
{
|
|
109
|
-
type: "llm-rubric",
|
|
110
|
-
label: "LLM-graded rubric",
|
|
111
|
-
compatibleModes: [
|
|
112
|
-
"literacy",
|
|
113
|
-
"mcp-server",
|
|
114
|
-
"agent-harness",
|
|
115
|
-
"knowledge-probe",
|
|
116
|
-
"custom",
|
|
117
|
-
],
|
|
118
|
-
handlerModule: "promptfoo:builtin",
|
|
119
|
-
},
|
|
120
|
-
{
|
|
121
|
-
type: "similar",
|
|
122
|
-
label: "Semantic similarity",
|
|
123
|
-
compatibleModes: ["literacy", "knowledge-probe"],
|
|
124
|
-
handlerModule: "promptfoo:builtin",
|
|
125
|
-
},
|
|
126
|
-
],
|
|
127
|
-
// ── Rubric templates ─────────────────────────────────────
|
|
128
|
-
rubricTemplates: [
|
|
129
|
-
{
|
|
130
|
-
id: "task-completion",
|
|
131
|
-
dimension: "task-completion",
|
|
132
|
-
header: "Score task completion from 0 to 100:",
|
|
133
|
-
scale: [
|
|
134
|
-
"0: Couldn't attempt — missing critical information",
|
|
135
|
-
"20: Attempted but fundamentally wrong approach",
|
|
136
|
-
"50: Partial implementation — major functional gaps",
|
|
137
|
-
"80: Mostly complete — minor issues or missing edge cases",
|
|
138
|
-
"100: Fully functional code — works as expected",
|
|
139
|
-
],
|
|
140
|
-
criteriaLabel: "Must demonstrate:",
|
|
141
|
-
},
|
|
142
|
-
{
|
|
143
|
-
id: "code-correctness",
|
|
144
|
-
dimension: "code-correctness",
|
|
145
|
-
header: "Score code correctness from 0 to 100:",
|
|
146
|
-
scale: [
|
|
147
|
-
"0: Broken code, syntax errors, or deprecated APIs",
|
|
148
|
-
"30: Works but uses anti-patterns or inefficient approaches",
|
|
149
|
-
"50: Works but not idiomatic",
|
|
150
|
-
"80: Follows most best practices",
|
|
151
|
-
"100: Follows all best practices, idiomatic implementation",
|
|
152
|
-
],
|
|
153
|
-
criteriaLabel: "Check for:",
|
|
154
|
-
},
|
|
155
|
-
{
|
|
156
|
-
id: "doc-coverage",
|
|
157
|
-
dimension: "doc-coverage",
|
|
158
|
-
header: "Score documentation coverage from 0 to 100:",
|
|
159
|
-
scale: [
|
|
160
|
-
"0: Had to hallucinate/guess most implementation details",
|
|
161
|
-
"30: Significant gaps — filled with assumptions",
|
|
162
|
-
"50: Some gaps — inferred from partial information",
|
|
163
|
-
"80: Minor gaps — almost everything was documented",
|
|
164
|
-
"100: Complete coverage — all necessary info was in docs",
|
|
165
|
-
],
|
|
166
|
-
},
|
|
167
|
-
],
|
|
40
|
+
// ── Mode ──────────────────────────────────────────────────
|
|
41
|
+
// Targets the literacy mode base. Evaluation methodology (rubrics,
|
|
42
|
+
// scoring, prompts) is inherited from mode-bases/literacy.ts.
|
|
43
|
+
mode: "literacy",
|
|
168
44
|
// ── Fixture resolvers ────────────────────────────────────
|
|
169
45
|
fixtureResolvers: [
|
|
170
46
|
{
|
|
@@ -172,22 +48,6 @@ export function createSanityLiteracyPreset(options) {
|
|
|
172
48
|
handlerModule: "./fixture-resolver.js",
|
|
173
49
|
},
|
|
174
50
|
],
|
|
175
|
-
// ── Prompt templates (from literacy handler) ─────────────
|
|
176
|
-
promptTemplates: LITERACY_PROMPT_TEMPLATES,
|
|
177
|
-
// ── Scoring profiles ─────────────────────────────────────
|
|
178
|
-
// Literacy-relevant profiles only; mode-specific profiles for
|
|
179
|
-
// mcp-server, knowledge-probe, etc. belong in their own presets.
|
|
180
|
-
scoringProfiles: {
|
|
181
|
-
default: {
|
|
182
|
-
"task-completion": 0.5,
|
|
183
|
-
"code-correctness": 0.25,
|
|
184
|
-
"doc-coverage": 0.25,
|
|
185
|
-
},
|
|
186
|
-
"output-only": {
|
|
187
|
-
"task-completion": 0.6,
|
|
188
|
-
"code-correctness": 0.4,
|
|
189
|
-
},
|
|
190
|
-
},
|
|
191
51
|
// ── Doc fetcher factory ──────────────────────────────────
|
|
192
52
|
// Closure captures rootDir so the registry can instantiate
|
|
193
53
|
// the fetcher without knowing about Sanity internals.
|
|
@@ -227,7 +87,6 @@ export function createSanityLiteracyPreset(options) {
|
|
|
227
87
|
status: "covered",
|
|
228
88
|
area: "groq",
|
|
229
89
|
priority: "critical",
|
|
230
|
-
taskCount: 3,
|
|
231
90
|
},
|
|
232
91
|
{
|
|
233
92
|
id: "visual-editing",
|
|
@@ -236,7 +95,6 @@ export function createSanityLiteracyPreset(options) {
|
|
|
236
95
|
status: "covered",
|
|
237
96
|
area: "visual-editing",
|
|
238
97
|
priority: "critical",
|
|
239
|
-
taskCount: 1,
|
|
240
98
|
},
|
|
241
99
|
{
|
|
242
100
|
id: "nextjs-live",
|
|
@@ -245,7 +103,6 @@ export function createSanityLiteracyPreset(options) {
|
|
|
245
103
|
status: "covered",
|
|
246
104
|
area: "nextjs-live",
|
|
247
105
|
priority: "high",
|
|
248
|
-
taskCount: 2,
|
|
249
106
|
},
|
|
250
107
|
{
|
|
251
108
|
id: "functions",
|
|
@@ -254,7 +111,6 @@ export function createSanityLiteracyPreset(options) {
|
|
|
254
111
|
status: "covered",
|
|
255
112
|
area: "functions",
|
|
256
113
|
priority: "high",
|
|
257
|
-
taskCount: 2,
|
|
258
114
|
},
|
|
259
115
|
{
|
|
260
116
|
id: "studio-setup",
|
|
@@ -263,7 +119,6 @@ export function createSanityLiteracyPreset(options) {
|
|
|
263
119
|
status: "covered",
|
|
264
120
|
area: "studio-setup",
|
|
265
121
|
priority: "high",
|
|
266
|
-
taskCount: 1,
|
|
267
122
|
},
|
|
268
123
|
{
|
|
269
124
|
id: "frameworks",
|
|
@@ -272,7 +127,6 @@ export function createSanityLiteracyPreset(options) {
|
|
|
272
127
|
status: "covered",
|
|
273
128
|
area: "frameworks",
|
|
274
129
|
priority: "high",
|
|
275
|
-
taskCount: 2,
|
|
276
130
|
},
|
|
277
131
|
// Uncovered (no evaluation tasks yet)
|
|
278
132
|
{
|
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
* @deprecated This is part of the LEGACY compilation path. New code should
|
|
5
5
|
* use the literacy handler in the compiler pipeline instead:
|
|
6
6
|
*
|
|
7
|
-
* import { compileLiteracyTask } from "./compiler/mode-handlers/literacy
|
|
7
|
+
* import { compileLiteracyTask } from "./compiler/mode-handlers/literacy/index.js"
|
|
8
8
|
*
|
|
9
|
-
* @see packages/eval/src/pipeline/compiler/mode-handlers/literacy
|
|
9
|
+
* @see packages/eval/src/pipeline/compiler/mode-handlers/literacy/index.ts
|
|
10
10
|
*
|
|
11
11
|
* ---
|
|
12
12
|
*
|
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
* @deprecated This is part of the LEGACY compilation path. New code should
|
|
5
5
|
* use the literacy handler in the compiler pipeline instead:
|
|
6
6
|
*
|
|
7
|
-
* import { compileLiteracyTask } from "./compiler/mode-handlers/literacy
|
|
7
|
+
* import { compileLiteracyTask } from "./compiler/mode-handlers/literacy/index.js"
|
|
8
8
|
*
|
|
9
|
-
* @see packages/eval/src/pipeline/compiler/mode-handlers/literacy
|
|
9
|
+
* @see packages/eval/src/pipeline/compiler/mode-handlers/literacy/index.ts
|
|
10
10
|
*
|
|
11
11
|
* ---
|
|
12
12
|
*
|
|
@@ -39,7 +39,7 @@ import { resolve } from "path";
|
|
|
39
39
|
import { dump } from "js-yaml";
|
|
40
40
|
import { ConsoleLogger } from "../adapters/loggers/index.js";
|
|
41
41
|
import { loadConfigFile } from "./compiler/config-loader.js";
|
|
42
|
-
import { LITERACY_PROMPT_TEMPLATES } from "./compiler/mode-handlers/literacy
|
|
42
|
+
import { LITERACY_PROMPT_TEMPLATES } from "./compiler/mode-handlers/literacy/index.js";
|
|
43
43
|
import { expandTaskDefinitions, loadAndExpandTasks } from "./expand-tasks.js";
|
|
44
44
|
import { validateModelsYaml } from "./validate.js";
|
|
45
45
|
import { LiteracyVariant } from "./normalize-mode.js";
|
|
@@ -58,7 +58,7 @@ export interface MirrorResult {
|
|
|
58
58
|
skipped: number;
|
|
59
59
|
/** Feature areas auto-created */
|
|
60
60
|
areasCreated: string[];
|
|
61
|
-
/**
|
|
61
|
+
/** Context doc slugs that failed to resolve */
|
|
62
62
|
unresolvedSlugs: string[];
|
|
63
63
|
/** Errors (non-fatal — mirror continues) */
|
|
64
64
|
errors: string[];
|
|
@@ -70,7 +70,7 @@ export interface MirrorResult {
|
|
|
70
70
|
* 1. Compute deterministic document ID
|
|
71
71
|
* 2. Compute content hash of the task definition
|
|
72
72
|
* 3. Check if mirror document exists with same hash → skip if unchanged
|
|
73
|
-
* 4. Resolve
|
|
73
|
+
* 4. Resolve context doc slugs → Sanity references
|
|
74
74
|
* 5. Auto-create feature areas if needed
|
|
75
75
|
* 6. Upsert the ailf.task document with origin block
|
|
76
76
|
*/
|
|
@@ -114,8 +114,8 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
|
|
|
114
114
|
_type: string;
|
|
115
115
|
ownership: string;
|
|
116
116
|
status: import("@sanity/ailf-core").TaskStatus;
|
|
117
|
-
|
|
118
|
-
|
|
117
|
+
assertions: Record<string, unknown>[];
|
|
118
|
+
contextDocs: ({
|
|
119
119
|
_key: string;
|
|
120
120
|
reason: string;
|
|
121
121
|
} | {
|
|
@@ -138,9 +138,9 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
|
|
|
138
138
|
_key: string;
|
|
139
139
|
reason: string;
|
|
140
140
|
})[];
|
|
141
|
-
|
|
141
|
+
title: string;
|
|
142
142
|
docCoverage: boolean;
|
|
143
|
-
|
|
143
|
+
area: {
|
|
144
144
|
_ref: string;
|
|
145
145
|
_type: string;
|
|
146
146
|
};
|
|
@@ -161,5 +161,5 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
|
|
|
161
161
|
author: GitAuthor;
|
|
162
162
|
lastEditor: GitAuthor;
|
|
163
163
|
};
|
|
164
|
-
|
|
164
|
+
promptText: string;
|
|
165
165
|
};
|
|
@@ -26,7 +26,7 @@ import { ConsoleLogger } from "../adapters/loggers/index.js";
|
|
|
26
26
|
* 1. Compute deterministic document ID
|
|
27
27
|
* 2. Compute content hash of the task definition
|
|
28
28
|
* 3. Check if mirror document exists with same hash → skip if unchanged
|
|
29
|
-
* 4. Resolve
|
|
29
|
+
* 4. Resolve context doc slugs → Sanity references
|
|
30
30
|
* 5. Auto-create feature areas if needed
|
|
31
31
|
* 6. Upsert the ailf.task document with origin block
|
|
32
32
|
*/
|
|
@@ -43,7 +43,7 @@ export async function mirrorRepoTasks(options) {
|
|
|
43
43
|
};
|
|
44
44
|
if (tasks.length === 0)
|
|
45
45
|
return result;
|
|
46
|
-
// Batch-resolve all
|
|
46
|
+
// Batch-resolve all context doc slugs (slug refs only — other ref types
|
|
47
47
|
// are stored without a resolved article reference for now)
|
|
48
48
|
const allSlugs = [
|
|
49
49
|
...new Set(tasks.flatMap((t) => (t.context?.docs ?? []).filter(isSlugRef).map((d) => d.slug))),
|
|
@@ -353,10 +353,10 @@ async function fetchExistingDocState(client, docIds) {
|
|
|
353
353
|
/** @internal Exported for testing — not part of the public API. */
|
|
354
354
|
export function buildMirrorDocument(task, opts) {
|
|
355
355
|
const { contentHash, docId, existingAuthor, git, slugToDocId } = opts;
|
|
356
|
-
// Build
|
|
356
|
+
// Build context docs with resolved references and correct refType.
|
|
357
357
|
// Each ref type gets the appropriate resolution fields set on the
|
|
358
358
|
// mirror document so Studio can display them correctly.
|
|
359
|
-
const
|
|
359
|
+
const contextDocs = (task.context?.docs ?? []).map((ref, i) => {
|
|
360
360
|
const base = { _key: `cd${i}`, reason: ref.reason ?? "" };
|
|
361
361
|
if (isSlugRef(ref)) {
|
|
362
362
|
const resolvedId = slugToDocId.get(ref.slug);
|
|
@@ -428,11 +428,11 @@ export function buildMirrorDocument(task, opts) {
|
|
|
428
428
|
_type: "ailf.task",
|
|
429
429
|
ownership: "repo",
|
|
430
430
|
status: task.status ?? "active",
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
431
|
+
assertions: assertArray,
|
|
432
|
+
contextDocs,
|
|
433
|
+
title: task.title,
|
|
434
434
|
docCoverage: task.docCoverage ?? false,
|
|
435
|
-
|
|
435
|
+
area: {
|
|
436
436
|
_ref: `ailf.featureArea.${area}`,
|
|
437
437
|
_type: "reference",
|
|
438
438
|
},
|
|
@@ -452,7 +452,7 @@ export function buildMirrorDocument(task, opts) {
|
|
|
452
452
|
author: existingAuthor ?? git.author,
|
|
453
453
|
lastEditor: git.author,
|
|
454
454
|
},
|
|
455
|
-
|
|
455
|
+
promptText: task.prompt?.text ?? "",
|
|
456
456
|
...(task.baseline
|
|
457
457
|
? {
|
|
458
458
|
baseline: {
|
package/dist/pipeline/plan.js
CHANGED
|
@@ -145,7 +145,7 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
145
145
|
const rawTasks = await loadAllTsTaskFiles(modeTasksDir);
|
|
146
146
|
if (rawTasks.length > 0) {
|
|
147
147
|
// Dynamic import of the handler module
|
|
148
|
-
const handlerModulePath = `./compiler/mode-handlers/${opts.mode}
|
|
148
|
+
const handlerModulePath = `./compiler/mode-handlers/${opts.mode}/index.js`;
|
|
149
149
|
const mod = await import(handlerModulePath);
|
|
150
150
|
const handler = mod.handler;
|
|
151
151
|
for (const rawFile of rawTasks) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sanity/ailf",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"private": false,
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "restricted"
|
|
@@ -13,6 +13,14 @@
|
|
|
13
13
|
},
|
|
14
14
|
"description": "AI Literacy Framework - Evaluation tool for Sanity documentation",
|
|
15
15
|
"type": "module",
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"types": "./dist/index.d.ts",
|
|
19
|
+
"import": "./dist/index.js"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"main": "dist/index.js",
|
|
23
|
+
"types": "dist/index.d.ts",
|
|
16
24
|
"bin": {
|
|
17
25
|
"ailf": "./bin/ailf.js"
|
|
18
26
|
},
|
|
@@ -26,6 +34,7 @@
|
|
|
26
34
|
"dependencies": {
|
|
27
35
|
"@google-cloud/bigquery": "^8.1.1",
|
|
28
36
|
"@inquirer/prompts": "^8.3.0",
|
|
37
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
29
38
|
"@portabletext/markdown": "^1.0.0",
|
|
30
39
|
"@sanity/client": "^7.3.0",
|
|
31
40
|
"commander": "^14.0.3",
|
|
@@ -41,9 +50,8 @@
|
|
|
41
50
|
"@types/node": "^22.13.1",
|
|
42
51
|
"tsx": "^4.19.2",
|
|
43
52
|
"typescript": "^5.7.3",
|
|
44
|
-
"@sanity/ailf-core": "0.1.0",
|
|
45
53
|
"@sanity/ailf-shared": "0.1.0",
|
|
46
|
-
"@sanity/ailf-
|
|
54
|
+
"@sanity/ailf-core": "0.1.0"
|
|
47
55
|
},
|
|
48
56
|
"scripts": {
|
|
49
57
|
"build": "tsc && tsx scripts/bundle-workspace-deps.ts",
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* cli.ts — Minimal CLI for standalone task validation.
|
|
3
|
-
*
|
|
4
|
-
* Usage:
|
|
5
|
-
* npx @sanity/ailf-tasks validate .ailf/tasks/
|
|
6
|
-
* npx @sanity/ailf-tasks validate # defaults to .ailf/tasks/
|
|
7
|
-
*/
|
|
8
|
-
import { loadTaskDir } from "./parser.js";
|
|
9
|
-
import { formatValidationResult, validateRepoTasks } from "./validation.js";
|
|
10
|
-
export function run() {
|
|
11
|
-
const args = process.argv.slice(2);
|
|
12
|
-
const command = args[0];
|
|
13
|
-
if (command === "validate") {
|
|
14
|
-
const dir = args[1] ?? ".ailf/tasks";
|
|
15
|
-
validateCommand(dir);
|
|
16
|
-
}
|
|
17
|
-
else if (command === "--help" ||
|
|
18
|
-
command === "-h" ||
|
|
19
|
-
command === undefined) {
|
|
20
|
-
printUsage();
|
|
21
|
-
}
|
|
22
|
-
else {
|
|
23
|
-
console.error(`Unknown command: ${command}`);
|
|
24
|
-
printUsage();
|
|
25
|
-
process.exit(1);
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
function validateCommand(dir) {
|
|
29
|
-
try {
|
|
30
|
-
const tasks = loadTaskDir(dir);
|
|
31
|
-
// Run semantic validation
|
|
32
|
-
const result = validateRepoTasks(tasks);
|
|
33
|
-
const formatted = formatValidationResult(result);
|
|
34
|
-
console.log(`✅ ${tasks.length} task(s) validated from ${dir}`);
|
|
35
|
-
for (const task of tasks) {
|
|
36
|
-
console.log(` ${task.id} — ${task.description}`);
|
|
37
|
-
}
|
|
38
|
-
if (result.warnings.length > 0 || result.errors.length > 0) {
|
|
39
|
-
console.log("");
|
|
40
|
-
console.log(formatted);
|
|
41
|
-
}
|
|
42
|
-
if (!result.valid) {
|
|
43
|
-
process.exit(1);
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
catch (err) {
|
|
47
|
-
console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
|
|
48
|
-
process.exit(1);
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
function printUsage() {
|
|
52
|
-
console.log("Usage: ailf-tasks <command> [options]");
|
|
53
|
-
console.log("");
|
|
54
|
-
console.log("Commands:");
|
|
55
|
-
console.log(" validate [dir] Validate task YAML files (default: .ailf/tasks/)");
|
|
56
|
-
console.log("");
|
|
57
|
-
console.log("Examples:");
|
|
58
|
-
console.log(" ailf-tasks validate");
|
|
59
|
-
console.log(" ailf-tasks validate .ailf/tasks/");
|
|
60
|
-
console.log(" ailf-tasks validate /path/to/tasks/");
|
|
61
|
-
}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @sanity/ailf-tasks — Task definition schemas and YAML parser.
|
|
3
|
-
*
|
|
4
|
-
* Lightweight package for parsing and validating .ailf/tasks/*.yaml files
|
|
5
|
-
* without depending on the full AILF CLI or its heavyweight dependencies
|
|
6
|
-
* (Promptfoo, LLM SDKs, Sanity client).
|
|
7
|
-
*
|
|
8
|
-
* Usage:
|
|
9
|
-
* import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
|
|
10
|
-
*/
|
|
11
|
-
export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "./schemas.js";
|
|
12
|
-
export { loadTaskDir, parseTaskFile } from "./parser.js";
|
|
13
|
-
export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "./validation.js";
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @sanity/ailf-tasks — Task definition schemas and YAML parser.
|
|
3
|
-
*
|
|
4
|
-
* Lightweight package for parsing and validating .ailf/tasks/*.yaml files
|
|
5
|
-
* without depending on the full AILF CLI or its heavyweight dependencies
|
|
6
|
-
* (Promptfoo, LLM SDKs, Sanity client).
|
|
7
|
-
*
|
|
8
|
-
* Usage:
|
|
9
|
-
* import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
|
|
10
|
-
*/
|
|
11
|
-
// Schemas and types
|
|
12
|
-
export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "./schemas.js";
|
|
13
|
-
// Parsing
|
|
14
|
-
export { loadTaskDir, parseTaskFile } from "./parser.js";
|
|
15
|
-
// Validation
|
|
16
|
-
export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "./validation.js";
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* parser.ts — Standalone task file and directory parsing.
|
|
3
|
-
*
|
|
4
|
-
* High-level functions for loading and validating .ailf/tasks/ YAML
|
|
5
|
-
* files without any dependency on the eval pipeline.
|
|
6
|
-
*
|
|
7
|
-
* Usage:
|
|
8
|
-
* import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
|
|
9
|
-
*/
|
|
10
|
-
import { type RepoTask } from "./schemas.js";
|
|
11
|
-
/**
|
|
12
|
-
* Parse a single task YAML string and return validated tasks.
|
|
13
|
-
*
|
|
14
|
-
* @param content - Raw YAML string content
|
|
15
|
-
* @param filename - Source filename (for error messages)
|
|
16
|
-
* @returns Validated array of RepoTask objects
|
|
17
|
-
* @throws Error if YAML parsing or Zod validation fails
|
|
18
|
-
*/
|
|
19
|
-
export declare function parseTaskFile(content: string, filename?: string): RepoTask[];
|
|
20
|
-
/**
|
|
21
|
-
* Load and parse all task YAML files from a directory.
|
|
22
|
-
*
|
|
23
|
-
* @param dirPath - Path to directory containing .yaml/.yml files
|
|
24
|
-
* @returns All validated tasks, sorted by filename
|
|
25
|
-
* @throws Error if directory not found, no YAML files, or validation fails
|
|
26
|
-
*/
|
|
27
|
-
export declare function loadTaskDir(dirPath: string): RepoTask[];
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* parser.ts — Standalone task file and directory parsing.
|
|
3
|
-
*
|
|
4
|
-
* High-level functions for loading and validating .ailf/tasks/ YAML
|
|
5
|
-
* files without any dependency on the eval pipeline.
|
|
6
|
-
*
|
|
7
|
-
* Usage:
|
|
8
|
-
* import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
|
|
9
|
-
*/
|
|
10
|
-
import { existsSync, readdirSync, readFileSync } from "fs";
|
|
11
|
-
import { resolve } from "path";
|
|
12
|
-
import { load } from "js-yaml";
|
|
13
|
-
import { RepoTaskFileSchema } from "./schemas.js";
|
|
14
|
-
// ---------------------------------------------------------------------------
|
|
15
|
-
// Public API
|
|
16
|
-
// ---------------------------------------------------------------------------
|
|
17
|
-
/**
|
|
18
|
-
* Parse a single task YAML string and return validated tasks.
|
|
19
|
-
*
|
|
20
|
-
* @param content - Raw YAML string content
|
|
21
|
-
* @param filename - Source filename (for error messages)
|
|
22
|
-
* @returns Validated array of RepoTask objects
|
|
23
|
-
* @throws Error if YAML parsing or Zod validation fails
|
|
24
|
-
*/
|
|
25
|
-
export function parseTaskFile(content, filename = "<string>") {
|
|
26
|
-
const parsed = load(content);
|
|
27
|
-
if (!Array.isArray(parsed)) {
|
|
28
|
-
throw new Error(`${filename} did not parse to an array of tasks. ` +
|
|
29
|
-
"Task files must contain a YAML array of task definitions.");
|
|
30
|
-
}
|
|
31
|
-
const result = RepoTaskFileSchema.safeParse(parsed);
|
|
32
|
-
if (!result.success) {
|
|
33
|
-
const messages = result.error.issues
|
|
34
|
-
.map((i) => ` [${i.path.join(".")}]: ${i.message}`)
|
|
35
|
-
.join("\n");
|
|
36
|
-
throw new Error(`Invalid task file "${filename}":\n${messages}`);
|
|
37
|
-
}
|
|
38
|
-
return result.data;
|
|
39
|
-
}
|
|
40
|
-
/**
|
|
41
|
-
* Load and parse all task YAML files from a directory.
|
|
42
|
-
*
|
|
43
|
-
* @param dirPath - Path to directory containing .yaml/.yml files
|
|
44
|
-
* @returns All validated tasks, sorted by filename
|
|
45
|
-
* @throws Error if directory not found, no YAML files, or validation fails
|
|
46
|
-
*/
|
|
47
|
-
export function loadTaskDir(dirPath) {
|
|
48
|
-
if (!existsSync(dirPath)) {
|
|
49
|
-
throw new Error(`Tasks directory not found: ${dirPath}\n` +
|
|
50
|
-
" Expected a directory containing .ailf/tasks/*.yaml files.");
|
|
51
|
-
}
|
|
52
|
-
const yamlFiles = readdirSync(dirPath)
|
|
53
|
-
.filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
|
|
54
|
-
.sort();
|
|
55
|
-
if (yamlFiles.length === 0) {
|
|
56
|
-
throw new Error(`No YAML files found in ${dirPath}\n` +
|
|
57
|
-
" Expected .ailf/tasks/*.yaml files with task definitions.");
|
|
58
|
-
}
|
|
59
|
-
const allTasks = [];
|
|
60
|
-
for (const file of yamlFiles) {
|
|
61
|
-
const filePath = resolve(dirPath, file);
|
|
62
|
-
const content = readFileSync(filePath, "utf-8");
|
|
63
|
-
try {
|
|
64
|
-
const tasks = parseTaskFile(content, file);
|
|
65
|
-
allTasks.push(...tasks);
|
|
66
|
-
}
|
|
67
|
-
catch (err) {
|
|
68
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
69
|
-
throw new Error(`Failed to load ${file}:\n${msg}`, { cause: err });
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
return allTasks;
|
|
73
|
-
}
|