@sanity/ailf 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/models.ts +15 -3
- package/dist/_vendor/ailf-core/config-helpers.d.ts +14 -17
- package/dist/_vendor/ailf-core/config-helpers.js +22 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +2 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1 -3
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +78 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/task-sources/content-lake-task-source.js +17 -20
- package/dist/adapters/task-sources/index.d.ts +2 -2
- package/dist/adapters/task-sources/index.js +2 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +2 -2
- package/dist/adapters/task-sources/task-file-loader.js +2 -2
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +73 -41
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/steps/fetch-docs-step.js +2 -3
- package/dist/orchestration/steps/generate-configs-step.js +28 -12
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +105 -68
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/{agent-harness-handler.d.ts → agent-harness/types.d.ts} +3 -24
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +4 -5
- package/dist/pipeline/compiler/mode-handlers/index.js +4 -6
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.d.ts → mcp-server/assertions.d.ts} +2 -10
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.js → mcp-server/assertions.js} +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +6 -9
- package/dist/pipeline/compiler/presets/sanity-literacy.js +10 -156
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/generate-configs.js +1 -1
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +7 -7
- package/dist/pipeline/mirror-repo-tasks.js +9 -9
- package/dist/pipeline/plan.js +1 -1
- package/package.json +11 -3
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -309
|
@@ -1,8 +1,129 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* repo-validation.ts —
|
|
2
|
+
* repo-validation.ts — Semantic validation for task definitions.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Checks that go beyond Zod schema parsing:
|
|
5
|
+
* - Assertion types are in the curated set
|
|
6
|
+
* - Rubric template names resolve to known templates
|
|
7
|
+
* - Doc ref slugs look reasonable (slugs, not URLs)
|
|
8
|
+
* - Tasks have at least one LLM rubric assertion (recommended)
|
|
9
|
+
* - Tasks have a prompt text (recommended)
|
|
10
|
+
*
|
|
11
|
+
* These produce warnings, not errors — the pipeline can still run
|
|
12
|
+
* with imperfect tasks. Only structural failures (caught by Zod) block.
|
|
13
|
+
*
|
|
14
|
+
* Previously this file re-exported from @sanity/ailf-tasks. That package
|
|
15
|
+
* has been eliminated — all validation logic now lives here.
|
|
16
|
+
*/
|
|
17
|
+
import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Public API
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
/**
|
|
22
|
+
* Run semantic validation on an array of parsed canonical tasks.
|
|
23
|
+
*
|
|
24
|
+
* Returns warnings for issues that don't block execution (unknown feature
|
|
25
|
+
* areas, unresolved slugs) and errors for issues that would cause pipeline
|
|
26
|
+
* failures (completely missing required fields — though Zod catches most).
|
|
27
|
+
*/
|
|
28
|
+
export function validateCanonicalTasks(tasks) {
|
|
29
|
+
const errors = [];
|
|
30
|
+
const warnings = [];
|
|
31
|
+
// Check for duplicate IDs
|
|
32
|
+
const seenIds = new Set();
|
|
33
|
+
for (const task of tasks) {
|
|
34
|
+
if (seenIds.has(task.id)) {
|
|
35
|
+
errors.push({
|
|
36
|
+
taskId: task.id,
|
|
37
|
+
field: "id",
|
|
38
|
+
message: `Duplicate task ID "${task.id}"`,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
seenIds.add(task.id);
|
|
42
|
+
}
|
|
43
|
+
for (const task of tasks) {
|
|
44
|
+
const assertions = task.assertions ?? [];
|
|
45
|
+
// Check assertion types
|
|
46
|
+
for (let i = 0; i < assertions.length; i++) {
|
|
47
|
+
const assertion = assertions[i];
|
|
48
|
+
if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
|
|
49
|
+
warnings.push({
|
|
50
|
+
taskId: task.id,
|
|
51
|
+
field: `assertions[${i}].type`,
|
|
52
|
+
message: `Unknown assertion type "${assertion.type}". ` +
|
|
53
|
+
`Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
// Check rubric template for llm-rubric assertions
|
|
57
|
+
if (assertion.type === "llm-rubric" && "template" in assertion) {
|
|
58
|
+
const template = assertion.template;
|
|
59
|
+
if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
|
|
60
|
+
warnings.push({
|
|
61
|
+
taskId: task.id,
|
|
62
|
+
field: `assertions[${i}].template`,
|
|
63
|
+
message: `Unknown rubric template "${template}". ` +
|
|
64
|
+
`Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// Check canonical doc refs look reasonable
|
|
70
|
+
const docs = task.context?.docs ?? [];
|
|
71
|
+
for (let i = 0; i < docs.length; i++) {
|
|
72
|
+
const doc = docs[i];
|
|
73
|
+
// Slug refs: warn if they look like URLs or paths
|
|
74
|
+
if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
|
|
75
|
+
if (doc.slug.includes("/") || doc.slug.includes("http")) {
|
|
76
|
+
warnings.push({
|
|
77
|
+
taskId: task.id,
|
|
78
|
+
field: `context.docs[${i}].slug`,
|
|
79
|
+
message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
// Check task has at least one llm-rubric assertion (recommended but not required)
|
|
85
|
+
const hasLlmRubric = assertions.some((a) => a.type === "llm-rubric");
|
|
86
|
+
if (!hasLlmRubric) {
|
|
87
|
+
warnings.push({
|
|
88
|
+
taskId: task.id,
|
|
89
|
+
field: "assertions",
|
|
90
|
+
message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
// Check prompt text exists
|
|
94
|
+
if (!task.prompt?.text) {
|
|
95
|
+
warnings.push({
|
|
96
|
+
taskId: task.id,
|
|
97
|
+
field: "prompt.text",
|
|
98
|
+
message: "No task prompt found in prompt.text. The LLM will receive an empty implementation request.",
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return {
|
|
103
|
+
valid: errors.length === 0,
|
|
104
|
+
errors,
|
|
105
|
+
warnings,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Format validation results for console output.
|
|
7
110
|
*/
|
|
8
|
-
export
|
|
111
|
+
export function formatValidationResult(result) {
|
|
112
|
+
const lines = [];
|
|
113
|
+
if (result.errors.length > 0) {
|
|
114
|
+
lines.push("Errors:");
|
|
115
|
+
for (const e of result.errors) {
|
|
116
|
+
lines.push(` [${e.taskId}] ${e.field}: ${e.message}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
if (result.warnings.length > 0) {
|
|
120
|
+
lines.push("Warnings:");
|
|
121
|
+
for (const w of result.warnings) {
|
|
122
|
+
lines.push(` [${w.taskId}] ${w.field}: ${w.message}`);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (result.valid && result.warnings.length === 0) {
|
|
126
|
+
lines.push("All tasks pass validation");
|
|
127
|
+
}
|
|
128
|
+
return lines.join("\n");
|
|
129
|
+
}
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Supplements the existing YAML-based task loading by supporting
|
|
5
5
|
* `*.task.ts` and `*.task.js` files in task directories. Files are
|
|
6
|
-
* loaded via jiti and
|
|
7
|
-
*
|
|
6
|
+
* loaded via jiti and expected to export GeneralizedTaskDefinition
|
|
7
|
+
* objects authored with `defineTask()`.
|
|
8
8
|
*
|
|
9
9
|
* TS task files export a single task or an array of tasks:
|
|
10
10
|
*
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Supplements the existing YAML-based task loading by supporting
|
|
5
5
|
* `*.task.ts` and `*.task.js` files in task directories. Files are
|
|
6
|
-
* loaded via jiti and
|
|
7
|
-
*
|
|
6
|
+
* loaded via jiti and expected to export GeneralizedTaskDefinition
|
|
7
|
+
* objects authored with `defineTask()`.
|
|
8
8
|
*
|
|
9
9
|
* TS task files export a single task or an array of tasks:
|
|
10
10
|
*
|
|
@@ -8,6 +8,7 @@ import { Command } from "commander";
|
|
|
8
8
|
import { dirname, resolve } from "path";
|
|
9
9
|
import { fileURLToPath } from "url";
|
|
10
10
|
import { countReferencedDocs, formatCoverageConsole, formatCoverageMarkdown, runCoverageAudit, } from "../pipeline/coverage-audit.js";
|
|
11
|
+
import { createLiteracyModeBase } from "../pipeline/compiler/mode-bases/index.js";
|
|
11
12
|
import { createSanityLiteracyPreset } from "../pipeline/compiler/presets/index.js";
|
|
12
13
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
14
|
const ROOT = resolve(__dirname, "..", "..");
|
|
@@ -17,9 +18,10 @@ export function createCoverageAuditCommand() {
|
|
|
17
18
|
.option("--format <fmt>", "Output format: table, md, markdown")
|
|
18
19
|
.option("--json", "Output raw JSON", false)
|
|
19
20
|
.action(async (opts) => {
|
|
20
|
-
// Build a registry with preset
|
|
21
|
+
// Build a registry with mode base + preset so coverage audit works
|
|
21
22
|
// even when config/features.ts is empty (preset is source of truth).
|
|
22
23
|
const registry = new InMemoryPluginRegistry();
|
|
24
|
+
registry.registerModeBase(createLiteracyModeBase());
|
|
23
25
|
registry.registerPreset(createSanityLiteracyPreset({ rootDir: ROOT }));
|
|
24
26
|
const report = runCoverageAudit(ROOT, { registry });
|
|
25
27
|
if (!report) {
|
package/dist/commands/init.d.ts
CHANGED
|
@@ -5,12 +5,14 @@
|
|
|
5
5
|
* task files. The generated files are ready-to-edit starting points —
|
|
6
6
|
* not live evaluation tasks.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
8
|
+
* TypeScript output (default) uses define* helpers from @sanity/ailf-core
|
|
9
|
+
* for full IDE autocomplete and type checking. YAML output preserves
|
|
10
|
+
* inline comments from the source files. JSON output is a plain
|
|
11
|
+
* serialization of the parsed data.
|
|
11
12
|
*
|
|
12
13
|
* Usage:
|
|
13
|
-
* ailf init #
|
|
14
|
+
* ailf init # TypeScript output (default)
|
|
15
|
+
* ailf init --output-format yaml # YAML output
|
|
14
16
|
* ailf init --output-format json # JSON output
|
|
15
17
|
* ailf init --force # overwrite existing files
|
|
16
18
|
* ailf init --path ./my-dir # target a specific directory
|
package/dist/commands/init.js
CHANGED
|
@@ -5,12 +5,14 @@
|
|
|
5
5
|
* task files. The generated files are ready-to-edit starting points —
|
|
6
6
|
* not live evaluation tasks.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
8
|
+
* TypeScript output (default) uses define* helpers from @sanity/ailf-core
|
|
9
|
+
* for full IDE autocomplete and type checking. YAML output preserves
|
|
10
|
+
* inline comments from the source files. JSON output is a plain
|
|
11
|
+
* serialization of the parsed data.
|
|
11
12
|
*
|
|
12
13
|
* Usage:
|
|
13
|
-
* ailf init #
|
|
14
|
+
* ailf init # TypeScript output (default)
|
|
15
|
+
* ailf init --output-format yaml # YAML output
|
|
14
16
|
* ailf init --output-format json # JSON output
|
|
15
17
|
* ailf init --force # overwrite existing files
|
|
16
18
|
* ailf init --path ./my-dir # target a specific directory
|
|
@@ -18,16 +20,17 @@
|
|
|
18
20
|
import { Command } from "commander";
|
|
19
21
|
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
20
22
|
import { resolve, relative } from "path";
|
|
21
|
-
import { ailfConfigData, ailfConfigYaml, taskYamlFiles, TASK_FILE_NAMES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
|
|
23
|
+
import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_TS_FILE_NAMES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
|
|
22
24
|
// ---------------------------------------------------------------------------
|
|
23
25
|
// Command factory
|
|
24
26
|
// ---------------------------------------------------------------------------
|
|
25
27
|
export function createInitCommand() {
|
|
26
28
|
return new Command("init")
|
|
27
29
|
.description("Initialize a directory for AI Literacy Framework evaluation")
|
|
28
|
-
.option("--output-format <fmt>", 'Output format for generated files: "
|
|
30
|
+
.option("--output-format <fmt>", 'Output format for generated files: "ts" (default), "yaml", or "json"', "ts")
|
|
29
31
|
.option("--force", "Overwrite existing files", false)
|
|
30
32
|
.option("--path <dir>", "Target directory (default: current directory)", ".")
|
|
33
|
+
.option("--mode <mode>", "Scaffold for a specific mode: literacy, mcp-server, custom (default: all modes)")
|
|
31
34
|
.action(async (opts) => {
|
|
32
35
|
await runInit(opts);
|
|
33
36
|
});
|
|
@@ -55,8 +58,13 @@ function rel(from, to) {
|
|
|
55
58
|
// Init logic
|
|
56
59
|
// ---------------------------------------------------------------------------
|
|
57
60
|
async function runInit(opts) {
|
|
58
|
-
const
|
|
59
|
-
|
|
61
|
+
const validFormats = new Set(["ts", "yaml", "json"]);
|
|
62
|
+
if (!validFormats.has(opts.outputFormat)) {
|
|
63
|
+
console.error(` ✗ Invalid output format "${opts.outputFormat}". Valid options: ts, yaml, json`);
|
|
64
|
+
process.exitCode = 1;
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
const format = opts.outputFormat;
|
|
60
68
|
const force = opts.force;
|
|
61
69
|
// Resolve target from the caller's actual working directory
|
|
62
70
|
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
@@ -72,24 +80,103 @@ async function runInit(opts) {
|
|
|
72
80
|
console.log(` ✓ Created ${rel(targetDir, tasksDir)}/`);
|
|
73
81
|
const written = [];
|
|
74
82
|
const skipped = [];
|
|
75
|
-
// 2. Write
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
83
|
+
// 2. Write project config
|
|
84
|
+
if (format === "ts") {
|
|
85
|
+
// TypeScript: ailf.config.ts with defineConfig helper
|
|
86
|
+
const configPath = resolve(ailfDir, "ailf.config.ts");
|
|
87
|
+
if (writeIfNew(configPath, ailfConfigTs, force)) {
|
|
88
|
+
written.push(rel(targetDir, configPath));
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
skipped.push(rel(targetDir, configPath));
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
else if (format === "yaml") {
|
|
95
|
+
// YAML: raw string passthrough (preserves comments)
|
|
96
|
+
const configPath = resolve(ailfDir, "config.yaml");
|
|
97
|
+
if (writeIfNew(configPath, ailfConfigYaml, force)) {
|
|
98
|
+
written.push(rel(targetDir, configPath));
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
skipped.push(rel(targetDir, configPath));
|
|
102
|
+
}
|
|
84
103
|
}
|
|
85
104
|
else {
|
|
86
|
-
|
|
105
|
+
// JSON: serialize the parsed data
|
|
106
|
+
const configPath = resolve(ailfDir, "config.json");
|
|
107
|
+
const content = JSON.stringify(ailfConfigData, null, 2) + "\n";
|
|
108
|
+
if (writeIfNew(configPath, content, force)) {
|
|
109
|
+
written.push(rel(targetDir, configPath));
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
skipped.push(rel(targetDir, configPath));
|
|
113
|
+
}
|
|
87
114
|
}
|
|
88
115
|
// 3. Write example tasks to .ailf/tasks/
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if (format === "
|
|
92
|
-
//
|
|
116
|
+
const modeFilter = opts.mode;
|
|
117
|
+
const isCustomMode = modeFilter === "custom";
|
|
118
|
+
if (format === "ts") {
|
|
119
|
+
// TypeScript: *.task.ts files with defineTask helper
|
|
120
|
+
// Default (no --mode): write literacy examples + draft MCP/probe examples
|
|
121
|
+
// --mode literacy: only literacy examples
|
|
122
|
+
// --mode mcp-server: only MCP examples (active, not draft)
|
|
123
|
+
// --mode custom: only a custom example task
|
|
124
|
+
if (!modeFilter || modeFilter === "literacy") {
|
|
125
|
+
for (const stem of TASK_TS_FILE_NAMES) {
|
|
126
|
+
const taskPath = resolve(tasksDir, `${stem}.task.ts`);
|
|
127
|
+
const content = taskTsFiles[stem];
|
|
128
|
+
if (writeIfNew(taskPath, content, force)) {
|
|
129
|
+
written.push(rel(targetDir, taskPath));
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
skipped.push(rel(targetDir, taskPath));
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Draft examples for other modes (default init only)
|
|
137
|
+
if (!modeFilter) {
|
|
138
|
+
const mcpPath = resolve(tasksDir, "example-mcp-tool-usage.task.ts");
|
|
139
|
+
if (writeIfNew(mcpPath, MCP_DRAFT_TASK_TS, force)) {
|
|
140
|
+
written.push(rel(targetDir, mcpPath));
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
skipped.push(rel(targetDir, mcpPath));
|
|
144
|
+
}
|
|
145
|
+
const probePath = resolve(tasksDir, "example-knowledge-probe.task.ts");
|
|
146
|
+
if (writeIfNew(probePath, PROBE_DRAFT_TASK_TS, force)) {
|
|
147
|
+
written.push(rel(targetDir, probePath));
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
skipped.push(rel(targetDir, probePath));
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
// MCP-only init
|
|
154
|
+
if (modeFilter === "mcp-server") {
|
|
155
|
+
const mcpContent = MCP_DRAFT_TASK_TS.replace('status: "draft",', '// status: "active", // Activated — this task runs in evaluations');
|
|
156
|
+
const mcpPath = resolve(tasksDir, "example-mcp-tool-usage.task.ts");
|
|
157
|
+
if (writeIfNew(mcpPath, mcpContent, force)) {
|
|
158
|
+
written.push(rel(targetDir, mcpPath));
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
skipped.push(rel(targetDir, mcpPath));
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
// Custom preset scaffold
|
|
165
|
+
if (isCustomMode) {
|
|
166
|
+
const customTaskPath = resolve(tasksDir, "example-custom.task.ts");
|
|
167
|
+
// Reuse the GROQ literacy task as a starting point
|
|
168
|
+
if (taskTsFiles[TASK_TS_FILE_NAMES[0]]) {
|
|
169
|
+
if (writeIfNew(customTaskPath, taskTsFiles[TASK_TS_FILE_NAMES[0]], force)) {
|
|
170
|
+
written.push(rel(targetDir, customTaskPath));
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
skipped.push(rel(targetDir, customTaskPath));
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
else if (format === "yaml") {
|
|
179
|
+
// YAML: raw string passthrough (preserves comments)
|
|
93
180
|
for (const stem of TASK_FILE_NAMES) {
|
|
94
181
|
const taskPath = resolve(tasksDir, `${stem}.yaml`);
|
|
95
182
|
const content = taskYamlFiles[stem];
|
|
@@ -118,6 +205,16 @@ async function runInit(opts) {
|
|
|
118
205
|
}
|
|
119
206
|
}
|
|
120
207
|
}
|
|
208
|
+
// 3b. Write custom preset scaffold (--mode custom only)
|
|
209
|
+
if (isCustomMode && format === "ts") {
|
|
210
|
+
const presetPath = resolve(ailfDir, "preset.ts");
|
|
211
|
+
if (writeIfNew(presetPath, CUSTOM_PRESET_TS, force)) {
|
|
212
|
+
written.push(rel(targetDir, presetPath));
|
|
213
|
+
}
|
|
214
|
+
else {
|
|
215
|
+
skipped.push(rel(targetDir, presetPath));
|
|
216
|
+
}
|
|
217
|
+
}
|
|
121
218
|
// 4. Write .gitignore in .ailf/ (keep results out of version control)
|
|
122
219
|
const gitignorePath = resolve(ailfDir, ".gitignore");
|
|
123
220
|
const gitignoreContent = `# AILF generated files\nresults/\ncontexts/\n`;
|
|
@@ -150,18 +247,25 @@ async function runInit(opts) {
|
|
|
150
247
|
console.log(` ⊘ Skipped ${f} (already exists, use --force to overwrite)`);
|
|
151
248
|
}
|
|
152
249
|
}
|
|
250
|
+
const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
|
|
153
251
|
console.log();
|
|
154
252
|
console.log(" Next steps:");
|
|
155
253
|
console.log();
|
|
156
254
|
console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
|
|
157
255
|
console.log(" slugs and prompts for your documentation");
|
|
158
|
-
console.log(
|
|
256
|
+
console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
|
|
159
257
|
console.log(" 3. Add two GitHub Actions secrets");
|
|
160
258
|
console.log(" (Settings → Secrets and variables → Actions):");
|
|
161
259
|
console.log(" • AILF_API_KEY — your API key (starts with ailf_live_sk_)");
|
|
162
260
|
console.log(" • NPM_TOKEN — npm token with read access to @sanity scope");
|
|
163
261
|
console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
|
|
164
262
|
console.log(" automatically on PRs");
|
|
263
|
+
if (format === "ts") {
|
|
264
|
+
console.log();
|
|
265
|
+
console.log(` 💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
|
|
266
|
+
console.log(" via defineTask() from @sanity/ailf-core. YAML and JSON are");
|
|
267
|
+
console.log(" also supported — re-run with --output-format yaml if preferred.");
|
|
268
|
+
}
|
|
165
269
|
console.log();
|
|
166
270
|
console.log(" 🔑 Retrieve secrets from 1Password (Sanity employees):");
|
|
167
271
|
console.log();
|
|
@@ -177,3 +281,178 @@ async function runInit(opts) {
|
|
|
177
281
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
178
282
|
console.log();
|
|
179
283
|
}
|
|
284
|
+
// ---------------------------------------------------------------------------
|
|
285
|
+
// Draft example templates for non-literacy modes
|
|
286
|
+
// ---------------------------------------------------------------------------
|
|
287
|
+
const MCP_DRAFT_TASK_TS = `/**
|
|
288
|
+
* Example Task: MCP Server tool-use evaluation (DRAFT).
|
|
289
|
+
*
|
|
290
|
+
* Tests whether an LLM can correctly discover and invoke Sanity MCP server
|
|
291
|
+
* tools. Connects to the hosted Sanity MCP server at https://mcp.sanity.io.
|
|
292
|
+
*
|
|
293
|
+
* Prerequisites:
|
|
294
|
+
* - A Sanity API token with read access (for token-based auth)
|
|
295
|
+
* - Or: OAuth authentication will be prompted on first connect
|
|
296
|
+
*
|
|
297
|
+
* Authentication options:
|
|
298
|
+
* 1. Token-based: set SANITY_API_TOKEN env var
|
|
299
|
+
* 2. OAuth: the server prompts for login on first connect
|
|
300
|
+
*
|
|
301
|
+
* Setup: npx sanity@latest mcp configure
|
|
302
|
+
* Docs: https://www.sanity.io/docs/ai/mcp-server
|
|
303
|
+
*
|
|
304
|
+
* This task is a DRAFT — it won't run unless activated or explicitly targeted.
|
|
305
|
+
* To activate: change status to "active" or remove the status field.
|
|
306
|
+
*/
|
|
307
|
+
|
|
308
|
+
import { defineTask } from "../_vendor/ailf-core/index.js"
|
|
309
|
+
|
|
310
|
+
export default defineTask({
|
|
311
|
+
mode: "mcp-server",
|
|
312
|
+
id: "example-mcp-tool-usage",
|
|
313
|
+
title: "MCP tool discovery and invocation",
|
|
314
|
+
description: "Example — tests Sanity MCP server tool-use (draft)",
|
|
315
|
+
area: "mcp",
|
|
316
|
+
|
|
317
|
+
// ── Server configuration ────────────────────────────────────
|
|
318
|
+
// The Sanity MCP server is hosted remotely at https://mcp.sanity.io.
|
|
319
|
+
// Authentication via API token header or OAuth.
|
|
320
|
+
//
|
|
321
|
+
// For token auth, set SANITY_API_TOKEN in your environment.
|
|
322
|
+
serverConfig: {
|
|
323
|
+
transport: "streamable-http",
|
|
324
|
+
url: "https://mcp.sanity.io",
|
|
325
|
+
env: {
|
|
326
|
+
SANITY_API_TOKEN: process.env.SANITY_API_TOKEN ?? "",
|
|
327
|
+
},
|
|
328
|
+
},
|
|
329
|
+
|
|
330
|
+
prompt: {
|
|
331
|
+
text: \`Use the available MCP tools to query all documents of type "article"
|
|
332
|
+
in the Sanity dataset. Return the title and slug for each document.
|
|
333
|
+
Limit results to 5 documents.\`,
|
|
334
|
+
},
|
|
335
|
+
|
|
336
|
+
assertions: [
|
|
337
|
+
{
|
|
338
|
+
type: "llm-rubric",
|
|
339
|
+
template: "mcp-input-validation",
|
|
340
|
+
criteria: [
|
|
341
|
+
"Correctly identifies the query_documents tool",
|
|
342
|
+
"Passes a valid GROQ query to filter by document type",
|
|
343
|
+
"Requests only the needed fields (title, slug)",
|
|
344
|
+
],
|
|
345
|
+
},
|
|
346
|
+
],
|
|
347
|
+
|
|
348
|
+
status: "draft",
|
|
349
|
+
})
|
|
350
|
+
`;
|
|
351
|
+
const PROBE_DRAFT_TASK_TS = `/**
|
|
352
|
+
* Example Task: Knowledge probe baseline (DRAFT).
|
|
353
|
+
*
|
|
354
|
+
* Tests what the model knows about a topic without providing documentation.
|
|
355
|
+
* Used to establish a baseline for comparison with literacy evaluations.
|
|
356
|
+
* This task is a DRAFT — it won't run unless activated or explicitly targeted.
|
|
357
|
+
*
|
|
358
|
+
* To activate: change status to "active" or remove the status field.
|
|
359
|
+
*/
|
|
360
|
+
|
|
361
|
+
import { defineTask } from "../_vendor/ailf-core/index.js"
|
|
362
|
+
|
|
363
|
+
export default defineTask({
|
|
364
|
+
mode: "knowledge-probe",
|
|
365
|
+
id: "example-knowledge-probe",
|
|
366
|
+
title: "Model knowledge of GROQ syntax",
|
|
367
|
+
description: "Example — probes baseline model knowledge (draft)",
|
|
368
|
+
area: "groq",
|
|
369
|
+
|
|
370
|
+
prompt: {
|
|
371
|
+
text: \`Explain the GROQ query language used by Sanity. Cover:
|
|
372
|
+
1. Basic query syntax and projections
|
|
373
|
+
2. How to filter and sort results
|
|
374
|
+
3. Common patterns for fetching related documents
|
|
375
|
+
Provide working code examples.\`,
|
|
376
|
+
},
|
|
377
|
+
|
|
378
|
+
assertions: [
|
|
379
|
+
{
|
|
380
|
+
type: "llm-rubric",
|
|
381
|
+
template: "task-completion",
|
|
382
|
+
criteria: [
|
|
383
|
+
"Demonstrates understanding of GROQ query syntax",
|
|
384
|
+
"Shows filtering and projection patterns",
|
|
385
|
+
"Code examples use valid GROQ syntax",
|
|
386
|
+
],
|
|
387
|
+
},
|
|
388
|
+
],
|
|
389
|
+
|
|
390
|
+
status: "draft",
|
|
391
|
+
})
|
|
392
|
+
`;
|
|
393
|
+
const CUSTOM_PRESET_TS = `/**
|
|
394
|
+
* Custom preset — your domain-specific evaluation configuration.
|
|
395
|
+
*
|
|
396
|
+
* This preset targets the "literacy" mode base and inherits its evaluation
|
|
397
|
+
* methodology (rubrics, scoring profiles, prompt templates). You only need
|
|
398
|
+
* to provide domain-specific configuration: where your docs live, what
|
|
399
|
+
* features to track, and how to fetch documentation.
|
|
400
|
+
*
|
|
401
|
+
* To use a different mode (e.g., "mcp-server"), change the mode field.
|
|
402
|
+
* Available built-in modes: literacy, mcp-server, knowledge-probe, agent-harness.
|
|
403
|
+
*
|
|
404
|
+
* @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/PRESETS.md
|
|
405
|
+
*/
|
|
406
|
+
|
|
407
|
+
import { definePreset } from "../_vendor/ailf-core/index.js"
|
|
408
|
+
|
|
409
|
+
export default definePreset({
|
|
410
|
+
name: "my-docs-evaluation",
|
|
411
|
+
manifest: {
|
|
412
|
+
name: "my-docs-evaluation",
|
|
413
|
+
version: "1.0.0",
|
|
414
|
+
description: "Documentation literacy evaluation for my project.",
|
|
415
|
+
pluginApiVersion: 1,
|
|
416
|
+
},
|
|
417
|
+
|
|
418
|
+
// Target the literacy mode base — inherits rubrics, scoring, prompts.
|
|
419
|
+
// Change to "mcp-server" to evaluate MCP tool usage instead.
|
|
420
|
+
mode: "literacy",
|
|
421
|
+
|
|
422
|
+
// Source definitions — where your documentation lives.
|
|
423
|
+
sourceDefs: [
|
|
424
|
+
{
|
|
425
|
+
name: "production",
|
|
426
|
+
baseUrl: "https://docs.example.com",
|
|
427
|
+
// projectId: "your-sanity-project-id",
|
|
428
|
+
// dataset: "production",
|
|
429
|
+
},
|
|
430
|
+
],
|
|
431
|
+
|
|
432
|
+
// Feature registry — what product features you're tracking coverage for.
|
|
433
|
+
featureDefs: {
|
|
434
|
+
features: [
|
|
435
|
+
{
|
|
436
|
+
id: "getting-started",
|
|
437
|
+
name: "Getting Started Guide",
|
|
438
|
+
sections: ["guides"],
|
|
439
|
+
status: "covered",
|
|
440
|
+
area: "guides",
|
|
441
|
+
priority: "critical",
|
|
442
|
+
},
|
|
443
|
+
{
|
|
444
|
+
id: "api-reference",
|
|
445
|
+
name: "API Reference",
|
|
446
|
+
sections: ["reference"],
|
|
447
|
+
status: "uncovered",
|
|
448
|
+
priority: "high",
|
|
449
|
+
},
|
|
450
|
+
],
|
|
451
|
+
},
|
|
452
|
+
|
|
453
|
+
// Optional: override mode base rubrics, scoring, or prompts here.
|
|
454
|
+
// rubricTemplates: [{ ... }],
|
|
455
|
+
// scoringProfiles: { ... },
|
|
456
|
+
// promptTemplates: { ... },
|
|
457
|
+
})
|
|
458
|
+
`;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* validate-tasks command — standalone validation of
|
|
2
|
+
* validate-tasks command — standalone validation of task files.
|
|
3
3
|
*
|
|
4
|
-
* Validates .ailf/tasks/*.yaml files against the
|
|
4
|
+
* Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
|
|
5
5
|
* running the full pipeline. Useful for pre-commit hooks and CI checks
|
|
6
6
|
* in external repos.
|
|
7
7
|
*
|