@sanity/ailf 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/models.ts +15 -3
- package/dist/_vendor/ailf-core/config-helpers.d.ts +14 -17
- package/dist/_vendor/ailf-core/config-helpers.js +22 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +2 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1 -3
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +78 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/task-sources/content-lake-task-source.js +17 -20
- package/dist/adapters/task-sources/index.d.ts +2 -2
- package/dist/adapters/task-sources/index.js +2 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +2 -2
- package/dist/adapters/task-sources/task-file-loader.js +2 -2
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +73 -41
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/steps/fetch-docs-step.js +2 -3
- package/dist/orchestration/steps/generate-configs-step.js +28 -12
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +105 -68
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/{agent-harness-handler.d.ts → agent-harness/types.d.ts} +3 -24
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +4 -5
- package/dist/pipeline/compiler/mode-handlers/index.js +4 -6
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.d.ts → mcp-server/assertions.d.ts} +2 -10
- package/dist/pipeline/compiler/mode-handlers/{mcp-assertions.js → mcp-server/assertions.js} +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +6 -9
- package/dist/pipeline/compiler/presets/sanity-literacy.js +10 -156
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/generate-configs.js +1 -1
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +7 -7
- package/dist/pipeline/mirror-repo-tasks.js +9 -9
- package/dist/pipeline/plan.js +1 -1
- package/package.json +11 -3
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -309
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Plugin registry — typed extension points for AILF evaluation capabilities.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
4
|
+
* Three-tier architecture:
|
|
5
|
+
* - **Mode bases** define evaluation methodology (rubrics, scoring, prompts)
|
|
6
|
+
* - **Domain presets** target a mode base and add domain config (sources,
|
|
7
|
+
* features, doc fetcher)
|
|
8
|
+
* - **Framework assertions** are generic evaluation primitives available to
|
|
9
|
+
* all modes
|
|
10
10
|
*
|
|
11
11
|
* @see docs/design-docs/architecture-overhaul/extensibility-plugins.md
|
|
12
12
|
*/
|
|
@@ -19,6 +19,7 @@ export class InMemoryPluginRegistry {
|
|
|
19
19
|
rubricTemplates_ = new Map();
|
|
20
20
|
fixtureResolvers_ = new Map();
|
|
21
21
|
reportSinks_ = new Map();
|
|
22
|
+
modeBases_ = new Map();
|
|
22
23
|
presets_ = new Map();
|
|
23
24
|
promptTemplates_ = {};
|
|
24
25
|
scoringProfiles_ = {};
|
|
@@ -26,19 +27,56 @@ export class InMemoryPluginRegistry {
|
|
|
26
27
|
sourceDefs_ = [];
|
|
27
28
|
featureDefs_;
|
|
28
29
|
registerPreset(preset) {
|
|
30
|
+
// Skip archived presets entirely
|
|
31
|
+
if (preset.status === "archived")
|
|
32
|
+
return;
|
|
33
|
+
// Store draft/paused presets in the map (for later activation via
|
|
34
|
+
// --preset flag) but skip all side-effect registrations. This prevents
|
|
35
|
+
// a draft preset from silently overwriting the doc fetcher, merging
|
|
36
|
+
// scoring profiles, etc.
|
|
29
37
|
this.presets_.set(preset.name, preset);
|
|
30
|
-
if (preset.
|
|
31
|
-
|
|
32
|
-
|
|
38
|
+
if (preset.status === "draft" || preset.status === "paused")
|
|
39
|
+
return;
|
|
40
|
+
// Resolve mode base defaults
|
|
41
|
+
const base = this.modeBases_.get(preset.mode);
|
|
42
|
+
if (!base) {
|
|
43
|
+
throw new Error(`Preset "${preset.name}" targets mode "${preset.mode}" ` +
|
|
44
|
+
`but no mode base is registered for it. ` +
|
|
45
|
+
`Available mode bases: ${[...this.modeBases_.keys()].join(", ") || "(none)"}`);
|
|
46
|
+
}
|
|
47
|
+
// Mode is already registered by registerModeBase() — no need to re-register.
|
|
48
|
+
// Merge rubric templates: base defaults + preset overrides (by ID)
|
|
49
|
+
const baseRubrics = new Map((base.rubricTemplates ?? []).map((r) => [r.id, r]));
|
|
50
|
+
for (const r of preset.rubricTemplates ?? []) {
|
|
51
|
+
baseRubrics.set(r.id, r);
|
|
52
|
+
}
|
|
53
|
+
for (const r of baseRubrics.values()) {
|
|
54
|
+
this.registerRubricTemplate(r);
|
|
55
|
+
}
|
|
56
|
+
// Merge scoring profiles: base defaults + preset overrides (by name)
|
|
57
|
+
const profiles = {
|
|
58
|
+
...base.scoringProfiles,
|
|
59
|
+
...preset.scoringProfiles,
|
|
60
|
+
};
|
|
61
|
+
if (Object.keys(profiles).length > 0) {
|
|
62
|
+
this.registerScoringProfiles(profiles);
|
|
33
63
|
}
|
|
64
|
+
// Merge prompt templates: base defaults + preset overrides (by name)
|
|
65
|
+
const prompts = {
|
|
66
|
+
...base.promptTemplates,
|
|
67
|
+
...preset.promptTemplates,
|
|
68
|
+
};
|
|
69
|
+
if (Object.keys(prompts).length > 0) {
|
|
70
|
+
this.registerPromptTemplates(prompts);
|
|
71
|
+
}
|
|
72
|
+
// Merge assertions: base + preset (preset overrides by type)
|
|
73
|
+
for (const a of base.assertions ?? [])
|
|
74
|
+
this.registerAssertion(a);
|
|
34
75
|
if (preset.assertions) {
|
|
35
76
|
for (const a of preset.assertions)
|
|
36
77
|
this.registerAssertion(a);
|
|
37
78
|
}
|
|
38
|
-
|
|
39
|
-
for (const t of preset.rubricTemplates)
|
|
40
|
-
this.registerRubricTemplate(t);
|
|
41
|
-
}
|
|
79
|
+
// Register domain-specific fields
|
|
42
80
|
if (preset.fixtureResolvers) {
|
|
43
81
|
for (const r of preset.fixtureResolvers)
|
|
44
82
|
this.registerFixtureResolver(r);
|
|
@@ -47,12 +85,6 @@ export class InMemoryPluginRegistry {
|
|
|
47
85
|
for (const s of preset.reportSinks)
|
|
48
86
|
this.registerReportSink(s);
|
|
49
87
|
}
|
|
50
|
-
if (preset.promptTemplates) {
|
|
51
|
-
this.registerPromptTemplates(preset.promptTemplates);
|
|
52
|
-
}
|
|
53
|
-
if (preset.scoringProfiles) {
|
|
54
|
-
this.registerScoringProfiles(preset.scoringProfiles);
|
|
55
|
-
}
|
|
56
88
|
if (preset.docFetcher) {
|
|
57
89
|
this.registerDocFetcherFactory(preset.docFetcher);
|
|
58
90
|
}
|
|
@@ -124,9 +156,30 @@ export class InMemoryPluginRegistry {
|
|
|
124
156
|
return this.sourceDefs_;
|
|
125
157
|
}
|
|
126
158
|
registerFeatureDefs(features) {
|
|
127
|
-
this.featureDefs_
|
|
159
|
+
if (!this.featureDefs_) {
|
|
160
|
+
this.featureDefs_ = features;
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
// Merge by feature ID: new features override existing on ID collision,
|
|
164
|
+
// existing features not in new set are preserved.
|
|
165
|
+
const merged = new Map(this.featureDefs_.features.map((f) => [f.id, f]));
|
|
166
|
+
for (const f of features.features) {
|
|
167
|
+
merged.set(f.id, f);
|
|
168
|
+
}
|
|
169
|
+
this.featureDefs_ = { features: [...merged.values()] };
|
|
128
170
|
}
|
|
129
171
|
getFeatureDefs() {
|
|
130
172
|
return this.featureDefs_;
|
|
131
173
|
}
|
|
174
|
+
registerModeBase(base) {
|
|
175
|
+
this.modeBases_.set(base.mode.id, base);
|
|
176
|
+
// Also register the mode itself so getMode() works
|
|
177
|
+
this.registerMode(base.mode);
|
|
178
|
+
}
|
|
179
|
+
getModeBase(modeId) {
|
|
180
|
+
return this.modeBases_.get(modeId);
|
|
181
|
+
}
|
|
182
|
+
getModeBases() {
|
|
183
|
+
return [...this.modeBases_.values()];
|
|
184
|
+
}
|
|
132
185
|
}
|
|
@@ -13,21 +13,29 @@
|
|
|
13
13
|
* @see docs/design-docs/architecture-overhaul/typescript-configuration.md
|
|
14
14
|
*/
|
|
15
15
|
import { existsSync } from "fs";
|
|
16
|
+
import { pathToFileURL } from "node:url";
|
|
16
17
|
import { createJiti } from "jiti";
|
|
17
18
|
// ---------------------------------------------------------------------------
|
|
18
|
-
//
|
|
19
|
+
// jiti instance factory — resolves imports relative to the loaded file
|
|
19
20
|
// ---------------------------------------------------------------------------
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
21
|
+
/**
|
|
22
|
+
* Create a jiti instance that resolves bare-specifier imports relative to
|
|
23
|
+
* the given file path, not relative to this loader module.
|
|
24
|
+
*
|
|
25
|
+
* This is critical for pnpm workspaces: a task file at `.ailf/tasks/foo.task.ts`
|
|
26
|
+
* importing `@sanity/ailf` must resolve through the dependency graph
|
|
27
|
+
* visible from the task file's directory, not from deep inside packages/eval/.
|
|
28
|
+
*
|
|
29
|
+
* We pass a `file://` URL (not a bare path) so jiti uses ESM resolution,
|
|
30
|
+
* which matches the `"import"` condition in package.json exports maps.
|
|
31
|
+
*/
|
|
32
|
+
function createJitiForFile(filePath) {
|
|
33
|
+
return createJiti(pathToFileURL(filePath).href, {
|
|
34
|
+
// Interop: handle both `export default` and `module.exports`
|
|
35
|
+
interopDefault: true,
|
|
36
|
+
// Don't require file extensions in imports
|
|
37
|
+
requireCache: true,
|
|
38
|
+
});
|
|
31
39
|
}
|
|
32
40
|
/**
|
|
33
41
|
* Load a TypeScript or JavaScript config file and return its default export.
|
|
@@ -43,7 +51,7 @@ export async function loadTsConfig(filePath) {
|
|
|
43
51
|
return { ok: false, error: `File not found: ${filePath}`, path: filePath };
|
|
44
52
|
}
|
|
45
53
|
try {
|
|
46
|
-
const jiti =
|
|
54
|
+
const jiti = createJitiForFile(filePath);
|
|
47
55
|
const mod = await jiti.import(filePath);
|
|
48
56
|
const value = extractDefault(mod);
|
|
49
57
|
if (value === undefined || value === null) {
|
|
@@ -20,8 +20,8 @@
|
|
|
20
20
|
* mapping to LiteracyTaskDefinition.
|
|
21
21
|
*
|
|
22
22
|
* Key projections:
|
|
23
|
-
* -
|
|
24
|
-
* -
|
|
23
|
+
* - area reference → dereferenced areaId string
|
|
24
|
+
* - contextDocs[] → dereferenced article slugs with reason
|
|
25
25
|
* - referenceSolution → title (for identification, not full content)
|
|
26
26
|
*
|
|
27
27
|
* Filter parameters:
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
*/
|
|
31
31
|
const TASKS_QUERY = /* groq */ `
|
|
32
32
|
*[_type == "ailf.task"
|
|
33
|
-
&& (!defined($areas) ||
|
|
33
|
+
&& (!defined($areas) || area->areaId.current in $areas)
|
|
34
34
|
&& (!defined($taskIds) || id.current in $taskIds)
|
|
35
35
|
&& (
|
|
36
36
|
// Status-based filtering (unified — replaces execution.enabled)
|
|
@@ -41,13 +41,13 @@ const TASKS_QUERY = /* groq */ `
|
|
|
41
41
|
|| (defined($taskIds) && status != "archived")
|
|
42
42
|
)
|
|
43
43
|
&& (!defined($tags) || count((tags)[@ in $tags]) > 0)
|
|
44
|
-
] | order(
|
|
44
|
+
] | order(area->areaId.current asc, id.current asc) {
|
|
45
45
|
"taskId": id.current,
|
|
46
|
-
|
|
47
|
-
"
|
|
48
|
-
|
|
46
|
+
title,
|
|
47
|
+
"areaId": area->areaId.current,
|
|
48
|
+
promptText,
|
|
49
49
|
docCoverage,
|
|
50
|
-
"
|
|
50
|
+
"contextDocs": contextDocs[] {
|
|
51
51
|
refType,
|
|
52
52
|
"slug": doc->slug.current,
|
|
53
53
|
"docRefId": doc->_id,
|
|
@@ -57,7 +57,7 @@ const TASKS_QUERY = /* groq */ `
|
|
|
57
57
|
perspective,
|
|
58
58
|
reason
|
|
59
59
|
},
|
|
60
|
-
|
|
60
|
+
assertions,
|
|
61
61
|
rawAssert,
|
|
62
62
|
baseline,
|
|
63
63
|
tags,
|
|
@@ -112,22 +112,19 @@ function buildGroqParams(filter) {
|
|
|
112
112
|
* Map a Content Lake ailf.task document directly to a LiteracyTaskDefinition.
|
|
113
113
|
*
|
|
114
114
|
* Returns null if the document is missing required fields (taskId,
|
|
115
|
-
*
|
|
115
|
+
* title, areaId, promptText). These are required by the
|
|
116
116
|
* Studio schema, but defensive coding handles edge cases (drafts,
|
|
117
117
|
* partially-created documents, etc.).
|
|
118
118
|
*/
|
|
119
119
|
function mapToLiteracyTask(raw) {
|
|
120
120
|
// Required fields — skip malformed documents
|
|
121
|
-
if (!raw.taskId ||
|
|
122
|
-
!raw.description ||
|
|
123
|
-
!raw.featureAreaId ||
|
|
124
|
-
!raw.taskPrompt) {
|
|
121
|
+
if (!raw.taskId || !raw.title || !raw.areaId || !raw.promptText) {
|
|
125
122
|
return null;
|
|
126
123
|
}
|
|
127
|
-
const docs = (raw.
|
|
124
|
+
const docs = (raw.contextDocs ?? [])
|
|
128
125
|
.map(mapCanonicalDocRef)
|
|
129
126
|
.filter((d) => d !== null);
|
|
130
|
-
const assertions = mapAssertions(raw.
|
|
127
|
+
const assertions = mapAssertions(raw.assertions ?? []);
|
|
131
128
|
// Append raw pass-through assertions (escape hatch for arbitrary Promptfoo
|
|
132
129
|
// assertion types that aren't in the curated list). These bypass template
|
|
133
130
|
// resolution and flow directly into the expanded Promptfoo test case as
|
|
@@ -158,9 +155,9 @@ function mapToLiteracyTask(raw) {
|
|
|
158
155
|
return {
|
|
159
156
|
mode: "literacy",
|
|
160
157
|
id: raw.taskId,
|
|
161
|
-
title: raw.
|
|
162
|
-
area: raw.
|
|
163
|
-
prompt: { text: raw.
|
|
158
|
+
title: raw.title,
|
|
159
|
+
area: raw.areaId,
|
|
160
|
+
prompt: { text: raw.promptText },
|
|
164
161
|
context: { docs },
|
|
165
162
|
assertions: allAssertions,
|
|
166
163
|
docCoverage: raw.docCoverage ?? false,
|
|
@@ -172,7 +169,7 @@ function mapToLiteracyTask(raw) {
|
|
|
172
169
|
};
|
|
173
170
|
}
|
|
174
171
|
/**
|
|
175
|
-
* Map a Content Lake
|
|
172
|
+
* Map a Content Lake context doc entry to the polymorphic CanonicalDocRef.
|
|
176
173
|
*
|
|
177
174
|
* Uses `refType` to determine which value field to read. Falls back to
|
|
178
175
|
* slug-based resolution for backward compatibility (documents created
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
export { CompositeTaskSource } from "./composite-task-source.js";
|
|
2
2
|
export { ContentLakeTaskSource } from "./content-lake-task-source.js";
|
|
3
|
-
export {
|
|
3
|
+
export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RepoConfig, type RubricTemplateName, } from "./repo-schemas.js";
|
|
4
4
|
export { RepoTaskSource } from "./repo-task-source.js";
|
|
5
5
|
export { detectTriggerContext, resolveTrigger, type ResolvedTrigger, type TriggerContext, } from "./repo-trigger.js";
|
|
6
|
-
export { formatValidationResult,
|
|
6
|
+
export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./repo-validation.js";
|
|
7
7
|
export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, } from "./task-file-loader.js";
|
|
8
8
|
export { YamlTaskSource } from "./yaml-task-source.js";
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
export { CompositeTaskSource } from "./composite-task-source.js";
|
|
2
2
|
export { ContentLakeTaskSource } from "./content-lake-task-source.js";
|
|
3
|
-
export {
|
|
3
|
+
export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
|
|
4
4
|
export { RepoTaskSource } from "./repo-task-source.js";
|
|
5
5
|
export { detectTriggerContext, resolveTrigger, } from "./repo-trigger.js";
|
|
6
|
-
export { formatValidationResult,
|
|
6
|
+
export { formatValidationResult, validateCanonicalTasks, } from "./repo-validation.js";
|
|
7
7
|
export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, } from "./task-file-loader.js";
|
|
8
8
|
export { YamlTaskSource } from "./yaml-task-source.js";
|
|
@@ -1,29 +1,231 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* repo-schemas.ts —
|
|
2
|
+
* repo-schemas.ts — Canonical Zod schemas for task and config validation.
|
|
3
3
|
*
|
|
4
|
-
* Task schemas
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Task schemas validate .ailf/tasks/*.yaml and .task.ts files against the
|
|
5
|
+
* canonical GeneralizedTaskDefinition shape. Field names match the internal
|
|
6
|
+
* domain model: `area` (not featureArea), `assertions` (not assert),
|
|
7
|
+
* `context.docs` (not canonicalDocs), `prompt.text` (not vars.task).
|
|
7
8
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* external tools that only validate task YAML.
|
|
9
|
+
* Previously this file re-exported from @sanity/ailf-tasks. That package
|
|
10
|
+
* has been eliminated — all schema logic now lives here.
|
|
11
11
|
*
|
|
12
|
-
*
|
|
12
|
+
* Config schemas (RepoConfigSchema, trigger config) are eval-pipeline-
|
|
13
|
+
* specific and remain here unchanged.
|
|
14
|
+
*
|
|
15
|
+
* @see packages/core/src/types/generalized-task.ts — canonical TypeScript types
|
|
13
16
|
* @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
14
17
|
*/
|
|
15
|
-
import { RepoTaskFileSchema as _Schema } from "../../_vendor/ailf-tasks/index.d.ts";
|
|
16
18
|
import { z } from "zod";
|
|
17
|
-
export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "../../_vendor/ailf-tasks/index.d.ts";
|
|
18
|
-
export { loadTaskDir, parseTaskFile } from "../../_vendor/ailf-tasks/index.d.ts";
|
|
19
19
|
/**
|
|
20
|
-
*
|
|
21
|
-
*
|
|
20
|
+
* The set of assertion types allowed in task files.
|
|
21
|
+
*
|
|
22
|
+
* This is a curated subset of Promptfoo assertion types — we expose only the
|
|
23
|
+
* types that are stable, well-documented, and useful for external authors.
|
|
24
|
+
*/
|
|
25
|
+
export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
|
|
26
|
+
export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
|
|
27
|
+
/**
|
|
28
|
+
* Valid rubric template names — must match keys in config/rubrics.yaml.
|
|
29
|
+
*/
|
|
30
|
+
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
|
|
31
|
+
export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
|
|
32
|
+
/**
|
|
33
|
+
* Zod schema for a single task definition using canonical field names.
|
|
34
|
+
*
|
|
35
|
+
* Uses .passthrough() to allow mode-specific fields (serverConfig, sandbox,
|
|
36
|
+
* handler, etc.) without listing every possible field. Mode-specific
|
|
37
|
+
* validation is deferred to the pipeline's mode handlers.
|
|
38
|
+
*/
|
|
39
|
+
export declare const CanonicalTaskSchema: z.ZodObject<{
|
|
40
|
+
id: z.ZodString;
|
|
41
|
+
mode: z.ZodDefault<z.ZodString>;
|
|
42
|
+
title: z.ZodString;
|
|
43
|
+
description: z.ZodOptional<z.ZodString>;
|
|
44
|
+
area: z.ZodOptional<z.ZodString>;
|
|
45
|
+
difficulty: z.ZodOptional<z.ZodEnum<{
|
|
46
|
+
basic: "basic";
|
|
47
|
+
intermediate: "intermediate";
|
|
48
|
+
advanced: "advanced";
|
|
49
|
+
}>>;
|
|
50
|
+
status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
51
|
+
active: "active";
|
|
52
|
+
draft: "draft";
|
|
53
|
+
paused: "paused";
|
|
54
|
+
archived: "archived";
|
|
55
|
+
}>>>;
|
|
56
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
57
|
+
prompt: z.ZodOptional<z.ZodObject<{
|
|
58
|
+
template: z.ZodOptional<z.ZodString>;
|
|
59
|
+
text: z.ZodOptional<z.ZodString>;
|
|
60
|
+
systemMessage: z.ZodOptional<z.ZodString>;
|
|
61
|
+
vars: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
62
|
+
}, z.core.$strip>>;
|
|
63
|
+
context: z.ZodOptional<z.ZodObject<{
|
|
64
|
+
docs: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
65
|
+
id: z.ZodString;
|
|
66
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
67
|
+
slug: z.ZodOptional<z.ZodString>;
|
|
68
|
+
path: z.ZodOptional<z.ZodString>;
|
|
69
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
70
|
+
slug: z.ZodString;
|
|
71
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
72
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
73
|
+
path: z.ZodString;
|
|
74
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
75
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
76
|
+
perspective: z.ZodString;
|
|
77
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
78
|
+
}, z.core.$strip>]>>>;
|
|
79
|
+
fixtures: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
80
|
+
}, z.core.$strip>>;
|
|
81
|
+
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
82
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
83
|
+
template: z.ZodEnum<{
|
|
84
|
+
"task-completion": "task-completion";
|
|
85
|
+
"code-correctness": "code-correctness";
|
|
86
|
+
"doc-coverage": "doc-coverage";
|
|
87
|
+
}>;
|
|
88
|
+
criteria: z.ZodArray<z.ZodString>;
|
|
89
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
90
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
91
|
+
type: z.ZodEnum<{
|
|
92
|
+
"llm-rubric": "llm-rubric";
|
|
93
|
+
contains: "contains";
|
|
94
|
+
"contains-any": "contains-any";
|
|
95
|
+
"contains-all": "contains-all";
|
|
96
|
+
"not-contains": "not-contains";
|
|
97
|
+
icontains: "icontains";
|
|
98
|
+
"icontains-any": "icontains-any";
|
|
99
|
+
regex: "regex";
|
|
100
|
+
javascript: "javascript";
|
|
101
|
+
similar: "similar";
|
|
102
|
+
cost: "cost";
|
|
103
|
+
latency: "latency";
|
|
104
|
+
}>;
|
|
105
|
+
value: z.ZodOptional<z.ZodUnknown>;
|
|
106
|
+
threshold: z.ZodOptional<z.ZodNumber>;
|
|
107
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
108
|
+
}, z.core.$loose>]>>>;
|
|
109
|
+
referenceSolution: z.ZodOptional<z.ZodString>;
|
|
110
|
+
docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
111
|
+
baseline: z.ZodOptional<z.ZodObject<{
|
|
112
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
113
|
+
rubric: z.ZodOptional<z.ZodEnum<{
|
|
114
|
+
full: "full";
|
|
115
|
+
abbreviated: "abbreviated";
|
|
116
|
+
none: "none";
|
|
117
|
+
}>>;
|
|
118
|
+
}, z.core.$strip>>;
|
|
119
|
+
rubric: z.ZodOptional<z.ZodUnknown>;
|
|
120
|
+
providers: z.ZodOptional<z.ZodArray<z.ZodUnknown>>;
|
|
121
|
+
options: z.ZodOptional<z.ZodUnknown>;
|
|
122
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
123
|
+
}, z.core.$loose>;
|
|
124
|
+
export type CanonicalTask = z.infer<typeof CanonicalTaskSchema>;
|
|
125
|
+
/**
|
|
126
|
+
* Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
|
|
127
|
+
* file contains. Each file must define at least one task.
|
|
128
|
+
*/
|
|
129
|
+
export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
|
|
130
|
+
id: z.ZodString;
|
|
131
|
+
mode: z.ZodDefault<z.ZodString>;
|
|
132
|
+
title: z.ZodString;
|
|
133
|
+
description: z.ZodOptional<z.ZodString>;
|
|
134
|
+
area: z.ZodOptional<z.ZodString>;
|
|
135
|
+
difficulty: z.ZodOptional<z.ZodEnum<{
|
|
136
|
+
basic: "basic";
|
|
137
|
+
intermediate: "intermediate";
|
|
138
|
+
advanced: "advanced";
|
|
139
|
+
}>>;
|
|
140
|
+
status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
141
|
+
active: "active";
|
|
142
|
+
draft: "draft";
|
|
143
|
+
paused: "paused";
|
|
144
|
+
archived: "archived";
|
|
145
|
+
}>>>;
|
|
146
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
147
|
+
prompt: z.ZodOptional<z.ZodObject<{
|
|
148
|
+
template: z.ZodOptional<z.ZodString>;
|
|
149
|
+
text: z.ZodOptional<z.ZodString>;
|
|
150
|
+
systemMessage: z.ZodOptional<z.ZodString>;
|
|
151
|
+
vars: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
152
|
+
}, z.core.$strip>>;
|
|
153
|
+
context: z.ZodOptional<z.ZodObject<{
|
|
154
|
+
docs: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
155
|
+
id: z.ZodString;
|
|
156
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
157
|
+
slug: z.ZodOptional<z.ZodString>;
|
|
158
|
+
path: z.ZodOptional<z.ZodString>;
|
|
159
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
160
|
+
slug: z.ZodString;
|
|
161
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
162
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
163
|
+
path: z.ZodString;
|
|
164
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
165
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
166
|
+
perspective: z.ZodString;
|
|
167
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
168
|
+
}, z.core.$strip>]>>>;
|
|
169
|
+
fixtures: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
170
|
+
}, z.core.$strip>>;
|
|
171
|
+
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
172
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
173
|
+
template: z.ZodEnum<{
|
|
174
|
+
"task-completion": "task-completion";
|
|
175
|
+
"code-correctness": "code-correctness";
|
|
176
|
+
"doc-coverage": "doc-coverage";
|
|
177
|
+
}>;
|
|
178
|
+
criteria: z.ZodArray<z.ZodString>;
|
|
179
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
180
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
181
|
+
type: z.ZodEnum<{
|
|
182
|
+
"llm-rubric": "llm-rubric";
|
|
183
|
+
contains: "contains";
|
|
184
|
+
"contains-any": "contains-any";
|
|
185
|
+
"contains-all": "contains-all";
|
|
186
|
+
"not-contains": "not-contains";
|
|
187
|
+
icontains: "icontains";
|
|
188
|
+
"icontains-any": "icontains-any";
|
|
189
|
+
regex: "regex";
|
|
190
|
+
javascript: "javascript";
|
|
191
|
+
similar: "similar";
|
|
192
|
+
cost: "cost";
|
|
193
|
+
latency: "latency";
|
|
194
|
+
}>;
|
|
195
|
+
value: z.ZodOptional<z.ZodUnknown>;
|
|
196
|
+
threshold: z.ZodOptional<z.ZodNumber>;
|
|
197
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
198
|
+
}, z.core.$loose>]>>>;
|
|
199
|
+
referenceSolution: z.ZodOptional<z.ZodString>;
|
|
200
|
+
docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
201
|
+
baseline: z.ZodOptional<z.ZodObject<{
|
|
202
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
203
|
+
rubric: z.ZodOptional<z.ZodEnum<{
|
|
204
|
+
full: "full";
|
|
205
|
+
abbreviated: "abbreviated";
|
|
206
|
+
none: "none";
|
|
207
|
+
}>>;
|
|
208
|
+
}, z.core.$strip>>;
|
|
209
|
+
rubric: z.ZodOptional<z.ZodUnknown>;
|
|
210
|
+
providers: z.ZodOptional<z.ZodArray<z.ZodUnknown>>;
|
|
211
|
+
options: z.ZodOptional<z.ZodUnknown>;
|
|
212
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
213
|
+
}, z.core.$loose>>;
|
|
214
|
+
/**
|
|
215
|
+
* Parse and validate a task file's content against the canonical schema.
|
|
216
|
+
* Returns typed tasks or throws with a user-friendly Zod error message.
|
|
217
|
+
*
|
|
218
|
+
* Accepts pre-parsed YAML data (unknown), not a raw string.
|
|
219
|
+
*/
|
|
220
|
+
export declare function parseCanonicalTaskFile(raw: unknown, filename: string): CanonicalTask[];
|
|
221
|
+
/**
|
|
222
|
+
* Detect legacy field names in raw task data and return helpful messages.
|
|
22
223
|
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
224
|
+
* Runs BEFORE Zod parsing to catch the most common migration mistake —
|
|
225
|
+
* using old field names from @sanity/ailf-tasks instead of the canonical
|
|
226
|
+
* GeneralizedTaskDefinition shape.
|
|
25
227
|
*/
|
|
26
|
-
export declare function
|
|
228
|
+
export declare function detectLegacyFieldNames(raw: unknown, filename: string): string[];
|
|
27
229
|
/**
|
|
28
230
|
* Zod schema for .ailf/config.yaml — controls documentation source,
|
|
29
231
|
* report destination, and trigger behavior for evaluations from an
|