@sanity/ailf 3.8.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/adapters/config-sources/file-config-adapter.js +4 -5
  2. package/dist/adapters/task-sources/repo-schemas.d.ts +3 -3
  3. package/dist/cli-program.d.ts +39 -0
  4. package/dist/cli-program.js +137 -0
  5. package/dist/cli.d.ts +6 -0
  6. package/dist/cli.js +12 -122
  7. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  8. package/package.json +5 -3
  9. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
  10. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
  11. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
  12. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
  13. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
  14. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
  15. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
  16. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
  17. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
  18. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
  19. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
  20. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
  21. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
  22. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
  23. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
  24. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
  25. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
  26. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
  27. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
  28. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
  29. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
  30. package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
  31. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
  32. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
@@ -1,486 +0,0 @@
1
- /**
2
- * literacy-handler.test.ts — Tests for literacy mode compilation.
3
- *
4
- * Tests validation, gold/baseline entry generation, rubric template
5
- * resolution, doc-coverage auto-generation, prompt assignment, baseline
6
- * filtering, and the literacy bridge for LiteracyTaskDefinition.
7
- *
8
- * Run: npx tsx --test src/pipeline/compiler/__tests__/literacy-handler.test.ts
9
- */
10
- import assert from "node:assert/strict";
11
- import { describe, it } from "node:test";
12
- import { LiteracyVariant } from "../../normalize-mode.js";
13
- import { compileLiteracyTask, validateLiteracyTask, } from "../mode-handlers/literacy/index.js";
14
- import { compileLiteracyTasks, compareCompilerOutputs, } from "../literacy-bridge.js";
15
- // ---------------------------------------------------------------------------
16
- // Helpers
17
- // ---------------------------------------------------------------------------
18
- const RUBRIC_CONFIG = {
19
- templates: {
20
- "task-completion": {
21
- dimension: "task-completion",
22
- header: "Score task completion from 0 to 100:",
23
- scale: [
24
- "0: Not attempted",
25
- "50: Partially complete",
26
- "100: Fully complete",
27
- ],
28
- criteria_label: "Check for:",
29
- },
30
- "code-correctness": {
31
- dimension: "code-correctness",
32
- header: "Score code correctness from 0 to 100:",
33
- scale: ["0: Does not compile", "50: Compiles but bugs", "100: Correct"],
34
- criteria_label: "Check for:",
35
- },
36
- "doc-coverage": {
37
- dimension: "doc-coverage",
38
- header: "Score documentation coverage from 0 to 100:",
39
- scale: [
40
- "0: No docs used",
41
- "50: Some docs used",
42
- "100: All relevant docs used",
43
- ],
44
- },
45
- },
46
- };
47
- function makeTask(overrides) {
48
- return {
49
- mode: "literacy",
50
- id: "test-literacy-task",
51
- title: "Test literacy task",
52
- area: "groq",
53
- prompt: { text: "Write a GROQ query to fetch all blog posts" },
54
- context: {
55
- docs: [{ slug: "groq-overview", reason: "Main GROQ reference" }],
56
- },
57
- referenceSolution: "canonical/groq-blog.ts",
58
- docCoverage: true,
59
- assertions: [
60
- {
61
- type: "llm-rubric",
62
- template: "task-completion",
63
- criteria: ["Uses _type filter", "Has projection"],
64
- },
65
- {
66
- type: "contains",
67
- value: "_type",
68
- },
69
- ],
70
- baseline: { enabled: true, rubric: "full" },
71
- ...overrides,
72
- };
73
- }
74
- // ---------------------------------------------------------------------------
75
- // validateLiteracyTask
76
- // ---------------------------------------------------------------------------
77
- describe("validateLiteracyTask", () => {
78
- it("passes for a valid task", () => {
79
- const errors = validateLiteracyTask(makeTask());
80
- assert.equal(errors.length, 0);
81
- });
82
- it("errors on missing ID", () => {
83
- const errors = validateLiteracyTask(makeTask({ id: "" }));
84
- assert.ok(errors.some((e) => e.field === "id"));
85
- });
86
- it("errors on missing title", () => {
87
- const errors = validateLiteracyTask(makeTask({ title: "" }));
88
- assert.ok(errors.some((e) => e.field === "title"));
89
- });
90
- it("errors on missing prompt text", () => {
91
- const errors = validateLiteracyTask(makeTask({ prompt: { text: "" } }));
92
- assert.ok(errors.some((e) => e.field === "prompt"));
93
- });
94
- });
95
- // ---------------------------------------------------------------------------
96
- // compileLiteracyTask — gold + baseline entries
97
- // ---------------------------------------------------------------------------
98
- describe("compileLiteracyTask — entries", () => {
99
- it("produces gold + baseline entries in baseline mode", () => {
100
- const result = compileLiteracyTask(makeTask(), {
101
- rubricConfig: RUBRIC_CONFIG,
102
- });
103
- assert.equal(result.tests.length, 2);
104
- assert.ok(result.tests[0].description.includes("(gold)"));
105
- assert.ok(result.tests[1].description.includes("(baseline)"));
106
- });
107
- it("produces only gold entry in agentic mode", () => {
108
- const result = compileLiteracyTask(makeTask(), {
109
- evalMode: LiteracyVariant.AGENTIC,
110
- rubricConfig: RUBRIC_CONFIG,
111
- });
112
- assert.equal(result.tests.length, 1);
113
- assert.ok(result.tests[0].description.includes("(gold)"));
114
- });
115
- it("skips baseline when baseline.enabled is false", () => {
116
- const result = compileLiteracyTask(makeTask({ baseline: { enabled: false } }), { rubricConfig: RUBRIC_CONFIG });
117
- assert.equal(result.tests.length, 1);
118
- });
119
- it("gold entry has docs variable", () => {
120
- const result = compileLiteracyTask(makeTask(), {
121
- rubricConfig: RUBRIC_CONFIG,
122
- });
123
- assert.ok(result.tests[0].vars.docs.includes("file://"));
124
- });
125
- it("baseline entry has empty docs", () => {
126
- const result = compileLiteracyTask(makeTask(), {
127
- rubricConfig: RUBRIC_CONFIG,
128
- });
129
- assert.equal(result.tests[1].vars.docs, "");
130
- });
131
- it("gold entry has with-docs prompt filter", () => {
132
- const result = compileLiteracyTask(makeTask(), {
133
- rubricConfig: RUBRIC_CONFIG,
134
- });
135
- assert.deepEqual(result.tests[0].prompts, ["with-docs"]);
136
- });
137
- it("baseline entry has without-docs prompt filter", () => {
138
- const result = compileLiteracyTask(makeTask(), {
139
- rubricConfig: RUBRIC_CONFIG,
140
- });
141
- assert.deepEqual(result.tests[1].prompts, ["without-docs"]);
142
- });
143
- it("sets empty docs when task has no canonical docs", () => {
144
- const result = compileLiteracyTask(makeTask({ context: { docs: [] } }), {
145
- rubricConfig: RUBRIC_CONFIG,
146
- });
147
- assert.equal(result.tests[0].vars.docs, "");
148
- });
149
- });
150
- // ---------------------------------------------------------------------------
151
- // compileLiteracyTask — prompts
152
- // ---------------------------------------------------------------------------
153
- describe("compileLiteracyTask — prompts", () => {
154
- it("generates with-docs and without-docs prompts in baseline mode", () => {
155
- const result = compileLiteracyTask(makeTask(), {
156
- rubricConfig: RUBRIC_CONFIG,
157
- });
158
- assert.equal(result.prompts.length, 2);
159
- assert.equal(result.prompts[0].id, "with-docs");
160
- assert.equal(result.prompts[1].id, "without-docs");
161
- });
162
- it("generates single agentic prompt in agentic mode", () => {
163
- const result = compileLiteracyTask(makeTask(), {
164
- evalMode: LiteracyVariant.AGENTIC,
165
- rubricConfig: RUBRIC_CONFIG,
166
- });
167
- assert.equal(result.prompts.length, 1);
168
- assert.equal(result.prompts[0].id, LiteracyVariant.AGENTIC);
169
- });
170
- });
171
- // ---------------------------------------------------------------------------
172
- // compileLiteracyTask — assertion resolution
173
- // ---------------------------------------------------------------------------
174
- describe("compileLiteracyTask — assertions", () => {
175
- it("resolves templated assertions from rubric config", () => {
176
- const result = compileLiteracyTask(makeTask(), {
177
- rubricConfig: RUBRIC_CONFIG,
178
- });
179
- const goldAsserts = result.tests[0].assert;
180
- const rubricAssert = goldAsserts.find((a) => a.type === "llm-rubric");
181
- assert.ok(rubricAssert);
182
- assert.ok(rubricAssert.value.includes("Score task completion"));
183
- assert.ok(rubricAssert.value.includes("Uses _type filter"));
184
- });
185
- it("includes dimension metadata on templated assertions", () => {
186
- const result = compileLiteracyTask(makeTask(), {
187
- rubricConfig: RUBRIC_CONFIG,
188
- });
189
- const goldAsserts = result.tests[0].assert;
190
- const rubricAssert = goldAsserts.find((a) => a.type === "llm-rubric");
191
- assert.ok(rubricAssert);
192
- const meta = rubricAssert.metadata;
193
- assert.equal(meta?.dimension, "task-completion");
194
- assert.equal(meta?.maxScore, 100);
195
- });
196
- it("passes through value assertions", () => {
197
- const result = compileLiteracyTask(makeTask(), {
198
- rubricConfig: RUBRIC_CONFIG,
199
- });
200
- const goldAsserts = result.tests[0].assert;
201
- const containsAssert = goldAsserts.find((a) => a.type === "contains");
202
- assert.ok(containsAssert);
203
- assert.equal(containsAssert.value, "_type");
204
- });
205
- it("auto-generates doc-coverage assertion when docCoverage is true", () => {
206
- const result = compileLiteracyTask(makeTask(), {
207
- rubricConfig: RUBRIC_CONFIG,
208
- });
209
- const goldAsserts = result.tests[0].assert;
210
- const docCovAsserts = goldAsserts.filter((a) => a.type === "llm-rubric" &&
211
- a.metadata &&
212
- a.metadata.dimension === "doc-coverage");
213
- assert.equal(docCovAsserts.length, 1);
214
- });
215
- it("does not auto-generate doc-coverage when docCoverage is false", () => {
216
- const result = compileLiteracyTask(makeTask({ docCoverage: false }), {
217
- rubricConfig: RUBRIC_CONFIG,
218
- });
219
- const goldAsserts = result.tests[0].assert;
220
- const docCovAsserts = goldAsserts.filter((a) => a.type === "llm-rubric" &&
221
- a.metadata &&
222
- a.metadata.dimension === "doc-coverage");
223
- assert.equal(docCovAsserts.length, 0);
224
- });
225
- it("sets grader provider on llm-rubric assertions", () => {
226
- const result = compileLiteracyTask(makeTask(), {
227
- rubricConfig: RUBRIC_CONFIG,
228
- graderProvider: "openai:chat:gpt-5",
229
- });
230
- const rubrics = result.tests[0].assert.filter((a) => a.type === "llm-rubric");
231
- assert.ok(rubrics.every((r) => r.provider === "openai:chat:gpt-5"));
232
- });
233
- });
234
- // ---------------------------------------------------------------------------
235
- // compileLiteracyTask — baseline assertion filtering
236
- // ---------------------------------------------------------------------------
237
- describe("compileLiteracyTask — baseline assertions", () => {
238
- it("carries all assertions when baseline rubric is full", () => {
239
- const result = compileLiteracyTask(makeTask({ baseline: { enabled: true, rubric: "full" } }), { rubricConfig: RUBRIC_CONFIG });
240
- const goldCount = result.tests[0].assert.length;
241
- const baselineCount = result.tests[1].assert.length;
242
- assert.equal(goldCount, baselineCount);
243
- });
244
- it("abbreviates baseline assertions when rubric is abbreviated", () => {
245
- const result = compileLiteracyTask(makeTask({ baseline: { enabled: true, rubric: "abbreviated" } }), { rubricConfig: RUBRIC_CONFIG });
246
- const baselineAsserts = result.tests[1].assert;
247
- // Abbreviated: only one summary llm-rubric
248
- assert.equal(baselineAsserts.length, 1);
249
- assert.equal(baselineAsserts[0].type, "llm-rubric");
250
- assert.ok(baselineAsserts[0].value.includes("Score task completion"));
251
- });
252
- it("has no assertions when baseline rubric is none", () => {
253
- const result = compileLiteracyTask(makeTask({ baseline: { enabled: true, rubric: "none" } }), { rubricConfig: RUBRIC_CONFIG });
254
- assert.equal(result.tests[1].assert, undefined);
255
- });
256
- });
257
- // ---------------------------------------------------------------------------
258
- // compileLiteracyTasks (bridge)
259
- // ---------------------------------------------------------------------------
260
- describe("compileLiteracyTasks — bridge", () => {
261
- it("compiles multiple tasks", () => {
262
- const tasks = [
263
- makeTask({ id: "task-1", title: "First task" }),
264
- makeTask({ id: "task-2", title: "Second task" }),
265
- ];
266
- const result = compileLiteracyTasks(tasks, {
267
- rootDir: "/tmp/fake-root",
268
- });
269
- assert.equal(result.tasks.length, 2);
270
- assert.ok(result.totalTests >= 4); // 2 tasks × (gold + baseline)
271
- });
272
- });
273
- // ---------------------------------------------------------------------------
274
- // compareCompilerOutputs (parallel comparison gate — task 7b)
275
- // ---------------------------------------------------------------------------
276
- describe("compareCompilerOutputs — comparison gate", () => {
277
- it("passes when outputs match", () => {
278
- const bridge = compileLiteracyTasks([makeTask({ id: "task-1", title: "Test" })], { rootDir: "/tmp/fake-root" });
279
- // Simulate legacy entries that match
280
- const legacyEntries = [
281
- {
282
- description: "task-1 Test (gold)",
283
- vars: { task: "Write a GROQ query", docs: "file://..." },
284
- assert: bridge.tasks[0].result.tests[0].assert?.map((a) => ({
285
- type: a.type,
286
- value: a.value,
287
- })),
288
- },
289
- {
290
- description: "task-1 Test (baseline)",
291
- vars: { task: "Write a GROQ query", docs: "" },
292
- assert: bridge.tasks[0].result.tests[1].assert?.map((a) => ({
293
- type: a.type,
294
- value: a.value,
295
- })),
296
- },
297
- ];
298
- const comparison = compareCompilerOutputs(legacyEntries, bridge);
299
- assert.ok(typeof comparison.passed === "boolean");
300
- assert.ok(comparison.summary.length > 0);
301
- });
302
- it("detects test count mismatch", () => {
303
- const bridge = compileLiteracyTasks([makeTask({ id: "task-1", title: "Test" })], { rootDir: "/tmp/fake-root" });
304
- // Provide only gold entry — missing baseline creates a count mismatch
305
- const legacyEntries = [
306
- {
307
- description: "task-1 Test (gold)",
308
- vars: { task: "Write a GROQ query", docs: "file://..." },
309
- assert: bridge.tasks[0].result.tests[0].assert?.map((a) => ({
310
- type: a.type,
311
- value: a.value,
312
- })),
313
- },
314
- ];
315
- const comparison = compareCompilerOutputs(legacyEntries, bridge);
316
- assert.equal(comparison.passed, false, "expected mismatch to be detected");
317
- assert.ok(comparison.discrepancies.length > 0, "expected discrepancies");
318
- assert.ok(comparison.discrepancies.some((d) => d.field === "testCount"), "expected testCount discrepancy");
319
- });
320
- it("detects assertion count mismatch on gold entries", () => {
321
- const bridge = compileLiteracyTasks([makeTask({ id: "task-1", title: "Test" })], { rootDir: "/tmp/fake-root" });
322
- // The bridge gold test has assertions from the task definition.
323
- // Provide a legacy gold entry with a different number of assertions
324
- // to trigger the assertion count check.
325
- const bridgeGoldAssertCount = bridge.tasks[0].result.tests[0].assert?.length ?? 0;
326
- assert.ok(bridgeGoldAssertCount > 0, "bridge should have at least one assertion");
327
- const legacyEntries = [
328
- {
329
- description: "task-1 Test (gold)",
330
- vars: { task: "Write a GROQ query", docs: "file://..." },
331
- // Deliberately provide wrong number of assertions
332
- assert: [],
333
- },
334
- {
335
- description: "task-1 Test (baseline)",
336
- vars: { task: "Write a GROQ query", docs: "" },
337
- },
338
- ];
339
- const comparison = compareCompilerOutputs(legacyEntries, bridge);
340
- assert.equal(comparison.passed, false, "expected mismatch to be detected");
341
- assert.ok(comparison.discrepancies.some((d) => d.field === "assertionCount"), "expected assertionCount discrepancy");
342
- });
343
- });
344
- // ---------------------------------------------------------------------------
345
- // Compilation output stability (AC: identical configs before and after)
346
- // ---------------------------------------------------------------------------
347
- describe("compileLiteracyTasks — output stability", () => {
348
- it("produces identical output across two compilations of the same input", () => {
349
- const tasks = [
350
- makeTask({ id: "task-1", title: "First task" }),
351
- makeTask({ id: "task-2", title: "Second task" }),
352
- ];
353
- const opts = { rootDir: "/tmp/fake-root" };
354
- const run1 = compileLiteracyTasks(tasks, opts);
355
- const run2 = compileLiteracyTasks(tasks, opts);
356
- // Same number of tasks and total tests
357
- assert.equal(run1.tasks.length, run2.tasks.length);
358
- assert.equal(run1.totalTests, run2.totalTests);
359
- // Per-task: same test count, same descriptions, same assertion structure
360
- for (let i = 0; i < run1.tasks.length; i++) {
361
- const r1 = run1.tasks[i].result;
362
- const r2 = run2.tasks[i].result;
363
- assert.equal(r1.tests.length, r2.tests.length, `test count mismatch for task ${i}`);
364
- for (let j = 0; j < r1.tests.length; j++) {
365
- assert.equal(r1.tests[j].description, r2.tests[j].description, `description mismatch at task ${i} test ${j}`);
366
- assert.equal(r1.tests[j].assert?.length ?? 0, r2.tests[j].assert?.length ?? 0, `assertion count mismatch at task ${i} test ${j}`);
367
- assert.deepEqual(r1.tests[j].vars, r2.tests[j].vars, `vars mismatch at task ${i} test ${j}`);
368
- // Verify assertion types and values match exactly
369
- if (r1.tests[j].assert) {
370
- for (let k = 0; k < r1.tests[j].assert.length; k++) {
371
- assert.equal(r1.tests[j].assert[k].type, r2.tests[j].assert[k].type, `assertion type mismatch at task ${i} test ${j} assert ${k}`);
372
- assert.equal(r1.tests[j].assert[k].value, r2.tests[j].assert[k].value, `assertion value mismatch at task ${i} test ${j} assert ${k}`);
373
- }
374
- }
375
- }
376
- }
377
- });
378
- it("produces identical output in baseline and agentic variants", () => {
379
- const task = makeTask({ id: "stable-task", title: "Stable task" });
380
- const baselineResult = compileLiteracyTask(task, {
381
- rubricConfig: RUBRIC_CONFIG,
382
- evalMode: LiteracyVariant.STANDARD,
383
- });
384
- const baselineResult2 = compileLiteracyTask(task, {
385
- rubricConfig: RUBRIC_CONFIG,
386
- evalMode: LiteracyVariant.STANDARD,
387
- });
388
- // Baseline produces gold + baseline entries
389
- assert.equal(baselineResult.tests.length, baselineResult2.tests.length);
390
- assert.deepEqual(baselineResult.tests.map((t) => t.description), baselineResult2.tests.map((t) => t.description));
391
- const agenticResult = compileLiteracyTask(task, {
392
- rubricConfig: RUBRIC_CONFIG,
393
- evalMode: LiteracyVariant.AGENTIC,
394
- });
395
- const agenticResult2 = compileLiteracyTask(task, {
396
- rubricConfig: RUBRIC_CONFIG,
397
- evalMode: LiteracyVariant.AGENTIC,
398
- });
399
- // Agentic produces only gold entry
400
- assert.equal(agenticResult.tests.length, agenticResult2.tests.length);
401
- assert.deepEqual(agenticResult.tests.map((t) => t.description), agenticResult2.tests.map((t) => t.description));
402
- });
403
- });
404
- // ---------------------------------------------------------------------------
405
- // compileLiteracyTasks — TaskGraphBuilder integration (task 7c)
406
- // ---------------------------------------------------------------------------
407
- describe("compileLiteracyTasks — TaskGraphBuilder wiring", () => {
408
- it("filters out archived tasks via graph builder", () => {
409
- const tasks = [
410
- makeTask({ id: "active-task", title: "Active" }),
411
- makeTask({
412
- id: "archived-task",
413
- title: "Archived",
414
- status: "archived",
415
- }),
416
- ];
417
- const result = compileLiteracyTasks(tasks, {
418
- rootDir: "/tmp/fake-root",
419
- });
420
- assert.equal(result.tasks.length, 1);
421
- assert.equal(result.tasks[0].taskId, "active-task");
422
- assert.ok(result.warnings.some((w) => w.includes("filtered out")));
423
- });
424
- it("warns about duplicate task IDs via graph builder", () => {
425
- const tasks = [
426
- makeTask({ id: "dup-task", title: "First" }),
427
- makeTask({ id: "dup-task", title: "Second" }),
428
- ];
429
- const result = compileLiteracyTasks(tasks, {
430
- rootDir: "/tmp/fake-root",
431
- });
432
- assert.ok(result.warnings.some((w) => w.includes("Duplicate")));
433
- });
434
- it("throws on cyclic dependencies", () => {
435
- const tasks = [
436
- makeTask({
437
- id: "task-a",
438
- title: "A",
439
- prompt: {
440
- text: "Write a GROQ query to fetch all blog posts",
441
- vars: { dependsOn: ["task-b"] },
442
- },
443
- }),
444
- makeTask({
445
- id: "task-b",
446
- title: "B",
447
- prompt: {
448
- text: "Write a GROQ query to fetch all blog posts",
449
- vars: { dependsOn: ["task-a"] },
450
- },
451
- }),
452
- ];
453
- assert.throws(() => compileLiteracyTasks(tasks, { rootDir: "/tmp/fake-root" }), /cycle/i);
454
- });
455
- it("respects topological ordering from dependencies", () => {
456
- const tasks = [
457
- makeTask({
458
- id: "step-2",
459
- title: "Step 2",
460
- prompt: {
461
- text: "Write a GROQ query to fetch all blog posts",
462
- vars: { dependsOn: ["step-1"] },
463
- },
464
- }),
465
- makeTask({ id: "step-1", title: "Step 1" }),
466
- ];
467
- const result = compileLiteracyTasks(tasks, {
468
- rootDir: "/tmp/fake-root",
469
- });
470
- // step-1 should come before step-2 despite being second in input
471
- assert.equal(result.tasks.length, 2);
472
- assert.equal(result.tasks[0].taskId, "step-1");
473
- assert.equal(result.tasks[1].taskId, "step-2");
474
- });
475
- it("returns empty result when all tasks are filtered out", () => {
476
- const tasks = [
477
- makeTask({ id: "archived-1", status: "archived" }),
478
- makeTask({ id: "archived-2", status: "archived" }),
479
- ];
480
- const result = compileLiteracyTasks(tasks, {
481
- rootDir: "/tmp/fake-root",
482
- });
483
- assert.equal(result.tasks.length, 0);
484
- assert.equal(result.totalTests, 0);
485
- });
486
- });
@@ -1,10 +0,0 @@
1
- /**
2
- * mcp-server-handler.test.ts — Tests for MCP server mode compilation.
3
- *
4
- * Tests the full MCP compilation pipeline: task validation, provider
5
- * assembly, assertion mapping, test case generation, and end-to-end
6
- * compilation of example tasks.
7
- *
8
- * Run: npx tsx --test src/pipeline/compiler/__tests__/mcp-server-handler.test.ts
9
- */
10
- export {};