@sanity/ailf 3.7.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
  2. package/config/thresholds.ts +3 -3
  3. package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
  4. package/dist/_vendor/ailf-core/examples/index.js +2 -2
  5. package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
  6. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
  7. package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
  8. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
  9. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
  10. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
  11. package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
  12. package/dist/_vendor/ailf-shared/run-classification.js +1 -1
  13. package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
  14. package/dist/adapters/api-client/build-request.d.ts +0 -2
  15. package/dist/adapters/api-client/build-request.js +2 -6
  16. package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
  17. package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
  18. package/dist/adapters/config-sources/file-config-adapter.js +42 -17
  19. package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
  20. package/dist/adapters/task-sources/repo-schemas.js +127 -0
  21. package/dist/cli-program.d.ts +39 -0
  22. package/dist/cli-program.js +137 -0
  23. package/dist/cli.d.ts +8 -2
  24. package/dist/cli.js +128 -142
  25. package/dist/commands/agent-report.js +1 -1
  26. package/dist/commands/calculate-scores.js +0 -2
  27. package/dist/commands/check-staleness.js +1 -1
  28. package/dist/commands/chronic-failures.js +4 -4
  29. package/dist/commands/coverage-audit.js +6 -7
  30. package/dist/commands/discovery-report.js +16 -4
  31. package/dist/commands/eval.d.ts +1 -1
  32. package/dist/commands/eval.js +1 -1
  33. package/dist/commands/explain-handler.d.ts +1 -1
  34. package/dist/commands/explain-handler.js +13 -44
  35. package/dist/commands/fetch-docs.js +0 -2
  36. package/dist/commands/generate-configs.js +0 -2
  37. package/dist/commands/grader/index.js +3 -3
  38. package/dist/commands/init.d.ts +2 -2
  39. package/dist/commands/init.js +10 -9
  40. package/dist/commands/interactive.d.ts +1 -1
  41. package/dist/commands/interactive.js +8 -8
  42. package/dist/commands/pipeline-action.d.ts +1 -3
  43. package/dist/commands/pipeline-action.js +174 -140
  44. package/dist/commands/pr-comment.js +1 -3
  45. package/dist/commands/publish.d.ts +1 -1
  46. package/dist/commands/publish.js +2 -4
  47. package/dist/commands/readiness-report.js +17 -8
  48. package/dist/commands/remote-pipeline.d.ts +1 -1
  49. package/dist/commands/remote-pipeline.js +1 -3
  50. package/dist/commands/run.d.ts +64 -0
  51. package/dist/commands/{pipeline.js → run.js} +19 -30
  52. package/dist/commands/shared/help.js +4 -4
  53. package/dist/commands/shared/options.d.ts +29 -3
  54. package/dist/commands/shared/options.js +37 -13
  55. package/dist/commands/validate-tasks.js +1 -1
  56. package/dist/commands/validate.d.ts +1 -1
  57. package/dist/commands/validate.js +2 -2
  58. package/dist/commands/weekly-digest.js +3 -3
  59. package/dist/config/thresholds.ts +3 -3
  60. package/dist/orchestration/build-app-context.js +0 -2
  61. package/dist/orchestration/build-step-sequence.js +1 -11
  62. package/dist/orchestration/steps/fetch-docs-step.js +1 -1
  63. package/dist/orchestration/steps/index.d.ts +0 -2
  64. package/dist/orchestration/steps/index.js +0 -2
  65. package/dist/orchestration/steps/run-eval-step.js +1 -1
  66. package/dist/pipeline/cache.d.ts +1 -1
  67. package/dist/pipeline/map-request-to-config.js +0 -2
  68. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  69. package/dist/pipeline/plan.d.ts +2 -4
  70. package/dist/pipeline/plan.js +4 -32
  71. package/dist/pipeline/run-context.d.ts +1 -1
  72. package/dist/pipeline/run-context.js +4 -4
  73. package/dist/pipeline/validate.d.ts +1 -1
  74. package/dist/pipeline/validate.js +1 -1
  75. package/package.json +11 -9
  76. package/dist/commands/pipeline.d.ts +0 -77
  77. package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
  78. package/dist/orchestration/steps/discovery-report-step.js +0 -62
  79. package/dist/orchestration/steps/readiness-step.d.ts +0 -13
  80. package/dist/orchestration/steps/readiness-step.js +0 -98
  81. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
  82. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
  83. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
  84. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
  85. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
  86. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
  87. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
  88. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
  89. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
  90. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
  91. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
  92. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
  93. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
  94. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
  95. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
  96. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
  97. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
  98. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
  99. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
  100. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
  101. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
  102. package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
  103. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
  104. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
@@ -1,314 +0,0 @@
1
- /**
2
- * knowledge-probe-handler.test.ts — Tests for knowledge probe mode compilation.
3
- *
4
- * Tests validation, provider assembly, prompt generation, assertion mapping
5
- * (including rejection of tool-use assertions), metadata generation, and
6
- * end-to-end compilation of example tasks.
7
- *
8
- * Run: npx tsx --test src/pipeline/compiler/__tests__/knowledge-probe-handler.test.ts
9
- */
10
- import assert from "node:assert/strict";
11
- import { describe, it } from "node:test";
12
- import { LiteracyVariant } from "../../normalize-mode.js";
13
- import { compileKnowledgeProbeTask, handler as probeHandler, KNOWLEDGE_PROBE_PROMPT_TEMPLATES, validateKnowledgeProbeTask, } from "../mode-handlers/knowledge-probe/index.js";
14
- import { allKnowledgeProbeExampleTasks, groqProjectionTask, defineTypeApiTask, ecosystemComparisonTask, } from "../mode-handlers/__fixtures__/knowledge-probe-example-tasks.js";
15
- // ---------------------------------------------------------------------------
16
- // Helpers
17
- // ---------------------------------------------------------------------------
18
- function makeTask(overrides) {
19
- return {
20
- mode: "knowledge-probe",
21
- id: "test-probe",
22
- title: "Test Knowledge Probe",
23
- description: "A test knowledge probe",
24
- area: "groq",
25
- ...overrides,
26
- };
27
- }
28
- // ---------------------------------------------------------------------------
29
- // handler.getPrompts() — prompt template ownership
30
- // ---------------------------------------------------------------------------
31
- describe("KnowledgeProbeHandler.getPrompts", () => {
32
- it("returns prompt templates", () => {
33
- const prompts = probeHandler.getPrompts();
34
- assert.ok(prompts, "getPrompts() should return a record");
35
- assert.ok(Object.keys(prompts).length > 0, "should return at least one template");
36
- });
37
- it("returns templates keyed by probe-specific IDs (not literacy names)", () => {
38
- const prompts = probeHandler.getPrompts();
39
- const keys = Object.keys(prompts);
40
- // Must not use literacy template names
41
- assert.ok(!keys.includes("with-docs"), "should not use literacy key 'with-docs'");
42
- assert.ok(!keys.includes("without-docs"), "should not use literacy key 'without-docs'");
43
- assert.ok(!keys.includes(LiteracyVariant.AGENTIC), "should not use literacy key 'agentic'");
44
- // Must have probe-appropriate key(s)
45
- assert.ok(keys.includes("knowledge-probe"), "should include 'knowledge-probe' template");
46
- });
47
- it("knowledge-probe template asks factual questions without context", () => {
48
- const prompts = probeHandler.getPrompts();
49
- const template = prompts["knowledge-probe"];
50
- assert.ok(template, "knowledge-probe template should exist");
51
- assert.ok(template.template.includes("{{task}}"), "should include {{task}} placeholder");
52
- // Should NOT reference documentation context
53
- assert.ok(!template.template.includes("{{docs}}"), "should NOT include {{docs}} — probes test raw model knowledge");
54
- });
55
- it("template has correct PromptTemplate shape", () => {
56
- const prompts = probeHandler.getPrompts();
57
- const template = prompts["knowledge-probe"];
58
- assert.equal(template.id, "knowledge-probe");
59
- assert.ok(template.label, "should have a human-readable label");
60
- assert.ok(template.template, "should have a template string");
61
- assert.ok(Array.isArray(template.variables), "should declare variables");
62
- assert.ok(template.variables.includes("task"), "variables should include 'task'");
63
- });
64
- it("exported KNOWLEDGE_PROBE_PROMPT_TEMPLATES matches handler.getPrompts()", () => {
65
- const fromHandler = probeHandler.getPrompts();
66
- assert.deepEqual(fromHandler, KNOWLEDGE_PROBE_PROMPT_TEMPLATES);
67
- });
68
- });
69
- // ---------------------------------------------------------------------------
70
- // validateKnowledgeProbeTask
71
- // ---------------------------------------------------------------------------
72
- describe("validateKnowledgeProbeTask", () => {
73
- it("passes for a valid minimal task", () => {
74
- const errors = validateKnowledgeProbeTask(makeTask());
75
- assert.equal(errors.length, 0);
76
- });
77
- it("errors on missing ID", () => {
78
- const errors = validateKnowledgeProbeTask(makeTask({ id: "" }));
79
- assert.ok(errors.some((e) => e.field === "id"));
80
- });
81
- it("errors on missing title", () => {
82
- const errors = validateKnowledgeProbeTask(makeTask({ title: "" }));
83
- assert.ok(errors.some((e) => e.field === "title"));
84
- });
85
- it("errors when no prompt or description is provided", () => {
86
- const errors = validateKnowledgeProbeTask(makeTask({ description: undefined }));
87
- assert.ok(errors.some((e) => e.field === "prompt"));
88
- });
89
- it("passes with prompt.text instead of description", () => {
90
- const errors = validateKnowledgeProbeTask(makeTask({
91
- description: undefined,
92
- prompt: { text: "Explain GROQ" },
93
- }));
94
- assert.equal(errors.length, 0);
95
- });
96
- it("passes with prompt.vars.task instead of description", () => {
97
- const errors = validateKnowledgeProbeTask(makeTask({
98
- description: undefined,
99
- prompt: { vars: { task: "Explain GROQ" } },
100
- }));
101
- assert.equal(errors.length, 0);
102
- });
103
- });
104
- // ---------------------------------------------------------------------------
105
- // compileKnowledgeProbeTask — basic compilation
106
- // ---------------------------------------------------------------------------
107
- describe("compileKnowledgeProbeTask — basic", () => {
108
- it("produces prompts and test cases", () => {
109
- const result = compileKnowledgeProbeTask(makeTask());
110
- assert.ok(result.prompts.length > 0, "Should produce prompts");
111
- assert.ok(result.tests.length > 0, "Should produce test cases");
112
- });
113
- it("produces exactly one test case (no baseline variant)", () => {
114
- const result = compileKnowledgeProbeTask(makeTask());
115
- // Knowledge probes have no baseline — there's no "without docs" variant
116
- // because there are no docs in the first place
117
- assert.equal(result.tests.length, 1);
118
- });
119
- it("does NOT include docs in test case vars", () => {
120
- const result = compileKnowledgeProbeTask(makeTask());
121
- assert.equal(result.tests[0].vars.docs, undefined);
122
- });
123
- it("includes task in vars from description", () => {
124
- const result = compileKnowledgeProbeTask(makeTask({ description: "Explain GROQ projections" }));
125
- assert.equal(result.tests[0].vars.task, "Explain GROQ projections");
126
- });
127
- it("prefers prompt.vars.task over description", () => {
128
- const result = compileKnowledgeProbeTask(makeTask({
129
- description: "Description",
130
- prompt: { vars: { task: "Custom prompt text" } },
131
- }));
132
- assert.equal(result.tests[0].vars.task, "Custom prompt text");
133
- });
134
- it("includes mode metadata in vars", () => {
135
- const result = compileKnowledgeProbeTask(makeTask());
136
- assert.equal(result.tests[0].vars.__mode, "knowledge-probe");
137
- });
138
- it("includes probe strategy in vars", () => {
139
- const result = compileKnowledgeProbeTask(makeTask({ probeStrategy: "depth-first" }));
140
- assert.equal(result.tests[0].vars.__probeStrategy, "depth-first");
141
- });
142
- it("defaults probe strategy to breadth-first", () => {
143
- const result = compileKnowledgeProbeTask(makeTask());
144
- assert.equal(result.tests[0].vars.__probeStrategy, "breadth-first");
145
- });
146
- });
147
- // ---------------------------------------------------------------------------
148
- // compileKnowledgeProbeTask — prompts
149
- // ---------------------------------------------------------------------------
150
- describe("compileKnowledgeProbeTask — prompts", () => {
151
- it("uses a single no-docs prompt", () => {
152
- const result = compileKnowledgeProbeTask(makeTask());
153
- assert.equal(result.prompts.length, 1);
154
- assert.equal(result.prompts[0].id, "knowledge-probe");
155
- });
156
- it("uses description as prompt text", () => {
157
- const result = compileKnowledgeProbeTask(makeTask({ description: "Explain GROQ" }));
158
- assert.equal(result.prompts[0].raw, "Explain GROQ");
159
- });
160
- it("prefers prompt.text over description", () => {
161
- const result = compileKnowledgeProbeTask(makeTask({
162
- description: "Desc",
163
- prompt: { text: "Custom prompt" },
164
- }));
165
- assert.equal(result.prompts[0].raw, "Custom prompt");
166
- });
167
- it("includes system message when provided", () => {
168
- const result = compileKnowledgeProbeTask(makeTask({
169
- prompt: {
170
- text: "Explain GROQ",
171
- systemMessage: "You are a Sanity expert.",
172
- },
173
- }));
174
- assert.ok(result.prompts[0].raw.includes("You are a Sanity expert."));
175
- assert.ok(result.prompts[0].raw.includes("Explain GROQ"));
176
- });
177
- });
178
- // ---------------------------------------------------------------------------
179
- // compileKnowledgeProbeTask — providers
180
- // ---------------------------------------------------------------------------
181
- describe("compileKnowledgeProbeTask — providers", () => {
182
- it("builds providers from model list", () => {
183
- const result = compileKnowledgeProbeTask(makeTask(), {
184
- models: [
185
- { id: "openai:chat:gpt-4o", label: "GPT-4o" },
186
- { id: "anthropic:messages:claude-sonnet-4-6", label: "Claude" },
187
- ],
188
- });
189
- assert.equal(result.providers.length, 2);
190
- assert.equal(result.providers[0].id, "openai:chat:gpt-4o");
191
- assert.equal(result.providers[1].id, "anthropic:messages:claude-sonnet-4-6");
192
- });
193
- it("returns empty providers when no models specified", () => {
194
- const result = compileKnowledgeProbeTask(makeTask());
195
- assert.equal(result.providers.length, 0);
196
- });
197
- });
198
- // ---------------------------------------------------------------------------
199
- // compileKnowledgeProbeTask — assertions
200
- // ---------------------------------------------------------------------------
201
- describe("compileKnowledgeProbeTask — assertions", () => {
202
- it("maps standard assertions", () => {
203
- const result = compileKnowledgeProbeTask(makeTask({
204
- assertions: [
205
- { type: "contains", value: "GROQ" },
206
- { type: "regex", value: "select\\(" },
207
- ],
208
- }));
209
- assert.equal(result.tests[0].assert?.length, 2);
210
- assert.equal(result.tests[0].assert[0].type, "contains");
211
- assert.equal(result.tests[0].assert[1].type, "regex");
212
- });
213
- it("maps LLM-graded assertions with grader provider", () => {
214
- const result = compileKnowledgeProbeTask(makeTask({
215
- assertions: [{ type: "llm-rubric", value: "Check accuracy" }],
216
- }), { graderProvider: "openai:chat:gpt-5" });
217
- assert.equal(result.tests[0].assert[0].type, "llm-rubric");
218
- assert.equal(result.tests[0].assert[0].provider, "openai:chat:gpt-5");
219
- });
220
- it("rejects tool-use assertions with warning", () => {
221
- const result = compileKnowledgeProbeTask(makeTask({
222
- assertions: [
223
- { type: "tool-called", value: "getDocument" },
224
- { type: "tool-input-matches", value: {} },
225
- { type: "tool-output-matches", value: {} },
226
- { type: "skill-used", value: "search" },
227
- { type: "tool-call-f1", value: 0.8 },
228
- ],
229
- }));
230
- // All tool-use assertions should be skipped
231
- assert.equal(result.tests[0].assert?.length ?? 0, 0);
232
- // Should have 5 warnings
233
- assert.equal(result.warnings.length, 5);
234
- assert.ok(result.warnings.every((w) => w.includes("not applicable")));
235
- });
236
- it("preserves assertion weights", () => {
237
- const result = compileKnowledgeProbeTask(makeTask({
238
- assertions: [{ type: "contains", value: "GROQ", weight: 0.3 }],
239
- }));
240
- assert.equal(result.tests[0].assert[0].weight, 0.3);
241
- });
242
- });
243
- // ---------------------------------------------------------------------------
244
- // compileKnowledgeProbeTask — metadata
245
- // ---------------------------------------------------------------------------
246
- describe("compileKnowledgeProbeTask — metadata", () => {
247
- it("includes correct mode metadata", () => {
248
- const result = compileKnowledgeProbeTask(makeTask());
249
- assert.equal(result.metadata.mode, "knowledge-probe");
250
- assert.equal(result.metadata.noDocContext, true);
251
- assert.equal(result.metadata.retrievalMetrics, false);
252
- });
253
- it("includes probe strategy in metadata", () => {
254
- const result = compileKnowledgeProbeTask(makeTask({ probeStrategy: "coverage-guided" }));
255
- assert.equal(result.metadata.probeStrategy, "coverage-guided");
256
- });
257
- it("defaults probe strategy to breadth-first", () => {
258
- const result = compileKnowledgeProbeTask(makeTask());
259
- assert.equal(result.metadata.probeStrategy, "breadth-first");
260
- });
261
- });
262
- // ---------------------------------------------------------------------------
263
- // Example task compilation (end-to-end)
264
- // ---------------------------------------------------------------------------
265
- describe("example knowledge probe tasks — end-to-end", () => {
266
- it("compiles all example tasks without errors", () => {
267
- for (const task of allKnowledgeProbeExampleTasks) {
268
- const result = compileKnowledgeProbeTask(task, {
269
- models: [
270
- { id: "openai:chat:gpt-4o", label: "GPT-4o" },
271
- { id: "anthropic:messages:claude-sonnet-4-6", label: "Claude" },
272
- ],
273
- });
274
- assert.ok(result.tests.length > 0, `${task.id}: should produce test cases`);
275
- assert.ok(result.prompts.length > 0, `${task.id}: should produce prompts`);
276
- assert.ok(result.providers.length > 0, `${task.id}: should have providers`);
277
- }
278
- });
279
- it("GROQ probe has correct assertion types", () => {
280
- const result = compileKnowledgeProbeTask(groqProjectionTask);
281
- assert.ok(result.tests[0].assert);
282
- // 2 contains + 2 llm-rubric
283
- assert.equal(result.tests[0].assert.length, 4);
284
- const types = result.tests[0].assert.map((a) => a.type);
285
- assert.ok(types.includes("contains"));
286
- assert.ok(types.includes("llm-rubric"));
287
- });
288
- it("defineType probe tests API currency", () => {
289
- const result = compileKnowledgeProbeTask(defineTypeApiTask);
290
- assert.ok(result.tests[0].assert);
291
- // 2 contains + 2 llm-rubric
292
- assert.equal(result.tests[0].assert.length, 4);
293
- });
294
- it("ecosystem comparison has contains-any assertions", () => {
295
- const result = compileKnowledgeProbeTask(ecosystemComparisonTask);
296
- assert.ok(result.tests[0].assert);
297
- const containsAny = result.tests[0].assert.filter((a) => a.type === "contains-any");
298
- assert.equal(containsAny.length, 2); // GROQ + GraphQL
299
- });
300
- it("no example task includes docs in vars", () => {
301
- for (const task of allKnowledgeProbeExampleTasks) {
302
- const result = compileKnowledgeProbeTask(task);
303
- assert.equal(result.tests[0].vars.docs, undefined, `${task.id}: should not include docs`);
304
- }
305
- });
306
- it("all example tasks have knowledge-probe metadata", () => {
307
- for (const task of allKnowledgeProbeExampleTasks) {
308
- const result = compileKnowledgeProbeTask(task);
309
- assert.equal(result.metadata.mode, "knowledge-probe");
310
- assert.equal(result.metadata.noDocContext, true);
311
- assert.equal(result.metadata.retrievalMetrics, false);
312
- }
313
- });
314
- });
@@ -1,10 +0,0 @@
1
- /**
2
- * literacy-handler.test.ts — Tests for literacy mode compilation.
3
- *
4
- * Tests validation, gold/baseline entry generation, rubric template
5
- * resolution, doc-coverage auto-generation, prompt assignment, baseline
6
- * filtering, and the literacy bridge for LiteracyTaskDefinition.
7
- *
8
- * Run: npx tsx --test src/pipeline/compiler/__tests__/literacy-handler.test.ts
9
- */
10
- export {};