@sanity/ailf 0.4.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/features.ts +23 -0
- package/config/models.ts +83 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
- package/dist/_vendor/ailf-core/config-helpers.js +150 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +10 -10
- package/dist/_vendor/ailf-core/examples/index.js +10 -10
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +32 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +52 -12
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +38 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +133 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
- package/dist/adapters/task-sources/index.d.ts +1 -0
- package/dist/adapters/task-sources/index.js +1 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
- package/dist/adapters/task-sources/repo-task-source.js +69 -16
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +7 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/composition-root.d.ts +1 -1
- package/dist/composition-root.js +67 -4
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +24 -6
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +6 -4
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +245 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +5 -7
- package/dist/pipeline/calculate-scores.js +74 -153
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +23 -14
- package/dist/pipeline/expand-tasks.js +37 -31
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +18 -21
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +6 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
- package/dist/pipeline/mirror-repo-tasks.js +16 -15
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +47 -0
- package/dist/pipeline/profile-resolution.js +91 -0
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +6 -3
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -62
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* agent-harness-handler.test.ts — Tests for agent harness mode compilation.
|
|
3
|
+
*
|
|
4
|
+
* Tests validation, provider assembly, tool permission resolution,
|
|
5
|
+
* assertion mapping, sandbox config, lifecycle extensions, and
|
|
6
|
+
* end-to-end compilation of example tasks.
|
|
7
|
+
*
|
|
8
|
+
* Run: npx tsx --test src/pipeline/compiler/__tests__/agent-harness-handler.test.ts
|
|
9
|
+
*/
|
|
10
|
+
import assert from "node:assert/strict";
|
|
11
|
+
import { describe, it } from "node:test";
|
|
12
|
+
import { LiteracyVariant } from "../../normalize-mode.js";
|
|
13
|
+
import { compileAgentHarnessTask, AGENT_HARNESS_PROMPT_TEMPLATES, handler as agentHandler, validateAgentHarnessTask, } from "../mode-handlers/agent-harness-handler.js";
|
|
14
|
+
import { allAgentHarnessExampleTasks, scaffoldProjectTask, modifyCodeTask, multiFileRefactorTask, } from "../mode-handlers/__fixtures__/agent-harness-example-tasks.js";
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Helpers
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
function makeTask(overrides) {
|
|
19
|
+
return {
|
|
20
|
+
mode: "agent-harness",
|
|
21
|
+
id: "test-agent-task",
|
|
22
|
+
title: "Test Agent Task",
|
|
23
|
+
description: "A test agent harness task",
|
|
24
|
+
area: "studio",
|
|
25
|
+
...overrides,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// handler.getPrompts() — prompt template ownership
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
describe("AgentHarnessHandler.getPrompts", () => {
|
|
32
|
+
it("returns prompt templates", () => {
|
|
33
|
+
const prompts = agentHandler.getPrompts();
|
|
34
|
+
assert.ok(prompts, "getPrompts() should return a record");
|
|
35
|
+
assert.ok(Object.keys(prompts).length > 0, "should return at least one template");
|
|
36
|
+
});
|
|
37
|
+
it("returns templates keyed by agent-specific IDs (not literacy names)", () => {
|
|
38
|
+
const prompts = agentHandler.getPrompts();
|
|
39
|
+
const keys = Object.keys(prompts);
|
|
40
|
+
// Must not use literacy template names
|
|
41
|
+
assert.ok(!keys.includes("with-docs"), "should not use literacy key 'with-docs'");
|
|
42
|
+
assert.ok(!keys.includes("without-docs"), "should not use literacy key 'without-docs'");
|
|
43
|
+
assert.ok(!keys.includes(LiteracyVariant.AGENTIC), "should not use literacy key 'agentic'");
|
|
44
|
+
// Must have agent-appropriate key(s)
|
|
45
|
+
assert.ok(keys.includes("agent-harness"), "should include 'agent-harness' template");
|
|
46
|
+
});
|
|
47
|
+
it("agent-harness template describes task for agent execution", () => {
|
|
48
|
+
const prompts = agentHandler.getPrompts();
|
|
49
|
+
const template = prompts["agent-harness"];
|
|
50
|
+
assert.ok(template, "agent-harness template should exist");
|
|
51
|
+
assert.ok(template.template.includes("{{task}}"), "should include {{task}} placeholder");
|
|
52
|
+
// Should reference agent / sandbox / tool execution context
|
|
53
|
+
assert.ok(/sandbox|file|tool|implement|code/i.test(template.template), "template should reference agent execution concepts");
|
|
54
|
+
});
|
|
55
|
+
it("template has correct PromptTemplate shape", () => {
|
|
56
|
+
const prompts = agentHandler.getPrompts();
|
|
57
|
+
const template = prompts["agent-harness"];
|
|
58
|
+
assert.equal(template.id, "agent-harness");
|
|
59
|
+
assert.ok(template.label, "should have a human-readable label");
|
|
60
|
+
assert.ok(template.template, "should have a template string");
|
|
61
|
+
assert.ok(Array.isArray(template.variables), "should declare variables");
|
|
62
|
+
assert.ok(template.variables.includes("task"), "variables should include 'task'");
|
|
63
|
+
});
|
|
64
|
+
it("exported AGENT_HARNESS_PROMPT_TEMPLATES matches handler.getPrompts()", () => {
|
|
65
|
+
const fromHandler = agentHandler.getPrompts();
|
|
66
|
+
assert.deepEqual(fromHandler, AGENT_HARNESS_PROMPT_TEMPLATES);
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// validateAgentHarnessTask
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
describe("validateAgentHarnessTask", () => {
|
|
73
|
+
it("passes for a valid minimal task", () => {
|
|
74
|
+
const errors = validateAgentHarnessTask(makeTask());
|
|
75
|
+
assert.equal(errors.length, 0);
|
|
76
|
+
});
|
|
77
|
+
it("errors on missing ID", () => {
|
|
78
|
+
const errors = validateAgentHarnessTask(makeTask({ id: "" }));
|
|
79
|
+
assert.ok(errors.some((e) => e.field === "id"));
|
|
80
|
+
});
|
|
81
|
+
it("errors on missing title", () => {
|
|
82
|
+
const errors = validateAgentHarnessTask(makeTask({ title: "" }));
|
|
83
|
+
assert.ok(errors.some((e) => e.field === "title"));
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
// ---------------------------------------------------------------------------
|
|
87
|
+
// compileAgentHarnessTask — provider assembly
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
describe("compileAgentHarnessTask — providers", () => {
|
|
90
|
+
it("produces a provider", () => {
|
|
91
|
+
const result = compileAgentHarnessTask(makeTask());
|
|
92
|
+
assert.ok(result.providers.length > 0);
|
|
93
|
+
assert.ok(result.providers[0].id.startsWith("agent:"));
|
|
94
|
+
});
|
|
95
|
+
it("resolves coding tool preset", () => {
|
|
96
|
+
const result = compileAgentHarnessTask(makeTask({ tools: ["coding"] }));
|
|
97
|
+
const config = result.providers[0].config;
|
|
98
|
+
const tools = config.allowedTools;
|
|
99
|
+
assert.ok(tools.includes("Bash"));
|
|
100
|
+
assert.ok(tools.includes("Read"));
|
|
101
|
+
assert.ok(tools.includes("Write"));
|
|
102
|
+
assert.ok(tools.includes("Edit"));
|
|
103
|
+
});
|
|
104
|
+
it("resolves read-only tool preset", () => {
|
|
105
|
+
const result = compileAgentHarnessTask(makeTask({ tools: ["read-only"] }));
|
|
106
|
+
const config = result.providers[0].config;
|
|
107
|
+
const tools = config.allowedTools;
|
|
108
|
+
assert.ok(tools.includes("Read"));
|
|
109
|
+
assert.ok(tools.includes("Grep"));
|
|
110
|
+
assert.ok(!tools.includes("Write"), "read-only should not include Write");
|
|
111
|
+
});
|
|
112
|
+
it("mixes preset and explicit tools", () => {
|
|
113
|
+
const result = compileAgentHarnessTask(makeTask({ tools: ["read-only", "WebFetch"] }));
|
|
114
|
+
const config = result.providers[0].config;
|
|
115
|
+
const tools = config.allowedTools;
|
|
116
|
+
assert.ok(tools.includes("Read"));
|
|
117
|
+
assert.ok(tools.includes("WebFetch"));
|
|
118
|
+
});
|
|
119
|
+
it("includes sandbox config in provider", () => {
|
|
120
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
121
|
+
sandbox: { type: "docker", image: "node:22-slim" },
|
|
122
|
+
}));
|
|
123
|
+
const config = result.providers[0].config;
|
|
124
|
+
const sandbox = config.sandbox;
|
|
125
|
+
assert.equal(sandbox.type, "docker");
|
|
126
|
+
assert.equal(sandbox.image, "node:22-slim");
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
// compileAgentHarnessTask — test cases
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
describe("compileAgentHarnessTask — test cases", () => {
|
|
133
|
+
it("produces at least one test case", () => {
|
|
134
|
+
const result = compileAgentHarnessTask(makeTask());
|
|
135
|
+
assert.ok(result.tests.length > 0);
|
|
136
|
+
});
|
|
137
|
+
it("includes task description in vars", () => {
|
|
138
|
+
const result = compileAgentHarnessTask(makeTask({ description: "Do the thing" }));
|
|
139
|
+
assert.equal(result.tests[0].vars.task, "Do the thing");
|
|
140
|
+
});
|
|
141
|
+
it("prefers prompt.vars.task over description", () => {
|
|
142
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
143
|
+
description: "Description",
|
|
144
|
+
prompt: { vars: { task: "Custom prompt" } },
|
|
145
|
+
}));
|
|
146
|
+
assert.equal(result.tests[0].vars.task, "Custom prompt");
|
|
147
|
+
});
|
|
148
|
+
it("creates multi-turn test case", () => {
|
|
149
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
150
|
+
multiTurn: {
|
|
151
|
+
turns: [
|
|
152
|
+
{ role: "user", content: "Hello" },
|
|
153
|
+
{ role: "assistant", content: "Hi" },
|
|
154
|
+
],
|
|
155
|
+
},
|
|
156
|
+
}));
|
|
157
|
+
assert.equal(result.tests.length, 2);
|
|
158
|
+
assert.ok(result.tests[1].description.includes("[multi-turn]"));
|
|
159
|
+
});
|
|
160
|
+
it("sets sandbox metadata in vars", () => {
|
|
161
|
+
const result = compileAgentHarnessTask(makeTask({ sandbox: { type: "docker" } }));
|
|
162
|
+
assert.equal(result.tests[0].vars.__sandboxType, "docker");
|
|
163
|
+
});
|
|
164
|
+
});
|
|
165
|
+
// ---------------------------------------------------------------------------
|
|
166
|
+
// compileAgentHarnessTask — assertions
|
|
167
|
+
// ---------------------------------------------------------------------------
|
|
168
|
+
describe("compileAgentHarnessTask — assertions", () => {
|
|
169
|
+
it("maps file-exists to javascript assertion", () => {
|
|
170
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
171
|
+
assertions: [{ type: "file-exists", value: "sanity.config.ts" }],
|
|
172
|
+
}));
|
|
173
|
+
const assertion = result.tests[0].assert?.[0];
|
|
174
|
+
assert.ok(assertion);
|
|
175
|
+
assert.equal(assertion.type, "javascript");
|
|
176
|
+
assert.ok(assertion.value.includes("sanity.config.ts"));
|
|
177
|
+
});
|
|
178
|
+
it("maps file-contains to javascript assertion", () => {
|
|
179
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
180
|
+
assertions: [
|
|
181
|
+
{
|
|
182
|
+
type: "file-contains",
|
|
183
|
+
value: { path: "config.ts", content: "projectId" },
|
|
184
|
+
},
|
|
185
|
+
],
|
|
186
|
+
}));
|
|
187
|
+
const assertion = result.tests[0].assert?.[0];
|
|
188
|
+
assert.ok(assertion);
|
|
189
|
+
assert.equal(assertion.type, "javascript");
|
|
190
|
+
assert.ok(assertion.value.includes("projectId"));
|
|
191
|
+
});
|
|
192
|
+
it("maps command-succeeds to javascript assertion", () => {
|
|
193
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
194
|
+
assertions: [{ type: "command-succeeds", value: "npx tsc --noEmit" }],
|
|
195
|
+
}));
|
|
196
|
+
const assertion = result.tests[0].assert?.[0];
|
|
197
|
+
assert.ok(assertion);
|
|
198
|
+
assert.equal(assertion.type, "javascript");
|
|
199
|
+
assert.ok(assertion.value.includes("tsc"));
|
|
200
|
+
});
|
|
201
|
+
it("maps diff-matches to javascript assertion", () => {
|
|
202
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
203
|
+
assertions: [{ type: "diff-matches", value: "createClient" }],
|
|
204
|
+
}));
|
|
205
|
+
const assertion = result.tests[0].assert?.[0];
|
|
206
|
+
assert.ok(assertion);
|
|
207
|
+
assert.equal(assertion.type, "javascript");
|
|
208
|
+
assert.ok(assertion.value.includes("git diff"));
|
|
209
|
+
});
|
|
210
|
+
it("passes through standard assertions", () => {
|
|
211
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
212
|
+
assertions: [{ type: "contains", value: "sanity" }],
|
|
213
|
+
}));
|
|
214
|
+
assert.equal(result.tests[0].assert?.[0]?.type, "contains");
|
|
215
|
+
});
|
|
216
|
+
it("sets grader provider on llm-rubric", () => {
|
|
217
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
218
|
+
assertions: [{ type: "llm-rubric", value: "Check quality" }],
|
|
219
|
+
}), { graderProvider: "openai:chat:gpt-5" });
|
|
220
|
+
assert.equal(result.tests[0].assert?.[0]?.provider, "openai:chat:gpt-5");
|
|
221
|
+
});
|
|
222
|
+
});
|
|
223
|
+
// ---------------------------------------------------------------------------
|
|
224
|
+
// compileAgentHarnessTask — lifecycle extensions
|
|
225
|
+
// ---------------------------------------------------------------------------
|
|
226
|
+
describe("compileAgentHarnessTask — lifecycle", () => {
|
|
227
|
+
it("produces beforeEach and afterEach extensions", () => {
|
|
228
|
+
const result = compileAgentHarnessTask(makeTask());
|
|
229
|
+
assert.equal(result.extensions.length, 2);
|
|
230
|
+
assert.equal(result.extensions[0].type, "beforeEach");
|
|
231
|
+
assert.equal(result.extensions[1].type, "afterEach");
|
|
232
|
+
});
|
|
233
|
+
it("beforeEach hook creates working directory", () => {
|
|
234
|
+
const result = compileAgentHarnessTask(makeTask());
|
|
235
|
+
assert.ok(result.extensions[0].code.includes("mkdirSync"));
|
|
236
|
+
assert.ok(result.extensions[0].code.includes("__workingDir"));
|
|
237
|
+
});
|
|
238
|
+
it("afterEach hook cleans up", () => {
|
|
239
|
+
const result = compileAgentHarnessTask(makeTask());
|
|
240
|
+
assert.ok(result.extensions[1].code.includes("rmSync"));
|
|
241
|
+
});
|
|
242
|
+
it("sandbox config captures task settings", () => {
|
|
243
|
+
const result = compileAgentHarnessTask(makeTask({
|
|
244
|
+
sandbox: {
|
|
245
|
+
type: "docker",
|
|
246
|
+
image: "node:22",
|
|
247
|
+
limits: { cpus: 2, networkAccess: false },
|
|
248
|
+
},
|
|
249
|
+
fixtures: ["file://schema.ts"],
|
|
250
|
+
}));
|
|
251
|
+
assert.equal(result.sandboxConfig.type, "docker");
|
|
252
|
+
assert.equal(result.sandboxConfig.image, "node:22");
|
|
253
|
+
assert.deepEqual(result.sandboxConfig.fixtures, ["file://schema.ts"]);
|
|
254
|
+
assert.equal(result.sandboxConfig.limits?.cpus, 2);
|
|
255
|
+
assert.equal(result.sandboxConfig.limits?.networkAccess, false);
|
|
256
|
+
});
|
|
257
|
+
});
|
|
258
|
+
// ---------------------------------------------------------------------------
|
|
259
|
+
// Example task compilation (end-to-end)
|
|
260
|
+
// ---------------------------------------------------------------------------
|
|
261
|
+
describe("example agent harness tasks — end-to-end", () => {
|
|
262
|
+
it("compiles all example tasks without errors", () => {
|
|
263
|
+
for (const task of allAgentHarnessExampleTasks) {
|
|
264
|
+
const result = compileAgentHarnessTask(task);
|
|
265
|
+
assert.ok(result.providers.length > 0, `${task.id}: should produce providers`);
|
|
266
|
+
assert.ok(result.tests.length > 0, `${task.id}: should produce test cases`);
|
|
267
|
+
assert.ok(result.extensions.length > 0, `${task.id}: should produce lifecycle extensions`);
|
|
268
|
+
}
|
|
269
|
+
});
|
|
270
|
+
it("scaffold task has file-exists assertions", () => {
|
|
271
|
+
const result = compileAgentHarnessTask(scaffoldProjectTask);
|
|
272
|
+
assert.ok(result.tests[0].assert);
|
|
273
|
+
assert.ok(result.tests[0].assert.length >= 3);
|
|
274
|
+
// First two are file-exists (javascript), third is file-contains, fourth is command-succeeds
|
|
275
|
+
assert.equal(result.tests[0].assert[0].type, "javascript");
|
|
276
|
+
});
|
|
277
|
+
it("modify task has file-contains assertions", () => {
|
|
278
|
+
const result = compileAgentHarnessTask(modifyCodeTask);
|
|
279
|
+
assert.ok(result.tests[0].assert);
|
|
280
|
+
assert.ok(result.tests[0].assert.some((a) => a.type === "javascript" &&
|
|
281
|
+
a.value.includes("useDocumentOperation")));
|
|
282
|
+
});
|
|
283
|
+
it("refactor task has docker sandbox config", () => {
|
|
284
|
+
const result = compileAgentHarnessTask(multiFileRefactorTask);
|
|
285
|
+
assert.equal(result.sandboxConfig.type, "docker");
|
|
286
|
+
assert.equal(result.sandboxConfig.image, "node:22-slim");
|
|
287
|
+
});
|
|
288
|
+
});
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* assertion-mapper.test.ts — Unit tests for the assertion type mapper.
|
|
3
|
+
*
|
|
4
|
+
* Tests mapping of AILF assertion types to Promptfoo assertion types,
|
|
5
|
+
* mode compatibility checking, negation support, and templated assertions.
|
|
6
|
+
*
|
|
7
|
+
* Run: npx tsx --test src/pipeline/compiler/__tests__/assertion-mapper.test.ts
|
|
8
|
+
*/
|
|
9
|
+
export {};
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* assertion-mapper.test.ts — Unit tests for the assertion type mapper.
|
|
3
|
+
*
|
|
4
|
+
* Tests mapping of AILF assertion types to Promptfoo assertion types,
|
|
5
|
+
* mode compatibility checking, negation support, and templated assertions.
|
|
6
|
+
*
|
|
7
|
+
* Run: npx tsx --test src/pipeline/compiler/__tests__/assertion-mapper.test.ts
|
|
8
|
+
*/
|
|
9
|
+
import assert from "node:assert/strict";
|
|
10
|
+
import { describe, it } from "node:test";
|
|
11
|
+
import { isAssertionCompatibleWithMode, isValidAssertionType, mapAssertions, } from "../assertion-mapper.js";
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// isValidAssertionType
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
describe("isValidAssertionType", () => {
|
|
16
|
+
it("recognizes known deterministic types", () => {
|
|
17
|
+
assert.equal(isValidAssertionType("contains"), true);
|
|
18
|
+
assert.equal(isValidAssertionType("equals"), true);
|
|
19
|
+
assert.equal(isValidAssertionType("regex"), true);
|
|
20
|
+
assert.equal(isValidAssertionType("is-json"), true);
|
|
21
|
+
});
|
|
22
|
+
it("recognizes LLM-graded types", () => {
|
|
23
|
+
assert.equal(isValidAssertionType("llm-rubric"), true);
|
|
24
|
+
assert.equal(isValidAssertionType("model-graded-closedqa"), true);
|
|
25
|
+
assert.equal(isValidAssertionType("model-graded-factuality"), true);
|
|
26
|
+
assert.equal(isValidAssertionType("g-eval"), true);
|
|
27
|
+
assert.equal(isValidAssertionType("similar"), true);
|
|
28
|
+
});
|
|
29
|
+
it("recognizes programmatic types", () => {
|
|
30
|
+
assert.equal(isValidAssertionType("javascript"), true);
|
|
31
|
+
assert.equal(isValidAssertionType("python"), true);
|
|
32
|
+
});
|
|
33
|
+
it("recognizes tool-use types", () => {
|
|
34
|
+
assert.equal(isValidAssertionType("tool-called"), true);
|
|
35
|
+
assert.equal(isValidAssertionType("tool-call-f1"), true);
|
|
36
|
+
assert.equal(isValidAssertionType("skill-used"), true);
|
|
37
|
+
});
|
|
38
|
+
it("recognizes negated types", () => {
|
|
39
|
+
assert.equal(isValidAssertionType("not-contains"), true);
|
|
40
|
+
assert.equal(isValidAssertionType("not-equals"), true);
|
|
41
|
+
});
|
|
42
|
+
it("rejects unknown types", () => {
|
|
43
|
+
assert.equal(isValidAssertionType("nonexistent"), false);
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// isAssertionCompatibleWithMode
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
describe("isAssertionCompatibleWithMode", () => {
|
|
50
|
+
it("allows deterministic types in any mode", () => {
|
|
51
|
+
assert.equal(isAssertionCompatibleWithMode("contains", "literacy"), true);
|
|
52
|
+
assert.equal(isAssertionCompatibleWithMode("contains", "agent-harness"), true);
|
|
53
|
+
assert.equal(isAssertionCompatibleWithMode("contains", "knowledge-probe"), true);
|
|
54
|
+
});
|
|
55
|
+
it("restricts tool-use types to appropriate modes", () => {
|
|
56
|
+
assert.equal(isAssertionCompatibleWithMode("tool-called", "agent-harness"), true);
|
|
57
|
+
assert.equal(isAssertionCompatibleWithMode("tool-called", "mcp-server"), true);
|
|
58
|
+
assert.equal(isAssertionCompatibleWithMode("tool-called", "literacy"), true);
|
|
59
|
+
assert.equal(isAssertionCompatibleWithMode("tool-called", "knowledge-probe"), false);
|
|
60
|
+
});
|
|
61
|
+
it("handles negated types", () => {
|
|
62
|
+
assert.equal(isAssertionCompatibleWithMode("not-contains", "literacy"), true);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
// mapAssertions
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
describe("mapAssertions", () => {
|
|
69
|
+
it("maps a simple contains assertion", () => {
|
|
70
|
+
const { mapped, warnings } = mapAssertions([
|
|
71
|
+
{ type: "contains", value: "defineType" },
|
|
72
|
+
]);
|
|
73
|
+
assert.equal(mapped.length, 1);
|
|
74
|
+
assert.equal(mapped[0].type, "contains");
|
|
75
|
+
assert.equal(mapped[0].value, "defineType");
|
|
76
|
+
assert.equal(warnings.length, 0);
|
|
77
|
+
});
|
|
78
|
+
it("maps an llm-rubric assertion with template and criteria", () => {
|
|
79
|
+
const { mapped } = mapAssertions([
|
|
80
|
+
{
|
|
81
|
+
type: "llm-rubric",
|
|
82
|
+
template: "task-completion",
|
|
83
|
+
criteria: ["Uses projection syntax", "Demonstrates spread operator"],
|
|
84
|
+
},
|
|
85
|
+
]);
|
|
86
|
+
assert.equal(mapped.length, 1);
|
|
87
|
+
assert.equal(mapped[0].type, "llm-rubric");
|
|
88
|
+
assert.ok(mapped[0].value.includes("task-completion"), "Should include template name");
|
|
89
|
+
});
|
|
90
|
+
it("preserves weight on assertions", () => {
|
|
91
|
+
const { mapped } = mapAssertions([
|
|
92
|
+
{ type: "contains", value: "test", weight: 0.5 },
|
|
93
|
+
]);
|
|
94
|
+
assert.equal(mapped[0].weight, 0.5);
|
|
95
|
+
});
|
|
96
|
+
it("maps negated assertions", () => {
|
|
97
|
+
const { mapped } = mapAssertions([
|
|
98
|
+
{ type: "not-contains", value: "deprecated" },
|
|
99
|
+
]);
|
|
100
|
+
assert.equal(mapped.length, 1);
|
|
101
|
+
assert.equal(mapped[0].type, "not-contains");
|
|
102
|
+
});
|
|
103
|
+
it("skips incompatible tool-use assertions with warning", () => {
|
|
104
|
+
const { mapped, warnings } = mapAssertions([{ type: "tool-called", value: "WebSearch" }], { mode: "knowledge-probe" });
|
|
105
|
+
assert.equal(mapped.length, 0);
|
|
106
|
+
assert.equal(warnings.length, 1);
|
|
107
|
+
assert.ok(warnings[0].includes("not compatible"));
|
|
108
|
+
});
|
|
109
|
+
it("warns about unknown assertion types but passes through", () => {
|
|
110
|
+
const { mapped, warnings } = mapAssertions([
|
|
111
|
+
{ type: "custom-check", value: "something" },
|
|
112
|
+
]);
|
|
113
|
+
assert.equal(mapped.length, 1);
|
|
114
|
+
assert.equal(mapped[0].type, "custom-check");
|
|
115
|
+
assert.ok(warnings.some((w) => w.includes("Unknown")));
|
|
116
|
+
});
|
|
117
|
+
it("skips non-negatable assertion types", () => {
|
|
118
|
+
const { mapped, warnings } = mapAssertions([
|
|
119
|
+
{ type: "not-javascript", value: "return true" },
|
|
120
|
+
]);
|
|
121
|
+
assert.equal(mapped.length, 0);
|
|
122
|
+
assert.ok(warnings.some((w) => w.includes("negation")));
|
|
123
|
+
});
|
|
124
|
+
it("sets grader provider on LLM-graded assertions", () => {
|
|
125
|
+
const { mapped } = mapAssertions([
|
|
126
|
+
{
|
|
127
|
+
type: "llm-rubric",
|
|
128
|
+
template: "code-correctness",
|
|
129
|
+
criteria: ["Valid GROQ"],
|
|
130
|
+
},
|
|
131
|
+
], { graderProvider: "openai:chat:gpt-5" });
|
|
132
|
+
assert.equal(mapped[0].provider, "openai:chat:gpt-5");
|
|
133
|
+
});
|
|
134
|
+
it("maps multiple assertions in order", () => {
|
|
135
|
+
const { mapped } = mapAssertions([
|
|
136
|
+
{ type: "contains", value: "first" },
|
|
137
|
+
{ type: "equals", value: "second" },
|
|
138
|
+
{ type: "regex", value: "third.*pattern" },
|
|
139
|
+
]);
|
|
140
|
+
assert.equal(mapped.length, 3);
|
|
141
|
+
assert.equal(mapped[0].type, "contains");
|
|
142
|
+
assert.equal(mapped[1].type, "equals");
|
|
143
|
+
assert.equal(mapped[2].type, "regex");
|
|
144
|
+
});
|
|
145
|
+
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* knowledge-probe-handler.test.ts — Tests for knowledge probe mode compilation.
|
|
3
|
+
*
|
|
4
|
+
* Tests validation, provider assembly, prompt generation, assertion mapping
|
|
5
|
+
* (including rejection of tool-use assertions), metadata generation, and
|
|
6
|
+
* end-to-end compilation of example tasks.
|
|
7
|
+
*
|
|
8
|
+
* Run: npx tsx --test src/pipeline/compiler/__tests__/knowledge-probe-handler.test.ts
|
|
9
|
+
*/
|
|
10
|
+
export {};
|