@sanity/ailf 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/features.ts +23 -0
- package/config/models.ts +83 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
- package/dist/_vendor/ailf-core/config-helpers.js +150 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -29
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -8
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +38 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +133 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
- package/dist/adapters/task-sources/index.d.ts +1 -0
- package/dist/adapters/task-sources/index.js +1 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
- package/dist/adapters/task-sources/repo-task-source.js +69 -16
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +7 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/composition-root.d.ts +1 -1
- package/dist/composition-root.js +67 -4
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +24 -6
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +6 -4
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +245 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +6 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
- package/dist/pipeline/mirror-repo-tasks.js +16 -15
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +6 -3
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PromptfooCompiler — compiles a TaskGraph into Promptfoo YAML configuration.
|
|
3
|
+
*
|
|
4
|
+
* The compiler is the core of the new architecture. It takes a validated
|
|
5
|
+
* TaskGraph and produces a Promptfoo config that can be executed via
|
|
6
|
+
* `promptfoo eval`.
|
|
7
|
+
*
|
|
8
|
+
* Compilation pipeline:
|
|
9
|
+
* TaskGraph → resolve fixtures → resolve variables → map assertions
|
|
10
|
+
* → assemble prompts → assemble providers → emit YAML
|
|
11
|
+
*
|
|
12
|
+
* This module exists alongside `generate-configs.ts` — it does NOT replace
|
|
13
|
+
* the existing codegen path. Phase 7 will swap callers over to the compiler.
|
|
14
|
+
*
|
|
15
|
+
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
16
|
+
*/
|
|
17
|
+
import { mapAssertions } from "./assertion-mapper.js";
|
|
18
|
+
import { resolveTaskFixtures } from "./fixture-resolver.js";
|
|
19
|
+
import { LiteracyVariant } from "../normalize-mode.js";
|
|
20
|
+
import { resolveVariables } from "./variable-resolver.js";
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Public API
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
/**
|
|
25
|
+
* Compile a TaskGraph into a Promptfoo configuration.
|
|
26
|
+
*
|
|
27
|
+
* Traverses the graph in topological order, resolves fixtures and
|
|
28
|
+
* variables for each node, maps assertions, and assembles the final
|
|
29
|
+
* Promptfoo config.
|
|
30
|
+
*/
|
|
31
|
+
export function compileToPromptfoo(graph, options) {
|
|
32
|
+
const warnings = [];
|
|
33
|
+
const tests = [];
|
|
34
|
+
// Sort nodes by priority (topological order)
|
|
35
|
+
const sortedNodes = [...graph.nodes.values()].sort((a, b) => a.priority - b.priority);
|
|
36
|
+
// Compile each node into test cases
|
|
37
|
+
for (const node of sortedNodes) {
|
|
38
|
+
const compiled = compileNode(node, graph, options, warnings);
|
|
39
|
+
tests.push(...compiled);
|
|
40
|
+
}
|
|
41
|
+
// Build providers list from model registry
|
|
42
|
+
const providers = buildProviders(options.models, options.mode);
|
|
43
|
+
// Prompt resolution: handler-owned → explicit override → built-in defaults
|
|
44
|
+
const prompts = resolvePrompts(options);
|
|
45
|
+
const config = {
|
|
46
|
+
description: `AILF evaluation — ${options.mode} mode (${tests.length} test cases)`,
|
|
47
|
+
prompts,
|
|
48
|
+
providers,
|
|
49
|
+
tests,
|
|
50
|
+
...(options.outputPath ? { outputPath: options.outputPath } : {}),
|
|
51
|
+
...(options.graderProvider
|
|
52
|
+
? {
|
|
53
|
+
defaultTest: {
|
|
54
|
+
options: {
|
|
55
|
+
provider: options.graderProvider,
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
}
|
|
59
|
+
: {}),
|
|
60
|
+
};
|
|
61
|
+
return {
|
|
62
|
+
config,
|
|
63
|
+
taskCount: sortedNodes.length,
|
|
64
|
+
testCaseCount: tests.length,
|
|
65
|
+
warnings,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Node compilation
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
function compileNode(node, graph, options, warnings) {
|
|
72
|
+
// Resolve fixtures using a minimal GeneralizedTaskDefinition stub.
|
|
73
|
+
// The fixture resolver needs task.id and context.docs — we use the
|
|
74
|
+
// node's mode (propagated from the original task definition) to
|
|
75
|
+
// construct the correct variant stub.
|
|
76
|
+
const nodeMode = node.mode ?? options.mode ?? "literacy";
|
|
77
|
+
const fixtureResult = resolveTaskFixtures({
|
|
78
|
+
mode: nodeMode,
|
|
79
|
+
id: node.taskId,
|
|
80
|
+
title: node.taskId,
|
|
81
|
+
prompt: { text: node.resolvedPrompt },
|
|
82
|
+
}, node.resolvedVariables, { rootDir: options.rootDir });
|
|
83
|
+
warnings.push(...fixtureResult.warnings);
|
|
84
|
+
// Resolve dynamic variables
|
|
85
|
+
const varResult = resolveVariables(fixtureResult.updatedVars);
|
|
86
|
+
warnings.push(...varResult.warnings);
|
|
87
|
+
// Map assertions from the node's metadata
|
|
88
|
+
// For now, nodes carry assertion data in resolvedVariables.values.__assertions
|
|
89
|
+
// (set by the TaskGraphBuilder when it has generalized task data)
|
|
90
|
+
const rawAssertions = varResult.envelope.values.__assertions ?? [];
|
|
91
|
+
const { mapped: assertions, warnings: assertionWarnings } = mapAssertions(rawAssertions, { mode: options.mode, graderProvider: options.graderProvider });
|
|
92
|
+
warnings.push(...assertionWarnings);
|
|
93
|
+
// Build test case vars (exclude internal __ prefixed keys)
|
|
94
|
+
const vars = {};
|
|
95
|
+
for (const [key, value] of Object.entries(varResult.envelope.values)) {
|
|
96
|
+
if (!key.startsWith("__")) {
|
|
97
|
+
vars[key] = value;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// Create the gold test case
|
|
101
|
+
const goldTest = {
|
|
102
|
+
description: node.taskId,
|
|
103
|
+
vars,
|
|
104
|
+
...(assertions.length > 0 ? { assert: assertions } : {}),
|
|
105
|
+
};
|
|
106
|
+
const tests = [goldTest];
|
|
107
|
+
// For literacy/baseline mode, also create a baseline variant (no docs).
|
|
108
|
+
// Route on the node's mode (from the task definition) rather than
|
|
109
|
+
// the global options.mode, so heterogeneous graphs compile correctly.
|
|
110
|
+
if (nodeMode === LiteracyVariant.STANDARD || nodeMode === "literacy") {
|
|
111
|
+
const baselineVars = { ...vars, docs: "" };
|
|
112
|
+
const baselineTest = {
|
|
113
|
+
description: `${node.taskId} [baseline]`,
|
|
114
|
+
vars: baselineVars,
|
|
115
|
+
prompts: ["without-docs"],
|
|
116
|
+
...(assertions.length > 0 ? { assert: assertions } : {}),
|
|
117
|
+
};
|
|
118
|
+
tests.push(baselineTest);
|
|
119
|
+
}
|
|
120
|
+
return tests;
|
|
121
|
+
}
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
// Provider assembly
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
/**
|
|
126
|
+
* Build the Promptfoo providers list from the model registry.
|
|
127
|
+
*/
|
|
128
|
+
function buildProviders(models, mode) {
|
|
129
|
+
const providers = [];
|
|
130
|
+
for (const model of models.models) {
|
|
131
|
+
// Check mode compatibility
|
|
132
|
+
if (model.modes && model.modes.length > 0) {
|
|
133
|
+
if (!modelMatchesMode(model, mode))
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
// Check env gate
|
|
137
|
+
if (model.env && !process.env[model.env]) {
|
|
138
|
+
continue; // Skip models whose API key isn't set
|
|
139
|
+
}
|
|
140
|
+
providers.push({
|
|
141
|
+
id: model.id,
|
|
142
|
+
label: model.label,
|
|
143
|
+
config: {
|
|
144
|
+
...models.defaults,
|
|
145
|
+
...model.config,
|
|
146
|
+
},
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
return providers;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Check if a model entry matches the current evaluation mode.
|
|
153
|
+
*
|
|
154
|
+
* Literacy mode defaults to baseline model matching. Variant-specific
|
|
155
|
+
* provider filtering is handled by the provider-assembler and
|
|
156
|
+
* generate-configs-step, not here.
|
|
157
|
+
*/
|
|
158
|
+
function modelMatchesMode(model, mode) {
|
|
159
|
+
if (!model.modes || model.modes.length === 0)
|
|
160
|
+
return true;
|
|
161
|
+
switch (mode) {
|
|
162
|
+
case "literacy":
|
|
163
|
+
return model.modes.includes(LiteracyVariant.STANDARD);
|
|
164
|
+
default:
|
|
165
|
+
// Non-literacy modes accept all models by default
|
|
166
|
+
return true;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
// Prompt resolution
|
|
171
|
+
// ---------------------------------------------------------------------------
|
|
172
|
+
/**
|
|
173
|
+
* Resolve prompts with a three-level fallback chain:
|
|
174
|
+
* 1. handler.getPrompts() — mode-handler-owned templates
|
|
175
|
+
* 2. options.prompts — explicit caller-provided templates
|
|
176
|
+
* 3. buildDefaultPrompts() — built-in defaults per mode
|
|
177
|
+
*/
|
|
178
|
+
function resolvePrompts(options) {
|
|
179
|
+
// 1. Check handler-owned prompts
|
|
180
|
+
const handlerPrompts = options.handler?.getPrompts?.();
|
|
181
|
+
if (handlerPrompts && Object.keys(handlerPrompts).length > 0) {
|
|
182
|
+
return Object.values(handlerPrompts).map(promptTemplateToPromptfoo);
|
|
183
|
+
}
|
|
184
|
+
// 2. Check explicit override
|
|
185
|
+
if (options.prompts)
|
|
186
|
+
return options.prompts;
|
|
187
|
+
// 3. Built-in defaults
|
|
188
|
+
return buildDefaultPrompts(options.mode);
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Convert a PromptTemplate (core port type) to a PromptfooPrompt (compiler type).
|
|
192
|
+
*/
|
|
193
|
+
function promptTemplateToPromptfoo(pt) {
|
|
194
|
+
return { id: pt.id, label: pt.label, raw: pt.template };
|
|
195
|
+
}
|
|
196
|
+
// ---------------------------------------------------------------------------
|
|
197
|
+
// Default prompts
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
/**
|
|
200
|
+
* Build default prompt entries for a mode.
|
|
201
|
+
*
|
|
202
|
+
* Handler-owned prompts (via getPrompts()) take precedence over these
|
|
203
|
+
* built-in defaults. This fallback exists for modes that haven't yet
|
|
204
|
+
* migrated to handler-owned prompts.
|
|
205
|
+
*/
|
|
206
|
+
function buildDefaultPrompts(mode) {
|
|
207
|
+
switch (mode) {
|
|
208
|
+
case "literacy":
|
|
209
|
+
return [
|
|
210
|
+
{
|
|
211
|
+
id: "with-docs",
|
|
212
|
+
label: "With documentation context",
|
|
213
|
+
raw: "{{task}}\n\nDocumentation context:\n{{docs}}",
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
id: "without-docs",
|
|
217
|
+
label: "Without documentation context",
|
|
218
|
+
raw: "{{task}}",
|
|
219
|
+
},
|
|
220
|
+
];
|
|
221
|
+
default:
|
|
222
|
+
return [
|
|
223
|
+
{
|
|
224
|
+
id: "default",
|
|
225
|
+
label: "Default prompt",
|
|
226
|
+
raw: "{{task}}",
|
|
227
|
+
},
|
|
228
|
+
];
|
|
229
|
+
}
|
|
230
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* provider-assembler.ts — Build per-mode provider arrays from models config.
|
|
3
|
+
*
|
|
4
|
+
* Replicates the provider-building logic from the legacy generate-configs.ts
|
|
5
|
+
* so the new compiler produces identical provider configurations.
|
|
6
|
+
*
|
|
7
|
+
* Separated into its own module so GenerateConfigsStep can import it
|
|
8
|
+
* without pulling in the full legacy generate-configs machinery.
|
|
9
|
+
*/
|
|
10
|
+
import { type ModelsConfig } from "../../_vendor/ailf-core/index.d.ts";
|
|
11
|
+
import type { ResolvedSourceConfig } from "../../sources.js";
|
|
12
|
+
/**
|
|
13
|
+
* Provider arrays grouped by literacy variant.
|
|
14
|
+
*
|
|
15
|
+
* These keys are literacy variant names (not EvalMode values). Each variant
|
|
16
|
+
* needs a different set of model providers with variant-specific config
|
|
17
|
+
* (e.g., agentic providers carry tool-use config, observed providers carry
|
|
18
|
+
* observer instrumentation).
|
|
19
|
+
*/
|
|
20
|
+
export interface LiteracyVariantProviders {
|
|
21
|
+
baseline: Record<string, unknown>[];
|
|
22
|
+
agentic: Record<string, unknown>[];
|
|
23
|
+
observed: Record<string, unknown>[];
|
|
24
|
+
}
|
|
25
|
+
/** @deprecated Use LiteracyVariantProviders — kept for backward compatibility */
|
|
26
|
+
export type AssembledProviders = LiteracyVariantProviders;
|
|
27
|
+
/** Result of loading models and assembling providers */
|
|
28
|
+
export interface ModelsAndProviders {
|
|
29
|
+
models: ModelsConfig;
|
|
30
|
+
providers: LiteracyVariantProviders;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Load models config and assemble provider arrays for literacy variants.
|
|
34
|
+
*
|
|
35
|
+
* Returns provider arrays keyed by literacy variant name (baseline,
|
|
36
|
+
* agentic, observed). These are consumed by the YAML writer to produce
|
|
37
|
+
* the per-variant promptfoo config files.
|
|
38
|
+
*/
|
|
39
|
+
export declare function loadModelsAndProviders(rootDir: string, source?: ResolvedSourceConfig, searchMode?: string, allowedOrigins?: string[]): ModelsAndProviders;
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* provider-assembler.ts — Build per-mode provider arrays from models config.
|
|
3
|
+
*
|
|
4
|
+
* Replicates the provider-building logic from the legacy generate-configs.ts
|
|
5
|
+
* so the new compiler produces identical provider configurations.
|
|
6
|
+
*
|
|
7
|
+
* Separated into its own module so GenerateConfigsStep can import it
|
|
8
|
+
* without pulling in the full legacy generate-configs machinery.
|
|
9
|
+
*/
|
|
10
|
+
import { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "../../_vendor/ailf-core/index.js";
|
|
11
|
+
import { LiteracyVariant } from "../normalize-mode.js";
|
|
12
|
+
import { loadConfigFile } from "./config-loader.js";
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Public API
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
/**
|
|
17
|
+
* Load models config and assemble provider arrays for literacy variants.
|
|
18
|
+
*
|
|
19
|
+
* Returns provider arrays keyed by literacy variant name (baseline,
|
|
20
|
+
* agentic, observed). These are consumed by the YAML writer to produce
|
|
21
|
+
* the per-variant promptfoo config files.
|
|
22
|
+
*/
|
|
23
|
+
export function loadModelsAndProviders(rootDir, source, searchMode, allowedOrigins) {
|
|
24
|
+
const models = loadModelsYaml(rootDir);
|
|
25
|
+
return {
|
|
26
|
+
models,
|
|
27
|
+
providers: {
|
|
28
|
+
baseline: buildBaselineProviders(models),
|
|
29
|
+
agentic: buildAgenticProviders(models, source, searchMode, allowedOrigins),
|
|
30
|
+
observed: buildObservedProviders(models),
|
|
31
|
+
},
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Baseline providers
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
function buildBaselineProviders(models) {
|
|
38
|
+
return models.models
|
|
39
|
+
.filter((m) => modelMatchesMode(m, LiteracyVariant.STANDARD))
|
|
40
|
+
.map((model) => ({
|
|
41
|
+
config: mergeConfig(models.defaults, model.config),
|
|
42
|
+
id: model.id,
|
|
43
|
+
label: model.label,
|
|
44
|
+
}));
|
|
45
|
+
}
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// Observed providers
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
function buildObservedProviders(models) {
|
|
50
|
+
return models.models
|
|
51
|
+
.filter((m) => modelMatchesMode(m, LiteracyVariant.OBSERVED))
|
|
52
|
+
.map((model) => {
|
|
53
|
+
const modelName = extractModelName(model.id);
|
|
54
|
+
return {
|
|
55
|
+
config: {
|
|
56
|
+
...mergeConfig(models.defaults, model.config),
|
|
57
|
+
modelName,
|
|
58
|
+
observe: true,
|
|
59
|
+
recordOptions: models.defaults.observerOptions ?? {},
|
|
60
|
+
},
|
|
61
|
+
id: "file://dist/agent-observer/provider.js",
|
|
62
|
+
label: `${model.label} (Observed)`,
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
// Agentic providers
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
function buildAgenticProviders(models, source, searchMode, _allowedOrigins) {
|
|
70
|
+
const naiveModels = models.models.filter((m) => modelMatchesMode(m, "agentic-naive"));
|
|
71
|
+
const optimizedModels = models.models.filter((m) => modelMatchesMode(m, "agentic-optimized"));
|
|
72
|
+
const resolvedSearchMode = searchMode ?? "open";
|
|
73
|
+
const sourceConfig = source
|
|
74
|
+
? {
|
|
75
|
+
...(source.allowedOrigins?.length
|
|
76
|
+
? { allowedOrigins: source.allowedOrigins }
|
|
77
|
+
: {}),
|
|
78
|
+
docBaseUrl: source.baseUrl,
|
|
79
|
+
...(source.headers && Object.keys(source.headers).length > 0
|
|
80
|
+
? { customHeaders: source.headers }
|
|
81
|
+
: {}),
|
|
82
|
+
llmsTxtUrl: source.llmsTxt,
|
|
83
|
+
...(source.priorityDomain
|
|
84
|
+
? { priorityDomain: source.priorityDomain }
|
|
85
|
+
: {}),
|
|
86
|
+
...(resolvedSearchMode !== "open"
|
|
87
|
+
? { searchMode: resolvedSearchMode }
|
|
88
|
+
: {}),
|
|
89
|
+
}
|
|
90
|
+
: {};
|
|
91
|
+
const providers = [];
|
|
92
|
+
for (const model of naiveModels) {
|
|
93
|
+
const modelName = extractModelName(model.id);
|
|
94
|
+
const provider = extractProvider(model.id);
|
|
95
|
+
providers.push({
|
|
96
|
+
config: {
|
|
97
|
+
...mergeConfig(models.defaults, model.config, {
|
|
98
|
+
agentMode: "naive",
|
|
99
|
+
maxToolRounds: models.defaults.maxToolRounds ?? 5,
|
|
100
|
+
model: modelName,
|
|
101
|
+
provider,
|
|
102
|
+
}),
|
|
103
|
+
...sourceConfig,
|
|
104
|
+
observe: true,
|
|
105
|
+
observerOptions: models.defaults.observerOptions ?? {},
|
|
106
|
+
},
|
|
107
|
+
id: "file://dist/agent-observer/agentic-provider.js",
|
|
108
|
+
label: `${model.label} (Naive Agent)`,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
for (const model of optimizedModels) {
|
|
112
|
+
const modelName = extractModelName(model.id);
|
|
113
|
+
const provider = extractProvider(model.id);
|
|
114
|
+
providers.push({
|
|
115
|
+
config: {
|
|
116
|
+
...mergeConfig(models.defaults, model.config, {
|
|
117
|
+
agentMode: "optimized",
|
|
118
|
+
maxToolRounds: models.defaults.maxToolRounds ?? 5,
|
|
119
|
+
model: modelName,
|
|
120
|
+
provider,
|
|
121
|
+
}),
|
|
122
|
+
...sourceConfig,
|
|
123
|
+
observe: true,
|
|
124
|
+
observerOptions: models.defaults.observerOptions ?? {},
|
|
125
|
+
},
|
|
126
|
+
id: "file://dist/agent-observer/agentic-provider.js",
|
|
127
|
+
label: `${model.label} (Optimized Agent)`,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
return providers;
|
|
131
|
+
}
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
// Helpers
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
function loadModelsYaml(rootDir) {
|
|
136
|
+
return loadConfigFile("models", rootDir).data;
|
|
137
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DockerSandboxStrategy — full isolation via Docker containers.
|
|
3
|
+
*
|
|
4
|
+
* Provides deterministic filesystem, network control, and resource limits.
|
|
5
|
+
* Falls back to TempDirSandboxStrategy when Docker is unavailable.
|
|
6
|
+
*
|
|
7
|
+
* Docker interaction uses the `docker` CLI via `execFileSync` (array form,
|
|
8
|
+
* no shell) to prevent shell injection from task-supplied values like
|
|
9
|
+
* image names or task IDs.
|
|
10
|
+
*
|
|
11
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
12
|
+
*/
|
|
13
|
+
import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
|
|
14
|
+
export declare class DockerSandboxStrategy implements SandboxStrategy {
|
|
15
|
+
readonly name = "Docker Container";
|
|
16
|
+
readonly type: "docker";
|
|
17
|
+
isAvailable(): Promise<boolean>;
|
|
18
|
+
provision(options: SandboxProvisionOptions): Promise<SandboxInfo>;
|
|
19
|
+
collectArtifacts(sandbox: SandboxInfo): Promise<SandboxArtifacts>;
|
|
20
|
+
teardown(sandbox: SandboxInfo): Promise<void>;
|
|
21
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DockerSandboxStrategy — full isolation via Docker containers.
|
|
3
|
+
*
|
|
4
|
+
* Provides deterministic filesystem, network control, and resource limits.
|
|
5
|
+
* Falls back to TempDirSandboxStrategy when Docker is unavailable.
|
|
6
|
+
*
|
|
7
|
+
* Docker interaction uses the `docker` CLI via `execFileSync` (array form,
|
|
8
|
+
* no shell) to prevent shell injection from task-supplied values like
|
|
9
|
+
* image names or task IDs.
|
|
10
|
+
*
|
|
11
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
12
|
+
*/
|
|
13
|
+
import { randomUUID } from "crypto";
|
|
14
|
+
import { execFileSync } from "child_process";
|
|
15
|
+
import { mkdirSync } from "fs";
|
|
16
|
+
import { tmpdir } from "os";
|
|
17
|
+
import { resolve } from "path";
|
|
18
|
+
const DEFAULT_IMAGE = "node:22-slim";
|
|
19
|
+
const DEFAULT_WORKDIR = "/workspace";
|
|
20
|
+
/** Only allow official base images to prevent pulling from untrusted registries. */
|
|
21
|
+
const ALLOWED_IMAGE_PATTERN = /^(node|python|ubuntu|alpine|debian|rust|golang|mcr\.microsoft\.com\/[a-z]+)(:[a-zA-Z0-9._-]+)?$/;
|
|
22
|
+
function validateDockerImage(image) {
|
|
23
|
+
if (!ALLOWED_IMAGE_PATTERN.test(image)) {
|
|
24
|
+
throw new Error(`Docker image "${image}" is not in the allowlist. ` +
|
|
25
|
+
`Only official base images (node, python, ubuntu, alpine, debian, rust, golang) are permitted.`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
export class DockerSandboxStrategy {
|
|
29
|
+
name = "Docker Container";
|
|
30
|
+
type = "docker";
|
|
31
|
+
async isAvailable() {
|
|
32
|
+
try {
|
|
33
|
+
execFileSync("docker", ["info"], { stdio: "ignore", timeout: 5000 });
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
async provision(options) {
|
|
41
|
+
const image = options.image ?? DEFAULT_IMAGE;
|
|
42
|
+
validateDockerImage(image);
|
|
43
|
+
const id = `ailf-${randomUUID().slice(0, 12)}`;
|
|
44
|
+
// Create a local staging directory for fixture injection
|
|
45
|
+
const stagingDir = resolve(tmpdir(), `${id}-staging`);
|
|
46
|
+
mkdirSync(stagingDir, { recursive: true });
|
|
47
|
+
// Build docker create command as array (no shell, prevents injection)
|
|
48
|
+
const args = ["create", "--name", id, "--workdir", DEFAULT_WORKDIR];
|
|
49
|
+
// Security hardening — defense-in-depth against container escape
|
|
50
|
+
args.push("--cap-drop", "ALL");
|
|
51
|
+
args.push("--security-opt", "no-new-privileges");
|
|
52
|
+
args.push("--read-only");
|
|
53
|
+
args.push("--tmpfs", "/tmp:rw,noexec,nosuid,size=100m");
|
|
54
|
+
// Resource limits
|
|
55
|
+
if (options.limits) {
|
|
56
|
+
if (options.limits.cpus) {
|
|
57
|
+
args.push("--cpus", String(options.limits.cpus));
|
|
58
|
+
}
|
|
59
|
+
if (options.limits.memoryBytes) {
|
|
60
|
+
args.push("--memory", String(options.limits.memoryBytes));
|
|
61
|
+
}
|
|
62
|
+
if (options.limits.networkAccess === false) {
|
|
63
|
+
args.push("--network", "none");
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// Bind mount staging directory
|
|
67
|
+
args.push("-v", `${stagingDir}:${DEFAULT_WORKDIR}`);
|
|
68
|
+
args.push(image);
|
|
69
|
+
args.push("sleep", "infinity"); // Keep container alive
|
|
70
|
+
try {
|
|
71
|
+
const containerId = execFileSync("docker", args, {
|
|
72
|
+
encoding: "utf-8",
|
|
73
|
+
timeout: 30_000,
|
|
74
|
+
}).trim();
|
|
75
|
+
// Start the container
|
|
76
|
+
execFileSync("docker", ["start", id], {
|
|
77
|
+
stdio: "ignore",
|
|
78
|
+
timeout: 10_000,
|
|
79
|
+
});
|
|
80
|
+
return {
|
|
81
|
+
id,
|
|
82
|
+
workingDir: stagingDir,
|
|
83
|
+
strategy: "docker",
|
|
84
|
+
containerId: containerId || id,
|
|
85
|
+
createdAt: new Date().toISOString(),
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
catch (err) {
|
|
89
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
90
|
+
throw new Error(`Failed to provision Docker sandbox "${id}": ${msg}`, {
|
|
91
|
+
cause: err,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
async collectArtifacts(sandbox) {
|
|
96
|
+
const modifiedFiles = [];
|
|
97
|
+
try {
|
|
98
|
+
// Get list of modified files via docker diff (array form)
|
|
99
|
+
const diff = execFileSync("docker", ["diff", sandbox.id], {
|
|
100
|
+
encoding: "utf-8",
|
|
101
|
+
timeout: 10_000,
|
|
102
|
+
}).trim();
|
|
103
|
+
if (diff) {
|
|
104
|
+
for (const line of diff.split("\n")) {
|
|
105
|
+
// docker diff output: C /workspace/file.ts (C=changed, A=added, D=deleted)
|
|
106
|
+
const match = /^[ACD]\s+(.+)$/.exec(line.trim());
|
|
107
|
+
if (match && match[1].startsWith(DEFAULT_WORKDIR)) {
|
|
108
|
+
modifiedFiles.push(match[1].replace(`${DEFAULT_WORKDIR}/`, ""));
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
modifiedFiles,
|
|
114
|
+
diff: diff || undefined,
|
|
115
|
+
durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
return {
|
|
120
|
+
modifiedFiles,
|
|
121
|
+
durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
async teardown(sandbox) {
|
|
126
|
+
try {
|
|
127
|
+
execFileSync("docker", ["rm", "-f", sandbox.id], {
|
|
128
|
+
stdio: "ignore",
|
|
129
|
+
timeout: 10_000,
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
catch {
|
|
133
|
+
// Best-effort cleanup
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fixture provisioner — five-stage pipeline for preparing sandbox state.
|
|
3
|
+
*
|
|
4
|
+
* Pipeline stages:
|
|
5
|
+
* Resolve → Fetch → Cache → Transform → Inject
|
|
6
|
+
*
|
|
7
|
+
* Handles three URI schemes for v1:
|
|
8
|
+
* - file:// — local filesystem path (relative to task)
|
|
9
|
+
* - template:// — built-in project templates
|
|
10
|
+
* - sanity:// — Content Lake document by ID or query
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
|
|
13
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
14
|
+
*/
|
|
15
|
+
import type { SandboxInfo } from "./sandbox-strategy.js";
|
|
16
|
+
/** A fixture reference from a task definition */
|
|
17
|
+
export interface FixtureRef {
|
|
18
|
+
/** URI pointing to the fixture source */
|
|
19
|
+
uri: string;
|
|
20
|
+
/** Injection target */
|
|
21
|
+
inject: "provider_config" | "system_prompt" | "vars" | "working_dir";
|
|
22
|
+
/** Key name (for vars injection) or relative path (for working_dir) */
|
|
23
|
+
key?: string;
|
|
24
|
+
/** Content transform to apply before injection */
|
|
25
|
+
transform?: FixtureTransform;
|
|
26
|
+
}
|
|
27
|
+
/** Available fixture transforms */
|
|
28
|
+
export type FixtureTransform = "extract-text" | "none" | "strip-html" | "truncate";
|
|
29
|
+
/** A resolved and fetched fixture ready for injection */
|
|
30
|
+
export interface ProvisionedFixture {
|
|
31
|
+
/** Original URI */
|
|
32
|
+
uri: string;
|
|
33
|
+
/** Resolved content */
|
|
34
|
+
content: string;
|
|
35
|
+
/** SHA-256 hash of the content */
|
|
36
|
+
contentHash: string;
|
|
37
|
+
/** Injection target */
|
|
38
|
+
inject: FixtureRef["inject"];
|
|
39
|
+
/** Key or path */
|
|
40
|
+
key?: string;
|
|
41
|
+
}
|
|
42
|
+
/** Result of the provisioning pipeline */
|
|
43
|
+
export interface ProvisioningResult {
|
|
44
|
+
/** Successfully provisioned fixtures */
|
|
45
|
+
fixtures: ProvisionedFixture[];
|
|
46
|
+
/** Variable overrides from vars-injected fixtures */
|
|
47
|
+
vars: Record<string, unknown>;
|
|
48
|
+
/** Warnings (non-fatal issues) */
|
|
49
|
+
warnings: string[];
|
|
50
|
+
/** Fixture manifest for reproducibility */
|
|
51
|
+
manifest: Record<string, string>;
|
|
52
|
+
}
|
|
53
|
+
/** Options for the provisioning pipeline */
|
|
54
|
+
export interface ProvisioningOptions {
|
|
55
|
+
/** Root directory for resolving relative paths */
|
|
56
|
+
rootDir: string;
|
|
57
|
+
/** Sandbox to inject working_dir fixtures into */
|
|
58
|
+
sandbox?: SandboxInfo;
|
|
59
|
+
/** Cache directory for content-addressable storage */
|
|
60
|
+
cacheDir?: string;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Run the five-stage fixture provisioning pipeline.
|
|
64
|
+
*
|
|
65
|
+
* @param refs - Fixture references from the task definition
|
|
66
|
+
* @param options - Provisioning configuration
|
|
67
|
+
* @returns Provisioned fixtures and injection metadata
|
|
68
|
+
*/
|
|
69
|
+
export declare function provisionFixtures(refs: FixtureRef[], options: ProvisioningOptions): Promise<ProvisioningResult>;
|