@sanity/ailf 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/features.ts +23 -0
- package/config/models.ts +83 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
- package/dist/_vendor/ailf-core/config-helpers.js +150 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -29
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -8
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +38 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +133 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
- package/dist/adapters/task-sources/index.d.ts +1 -0
- package/dist/adapters/task-sources/index.js +1 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
- package/dist/adapters/task-sources/repo-task-source.js +69 -16
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +7 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/composition-root.d.ts +1 -1
- package/dist/composition-root.js +67 -4
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +24 -6
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +6 -4
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +245 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +6 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
- package/dist/pipeline/mirror-repo-tasks.js +16 -15
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +6 -3
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fixture provisioner — five-stage pipeline for preparing sandbox state.
|
|
3
|
+
*
|
|
4
|
+
* Pipeline stages:
|
|
5
|
+
* Resolve → Fetch → Cache → Transform → Inject
|
|
6
|
+
*
|
|
7
|
+
* Handles three URI schemes for v1:
|
|
8
|
+
* - file:// — local filesystem path (relative to task)
|
|
9
|
+
* - template:// — built-in project templates
|
|
10
|
+
* - sanity:// — Content Lake document by ID or query
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
|
|
13
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
14
|
+
*/
|
|
15
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
16
|
+
import { createHash } from "crypto";
|
|
17
|
+
import { basename, dirname, resolve } from "path";
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Public API
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
/**
|
|
22
|
+
* Run the five-stage fixture provisioning pipeline.
|
|
23
|
+
*
|
|
24
|
+
* @param refs - Fixture references from the task definition
|
|
25
|
+
* @param options - Provisioning configuration
|
|
26
|
+
* @returns Provisioned fixtures and injection metadata
|
|
27
|
+
*/
|
|
28
|
+
export async function provisionFixtures(refs, options) {
|
|
29
|
+
const fixtures = [];
|
|
30
|
+
const vars = {};
|
|
31
|
+
const warnings = [];
|
|
32
|
+
const manifest = {};
|
|
33
|
+
for (const ref of refs) {
|
|
34
|
+
try {
|
|
35
|
+
// Stage 1: Resolve — parse URI, determine backend
|
|
36
|
+
const resolved = resolveFixtureURI(ref.uri, options.rootDir);
|
|
37
|
+
// Stage 2: Fetch — read content from source
|
|
38
|
+
const content = await fetchFixtureContent(resolved, warnings);
|
|
39
|
+
if (content === null)
|
|
40
|
+
continue;
|
|
41
|
+
// Stage 3: Cache — store in content-addressable cache
|
|
42
|
+
const contentHash = hashContent(content);
|
|
43
|
+
if (options.cacheDir) {
|
|
44
|
+
cacheFixture(options.cacheDir, contentHash, content);
|
|
45
|
+
}
|
|
46
|
+
manifest[ref.uri] = contentHash;
|
|
47
|
+
// Stage 4: Transform — apply preprocessing
|
|
48
|
+
const transformed = applyTransform(content, ref.transform);
|
|
49
|
+
// Stage 5: Inject — place into target
|
|
50
|
+
const provisioned = {
|
|
51
|
+
uri: ref.uri,
|
|
52
|
+
content: transformed,
|
|
53
|
+
contentHash,
|
|
54
|
+
inject: ref.inject,
|
|
55
|
+
key: ref.key,
|
|
56
|
+
};
|
|
57
|
+
fixtures.push(provisioned);
|
|
58
|
+
// Handle injection targets
|
|
59
|
+
switch (ref.inject) {
|
|
60
|
+
case "vars":
|
|
61
|
+
if (ref.key) {
|
|
62
|
+
vars[ref.key] = transformed;
|
|
63
|
+
}
|
|
64
|
+
break;
|
|
65
|
+
case "working_dir":
|
|
66
|
+
if (options.sandbox) {
|
|
67
|
+
injectIntoWorkingDir(options.sandbox.workingDir, ref.key ?? basename(ref.uri), transformed);
|
|
68
|
+
}
|
|
69
|
+
break;
|
|
70
|
+
case "system_prompt":
|
|
71
|
+
case "provider_config":
|
|
72
|
+
// These are handled by the caller when assembling the Promptfoo config
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
78
|
+
warnings.push(`Fixture "${ref.uri}": ${msg}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return { fixtures, vars, warnings, manifest };
|
|
82
|
+
}
|
|
83
|
+
function assertPathContained(absolutePath, rootDir) {
|
|
84
|
+
const normalizedBase = resolve(rootDir) + "/";
|
|
85
|
+
if (!absolutePath.startsWith(normalizedBase) &&
|
|
86
|
+
absolutePath !== resolve(rootDir)) {
|
|
87
|
+
throw new Error(`Path traversal detected: "${absolutePath}" resolves outside rootDir "${rootDir}"`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
function resolveFixtureURI(uri, rootDir) {
|
|
91
|
+
if (uri.startsWith("file://")) {
|
|
92
|
+
const relativePath = uri.slice(7);
|
|
93
|
+
const absolutePath = resolve(rootDir, relativePath);
|
|
94
|
+
assertPathContained(absolutePath, rootDir);
|
|
95
|
+
return { scheme: "file", path: relativePath, absolutePath };
|
|
96
|
+
}
|
|
97
|
+
if (uri.startsWith("template://")) {
|
|
98
|
+
return { scheme: "template", path: uri.slice(11) };
|
|
99
|
+
}
|
|
100
|
+
if (uri.startsWith("sanity://")) {
|
|
101
|
+
return { scheme: "sanity", path: uri.slice(9) };
|
|
102
|
+
}
|
|
103
|
+
// Bare path — treat as file
|
|
104
|
+
const absolutePath = resolve(rootDir, uri);
|
|
105
|
+
assertPathContained(absolutePath, rootDir);
|
|
106
|
+
return { scheme: "file", path: uri, absolutePath };
|
|
107
|
+
}
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
// Stage 2: Fetch
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
async function fetchFixtureContent(resolved, warnings) {
|
|
112
|
+
switch (resolved.scheme) {
|
|
113
|
+
case "file": {
|
|
114
|
+
if (!resolved.absolutePath || !existsSync(resolved.absolutePath)) {
|
|
115
|
+
warnings.push(`Fixture file not found: ${resolved.absolutePath ?? resolved.path}`);
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
return readFileSync(resolved.absolutePath, "utf-8");
|
|
119
|
+
}
|
|
120
|
+
case "template": {
|
|
121
|
+
// Template fixtures are resolved from built-in templates
|
|
122
|
+
// For v1, return a placeholder — template registry is a future enhancement
|
|
123
|
+
warnings.push(`Template fixture "${resolved.path}" — template registry not yet implemented, ` +
|
|
124
|
+
"returning placeholder content");
|
|
125
|
+
return `<!-- Template: ${resolved.path} -->\n`;
|
|
126
|
+
}
|
|
127
|
+
case "sanity": {
|
|
128
|
+
// Sanity fixtures require the DocFetcher port (injected at eval time)
|
|
129
|
+
// At compile time, return a deferred marker
|
|
130
|
+
warnings.push(`Sanity fixture "${resolved.path}" — deferred to eval time ` +
|
|
131
|
+
"(requires DocFetcher port)");
|
|
132
|
+
return `<!-- Sanity document: ${resolved.path} -->\n`;
|
|
133
|
+
}
|
|
134
|
+
default:
|
|
135
|
+
warnings.push(`Unknown fixture scheme: ${resolved.scheme}`);
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
// Stage 3: Cache
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
function hashContent(content) {
|
|
143
|
+
return createHash("sha256").update(content).digest("hex");
|
|
144
|
+
}
|
|
145
|
+
function cacheFixture(cacheDir, hash, content) {
|
|
146
|
+
const cachePath = resolve(cacheDir, hash);
|
|
147
|
+
if (!existsSync(cachePath)) {
|
|
148
|
+
mkdirSync(dirname(cachePath), { recursive: true });
|
|
149
|
+
writeFileSync(cachePath, content);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// ---------------------------------------------------------------------------
|
|
153
|
+
// Stage 4: Transform
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
function applyTransform(content, transform) {
|
|
156
|
+
if (!transform || transform === "none")
|
|
157
|
+
return content;
|
|
158
|
+
switch (transform) {
|
|
159
|
+
case "strip-html":
|
|
160
|
+
return content.replace(/<[^>]*>/g, "").trim();
|
|
161
|
+
case "extract-text":
|
|
162
|
+
// Remove all markup, normalize whitespace
|
|
163
|
+
return content
|
|
164
|
+
.replace(/<[^>]*>/g, " ")
|
|
165
|
+
.replace(/\s+/g, " ")
|
|
166
|
+
.trim();
|
|
167
|
+
case "truncate":
|
|
168
|
+
// Default truncation: 10,000 characters
|
|
169
|
+
return content.length > 10_000
|
|
170
|
+
? content.slice(0, 10_000) + "\n... (truncated)"
|
|
171
|
+
: content;
|
|
172
|
+
default:
|
|
173
|
+
return content;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
177
|
+
// Stage 5: Inject
|
|
178
|
+
// ---------------------------------------------------------------------------
|
|
179
|
+
function injectIntoWorkingDir(workingDir, targetPath, content) {
|
|
180
|
+
const fullPath = resolve(workingDir, targetPath);
|
|
181
|
+
const normalizedBase = resolve(workingDir) + "/";
|
|
182
|
+
// Path containment: prevent targetPath like "../../etc/cron.d/evil"
|
|
183
|
+
if (!fullPath.startsWith(normalizedBase) &&
|
|
184
|
+
fullPath !== resolve(workingDir)) {
|
|
185
|
+
throw new Error(`Path traversal detected: "${targetPath}" resolves outside sandbox "${workingDir}"`);
|
|
186
|
+
}
|
|
187
|
+
mkdirSync(dirname(fullPath), { recursive: true });
|
|
188
|
+
writeFileSync(fullPath, content);
|
|
189
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GitWorktreeSandboxStrategy — sandbox using `git worktree` for repo-based tasks.
|
|
3
|
+
*
|
|
4
|
+
* Creates a git worktree at a specific ref, providing a deterministic
|
|
5
|
+
* starting state for tasks that modify a git repository.
|
|
6
|
+
*
|
|
7
|
+
* All git CLI calls use `execFileSync` (array form, no shell) to prevent
|
|
8
|
+
* injection from task-supplied values like git refs or repo paths.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
11
|
+
*/
|
|
12
|
+
import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
|
|
13
|
+
export declare class GitWorktreeSandboxStrategy implements SandboxStrategy {
|
|
14
|
+
readonly name = "Git Worktree";
|
|
15
|
+
readonly type: "git-worktree";
|
|
16
|
+
isAvailable(): Promise<boolean>;
|
|
17
|
+
provision(options: SandboxProvisionOptions): Promise<SandboxInfo>;
|
|
18
|
+
collectArtifacts(sandbox: SandboxInfo): Promise<SandboxArtifacts>;
|
|
19
|
+
teardown(sandbox: SandboxInfo): Promise<void>;
|
|
20
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GitWorktreeSandboxStrategy — sandbox using `git worktree` for repo-based tasks.
|
|
3
|
+
*
|
|
4
|
+
* Creates a git worktree at a specific ref, providing a deterministic
|
|
5
|
+
* starting state for tasks that modify a git repository.
|
|
6
|
+
*
|
|
7
|
+
* All git CLI calls use `execFileSync` (array form, no shell) to prevent
|
|
8
|
+
* injection from task-supplied values like git refs or repo paths.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
11
|
+
*/
|
|
12
|
+
import { randomUUID } from "crypto";
|
|
13
|
+
import { execFileSync } from "child_process";
|
|
14
|
+
import { existsSync, rmSync } from "fs";
|
|
15
|
+
import { tmpdir } from "os";
|
|
16
|
+
import { resolve } from "path";
|
|
17
|
+
/** Validate a git ref contains no shell metacharacters or path traversal */
|
|
18
|
+
function validateGitRef(ref) {
|
|
19
|
+
// Git refs: alphanumeric, -, _, /, ., ~, ^ — no spaces, no shell metacharacters
|
|
20
|
+
if (!/^[a-zA-Z0-9._\-/~^]+$/.test(ref)) {
|
|
21
|
+
throw new Error(`Invalid git ref: "${ref}" — must contain only alphanumeric, -, _, /, ., ~, ^ characters`);
|
|
22
|
+
}
|
|
23
|
+
// Disallow path traversal via ".." segments
|
|
24
|
+
if (ref.includes("..")) {
|
|
25
|
+
throw new Error(`Invalid git ref: "${ref}" — must not contain ".." (path traversal)`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
export class GitWorktreeSandboxStrategy {
|
|
29
|
+
name = "Git Worktree";
|
|
30
|
+
type = "git-worktree";
|
|
31
|
+
async isAvailable() {
|
|
32
|
+
try {
|
|
33
|
+
execFileSync("git", ["--version"], { stdio: "ignore", timeout: 5000 });
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
async provision(options) {
|
|
41
|
+
const repoPath = options.repoPath;
|
|
42
|
+
if (!repoPath) {
|
|
43
|
+
throw new Error("GitWorktreeSandboxStrategy requires repoPath — " +
|
|
44
|
+
"the path to the git repository to create a worktree from");
|
|
45
|
+
}
|
|
46
|
+
const ref = options.gitRef ?? "HEAD";
|
|
47
|
+
validateGitRef(ref);
|
|
48
|
+
const id = `ailf-worktree-${randomUUID().slice(0, 8)}`;
|
|
49
|
+
const workingDir = resolve(tmpdir(), id);
|
|
50
|
+
try {
|
|
51
|
+
// Array form — no shell, prevents injection via repoPath/workingDir/ref
|
|
52
|
+
execFileSync("git", ["-C", repoPath, "worktree", "add", workingDir, ref], {
|
|
53
|
+
encoding: "utf-8",
|
|
54
|
+
timeout: 30_000,
|
|
55
|
+
});
|
|
56
|
+
return {
|
|
57
|
+
id,
|
|
58
|
+
workingDir,
|
|
59
|
+
strategy: "git-worktree",
|
|
60
|
+
gitRef: ref,
|
|
61
|
+
createdAt: new Date().toISOString(),
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
catch (err) {
|
|
65
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
66
|
+
throw new Error(`Failed to create git worktree at "${ref}": ${msg}`, {
|
|
67
|
+
cause: err,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
async collectArtifacts(sandbox) {
|
|
72
|
+
const modifiedFiles = [];
|
|
73
|
+
let diff;
|
|
74
|
+
if (existsSync(sandbox.workingDir)) {
|
|
75
|
+
try {
|
|
76
|
+
diff = execFileSync("git", ["-C", sandbox.workingDir, "diff"], {
|
|
77
|
+
encoding: "utf-8",
|
|
78
|
+
timeout: 10_000,
|
|
79
|
+
}).trim();
|
|
80
|
+
const statusOutput = execFileSync("git", ["-C", sandbox.workingDir, "status", "--porcelain"], { encoding: "utf-8", timeout: 10_000 }).trim();
|
|
81
|
+
if (statusOutput) {
|
|
82
|
+
for (const line of statusOutput.split("\n")) {
|
|
83
|
+
const file = line.slice(3).trim();
|
|
84
|
+
if (file)
|
|
85
|
+
modifiedFiles.push(file);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
// Best-effort artifact collection
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return {
|
|
94
|
+
modifiedFiles,
|
|
95
|
+
diff: diff || undefined,
|
|
96
|
+
durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
async teardown(sandbox) {
|
|
100
|
+
if (existsSync(sandbox.workingDir)) {
|
|
101
|
+
try {
|
|
102
|
+
execFileSync("git", ["worktree", "remove", sandbox.workingDir, "--force"], { stdio: "ignore", timeout: 10_000 });
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
// If worktree remove fails, fall back to manual cleanup.
|
|
106
|
+
// Guard: only delete under tmpdir to prevent accidental deletion.
|
|
107
|
+
const tmp = resolve(tmpdir());
|
|
108
|
+
if (resolve(sandbox.workingDir).startsWith(tmp)) {
|
|
109
|
+
rmSync(sandbox.workingDir, { recursive: true, force: true });
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sandbox infrastructure — isolated execution environments for agent harness mode.
|
|
3
|
+
*
|
|
4
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
5
|
+
*/
|
|
6
|
+
export type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy, SandboxType, } from "./sandbox-strategy.js";
|
|
7
|
+
export { DockerSandboxStrategy } from "./docker-sandbox.js";
|
|
8
|
+
export { GitWorktreeSandboxStrategy } from "./git-worktree-sandbox.js";
|
|
9
|
+
export { TempDirSandboxStrategy } from "./tempdir-sandbox.js";
|
|
10
|
+
export { createSandboxStrategy, selectSandboxStrategy, type SandboxSelectionResult, } from "./sandbox-selector.js";
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sandbox infrastructure — isolated execution environments for agent harness mode.
|
|
3
|
+
*
|
|
4
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
5
|
+
*/
|
|
6
|
+
// Implementations
|
|
7
|
+
export { DockerSandboxStrategy } from "./docker-sandbox.js";
|
|
8
|
+
export { GitWorktreeSandboxStrategy } from "./git-worktree-sandbox.js";
|
|
9
|
+
export { TempDirSandboxStrategy } from "./tempdir-sandbox.js";
|
|
10
|
+
// Selector
|
|
11
|
+
export { createSandboxStrategy, selectSandboxStrategy, } from "./sandbox-selector.js";
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sandbox selector — chooses the best available sandbox strategy.
|
|
3
|
+
*
|
|
4
|
+
* Selection logic:
|
|
5
|
+
* 1. If task config specifies a strategy, use it
|
|
6
|
+
* 2. If Docker is available, prefer Docker (better isolation)
|
|
7
|
+
* 3. Fall back to TempDir (always available)
|
|
8
|
+
*
|
|
9
|
+
* CI environments (detected via CI env var) always prefer Docker.
|
|
10
|
+
*
|
|
11
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
12
|
+
*/
|
|
13
|
+
import type { SandboxStrategy, SandboxType } from "./sandbox-strategy.js";
|
|
14
|
+
/** Result of sandbox selection */
|
|
15
|
+
export interface SandboxSelectionResult {
|
|
16
|
+
/** The selected strategy */
|
|
17
|
+
strategy: SandboxStrategy;
|
|
18
|
+
/** Whether this was a fallback from the preferred strategy */
|
|
19
|
+
isFallback: boolean;
|
|
20
|
+
/** Warning message if fallback was used */
|
|
21
|
+
warning?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Select the best available sandbox strategy.
|
|
25
|
+
*
|
|
26
|
+
* @param preferred - Preferred sandbox type from task config
|
|
27
|
+
* @param log - Optional log function for diagnostics
|
|
28
|
+
* @returns The selected strategy with fallback metadata
|
|
29
|
+
*/
|
|
30
|
+
export declare function selectSandboxStrategy(preferred?: SandboxType, log?: (msg: string) => void): Promise<SandboxSelectionResult>;
|
|
31
|
+
/**
|
|
32
|
+
* Create a specific sandbox strategy by type.
|
|
33
|
+
* Does NOT check availability — caller should verify first.
|
|
34
|
+
*/
|
|
35
|
+
export declare function createSandboxStrategy(type: SandboxType): SandboxStrategy;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sandbox selector — chooses the best available sandbox strategy.
|
|
3
|
+
*
|
|
4
|
+
* Selection logic:
|
|
5
|
+
* 1. If task config specifies a strategy, use it
|
|
6
|
+
* 2. If Docker is available, prefer Docker (better isolation)
|
|
7
|
+
* 3. Fall back to TempDir (always available)
|
|
8
|
+
*
|
|
9
|
+
* CI environments (detected via CI env var) always prefer Docker.
|
|
10
|
+
*
|
|
11
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
12
|
+
*/
|
|
13
|
+
import { DockerSandboxStrategy } from "./docker-sandbox.js";
|
|
14
|
+
import { GitWorktreeSandboxStrategy } from "./git-worktree-sandbox.js";
|
|
15
|
+
import { TempDirSandboxStrategy } from "./tempdir-sandbox.js";
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Strategy registry
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
const strategies = {
|
|
20
|
+
docker: () => new DockerSandboxStrategy(),
|
|
21
|
+
"git-worktree": () => new GitWorktreeSandboxStrategy(),
|
|
22
|
+
none: () => new TempDirSandboxStrategy(), // "none" = tempdir
|
|
23
|
+
tempdir: () => new TempDirSandboxStrategy(),
|
|
24
|
+
};
|
|
25
|
+
/**
|
|
26
|
+
* Select the best available sandbox strategy.
|
|
27
|
+
*
|
|
28
|
+
* @param preferred - Preferred sandbox type from task config
|
|
29
|
+
* @param log - Optional log function for diagnostics
|
|
30
|
+
* @returns The selected strategy with fallback metadata
|
|
31
|
+
*/
|
|
32
|
+
export async function selectSandboxStrategy(preferred, log) {
|
|
33
|
+
const emit = log ?? (() => { });
|
|
34
|
+
// If a specific strategy is requested, try it first
|
|
35
|
+
if (preferred && preferred !== "none") {
|
|
36
|
+
const strategy = strategies[preferred]();
|
|
37
|
+
const available = await strategy.isAvailable();
|
|
38
|
+
if (available) {
|
|
39
|
+
emit(`Using ${strategy.name} sandbox strategy (requested)`);
|
|
40
|
+
return { strategy, isFallback: false };
|
|
41
|
+
}
|
|
42
|
+
emit(`${strategy.name} is not available, falling back...`);
|
|
43
|
+
}
|
|
44
|
+
// CI environments prefer Docker
|
|
45
|
+
const isCI = Boolean(process.env.CI || process.env.GITHUB_ACTIONS);
|
|
46
|
+
if (isCI) {
|
|
47
|
+
const docker = new DockerSandboxStrategy();
|
|
48
|
+
if (await docker.isAvailable()) {
|
|
49
|
+
emit("Using Docker sandbox strategy (CI environment)");
|
|
50
|
+
return { strategy: docker, isFallback: preferred !== "docker" };
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
// Default fallback: Docker → TempDir
|
|
54
|
+
const docker = new DockerSandboxStrategy();
|
|
55
|
+
if (await docker.isAvailable()) {
|
|
56
|
+
emit("Using Docker sandbox strategy (auto-detected)");
|
|
57
|
+
return {
|
|
58
|
+
strategy: docker,
|
|
59
|
+
isFallback: preferred !== undefined && preferred !== "docker",
|
|
60
|
+
...(preferred && preferred !== "docker"
|
|
61
|
+
? {
|
|
62
|
+
warning: `Preferred sandbox "${preferred}" unavailable, using Docker instead`,
|
|
63
|
+
}
|
|
64
|
+
: {}),
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
// Universal fallback
|
|
68
|
+
const tempdir = new TempDirSandboxStrategy();
|
|
69
|
+
emit("Using TempDir sandbox strategy (fallback)");
|
|
70
|
+
return {
|
|
71
|
+
strategy: tempdir,
|
|
72
|
+
isFallback: preferred !== undefined && preferred !== "tempdir",
|
|
73
|
+
...(preferred && preferred !== "tempdir" && preferred !== "none"
|
|
74
|
+
? {
|
|
75
|
+
warning: `Preferred sandbox "${preferred}" unavailable, using temp directory instead`,
|
|
76
|
+
}
|
|
77
|
+
: {}),
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Create a specific sandbox strategy by type.
|
|
82
|
+
* Does NOT check availability — caller should verify first.
|
|
83
|
+
*/
|
|
84
|
+
export function createSandboxStrategy(type) {
|
|
85
|
+
return strategies[type]();
|
|
86
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SandboxStrategy — port interface for isolated agent execution environments.
|
|
3
|
+
*
|
|
4
|
+
* Three implementations, selected by task config with automatic fallback:
|
|
5
|
+
*
|
|
6
|
+
* - DockerSandboxStrategy — full isolation via Docker containers
|
|
7
|
+
* - TempDirSandboxStrategy — lightweight fallback using OS temp directories
|
|
8
|
+
* - GitWorktreeSandboxStrategy — uses `git worktree` for repo-based tasks
|
|
9
|
+
*
|
|
10
|
+
* Selection: task config specifies preferred strategy; runtime falls back
|
|
11
|
+
* Docker → TempDir if Docker is unavailable. CI environments prefer Docker.
|
|
12
|
+
*
|
|
13
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
14
|
+
*/
|
|
15
|
+
/** Metadata describing a provisioned sandbox */
|
|
16
|
+
export interface SandboxInfo {
|
|
17
|
+
/** Unique sandbox identifier */
|
|
18
|
+
id: string;
|
|
19
|
+
/** Absolute path to the sandbox working directory */
|
|
20
|
+
workingDir: string;
|
|
21
|
+
/** Which strategy created this sandbox */
|
|
22
|
+
strategy: SandboxType;
|
|
23
|
+
/** Docker container ID (when strategy is "docker") */
|
|
24
|
+
containerId?: string;
|
|
25
|
+
/** Git worktree ref (when strategy is "git-worktree") */
|
|
26
|
+
gitRef?: string;
|
|
27
|
+
/** Timestamp when the sandbox was provisioned */
|
|
28
|
+
createdAt: string;
|
|
29
|
+
}
|
|
30
|
+
/** Supported sandbox strategies */
|
|
31
|
+
export type SandboxType = "docker" | "git-worktree" | "none" | "tempdir";
|
|
32
|
+
/** Configuration for sandbox provisioning */
|
|
33
|
+
export interface SandboxProvisionOptions {
|
|
34
|
+
/** Sandbox type to use */
|
|
35
|
+
type: SandboxType;
|
|
36
|
+
/** Task ID (used for naming) */
|
|
37
|
+
taskId: string;
|
|
38
|
+
/** Docker image (for docker strategy) */
|
|
39
|
+
image?: string;
|
|
40
|
+
/** Resource limits */
|
|
41
|
+
limits?: {
|
|
42
|
+
cpus?: number;
|
|
43
|
+
memoryBytes?: number;
|
|
44
|
+
diskBytes?: number;
|
|
45
|
+
networkAccess?: boolean;
|
|
46
|
+
};
|
|
47
|
+
/** Git ref for git-worktree strategy */
|
|
48
|
+
gitRef?: string;
|
|
49
|
+
/** Git repo path for git-worktree strategy */
|
|
50
|
+
repoPath?: string;
|
|
51
|
+
}
|
|
52
|
+
/** Artifacts collected from sandbox after execution */
|
|
53
|
+
export interface SandboxArtifacts {
|
|
54
|
+
/** Files modified during execution (relative paths) */
|
|
55
|
+
modifiedFiles: string[];
|
|
56
|
+
/** Git diff output (if applicable) */
|
|
57
|
+
diff?: string;
|
|
58
|
+
/** Stdout/stderr captured during execution */
|
|
59
|
+
output?: string;
|
|
60
|
+
/** Total execution time in milliseconds */
|
|
61
|
+
durationMs: number;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Port interface for sandbox lifecycle management.
|
|
65
|
+
*
|
|
66
|
+
* Implementations handle the full lifecycle: provision → use → collect → teardown.
|
|
67
|
+
*/
|
|
68
|
+
export interface SandboxStrategy {
|
|
69
|
+
/** Human-readable strategy name */
|
|
70
|
+
readonly name: string;
|
|
71
|
+
/** Strategy type identifier */
|
|
72
|
+
readonly type: SandboxType;
|
|
73
|
+
/** Check if this strategy is available in the current environment */
|
|
74
|
+
isAvailable(): Promise<boolean>;
|
|
75
|
+
/** Provision a new sandbox environment */
|
|
76
|
+
provision(options: SandboxProvisionOptions): Promise<SandboxInfo>;
|
|
77
|
+
/** Collect artifacts from the sandbox after execution */
|
|
78
|
+
collectArtifacts(sandbox: SandboxInfo): Promise<SandboxArtifacts>;
|
|
79
|
+
/** Tear down the sandbox (remove files, stop containers) */
|
|
80
|
+
teardown(sandbox: SandboxInfo): Promise<void>;
|
|
81
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SandboxStrategy — port interface for isolated agent execution environments.
|
|
3
|
+
*
|
|
4
|
+
* Three implementations, selected by task config with automatic fallback:
|
|
5
|
+
*
|
|
6
|
+
* - DockerSandboxStrategy — full isolation via Docker containers
|
|
7
|
+
* - TempDirSandboxStrategy — lightweight fallback using OS temp directories
|
|
8
|
+
* - GitWorktreeSandboxStrategy — uses `git worktree` for repo-based tasks
|
|
9
|
+
*
|
|
10
|
+
* Selection: task config specifies preferred strategy; runtime falls back
|
|
11
|
+
* Docker → TempDir if Docker is unavailable. CI environments prefer Docker.
|
|
12
|
+
*
|
|
13
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
14
|
+
*/
|
|
15
|
+
export {};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TempDirSandboxStrategy — lightweight sandbox using OS temp directories.
|
|
3
|
+
*
|
|
4
|
+
* Creates a temporary directory for each test case, provides it as
|
|
5
|
+
* the working directory, and cleans up after execution. No isolation
|
|
6
|
+
* guarantees — the agent has full access to the host filesystem.
|
|
7
|
+
*
|
|
8
|
+
* This is the universal fallback when Docker is unavailable.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
11
|
+
*/
|
|
12
|
+
import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
|
|
13
|
+
export declare class TempDirSandboxStrategy implements SandboxStrategy {
|
|
14
|
+
readonly name = "Temporary Directory";
|
|
15
|
+
readonly type: "tempdir";
|
|
16
|
+
isAvailable(): Promise<boolean>;
|
|
17
|
+
provision(options: SandboxProvisionOptions): Promise<SandboxInfo>;
|
|
18
|
+
collectArtifacts(sandbox: SandboxInfo): Promise<SandboxArtifacts>;
|
|
19
|
+
teardown(sandbox: SandboxInfo): Promise<void>;
|
|
20
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TempDirSandboxStrategy — lightweight sandbox using OS temp directories.
|
|
3
|
+
*
|
|
4
|
+
* Creates a temporary directory for each test case, provides it as
|
|
5
|
+
* the working directory, and cleans up after execution. No isolation
|
|
6
|
+
* guarantees — the agent has full access to the host filesystem.
|
|
7
|
+
*
|
|
8
|
+
* This is the universal fallback when Docker is unavailable.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
11
|
+
*/
|
|
12
|
+
import { randomUUID } from "crypto";
|
|
13
|
+
import { existsSync, mkdirSync, readdirSync, rmSync } from "fs";
|
|
14
|
+
import { tmpdir } from "os";
|
|
15
|
+
import { resolve } from "path";
|
|
16
|
+
export class TempDirSandboxStrategy {
|
|
17
|
+
name = "Temporary Directory";
|
|
18
|
+
type = "tempdir";
|
|
19
|
+
async isAvailable() {
|
|
20
|
+
// Always available — every OS has a temp directory
|
|
21
|
+
return true;
|
|
22
|
+
}
|
|
23
|
+
async provision(options) {
|
|
24
|
+
// Include sanitized taskId for debugging (strip non-alphanumeric for safety)
|
|
25
|
+
const safeTaskId = (options.taskId ?? "anon")
|
|
26
|
+
.replace(/[^a-zA-Z0-9_-]/g, "")
|
|
27
|
+
.slice(0, 20);
|
|
28
|
+
const id = `ailf-sandbox-${safeTaskId}-${randomUUID().slice(0, 8)}`;
|
|
29
|
+
const workingDir = resolve(tmpdir(), id);
|
|
30
|
+
mkdirSync(workingDir, { recursive: true });
|
|
31
|
+
return {
|
|
32
|
+
id,
|
|
33
|
+
workingDir,
|
|
34
|
+
strategy: "tempdir",
|
|
35
|
+
createdAt: new Date().toISOString(),
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
async collectArtifacts(sandbox) {
|
|
39
|
+
const modifiedFiles = [];
|
|
40
|
+
if (existsSync(sandbox.workingDir)) {
|
|
41
|
+
collectFilesRecursive(sandbox.workingDir, "", modifiedFiles);
|
|
42
|
+
}
|
|
43
|
+
return {
|
|
44
|
+
modifiedFiles,
|
|
45
|
+
durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
async teardown(sandbox) {
|
|
49
|
+
const workDir = resolve(sandbox.workingDir);
|
|
50
|
+
// Guard: only delete directories under os.tmpdir() to prevent
|
|
51
|
+
// accidental deletion if workingDir is corrupted
|
|
52
|
+
if (existsSync(workDir) && workDir.startsWith(resolve(tmpdir()))) {
|
|
53
|
+
rmSync(workDir, { recursive: true, force: true });
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
// Helpers
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
function collectFilesRecursive(dir, prefix, files, maxDepth = 20) {
|
|
61
|
+
if (maxDepth <= 0)
|
|
62
|
+
return;
|
|
63
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
64
|
+
if (entry.isSymbolicLink())
|
|
65
|
+
continue; // Skip symlinks to prevent traversal
|
|
66
|
+
const relative = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
67
|
+
if (entry.isDirectory()) {
|
|
68
|
+
collectFilesRecursive(resolve(dir, entry.name), relative, files, maxDepth - 1);
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
files.push(relative);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|