@sanity/ailf 0.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/features.ts +23 -0
- package/config/models.ts +95 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
- package/dist/_vendor/ailf-core/config-helpers.js +170 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +39 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +141 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
- package/dist/adapters/task-sources/index.d.ts +3 -2
- package/dist/adapters/task-sources/index.js +3 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
- package/dist/adapters/task-sources/repo-task-source.js +92 -80
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +9 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +99 -4
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +28 -8
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +8 -7
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +261 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +7 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
- package/dist/pipeline/mirror-repo-tasks.js +22 -21
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +15 -4
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* provider-assembler.ts — Build per-mode provider arrays from models config.
|
|
3
|
+
*
|
|
4
|
+
* Replicates the provider-building logic from the legacy generate-configs.ts
|
|
5
|
+
* so the new compiler produces identical provider configurations.
|
|
6
|
+
*
|
|
7
|
+
* Separated into its own module so GenerateConfigsStep can import it
|
|
8
|
+
* without pulling in the full legacy generate-configs machinery.
|
|
9
|
+
*/
|
|
10
|
+
import { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "../../_vendor/ailf-core/index.js";
|
|
11
|
+
import { LiteracyVariant } from "../normalize-mode.js";
|
|
12
|
+
import { loadConfigFile } from "./config-loader.js";
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Public API
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
/**
|
|
17
|
+
* Load models config and assemble provider arrays for literacy variants.
|
|
18
|
+
*
|
|
19
|
+
* Returns provider arrays keyed by literacy variant name (baseline,
|
|
20
|
+
* agentic, observed). These are consumed by the YAML writer to produce
|
|
21
|
+
* the per-variant promptfoo config files.
|
|
22
|
+
*/
|
|
23
|
+
export function loadModelsAndProviders(rootDir, source, searchMode, allowedOrigins) {
|
|
24
|
+
const models = loadModelsYaml(rootDir);
|
|
25
|
+
return {
|
|
26
|
+
models,
|
|
27
|
+
providers: {
|
|
28
|
+
baseline: buildBaselineProviders(models),
|
|
29
|
+
agentic: buildAgenticProviders(models, source, searchMode, allowedOrigins),
|
|
30
|
+
observed: buildObservedProviders(models),
|
|
31
|
+
},
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Baseline providers
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
function buildBaselineProviders(models) {
|
|
38
|
+
return models.models
|
|
39
|
+
.filter((m) => modelMatchesMode(m, LiteracyVariant.STANDARD))
|
|
40
|
+
.map((model) => ({
|
|
41
|
+
config: mergeConfig(models.defaults, model.config),
|
|
42
|
+
id: model.id,
|
|
43
|
+
label: model.label,
|
|
44
|
+
}));
|
|
45
|
+
}
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// Observed providers
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
function buildObservedProviders(models) {
|
|
50
|
+
return models.models
|
|
51
|
+
.filter((m) => modelMatchesMode(m, LiteracyVariant.OBSERVED))
|
|
52
|
+
.map((model) => {
|
|
53
|
+
const modelName = extractModelName(model.id);
|
|
54
|
+
return {
|
|
55
|
+
config: {
|
|
56
|
+
...mergeConfig(models.defaults, model.config),
|
|
57
|
+
modelName,
|
|
58
|
+
observe: true,
|
|
59
|
+
recordOptions: models.defaults.observerOptions ?? {},
|
|
60
|
+
},
|
|
61
|
+
id: "file://dist/agent-observer/provider.js",
|
|
62
|
+
label: `${model.label} (Observed)`,
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
// Agentic providers
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
function buildAgenticProviders(models, source, searchMode, _allowedOrigins) {
|
|
70
|
+
const naiveModels = models.models.filter((m) => modelMatchesMode(m, "agentic-naive"));
|
|
71
|
+
const optimizedModels = models.models.filter((m) => modelMatchesMode(m, "agentic-optimized"));
|
|
72
|
+
const resolvedSearchMode = searchMode ?? "open";
|
|
73
|
+
const sourceConfig = source
|
|
74
|
+
? {
|
|
75
|
+
...(source.allowedOrigins?.length
|
|
76
|
+
? { allowedOrigins: source.allowedOrigins }
|
|
77
|
+
: {}),
|
|
78
|
+
docBaseUrl: source.baseUrl,
|
|
79
|
+
...(source.headers && Object.keys(source.headers).length > 0
|
|
80
|
+
? { customHeaders: source.headers }
|
|
81
|
+
: {}),
|
|
82
|
+
llmsTxtUrl: source.llmsTxt,
|
|
83
|
+
...(source.priorityDomain
|
|
84
|
+
? { priorityDomain: source.priorityDomain }
|
|
85
|
+
: {}),
|
|
86
|
+
...(resolvedSearchMode !== "open"
|
|
87
|
+
? { searchMode: resolvedSearchMode }
|
|
88
|
+
: {}),
|
|
89
|
+
}
|
|
90
|
+
: {};
|
|
91
|
+
const providers = [];
|
|
92
|
+
for (const model of naiveModels) {
|
|
93
|
+
const modelName = extractModelName(model.id);
|
|
94
|
+
const provider = extractProvider(model.id);
|
|
95
|
+
providers.push({
|
|
96
|
+
config: {
|
|
97
|
+
...mergeConfig(models.defaults, model.config, {
|
|
98
|
+
agentMode: "naive",
|
|
99
|
+
maxToolRounds: models.defaults.maxToolRounds ?? 5,
|
|
100
|
+
model: modelName,
|
|
101
|
+
provider,
|
|
102
|
+
}),
|
|
103
|
+
...sourceConfig,
|
|
104
|
+
observe: true,
|
|
105
|
+
observerOptions: models.defaults.observerOptions ?? {},
|
|
106
|
+
},
|
|
107
|
+
id: "file://dist/agent-observer/agentic-provider.js",
|
|
108
|
+
label: `${model.label} (Naive Agent)`,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
for (const model of optimizedModels) {
|
|
112
|
+
const modelName = extractModelName(model.id);
|
|
113
|
+
const provider = extractProvider(model.id);
|
|
114
|
+
providers.push({
|
|
115
|
+
config: {
|
|
116
|
+
...mergeConfig(models.defaults, model.config, {
|
|
117
|
+
agentMode: "optimized",
|
|
118
|
+
maxToolRounds: models.defaults.maxToolRounds ?? 5,
|
|
119
|
+
model: modelName,
|
|
120
|
+
provider,
|
|
121
|
+
}),
|
|
122
|
+
...sourceConfig,
|
|
123
|
+
observe: true,
|
|
124
|
+
observerOptions: models.defaults.observerOptions ?? {},
|
|
125
|
+
},
|
|
126
|
+
id: "file://dist/agent-observer/agentic-provider.js",
|
|
127
|
+
label: `${model.label} (Optimized Agent)`,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
return providers;
|
|
131
|
+
}
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
// Helpers
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
function loadModelsYaml(rootDir) {
|
|
136
|
+
return loadConfigFile("models", rootDir).data;
|
|
137
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DockerSandboxStrategy — full isolation via Docker containers.
|
|
3
|
+
*
|
|
4
|
+
* Provides deterministic filesystem, network control, and resource limits.
|
|
5
|
+
* Falls back to TempDirSandboxStrategy when Docker is unavailable.
|
|
6
|
+
*
|
|
7
|
+
* Docker interaction uses the `docker` CLI via `execFileSync` (array form,
|
|
8
|
+
* no shell) to prevent shell injection from task-supplied values like
|
|
9
|
+
* image names or task IDs.
|
|
10
|
+
*
|
|
11
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
12
|
+
*/
|
|
13
|
+
import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
|
|
14
|
+
export declare class DockerSandboxStrategy implements SandboxStrategy {
|
|
15
|
+
readonly name = "Docker Container";
|
|
16
|
+
readonly type: "docker";
|
|
17
|
+
isAvailable(): Promise<boolean>;
|
|
18
|
+
provision(options: SandboxProvisionOptions): Promise<SandboxInfo>;
|
|
19
|
+
collectArtifacts(sandbox: SandboxInfo): Promise<SandboxArtifacts>;
|
|
20
|
+
teardown(sandbox: SandboxInfo): Promise<void>;
|
|
21
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DockerSandboxStrategy — full isolation via Docker containers.
|
|
3
|
+
*
|
|
4
|
+
* Provides deterministic filesystem, network control, and resource limits.
|
|
5
|
+
* Falls back to TempDirSandboxStrategy when Docker is unavailable.
|
|
6
|
+
*
|
|
7
|
+
* Docker interaction uses the `docker` CLI via `execFileSync` (array form,
|
|
8
|
+
* no shell) to prevent shell injection from task-supplied values like
|
|
9
|
+
* image names or task IDs.
|
|
10
|
+
*
|
|
11
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
12
|
+
*/
|
|
13
|
+
import { randomUUID } from "crypto";
|
|
14
|
+
import { execFileSync } from "child_process";
|
|
15
|
+
import { mkdirSync } from "fs";
|
|
16
|
+
import { tmpdir } from "os";
|
|
17
|
+
import { resolve } from "path";
|
|
18
|
+
const DEFAULT_IMAGE = "node:22-slim";
|
|
19
|
+
const DEFAULT_WORKDIR = "/workspace";
|
|
20
|
+
/** Only allow official base images to prevent pulling from untrusted registries. */
|
|
21
|
+
const ALLOWED_IMAGE_PATTERN = /^(node|python|ubuntu|alpine|debian|rust|golang|mcr\.microsoft\.com\/[a-z]+)(:[a-zA-Z0-9._-]+)?$/;
|
|
22
|
+
function validateDockerImage(image) {
|
|
23
|
+
if (!ALLOWED_IMAGE_PATTERN.test(image)) {
|
|
24
|
+
throw new Error(`Docker image "${image}" is not in the allowlist. ` +
|
|
25
|
+
`Only official base images (node, python, ubuntu, alpine, debian, rust, golang) are permitted.`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
export class DockerSandboxStrategy {
|
|
29
|
+
name = "Docker Container";
|
|
30
|
+
type = "docker";
|
|
31
|
+
async isAvailable() {
|
|
32
|
+
try {
|
|
33
|
+
execFileSync("docker", ["info"], { stdio: "ignore", timeout: 5000 });
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
async provision(options) {
|
|
41
|
+
const image = options.image ?? DEFAULT_IMAGE;
|
|
42
|
+
validateDockerImage(image);
|
|
43
|
+
const id = `ailf-${randomUUID().slice(0, 12)}`;
|
|
44
|
+
// Create a local staging directory for fixture injection
|
|
45
|
+
const stagingDir = resolve(tmpdir(), `${id}-staging`);
|
|
46
|
+
mkdirSync(stagingDir, { recursive: true });
|
|
47
|
+
// Build docker create command as array (no shell, prevents injection)
|
|
48
|
+
const args = ["create", "--name", id, "--workdir", DEFAULT_WORKDIR];
|
|
49
|
+
// Security hardening — defense-in-depth against container escape
|
|
50
|
+
args.push("--cap-drop", "ALL");
|
|
51
|
+
args.push("--security-opt", "no-new-privileges");
|
|
52
|
+
args.push("--read-only");
|
|
53
|
+
args.push("--tmpfs", "/tmp:rw,noexec,nosuid,size=100m");
|
|
54
|
+
// Resource limits
|
|
55
|
+
if (options.limits) {
|
|
56
|
+
if (options.limits.cpus) {
|
|
57
|
+
args.push("--cpus", String(options.limits.cpus));
|
|
58
|
+
}
|
|
59
|
+
if (options.limits.memoryBytes) {
|
|
60
|
+
args.push("--memory", String(options.limits.memoryBytes));
|
|
61
|
+
}
|
|
62
|
+
if (options.limits.networkAccess === false) {
|
|
63
|
+
args.push("--network", "none");
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// Bind mount staging directory
|
|
67
|
+
args.push("-v", `${stagingDir}:${DEFAULT_WORKDIR}`);
|
|
68
|
+
args.push(image);
|
|
69
|
+
args.push("sleep", "infinity"); // Keep container alive
|
|
70
|
+
try {
|
|
71
|
+
const containerId = execFileSync("docker", args, {
|
|
72
|
+
encoding: "utf-8",
|
|
73
|
+
timeout: 30_000,
|
|
74
|
+
}).trim();
|
|
75
|
+
// Start the container
|
|
76
|
+
execFileSync("docker", ["start", id], {
|
|
77
|
+
stdio: "ignore",
|
|
78
|
+
timeout: 10_000,
|
|
79
|
+
});
|
|
80
|
+
return {
|
|
81
|
+
id,
|
|
82
|
+
workingDir: stagingDir,
|
|
83
|
+
strategy: "docker",
|
|
84
|
+
containerId: containerId || id,
|
|
85
|
+
createdAt: new Date().toISOString(),
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
catch (err) {
|
|
89
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
90
|
+
throw new Error(`Failed to provision Docker sandbox "${id}": ${msg}`, {
|
|
91
|
+
cause: err,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
async collectArtifacts(sandbox) {
|
|
96
|
+
const modifiedFiles = [];
|
|
97
|
+
try {
|
|
98
|
+
// Get list of modified files via docker diff (array form)
|
|
99
|
+
const diff = execFileSync("docker", ["diff", sandbox.id], {
|
|
100
|
+
encoding: "utf-8",
|
|
101
|
+
timeout: 10_000,
|
|
102
|
+
}).trim();
|
|
103
|
+
if (diff) {
|
|
104
|
+
for (const line of diff.split("\n")) {
|
|
105
|
+
// docker diff output: C /workspace/file.ts (C=changed, A=added, D=deleted)
|
|
106
|
+
const match = /^[ACD]\s+(.+)$/.exec(line.trim());
|
|
107
|
+
if (match && match[1].startsWith(DEFAULT_WORKDIR)) {
|
|
108
|
+
modifiedFiles.push(match[1].replace(`${DEFAULT_WORKDIR}/`, ""));
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
modifiedFiles,
|
|
114
|
+
diff: diff || undefined,
|
|
115
|
+
durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
return {
|
|
120
|
+
modifiedFiles,
|
|
121
|
+
durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
async teardown(sandbox) {
|
|
126
|
+
try {
|
|
127
|
+
execFileSync("docker", ["rm", "-f", sandbox.id], {
|
|
128
|
+
stdio: "ignore",
|
|
129
|
+
timeout: 10_000,
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
catch {
|
|
133
|
+
// Best-effort cleanup
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fixture provisioner — five-stage pipeline for preparing sandbox state.
|
|
3
|
+
*
|
|
4
|
+
* Pipeline stages:
|
|
5
|
+
* Resolve → Fetch → Cache → Transform → Inject
|
|
6
|
+
*
|
|
7
|
+
* Handles three URI schemes for v1:
|
|
8
|
+
* - file:// — local filesystem path (relative to task)
|
|
9
|
+
* - template:// — built-in project templates
|
|
10
|
+
* - sanity:// — Content Lake document by ID or query
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
|
|
13
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
14
|
+
*/
|
|
15
|
+
import type { SandboxInfo } from "./sandbox-strategy.js";
|
|
16
|
+
/** A fixture reference from a task definition */
|
|
17
|
+
export interface FixtureRef {
|
|
18
|
+
/** URI pointing to the fixture source */
|
|
19
|
+
uri: string;
|
|
20
|
+
/** Injection target */
|
|
21
|
+
inject: "provider_config" | "system_prompt" | "vars" | "working_dir";
|
|
22
|
+
/** Key name (for vars injection) or relative path (for working_dir) */
|
|
23
|
+
key?: string;
|
|
24
|
+
/** Content transform to apply before injection */
|
|
25
|
+
transform?: FixtureTransform;
|
|
26
|
+
}
|
|
27
|
+
/** Available fixture transforms */
|
|
28
|
+
export type FixtureTransform = "extract-text" | "none" | "strip-html" | "truncate";
|
|
29
|
+
/** A resolved and fetched fixture ready for injection */
|
|
30
|
+
export interface ProvisionedFixture {
|
|
31
|
+
/** Original URI */
|
|
32
|
+
uri: string;
|
|
33
|
+
/** Resolved content */
|
|
34
|
+
content: string;
|
|
35
|
+
/** SHA-256 hash of the content */
|
|
36
|
+
contentHash: string;
|
|
37
|
+
/** Injection target */
|
|
38
|
+
inject: FixtureRef["inject"];
|
|
39
|
+
/** Key or path */
|
|
40
|
+
key?: string;
|
|
41
|
+
}
|
|
42
|
+
/** Result of the provisioning pipeline */
|
|
43
|
+
export interface ProvisioningResult {
|
|
44
|
+
/** Successfully provisioned fixtures */
|
|
45
|
+
fixtures: ProvisionedFixture[];
|
|
46
|
+
/** Variable overrides from vars-injected fixtures */
|
|
47
|
+
vars: Record<string, unknown>;
|
|
48
|
+
/** Warnings (non-fatal issues) */
|
|
49
|
+
warnings: string[];
|
|
50
|
+
/** Fixture manifest for reproducibility */
|
|
51
|
+
manifest: Record<string, string>;
|
|
52
|
+
}
|
|
53
|
+
/** Options for the provisioning pipeline */
|
|
54
|
+
export interface ProvisioningOptions {
|
|
55
|
+
/** Root directory for resolving relative paths */
|
|
56
|
+
rootDir: string;
|
|
57
|
+
/** Sandbox to inject working_dir fixtures into */
|
|
58
|
+
sandbox?: SandboxInfo;
|
|
59
|
+
/** Cache directory for content-addressable storage */
|
|
60
|
+
cacheDir?: string;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Run the five-stage fixture provisioning pipeline.
|
|
64
|
+
*
|
|
65
|
+
* @param refs - Fixture references from the task definition
|
|
66
|
+
* @param options - Provisioning configuration
|
|
67
|
+
* @returns Provisioned fixtures and injection metadata
|
|
68
|
+
*/
|
|
69
|
+
export declare function provisionFixtures(refs: FixtureRef[], options: ProvisioningOptions): Promise<ProvisioningResult>;
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fixture provisioner — five-stage pipeline for preparing sandbox state.
|
|
3
|
+
*
|
|
4
|
+
* Pipeline stages:
|
|
5
|
+
* Resolve → Fetch → Cache → Transform → Inject
|
|
6
|
+
*
|
|
7
|
+
* Handles three URI schemes for v1:
|
|
8
|
+
* - file:// — local filesystem path (relative to task)
|
|
9
|
+
* - template:// — built-in project templates
|
|
10
|
+
* - sanity:// — Content Lake document by ID or query
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
|
|
13
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
14
|
+
*/
|
|
15
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
16
|
+
import { createHash } from "crypto";
|
|
17
|
+
import { basename, dirname, resolve } from "path";
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Public API
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
/**
|
|
22
|
+
* Run the five-stage fixture provisioning pipeline.
|
|
23
|
+
*
|
|
24
|
+
* @param refs - Fixture references from the task definition
|
|
25
|
+
* @param options - Provisioning configuration
|
|
26
|
+
* @returns Provisioned fixtures and injection metadata
|
|
27
|
+
*/
|
|
28
|
+
export async function provisionFixtures(refs, options) {
|
|
29
|
+
const fixtures = [];
|
|
30
|
+
const vars = {};
|
|
31
|
+
const warnings = [];
|
|
32
|
+
const manifest = {};
|
|
33
|
+
for (const ref of refs) {
|
|
34
|
+
try {
|
|
35
|
+
// Stage 1: Resolve — parse URI, determine backend
|
|
36
|
+
const resolved = resolveFixtureURI(ref.uri, options.rootDir);
|
|
37
|
+
// Stage 2: Fetch — read content from source
|
|
38
|
+
const content = await fetchFixtureContent(resolved, warnings);
|
|
39
|
+
if (content === null)
|
|
40
|
+
continue;
|
|
41
|
+
// Stage 3: Cache — store in content-addressable cache
|
|
42
|
+
const contentHash = hashContent(content);
|
|
43
|
+
if (options.cacheDir) {
|
|
44
|
+
cacheFixture(options.cacheDir, contentHash, content);
|
|
45
|
+
}
|
|
46
|
+
manifest[ref.uri] = contentHash;
|
|
47
|
+
// Stage 4: Transform — apply preprocessing
|
|
48
|
+
const transformed = applyTransform(content, ref.transform);
|
|
49
|
+
// Stage 5: Inject — place into target
|
|
50
|
+
const provisioned = {
|
|
51
|
+
uri: ref.uri,
|
|
52
|
+
content: transformed,
|
|
53
|
+
contentHash,
|
|
54
|
+
inject: ref.inject,
|
|
55
|
+
key: ref.key,
|
|
56
|
+
};
|
|
57
|
+
fixtures.push(provisioned);
|
|
58
|
+
// Handle injection targets
|
|
59
|
+
switch (ref.inject) {
|
|
60
|
+
case "vars":
|
|
61
|
+
if (ref.key) {
|
|
62
|
+
vars[ref.key] = transformed;
|
|
63
|
+
}
|
|
64
|
+
break;
|
|
65
|
+
case "working_dir":
|
|
66
|
+
if (options.sandbox) {
|
|
67
|
+
injectIntoWorkingDir(options.sandbox.workingDir, ref.key ?? basename(ref.uri), transformed);
|
|
68
|
+
}
|
|
69
|
+
break;
|
|
70
|
+
case "system_prompt":
|
|
71
|
+
case "provider_config":
|
|
72
|
+
// These are handled by the caller when assembling the Promptfoo config
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
78
|
+
warnings.push(`Fixture "${ref.uri}": ${msg}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return { fixtures, vars, warnings, manifest };
|
|
82
|
+
}
|
|
83
|
+
function assertPathContained(absolutePath, rootDir) {
|
|
84
|
+
const normalizedBase = resolve(rootDir) + "/";
|
|
85
|
+
if (!absolutePath.startsWith(normalizedBase) &&
|
|
86
|
+
absolutePath !== resolve(rootDir)) {
|
|
87
|
+
throw new Error(`Path traversal detected: "${absolutePath}" resolves outside rootDir "${rootDir}"`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
function resolveFixtureURI(uri, rootDir) {
|
|
91
|
+
if (uri.startsWith("file://")) {
|
|
92
|
+
const relativePath = uri.slice(7);
|
|
93
|
+
const absolutePath = resolve(rootDir, relativePath);
|
|
94
|
+
assertPathContained(absolutePath, rootDir);
|
|
95
|
+
return { scheme: "file", path: relativePath, absolutePath };
|
|
96
|
+
}
|
|
97
|
+
if (uri.startsWith("template://")) {
|
|
98
|
+
return { scheme: "template", path: uri.slice(11) };
|
|
99
|
+
}
|
|
100
|
+
if (uri.startsWith("sanity://")) {
|
|
101
|
+
return { scheme: "sanity", path: uri.slice(9) };
|
|
102
|
+
}
|
|
103
|
+
// Bare path — treat as file
|
|
104
|
+
const absolutePath = resolve(rootDir, uri);
|
|
105
|
+
assertPathContained(absolutePath, rootDir);
|
|
106
|
+
return { scheme: "file", path: uri, absolutePath };
|
|
107
|
+
}
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
// Stage 2: Fetch
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
async function fetchFixtureContent(resolved, warnings) {
|
|
112
|
+
switch (resolved.scheme) {
|
|
113
|
+
case "file": {
|
|
114
|
+
if (!resolved.absolutePath || !existsSync(resolved.absolutePath)) {
|
|
115
|
+
warnings.push(`Fixture file not found: ${resolved.absolutePath ?? resolved.path}`);
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
return readFileSync(resolved.absolutePath, "utf-8");
|
|
119
|
+
}
|
|
120
|
+
case "template": {
|
|
121
|
+
// Template fixtures are resolved from built-in templates
|
|
122
|
+
// For v1, return a placeholder — template registry is a future enhancement
|
|
123
|
+
warnings.push(`Template fixture "${resolved.path}" — template registry not yet implemented, ` +
|
|
124
|
+
"returning placeholder content");
|
|
125
|
+
return `<!-- Template: ${resolved.path} -->\n`;
|
|
126
|
+
}
|
|
127
|
+
case "sanity": {
|
|
128
|
+
// Sanity fixtures require the DocFetcher port (injected at eval time)
|
|
129
|
+
// At compile time, return a deferred marker
|
|
130
|
+
warnings.push(`Sanity fixture "${resolved.path}" — deferred to eval time ` +
|
|
131
|
+
"(requires DocFetcher port)");
|
|
132
|
+
return `<!-- Sanity document: ${resolved.path} -->\n`;
|
|
133
|
+
}
|
|
134
|
+
default:
|
|
135
|
+
warnings.push(`Unknown fixture scheme: ${resolved.scheme}`);
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
// Stage 3: Cache
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
function hashContent(content) {
|
|
143
|
+
return createHash("sha256").update(content).digest("hex");
|
|
144
|
+
}
|
|
145
|
+
function cacheFixture(cacheDir, hash, content) {
|
|
146
|
+
const cachePath = resolve(cacheDir, hash);
|
|
147
|
+
if (!existsSync(cachePath)) {
|
|
148
|
+
mkdirSync(dirname(cachePath), { recursive: true });
|
|
149
|
+
writeFileSync(cachePath, content);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// ---------------------------------------------------------------------------
|
|
153
|
+
// Stage 4: Transform
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
function applyTransform(content, transform) {
|
|
156
|
+
if (!transform || transform === "none")
|
|
157
|
+
return content;
|
|
158
|
+
switch (transform) {
|
|
159
|
+
case "strip-html":
|
|
160
|
+
return content.replace(/<[^>]*>/g, "").trim();
|
|
161
|
+
case "extract-text":
|
|
162
|
+
// Remove all markup, normalize whitespace
|
|
163
|
+
return content
|
|
164
|
+
.replace(/<[^>]*>/g, " ")
|
|
165
|
+
.replace(/\s+/g, " ")
|
|
166
|
+
.trim();
|
|
167
|
+
case "truncate":
|
|
168
|
+
// Default truncation: 10,000 characters
|
|
169
|
+
return content.length > 10_000
|
|
170
|
+
? content.slice(0, 10_000) + "\n... (truncated)"
|
|
171
|
+
: content;
|
|
172
|
+
default:
|
|
173
|
+
return content;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
177
|
+
// Stage 5: Inject
|
|
178
|
+
// ---------------------------------------------------------------------------
|
|
179
|
+
function injectIntoWorkingDir(workingDir, targetPath, content) {
|
|
180
|
+
const fullPath = resolve(workingDir, targetPath);
|
|
181
|
+
const normalizedBase = resolve(workingDir) + "/";
|
|
182
|
+
// Path containment: prevent targetPath like "../../etc/cron.d/evil"
|
|
183
|
+
if (!fullPath.startsWith(normalizedBase) &&
|
|
184
|
+
fullPath !== resolve(workingDir)) {
|
|
185
|
+
throw new Error(`Path traversal detected: "${targetPath}" resolves outside sandbox "${workingDir}"`);
|
|
186
|
+
}
|
|
187
|
+
mkdirSync(dirname(fullPath), { recursive: true });
|
|
188
|
+
writeFileSync(fullPath, content);
|
|
189
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GitWorktreeSandboxStrategy — sandbox using `git worktree` for repo-based tasks.
|
|
3
|
+
*
|
|
4
|
+
* Creates a git worktree at a specific ref, providing a deterministic
|
|
5
|
+
* starting state for tasks that modify a git repository.
|
|
6
|
+
*
|
|
7
|
+
* All git CLI calls use `execFileSync` (array form, no shell) to prevent
|
|
8
|
+
* injection from task-supplied values like git refs or repo paths.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
11
|
+
*/
|
|
12
|
+
import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
|
|
13
|
+
export declare class GitWorktreeSandboxStrategy implements SandboxStrategy {
|
|
14
|
+
readonly name = "Git Worktree";
|
|
15
|
+
readonly type: "git-worktree";
|
|
16
|
+
isAvailable(): Promise<boolean>;
|
|
17
|
+
provision(options: SandboxProvisionOptions): Promise<SandboxInfo>;
|
|
18
|
+
collectArtifacts(sandbox: SandboxInfo): Promise<SandboxArtifacts>;
|
|
19
|
+
teardown(sandbox: SandboxInfo): Promise<void>;
|
|
20
|
+
}
|