@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Environment variable bridge — writes ResolvedConfig values to process.env
|
|
3
|
+
* so that lib/*.ts modules (which still read process.env) work correctly.
|
|
4
|
+
*
|
|
5
|
+
* This replaces the former global applyEnvironment() with an explicit
|
|
6
|
+
* per-step bridge. Each orchestration step calls this before invoking
|
|
7
|
+
* its lib/*.ts main() function.
|
|
8
|
+
*
|
|
9
|
+
* Phase 9 will eliminate this file entirely by giving lib/*.ts main()
|
|
10
|
+
* functions typed option parameters.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/exec-plans/active/ports-and-adapters/phase-8-delete-legacy-step-layer.md
|
|
13
|
+
*/
|
|
14
|
+
import type { ResolvedConfig } from "../_vendor/ailf-core/index.d.ts";
|
|
15
|
+
/**
|
|
16
|
+
* Bridge ResolvedConfig values to process.env.
|
|
17
|
+
*
|
|
18
|
+
* Idempotent — safe to call multiple times. Only sets env vars for
|
|
19
|
+
* config values that are defined (never deletes or resets).
|
|
20
|
+
*/
|
|
21
|
+
export declare function bridgeConfigToEnv(config: ResolvedConfig): void;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Environment variable bridge — writes ResolvedConfig values to process.env
|
|
3
|
+
* so that lib/*.ts modules (which still read process.env) work correctly.
|
|
4
|
+
*
|
|
5
|
+
* This replaces the former global applyEnvironment() with an explicit
|
|
6
|
+
* per-step bridge. Each orchestration step calls this before invoking
|
|
7
|
+
* its lib/*.ts main() function.
|
|
8
|
+
*
|
|
9
|
+
* Phase 9 will eliminate this file entirely by giving lib/*.ts main()
|
|
10
|
+
* functions typed option parameters.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/exec-plans/active/ports-and-adapters/phase-8-delete-legacy-step-layer.md
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Bridge ResolvedConfig values to process.env.
|
|
16
|
+
*
|
|
17
|
+
* Idempotent — safe to call multiple times. Only sets env vars for
|
|
18
|
+
* config values that are defined (never deletes or resets).
|
|
19
|
+
*/
|
|
20
|
+
export function bridgeConfigToEnv(config) {
|
|
21
|
+
// Mode
|
|
22
|
+
process.env.EVAL_MODE = config.mode;
|
|
23
|
+
// Search mode
|
|
24
|
+
if (config.searchMode !== "open") {
|
|
25
|
+
process.env.EVAL_SEARCH_MODE = config.searchMode;
|
|
26
|
+
}
|
|
27
|
+
// Source
|
|
28
|
+
if (config.source) {
|
|
29
|
+
process.env.DOC_SOURCE = config.source;
|
|
30
|
+
}
|
|
31
|
+
// URL-derived overrides
|
|
32
|
+
if (config.urls?.[0]) {
|
|
33
|
+
process.env.DOC_BASE_URL = config.urls[0];
|
|
34
|
+
}
|
|
35
|
+
// Sanity overrides
|
|
36
|
+
if (config.datasetOverride) {
|
|
37
|
+
process.env.SANITY_DATASET = config.datasetOverride;
|
|
38
|
+
}
|
|
39
|
+
if (config.projectIdOverride) {
|
|
40
|
+
process.env.SANITY_PROJECT_ID = config.projectIdOverride;
|
|
41
|
+
}
|
|
42
|
+
if (config.perspectiveOverride) {
|
|
43
|
+
process.env.SANITY_PERSPECTIVE = config.perspectiveOverride;
|
|
44
|
+
}
|
|
45
|
+
if (config.studioOriginOverride) {
|
|
46
|
+
process.env.SANITY_STUDIO_ORIGIN = config.studioOriginOverride;
|
|
47
|
+
}
|
|
48
|
+
if (config.sanityDocumentArgs?.length) {
|
|
49
|
+
process.env.SANITY_DOCUMENT_IDS = config.sanityDocumentArgs.join(",");
|
|
50
|
+
}
|
|
51
|
+
// Custom headers
|
|
52
|
+
if (config.headers) {
|
|
53
|
+
process.env.DOC_HEADERS = JSON.stringify(config.headers);
|
|
54
|
+
}
|
|
55
|
+
// Allowed origins
|
|
56
|
+
if (config.allowedOrigins?.length) {
|
|
57
|
+
process.env.DOC_ALLOWED_ORIGINS = config.allowedOrigins.join(",");
|
|
58
|
+
}
|
|
59
|
+
// Scoping filters
|
|
60
|
+
if (config.areas) {
|
|
61
|
+
process.env.EVAL_FILTER_AREAS = config.areas.join(",");
|
|
62
|
+
}
|
|
63
|
+
if (config.tasks) {
|
|
64
|
+
process.env.EVAL_FILTER_TASKS = config.tasks.join(",");
|
|
65
|
+
}
|
|
66
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared task loading for pipeline orchestration steps.
|
|
3
|
+
*
|
|
4
|
+
* Both FetchDocsStep and GenerateConfigsStep need to see the same set of
|
|
5
|
+
* tasks. This function loads from filesystem .task.ts files — the
|
|
6
|
+
* authoritative source for the current pipeline architecture.
|
|
7
|
+
*
|
|
8
|
+
* Background: The composition root wires ctx.taskSource to
|
|
9
|
+
* ContentLakeTaskSource by default, but GenerateConfigsStep bypasses it
|
|
10
|
+
* and loads directly from the filesystem. FetchDocsStep must use the
|
|
11
|
+
* same source to avoid a mismatch where configs reference context files
|
|
12
|
+
* that were never fetched.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/orchestration/steps/generate-configs-step.ts
|
|
15
|
+
* @see packages/eval/src/orchestration/steps/fetch-docs-step.ts
|
|
16
|
+
*/
|
|
17
|
+
import type { GeneralizedTaskDefinition } from "../_vendor/ailf-core/index.d.ts";
|
|
18
|
+
export interface LoadPipelineTasksOptions {
|
|
19
|
+
/** Absolute path to the eval package root (packages/eval) */
|
|
20
|
+
rootDir: string;
|
|
21
|
+
/** Evaluation mode — determines the tasks/{mode}/ subdirectory */
|
|
22
|
+
mode: string;
|
|
23
|
+
/** Optional extra directory for repo-based tasks (--repo-tasks-path) */
|
|
24
|
+
repoTasksPath?: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Load task definitions from the filesystem, matching the pipeline's
|
|
28
|
+
* authoritative task source.
|
|
29
|
+
*
|
|
30
|
+
* Discovers and loads `*.task.ts` files from `tasks/{mode}/` and
|
|
31
|
+
* optionally `--repo-tasks-path`. Tasks whose `mode` field doesn't
|
|
32
|
+
* match the requested mode are excluded.
|
|
33
|
+
*/
|
|
34
|
+
export declare function loadPipelineTasks(opts: LoadPipelineTasksOptions): Promise<GeneralizedTaskDefinition[]>;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared task loading for pipeline orchestration steps.
|
|
3
|
+
*
|
|
4
|
+
* Both FetchDocsStep and GenerateConfigsStep need to see the same set of
|
|
5
|
+
* tasks. This function loads from filesystem .task.ts files — the
|
|
6
|
+
* authoritative source for the current pipeline architecture.
|
|
7
|
+
*
|
|
8
|
+
* Background: The composition root wires ctx.taskSource to
|
|
9
|
+
* ContentLakeTaskSource by default, but GenerateConfigsStep bypasses it
|
|
10
|
+
* and loads directly from the filesystem. FetchDocsStep must use the
|
|
11
|
+
* same source to avoid a mismatch where configs reference context files
|
|
12
|
+
* that were never fetched.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/orchestration/steps/generate-configs-step.ts
|
|
15
|
+
* @see packages/eval/src/orchestration/steps/fetch-docs-step.ts
|
|
16
|
+
*/
|
|
17
|
+
import { resolve } from "path";
|
|
18
|
+
import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
|
|
19
|
+
import { resolveVendoredSubdir } from "../pipeline/compiler/config-loader.js";
|
|
20
|
+
/**
|
|
21
|
+
* Load task definitions from the filesystem, matching the pipeline's
|
|
22
|
+
* authoritative task source.
|
|
23
|
+
*
|
|
24
|
+
* Discovers and loads `*.task.ts` files from `tasks/{mode}/` and
|
|
25
|
+
* optionally `--repo-tasks-path`. Tasks whose `mode` field doesn't
|
|
26
|
+
* match the requested mode are excluded.
|
|
27
|
+
*/
|
|
28
|
+
export async function loadPipelineTasks(opts) {
|
|
29
|
+
const tasksDir = resolveVendoredSubdir(opts.rootDir, `tasks/${opts.mode}`);
|
|
30
|
+
const dirs = [tasksDir];
|
|
31
|
+
if (opts.repoTasksPath) {
|
|
32
|
+
const repoDir = resolve(opts.repoTasksPath);
|
|
33
|
+
if (!dirs.includes(repoDir)) {
|
|
34
|
+
dirs.push(repoDir);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const tasks = [];
|
|
38
|
+
for (const dir of dirs) {
|
|
39
|
+
const files = discoverTsTaskFiles(dir);
|
|
40
|
+
for (const file of files) {
|
|
41
|
+
const raw = await loadTsTaskFile(file);
|
|
42
|
+
for (const t of raw.tasks) {
|
|
43
|
+
const task = t;
|
|
44
|
+
// Filter to matching mode (skip tasks from other modes in same dir)
|
|
45
|
+
if (!("mode" in task) || task.mode === opts.mode) {
|
|
46
|
+
tasks.push(task);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return tasks;
|
|
52
|
+
}
|
|
@@ -20,10 +20,20 @@ import { runStep } from "./step-runner.js";
|
|
|
20
20
|
* underlying Sanity client. Best-effort — failures are logged and
|
|
21
21
|
* never block the pipeline.
|
|
22
22
|
*/
|
|
23
|
-
async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, status, errorInfo) {
|
|
23
|
+
async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, status, errorInfo, jobUpdates) {
|
|
24
24
|
const jobId = ctx.config.jobId;
|
|
25
25
|
if (!jobId)
|
|
26
26
|
return;
|
|
27
|
+
// Accumulate update for artifact capture
|
|
28
|
+
jobUpdates?.push({
|
|
29
|
+
jobId,
|
|
30
|
+
stepName,
|
|
31
|
+
completedSteps,
|
|
32
|
+
totalSteps,
|
|
33
|
+
status,
|
|
34
|
+
errorInfo,
|
|
35
|
+
timestamp: new Date().toISOString(),
|
|
36
|
+
});
|
|
27
37
|
// Use the report store's write capability to patch the job document.
|
|
28
38
|
// The report store exposes a Sanity client — we access it through
|
|
29
39
|
// a best-effort PATCH via the same client infrastructure.
|
|
@@ -59,6 +69,51 @@ async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, stat
|
|
|
59
69
|
}
|
|
60
70
|
}
|
|
61
71
|
// ---------------------------------------------------------------------------
|
|
72
|
+
// Artifact capture
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
/**
|
|
75
|
+
* Capture a snapshot of the pipeline config, final state, and step results.
|
|
76
|
+
* Strips secrets (API keys, tokens) from the config.
|
|
77
|
+
*/
|
|
78
|
+
function capturePipelineContext(ctx, state, results) {
|
|
79
|
+
if (!ctx.collector.enabled)
|
|
80
|
+
return;
|
|
81
|
+
const sanitized = Object.fromEntries(Object.entries(ctx.config).filter(([k]) => !/token|secret|key/i.test(k)));
|
|
82
|
+
ctx.collector.capture("pipeline", "pipeline-context", {
|
|
83
|
+
config: sanitized,
|
|
84
|
+
state: {
|
|
85
|
+
reportId: state.reportId,
|
|
86
|
+
evalFingerprint: state.evalFingerprint,
|
|
87
|
+
belowCritical: state.belowCritical,
|
|
88
|
+
remoteCacheHits: state.remoteCacheHits
|
|
89
|
+
? [...state.remoteCacheHits]
|
|
90
|
+
: undefined,
|
|
91
|
+
releaseAutoScope: state.releaseAutoScope,
|
|
92
|
+
testSummary: state.testSummary,
|
|
93
|
+
},
|
|
94
|
+
steps: Object.entries(results).map(([name, result]) => ({
|
|
95
|
+
name,
|
|
96
|
+
status: result.status,
|
|
97
|
+
durationMs: result.status !== "skipped" ? result.durationMs : undefined,
|
|
98
|
+
})),
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Flush captured artifacts to disk. Non-blocking — failures are logged
|
|
103
|
+
* but never affect the pipeline result.
|
|
104
|
+
*/
|
|
105
|
+
async function flushArtifacts(ctx) {
|
|
106
|
+
if (!ctx.collector.enabled)
|
|
107
|
+
return;
|
|
108
|
+
try {
|
|
109
|
+
const result = await ctx.collector.flush();
|
|
110
|
+
ctx.logger.info(`Captured ${result.artifactCount} artifacts → ${result.destination}`);
|
|
111
|
+
}
|
|
112
|
+
catch (err) {
|
|
113
|
+
ctx.logger.warn(`Artifact capture flush failed: ${err instanceof Error ? err.message : err}`);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
62
117
|
// Orchestrator
|
|
63
118
|
// ---------------------------------------------------------------------------
|
|
64
119
|
/**
|
|
@@ -76,6 +131,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
76
131
|
const validation = { issues: [], valid: true };
|
|
77
132
|
const pipelineStart = Date.now();
|
|
78
133
|
const hasJob = !!ctx.config.jobId;
|
|
134
|
+
const jobUpdates = [];
|
|
79
135
|
ctx.logger.section("ai-literacy-framework — Evaluation Pipeline");
|
|
80
136
|
ctx.logger.debug(`Pipeline starting with ${steps.length} steps`, {
|
|
81
137
|
steps: steps.map((s) => s.name),
|
|
@@ -86,7 +142,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
86
142
|
});
|
|
87
143
|
// Report initial running status
|
|
88
144
|
if (hasJob) {
|
|
89
|
-
await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running");
|
|
145
|
+
await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running", undefined, jobUpdates);
|
|
90
146
|
}
|
|
91
147
|
for (let i = 0; i < steps.length; i++) {
|
|
92
148
|
const step = steps[i];
|
|
@@ -94,7 +150,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
94
150
|
ctx.logger.section(step.name);
|
|
95
151
|
// Report current step progress
|
|
96
152
|
if (hasJob) {
|
|
97
|
-
await reportJobProgress(ctx, step.name, i, steps.length, "running");
|
|
153
|
+
await reportJobProgress(ctx, step.name, i, steps.length, "running", undefined, jobUpdates);
|
|
98
154
|
}
|
|
99
155
|
const result = await runStep(step, ctx, state);
|
|
100
156
|
results[step.name] = result;
|
|
@@ -111,8 +167,15 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
111
167
|
await reportJobProgress(ctx, step.name, i + 1, steps.length, "failed", {
|
|
112
168
|
message: failedError,
|
|
113
169
|
step: step.name,
|
|
114
|
-
});
|
|
170
|
+
}, jobUpdates);
|
|
115
171
|
}
|
|
172
|
+
// Capture pipeline context and job updates before flushing
|
|
173
|
+
capturePipelineContext(ctx, state, results);
|
|
174
|
+
if (jobUpdates.length > 0) {
|
|
175
|
+
ctx.collector.capture("job-store", "job-updates", jobUpdates);
|
|
176
|
+
}
|
|
177
|
+
// Flush captured artifacts even on failure (partial capture is useful)
|
|
178
|
+
await flushArtifacts(ctx);
|
|
116
179
|
return {
|
|
117
180
|
belowCritical: state.belowCritical,
|
|
118
181
|
durationMs: Date.now() - pipelineStart,
|
|
@@ -129,7 +192,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
129
192
|
}
|
|
130
193
|
// Report step completion
|
|
131
194
|
if (hasJob) {
|
|
132
|
-
await reportJobProgress(ctx, step.name, i + 1, steps.length, "running");
|
|
195
|
+
await reportJobProgress(ctx, step.name, i + 1, steps.length, "running", undefined, jobUpdates);
|
|
133
196
|
}
|
|
134
197
|
}
|
|
135
198
|
const durationMs = Date.now() - pipelineStart;
|
|
@@ -166,6 +229,13 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
166
229
|
ctx.logger.warn("Failed to report job completion — continuing");
|
|
167
230
|
}
|
|
168
231
|
}
|
|
232
|
+
// Capture pipeline context and job updates before flushing
|
|
233
|
+
capturePipelineContext(ctx, state, results);
|
|
234
|
+
if (jobUpdates.length > 0) {
|
|
235
|
+
ctx.collector.capture("job-store", "job-updates", jobUpdates);
|
|
236
|
+
}
|
|
237
|
+
// Flush captured artifacts (non-blocking — failures never affect pipeline result)
|
|
238
|
+
await flushArtifacts(ctx);
|
|
169
239
|
return {
|
|
170
240
|
belowCritical: state.belowCritical,
|
|
171
241
|
durationMs,
|
|
@@ -36,8 +36,12 @@ export async function runStep(step, ctx, state = {}) {
|
|
|
36
36
|
if (canCache) {
|
|
37
37
|
try {
|
|
38
38
|
const inputs = step.cacheInputs(ctx);
|
|
39
|
+
const context = step.cacheContext?.(ctx);
|
|
39
40
|
ctx.logger.debug(`[${step.name}] Cache inputs: ${inputs.length} files`);
|
|
40
|
-
|
|
41
|
+
if (context?.length) {
|
|
42
|
+
ctx.logger.debug(`[${step.name}] Cache context: ${context.join(", ")}`);
|
|
43
|
+
}
|
|
44
|
+
const key = await ctx.cache.computeKey(inputs, context);
|
|
41
45
|
cacheKey = key;
|
|
42
46
|
ctx.logger.debug(`[${step.name}] Cache key: ${key}`);
|
|
43
47
|
const cached = await ctx.cache.lookup(step.name, key);
|
|
@@ -4,9 +4,11 @@
|
|
|
4
4
|
* Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
|
|
5
5
|
* typed options derived from AppContext. No env bridge needed.
|
|
6
6
|
*/
|
|
7
|
+
import { existsSync } from "node:fs";
|
|
7
8
|
import { join } from "path";
|
|
8
9
|
import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
|
|
9
10
|
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
11
|
+
import { buildCacheContext } from "../cache-context.js";
|
|
10
12
|
import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
|
|
11
13
|
import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
|
|
12
14
|
import { resultsFileForMode } from "../../pipeline/eval-constants.js";
|
|
@@ -118,6 +120,14 @@ export class CalculateScoresStep {
|
|
|
118
120
|
if (belowCritical.length > 0) {
|
|
119
121
|
state.belowCritical = belowCritical;
|
|
120
122
|
}
|
|
123
|
+
// Capture score artifacts
|
|
124
|
+
const resultsDir = join(ctx.config.rootDir, "results", "latest");
|
|
125
|
+
for (const file of ["score-summary.json", "grader-judgments.json"]) {
|
|
126
|
+
const filePath = join(resultsDir, file);
|
|
127
|
+
if (existsSync(filePath)) {
|
|
128
|
+
ctx.collector.captureFile("calculate-scores", file.replace(".json", ""), filePath);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
121
131
|
const criticalSuffix = belowCritical.length > 0
|
|
122
132
|
? ` (${belowCritical.length} area(s) below critical threshold: ${belowCritical.join(", ")})`
|
|
123
133
|
: "";
|
|
@@ -130,4 +140,7 @@ export class CalculateScoresStep {
|
|
|
130
140
|
cacheInputs(ctx) {
|
|
131
141
|
return getStepInputPaths(ctx.config.rootDir, "calculate-scores");
|
|
132
142
|
}
|
|
143
|
+
cacheContext(ctx) {
|
|
144
|
+
return buildCacheContext(ctx.config);
|
|
145
|
+
}
|
|
133
146
|
}
|
|
@@ -52,11 +52,20 @@ export class CallbackStep {
|
|
|
52
52
|
}
|
|
53
53
|
// Deliver callback — read reportId from pipeline state (set by PublishReportStep)
|
|
54
54
|
ctx.logger.info(`Delivering results to ${this.callback.url}`);
|
|
55
|
-
const
|
|
55
|
+
const callbackPayload = {
|
|
56
56
|
deliveredAt: new Date().toISOString(),
|
|
57
57
|
jobId: this.jobId,
|
|
58
58
|
reportId: state.reportId,
|
|
59
59
|
summary,
|
|
60
|
+
};
|
|
61
|
+
// Capture callback payload (Tier 2 — no secrets: headers are NOT captured)
|
|
62
|
+
ctx.collector.capture("callback", "callback-payload", callbackPayload);
|
|
63
|
+
const result = await deliverCallback(this.callback, callbackPayload);
|
|
64
|
+
// Capture callback response status (not the body — that's the user's system)
|
|
65
|
+
ctx.collector.capture("callback", "callback-response", {
|
|
66
|
+
ok: result.ok,
|
|
67
|
+
attempts: result.attempts,
|
|
68
|
+
error: result.error,
|
|
60
69
|
});
|
|
61
70
|
if (result.ok) {
|
|
62
71
|
return {
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* inlined directly from the former pipeline/steps/compare-step.ts.
|
|
6
6
|
* This is an optional step — failure doesn't stop the pipeline.
|
|
7
7
|
*/
|
|
8
|
-
import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
|
|
8
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, } from "fs";
|
|
9
9
|
import { join, resolve } from "path";
|
|
10
10
|
import { compare } from "../../pipeline/compare.js";
|
|
11
11
|
export class CompareStep {
|
|
@@ -65,9 +65,12 @@ export class CompareStep {
|
|
|
65
65
|
? { noiseThreshold: ctx.config.compareThreshold }
|
|
66
66
|
: undefined;
|
|
67
67
|
const report = compare(baseline, experiment, options);
|
|
68
|
-
// Write report
|
|
69
|
-
|
|
68
|
+
// Write report to outputDir (respects --output-dir)
|
|
69
|
+
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
70
|
+
const reportPath = resolve(ctx.config.outputDir, "comparison-report.json");
|
|
70
71
|
writeFileSync(reportPath, JSON.stringify(report, null, 2));
|
|
72
|
+
// Capture comparison report
|
|
73
|
+
ctx.collector.captureFile("compare", "comparison-report", reportPath);
|
|
71
74
|
// Build summary
|
|
72
75
|
const improved = report.improved.length;
|
|
73
76
|
const regressed = report.regressed.length;
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Calls pure functions from pipeline/discovery-report.ts directly.
|
|
5
5
|
* Optional step — failure doesn't stop the pipeline.
|
|
6
6
|
*/
|
|
7
|
-
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
7
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
8
8
|
import { resolve } from "path";
|
|
9
9
|
import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
|
|
10
10
|
export class DiscoveryReportStep {
|
|
@@ -34,7 +34,11 @@ export class DiscoveryReportStep {
|
|
|
34
34
|
}
|
|
35
35
|
const report = generateDiscoveryReport(scoreSummary, ctx.config.areas);
|
|
36
36
|
const md = formatDiscoveryMarkdown(report);
|
|
37
|
-
|
|
37
|
+
// Write to outputDir (respects --output-dir)
|
|
38
|
+
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
39
|
+
const discoveryPath = resolve(ctx.config.outputDir, "discovery-report.md");
|
|
40
|
+
writeFileSync(discoveryPath, md);
|
|
41
|
+
ctx.collector.captureFile("discovery-report", "discovery-report", discoveryPath);
|
|
38
42
|
console.log(md);
|
|
39
43
|
const invisible = report.invisibleDocs.length;
|
|
40
44
|
const f1 = report.overall.avgF1.toFixed(2);
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shell delegation for the fetch-docs step.
|
|
3
|
+
*
|
|
4
|
+
* Isolates the execSync call so it can be replaced when the pipeline
|
|
5
|
+
* fully migrates to the DocFetcher port.
|
|
6
|
+
*/
|
|
7
|
+
export interface ShellResult {
|
|
8
|
+
ok: boolean;
|
|
9
|
+
error?: string;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Run `pnpm fetch-docs` via shell.
|
|
13
|
+
*
|
|
14
|
+
* Returns a result object instead of throwing so the step can
|
|
15
|
+
* handle the failure uniformly.
|
|
16
|
+
*/
|
|
17
|
+
export declare function runFetchDocsShell(rootDir: string, source?: string): ShellResult;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shell delegation for the fetch-docs step.
|
|
3
|
+
*
|
|
4
|
+
* Isolates the execSync call so it can be replaced when the pipeline
|
|
5
|
+
* fully migrates to the DocFetcher port.
|
|
6
|
+
*/
|
|
7
|
+
import { execSync } from "child_process";
|
|
8
|
+
/**
|
|
9
|
+
* Run `pnpm fetch-docs` via shell.
|
|
10
|
+
*
|
|
11
|
+
* Returns a result object instead of throwing so the step can
|
|
12
|
+
* handle the failure uniformly.
|
|
13
|
+
*/
|
|
14
|
+
export function runFetchDocsShell(rootDir, source) {
|
|
15
|
+
try {
|
|
16
|
+
const sourceArg = source ? ` --source ${source}` : "";
|
|
17
|
+
execSync(`pnpm fetch-docs${sourceArg}`, {
|
|
18
|
+
cwd: rootDir,
|
|
19
|
+
env: process.env,
|
|
20
|
+
stdio: "inherit",
|
|
21
|
+
});
|
|
22
|
+
return { ok: true };
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
25
|
+
return {
|
|
26
|
+
ok: false,
|
|
27
|
+
error: err instanceof Error ? err.message : String(err),
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -10,11 +10,13 @@
|
|
|
10
10
|
* and stores a `releaseAutoScope` entry in PipelineState. Downstream
|
|
11
11
|
* steps (GenerateConfigsStep, RunEvalStep) use this to narrow scope.
|
|
12
12
|
*/
|
|
13
|
-
import { mkdirSync, writeFileSync } from "fs";
|
|
13
|
+
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
14
14
|
import { join } from "path";
|
|
15
15
|
import { isIdRef, isPathRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
|
|
16
16
|
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
17
|
+
import { buildCacheContext } from "../cache-context.js";
|
|
17
18
|
import { checkCanonicalContextsExist } from "../../pipeline/checks.js";
|
|
19
|
+
import { loadPipelineTasks } from "../load-pipeline-tasks.js";
|
|
18
20
|
import { loadSource } from "../../sources.js";
|
|
19
21
|
import { configToSourceOverrides } from "../config-to-source-overrides.js";
|
|
20
22
|
export class FetchDocsStep {
|
|
@@ -27,16 +29,22 @@ export class FetchDocsStep {
|
|
|
27
29
|
return { status: "skipped", reason: "--skip-fetch" };
|
|
28
30
|
}
|
|
29
31
|
const start = Date.now();
|
|
30
|
-
//
|
|
31
|
-
|
|
32
|
+
// Load tasks from the filesystem — the same source GenerateConfigsStep
|
|
33
|
+
// uses. This replaces ctx.taskSource (ContentLakeTaskSource) which may
|
|
34
|
+
// have no ailf.task documents, causing a mismatch where generated
|
|
35
|
+
// configs reference context files that were never fetched.
|
|
36
|
+
const allTasks = await loadPipelineTasks({
|
|
37
|
+
rootDir: ctx.config.rootDir,
|
|
38
|
+
mode: ctx.config.mode,
|
|
39
|
+
repoTasksPath: ctx.config.repoTasksPath,
|
|
40
|
+
});
|
|
32
41
|
// Bridge: narrow to literacy tasks for canonical doc access
|
|
33
42
|
const literacyTasks = allTasks.filter((t) => t.mode === "literacy");
|
|
34
43
|
const tasksWithDocs = literacyTasks.filter((t) => (t.context?.docs?.length ?? 0) > 0);
|
|
35
44
|
if (tasksWithDocs.length === 0) {
|
|
36
45
|
return {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
status: "failed",
|
|
46
|
+
status: "skipped",
|
|
47
|
+
reason: "No literacy tasks with canonical_docs — nothing to fetch",
|
|
40
48
|
};
|
|
41
49
|
}
|
|
42
50
|
// Resolve source once with typed overrides
|
|
@@ -72,6 +80,21 @@ export class FetchDocsStep {
|
|
|
72
80
|
if (result.metadata) {
|
|
73
81
|
writeMetadataFiles(ctx.config.rootDir, result.metadata);
|
|
74
82
|
}
|
|
83
|
+
// Capture metadata files (mode-specific extras)
|
|
84
|
+
if (ctx.collector.extrasEnabled) {
|
|
85
|
+
const contextsDir = join(ctx.config.rootDir, "contexts");
|
|
86
|
+
for (const [type, filename] of [
|
|
87
|
+
["document-manifest", "document-manifest.json"],
|
|
88
|
+
["release-impact", "release-impact.json"],
|
|
89
|
+
["document-overlay", "document-overlay.json"],
|
|
90
|
+
["url-fetch", "url-fetch.json"],
|
|
91
|
+
]) {
|
|
92
|
+
const filePath = join(contextsDir, filename);
|
|
93
|
+
if (existsSync(filePath)) {
|
|
94
|
+
ctx.collector.captureFile("fetch-docs", type, filePath);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
75
98
|
}
|
|
76
99
|
catch (err) {
|
|
77
100
|
return {
|
|
@@ -118,19 +141,9 @@ export class FetchDocsStep {
|
|
|
118
141
|
cacheInputs(ctx) {
|
|
119
142
|
return getStepInputPaths(ctx.config.rootDir, "fetch-docs");
|
|
120
143
|
}
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
// ---------------------------------------------------------------------------
|
|
125
|
-
function buildFilter(ctx) {
|
|
126
|
-
const { areas, tasks, tags } = ctx.config;
|
|
127
|
-
if (!areas && !tasks && !tags)
|
|
128
|
-
return undefined;
|
|
129
|
-
return {
|
|
130
|
-
...(areas ? { areas } : {}),
|
|
131
|
-
...(tasks ? { taskIds: tasks } : {}),
|
|
132
|
-
...(tags ? { tags } : {}),
|
|
133
|
-
};
|
|
144
|
+
cacheContext(ctx) {
|
|
145
|
+
return buildCacheContext(ctx.config);
|
|
146
|
+
}
|
|
134
147
|
}
|
|
135
148
|
/**
|
|
136
149
|
* Write metadata files returned by DocFetcher to the contexts/ directory.
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
*
|
|
15
15
|
* This is an optional step — failure doesn't stop the pipeline.
|
|
16
16
|
*/
|
|
17
|
-
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
17
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
18
18
|
import { join, resolve } from "path";
|
|
19
19
|
import { isSlugRef } from "../../_vendor/ailf-core/index.js";
|
|
20
20
|
export class GapAnalysisStep {
|
|
@@ -56,7 +56,9 @@ export class GapAnalysisStep {
|
|
|
56
56
|
console.log(formatFailureModesConsole(failureModeReport));
|
|
57
57
|
const gapReport = buildGapAnalysisReport(failureModeReport, scoreSummary.scores);
|
|
58
58
|
console.log(formatGapAnalysisConsole(gapReport));
|
|
59
|
-
|
|
59
|
+
// Write user-facing artifacts to outputDir (respects --output-dir)
|
|
60
|
+
const outDir = ctx.config.outputDir;
|
|
61
|
+
mkdirSync(outDir, { recursive: true });
|
|
60
62
|
writeFileSync(join(outDir, "failure-modes.json"), JSON.stringify(failureModeReport, null, 2));
|
|
61
63
|
writeFileSync(join(outDir, "gap-analysis.json"), JSON.stringify(gapReport, null, 2));
|
|
62
64
|
const manifestPath = resolve(root, "contexts", "document-manifest.json");
|
|
@@ -166,6 +168,15 @@ export class GapAnalysisStep {
|
|
|
166
168
|
scores: enrichedScores,
|
|
167
169
|
};
|
|
168
170
|
writeFileSync(scoreSummaryPath, JSON.stringify(enrichedSummary, null, 2));
|
|
171
|
+
// Capture gap analysis artifacts
|
|
172
|
+
const failureModesPath = join(outDir, "failure-modes.json");
|
|
173
|
+
if (existsSync(failureModesPath)) {
|
|
174
|
+
ctx.collector.captureFile("gap-analysis", "failure-modes", failureModesPath);
|
|
175
|
+
}
|
|
176
|
+
const gapReportPath = join(outDir, "gap-analysis.json");
|
|
177
|
+
if (existsSync(gapReportPath)) {
|
|
178
|
+
ctx.collector.captureFile("gap-analysis", "gap-report", gapReportPath);
|
|
179
|
+
}
|
|
169
180
|
const gapCount = gapReport.gaps.length;
|
|
170
181
|
const classRate = failureModeReport.classificationRate.toFixed(0);
|
|
171
182
|
return {
|