@sanity/ailf 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +52 -32
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/checks.d.ts +8 -3
- package/dist/pipeline/checks.js +23 -3
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +25 -25
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -8,7 +8,9 @@
|
|
|
8
8
|
* Once all commands construct ResolvedConfig directly (or use --config),
|
|
9
9
|
* this bridge can be deleted.
|
|
10
10
|
*/
|
|
11
|
+
import { join } from "node:path";
|
|
11
12
|
import { createAppContext } from "../composition-root.js";
|
|
13
|
+
import { tryLoadConfigFile } from "../pipeline/compiler/config-loader.js";
|
|
12
14
|
/**
|
|
13
15
|
* Map legacy ResolvedOptions to ResolvedConfig.
|
|
14
16
|
*
|
|
@@ -50,6 +52,7 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
50
52
|
noCache: opts.noCache,
|
|
51
53
|
noRemoteCache: opts.noRemoteCache,
|
|
52
54
|
graderReplications: opts.graderReplications,
|
|
55
|
+
outputDir: opts.outputDir,
|
|
53
56
|
outputPath: opts.outputPath,
|
|
54
57
|
urls: opts.urlArgs.length > 0 ? opts.urlArgs : undefined,
|
|
55
58
|
headers: opts.headerArgs.length > 0
|
|
@@ -75,6 +78,10 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
75
78
|
remote: opts.remote ?? false,
|
|
76
79
|
apiUrl: opts.apiUrl ?? "https://ailf-api.sanity.build",
|
|
77
80
|
apiKey: opts.apiKey,
|
|
81
|
+
captureEnabled: opts.captureEnabled ?? false,
|
|
82
|
+
captureDir: opts.captureDir ?? join(rootDir, "results", "captures"),
|
|
83
|
+
captureCompress: opts.captureCompress ?? true,
|
|
84
|
+
captureExtras: opts.captureExtras ?? true,
|
|
78
85
|
};
|
|
79
86
|
}
|
|
80
87
|
/**
|
|
@@ -85,5 +92,11 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
85
92
|
*/
|
|
86
93
|
export function buildAppContext(opts, rootDir) {
|
|
87
94
|
const config = mapToResolvedConfig(opts, rootDir);
|
|
95
|
+
// Inject config-file-only values that don't come from CLI options.
|
|
96
|
+
// evalBudgetMs lives on ModelsConfig, not CLI flags.
|
|
97
|
+
const models = tryLoadConfigFile("models", rootDir);
|
|
98
|
+
if (models?.data?.evalBudgetMs) {
|
|
99
|
+
config.evalBudgetMs = models.data.evalBudgetMs;
|
|
100
|
+
}
|
|
88
101
|
return createAppContext(config);
|
|
89
102
|
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build cache context strings from the resolved pipeline configuration.
|
|
3
|
+
*
|
|
4
|
+
* These non-file strings participate in cache key computation so that
|
|
5
|
+
* different CLI filters (mode, variant, area, task, tag) produce
|
|
6
|
+
* distinct cache entries. Without them, running `--mode knowledge-probe`
|
|
7
|
+
* after `--mode literacy` would return cached literacy results.
|
|
8
|
+
*
|
|
9
|
+
* @see packages/core/src/ports/pipeline-step.ts — cacheContext() method
|
|
10
|
+
* @see packages/eval/src/pipeline/cache.ts — hashFiles() context parameter
|
|
11
|
+
*/
|
|
12
|
+
import type { ResolvedConfig } from "../_vendor/ailf-core/index.d.ts";
|
|
13
|
+
/**
|
|
14
|
+
* Derive deterministic context strings from the resolved pipeline config.
|
|
15
|
+
*
|
|
16
|
+
* Included in every cacheable step's key so that:
|
|
17
|
+
* - `--mode literacy` and `--mode knowledge-probe` never share cache
|
|
18
|
+
* - `--variant agentic` and `--variant baseline` never share cache
|
|
19
|
+
* - `--area studio` and `--area groq` never share cache
|
|
20
|
+
* - `--task T001` and `--task T002` never share cache
|
|
21
|
+
* - `--tag critical` and `--tag smoke` never share cache
|
|
22
|
+
*/
|
|
23
|
+
export declare function buildCacheContext(config: ResolvedConfig): string[];
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build cache context strings from the resolved pipeline configuration.
|
|
3
|
+
*
|
|
4
|
+
* These non-file strings participate in cache key computation so that
|
|
5
|
+
* different CLI filters (mode, variant, area, task, tag) produce
|
|
6
|
+
* distinct cache entries. Without them, running `--mode knowledge-probe`
|
|
7
|
+
* after `--mode literacy` would return cached literacy results.
|
|
8
|
+
*
|
|
9
|
+
* @see packages/core/src/ports/pipeline-step.ts — cacheContext() method
|
|
10
|
+
* @see packages/eval/src/pipeline/cache.ts — hashFiles() context parameter
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Derive deterministic context strings from the resolved pipeline config.
|
|
14
|
+
*
|
|
15
|
+
* Included in every cacheable step's key so that:
|
|
16
|
+
* - `--mode literacy` and `--mode knowledge-probe` never share cache
|
|
17
|
+
* - `--variant agentic` and `--variant baseline` never share cache
|
|
18
|
+
* - `--area studio` and `--area groq` never share cache
|
|
19
|
+
* - `--task T001` and `--task T002` never share cache
|
|
20
|
+
* - `--tag critical` and `--tag smoke` never share cache
|
|
21
|
+
*/
|
|
22
|
+
export function buildCacheContext(config) {
|
|
23
|
+
const context = [];
|
|
24
|
+
// Mode is always present — it's required in ResolvedConfig
|
|
25
|
+
context.push(`mode:${config.mode}`);
|
|
26
|
+
// Variant (literacy sub-mode: baseline, agentic, observed, full)
|
|
27
|
+
if (config.variant) {
|
|
28
|
+
context.push(`variant:${config.variant}`);
|
|
29
|
+
}
|
|
30
|
+
// Area filter — sorted for deterministic hashing
|
|
31
|
+
if (config.areas && config.areas.length > 0) {
|
|
32
|
+
context.push(`areas:${[...config.areas].sort().join(",")}`);
|
|
33
|
+
}
|
|
34
|
+
// Task filter — sorted for deterministic hashing
|
|
35
|
+
if (config.tasks && config.tasks.length > 0) {
|
|
36
|
+
context.push(`tasks:${[...config.tasks].sort().join(",")}`);
|
|
37
|
+
}
|
|
38
|
+
// Tag filter — sorted for deterministic hashing
|
|
39
|
+
if (config.tags && config.tags.length > 0) {
|
|
40
|
+
context.push(`tags:${[...config.tags].sort().join(",")}`);
|
|
41
|
+
}
|
|
42
|
+
return context;
|
|
43
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Environment variable bridge — writes ResolvedConfig values to process.env
|
|
3
|
+
* so that lib/*.ts modules (which still read process.env) work correctly.
|
|
4
|
+
*
|
|
5
|
+
* This replaces the former global applyEnvironment() with an explicit
|
|
6
|
+
* per-step bridge. Each orchestration step calls this before invoking
|
|
7
|
+
* its lib/*.ts main() function.
|
|
8
|
+
*
|
|
9
|
+
* Phase 9 will eliminate this file entirely by giving lib/*.ts main()
|
|
10
|
+
* functions typed option parameters.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/exec-plans/active/ports-and-adapters/phase-8-delete-legacy-step-layer.md
|
|
13
|
+
*/
|
|
14
|
+
import type { ResolvedConfig } from "../_vendor/ailf-core/index.d.ts";
|
|
15
|
+
/**
|
|
16
|
+
* Bridge ResolvedConfig values to process.env.
|
|
17
|
+
*
|
|
18
|
+
* Idempotent — safe to call multiple times. Only sets env vars for
|
|
19
|
+
* config values that are defined (never deletes or resets).
|
|
20
|
+
*/
|
|
21
|
+
export declare function bridgeConfigToEnv(config: ResolvedConfig): void;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Environment variable bridge — writes ResolvedConfig values to process.env
|
|
3
|
+
* so that lib/*.ts modules (which still read process.env) work correctly.
|
|
4
|
+
*
|
|
5
|
+
* This replaces the former global applyEnvironment() with an explicit
|
|
6
|
+
* per-step bridge. Each orchestration step calls this before invoking
|
|
7
|
+
* its lib/*.ts main() function.
|
|
8
|
+
*
|
|
9
|
+
* Phase 9 will eliminate this file entirely by giving lib/*.ts main()
|
|
10
|
+
* functions typed option parameters.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/exec-plans/active/ports-and-adapters/phase-8-delete-legacy-step-layer.md
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Bridge ResolvedConfig values to process.env.
|
|
16
|
+
*
|
|
17
|
+
* Idempotent — safe to call multiple times. Only sets env vars for
|
|
18
|
+
* config values that are defined (never deletes or resets).
|
|
19
|
+
*/
|
|
20
|
+
export function bridgeConfigToEnv(config) {
|
|
21
|
+
// Mode
|
|
22
|
+
process.env.EVAL_MODE = config.mode;
|
|
23
|
+
// Search mode
|
|
24
|
+
if (config.searchMode !== "open") {
|
|
25
|
+
process.env.EVAL_SEARCH_MODE = config.searchMode;
|
|
26
|
+
}
|
|
27
|
+
// Source
|
|
28
|
+
if (config.source) {
|
|
29
|
+
process.env.DOC_SOURCE = config.source;
|
|
30
|
+
}
|
|
31
|
+
// URL-derived overrides
|
|
32
|
+
if (config.urls?.[0]) {
|
|
33
|
+
process.env.DOC_BASE_URL = config.urls[0];
|
|
34
|
+
}
|
|
35
|
+
// Sanity overrides
|
|
36
|
+
if (config.datasetOverride) {
|
|
37
|
+
process.env.SANITY_DATASET = config.datasetOverride;
|
|
38
|
+
}
|
|
39
|
+
if (config.projectIdOverride) {
|
|
40
|
+
process.env.SANITY_PROJECT_ID = config.projectIdOverride;
|
|
41
|
+
}
|
|
42
|
+
if (config.perspectiveOverride) {
|
|
43
|
+
process.env.SANITY_PERSPECTIVE = config.perspectiveOverride;
|
|
44
|
+
}
|
|
45
|
+
if (config.studioOriginOverride) {
|
|
46
|
+
process.env.SANITY_STUDIO_ORIGIN = config.studioOriginOverride;
|
|
47
|
+
}
|
|
48
|
+
if (config.sanityDocumentArgs?.length) {
|
|
49
|
+
process.env.SANITY_DOCUMENT_IDS = config.sanityDocumentArgs.join(",");
|
|
50
|
+
}
|
|
51
|
+
// Custom headers
|
|
52
|
+
if (config.headers) {
|
|
53
|
+
process.env.DOC_HEADERS = JSON.stringify(config.headers);
|
|
54
|
+
}
|
|
55
|
+
// Allowed origins
|
|
56
|
+
if (config.allowedOrigins?.length) {
|
|
57
|
+
process.env.DOC_ALLOWED_ORIGINS = config.allowedOrigins.join(",");
|
|
58
|
+
}
|
|
59
|
+
// Scoping filters
|
|
60
|
+
if (config.areas) {
|
|
61
|
+
process.env.EVAL_FILTER_AREAS = config.areas.join(",");
|
|
62
|
+
}
|
|
63
|
+
if (config.tasks) {
|
|
64
|
+
process.env.EVAL_FILTER_TASKS = config.tasks.join(",");
|
|
65
|
+
}
|
|
66
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared task loading for pipeline orchestration steps.
|
|
3
|
+
*
|
|
4
|
+
* Both FetchDocsStep and GenerateConfigsStep need to see the same set of
|
|
5
|
+
* tasks. This function loads from filesystem .task.ts files — the
|
|
6
|
+
* authoritative source for the current pipeline architecture.
|
|
7
|
+
*
|
|
8
|
+
* Background: The composition root wires ctx.taskSource to
|
|
9
|
+
* ContentLakeTaskSource by default, but GenerateConfigsStep bypasses it
|
|
10
|
+
* and loads directly from the filesystem. FetchDocsStep must use the
|
|
11
|
+
* same source to avoid a mismatch where configs reference context files
|
|
12
|
+
* that were never fetched.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/orchestration/steps/generate-configs-step.ts
|
|
15
|
+
* @see packages/eval/src/orchestration/steps/fetch-docs-step.ts
|
|
16
|
+
*/
|
|
17
|
+
import type { GeneralizedTaskDefinition } from "../_vendor/ailf-core/index.d.ts";
|
|
18
|
+
export interface LoadPipelineTasksOptions {
|
|
19
|
+
/** Absolute path to the eval package root (packages/eval) */
|
|
20
|
+
rootDir: string;
|
|
21
|
+
/** Evaluation mode — determines the tasks/{mode}/ subdirectory */
|
|
22
|
+
mode: string;
|
|
23
|
+
/** Optional extra directory for repo-based tasks (--repo-tasks-path) */
|
|
24
|
+
repoTasksPath?: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Load task definitions from the filesystem, matching the pipeline's
|
|
28
|
+
* authoritative task source.
|
|
29
|
+
*
|
|
30
|
+
* Discovers and loads `*.task.ts` files from `tasks/{mode}/` and
|
|
31
|
+
* optionally `--repo-tasks-path`. Tasks whose `mode` field doesn't
|
|
32
|
+
* match the requested mode are excluded.
|
|
33
|
+
*/
|
|
34
|
+
export declare function loadPipelineTasks(opts: LoadPipelineTasksOptions): Promise<GeneralizedTaskDefinition[]>;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared task loading for pipeline orchestration steps.
|
|
3
|
+
*
|
|
4
|
+
* Both FetchDocsStep and GenerateConfigsStep need to see the same set of
|
|
5
|
+
* tasks. This function loads from filesystem .task.ts files — the
|
|
6
|
+
* authoritative source for the current pipeline architecture.
|
|
7
|
+
*
|
|
8
|
+
* Background: The composition root wires ctx.taskSource to
|
|
9
|
+
* ContentLakeTaskSource by default, but GenerateConfigsStep bypasses it
|
|
10
|
+
* and loads directly from the filesystem. FetchDocsStep must use the
|
|
11
|
+
* same source to avoid a mismatch where configs reference context files
|
|
12
|
+
* that were never fetched.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/orchestration/steps/generate-configs-step.ts
|
|
15
|
+
* @see packages/eval/src/orchestration/steps/fetch-docs-step.ts
|
|
16
|
+
*/
|
|
17
|
+
import { resolve } from "path";
|
|
18
|
+
import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
|
|
19
|
+
import { resolveVendoredSubdir } from "../pipeline/compiler/config-loader.js";
|
|
20
|
+
/**
|
|
21
|
+
* Load task definitions from the filesystem, matching the pipeline's
|
|
22
|
+
* authoritative task source.
|
|
23
|
+
*
|
|
24
|
+
* Discovers and loads `*.task.ts` files from `tasks/{mode}/` and
|
|
25
|
+
* optionally `--repo-tasks-path`. Tasks whose `mode` field doesn't
|
|
26
|
+
* match the requested mode are excluded.
|
|
27
|
+
*/
|
|
28
|
+
export async function loadPipelineTasks(opts) {
|
|
29
|
+
const tasksDir = resolveVendoredSubdir(opts.rootDir, `tasks/${opts.mode}`);
|
|
30
|
+
const dirs = [tasksDir];
|
|
31
|
+
if (opts.repoTasksPath) {
|
|
32
|
+
const repoDir = resolve(opts.repoTasksPath);
|
|
33
|
+
if (!dirs.includes(repoDir)) {
|
|
34
|
+
dirs.push(repoDir);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const tasks = [];
|
|
38
|
+
for (const dir of dirs) {
|
|
39
|
+
const files = discoverTsTaskFiles(dir);
|
|
40
|
+
for (const file of files) {
|
|
41
|
+
const raw = await loadTsTaskFile(file);
|
|
42
|
+
for (const t of raw.tasks) {
|
|
43
|
+
const task = t;
|
|
44
|
+
// Filter to matching mode (skip tasks from other modes in same dir)
|
|
45
|
+
if (!("mode" in task) || task.mode === opts.mode) {
|
|
46
|
+
tasks.push(task);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return tasks;
|
|
52
|
+
}
|
|
@@ -20,10 +20,20 @@ import { runStep } from "./step-runner.js";
|
|
|
20
20
|
* underlying Sanity client. Best-effort — failures are logged and
|
|
21
21
|
* never block the pipeline.
|
|
22
22
|
*/
|
|
23
|
-
async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, status, errorInfo) {
|
|
23
|
+
async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, status, errorInfo, jobUpdates) {
|
|
24
24
|
const jobId = ctx.config.jobId;
|
|
25
25
|
if (!jobId)
|
|
26
26
|
return;
|
|
27
|
+
// Accumulate update for artifact capture
|
|
28
|
+
jobUpdates?.push({
|
|
29
|
+
jobId,
|
|
30
|
+
stepName,
|
|
31
|
+
completedSteps,
|
|
32
|
+
totalSteps,
|
|
33
|
+
status,
|
|
34
|
+
errorInfo,
|
|
35
|
+
timestamp: new Date().toISOString(),
|
|
36
|
+
});
|
|
27
37
|
// Use the report store's write capability to patch the job document.
|
|
28
38
|
// The report store exposes a Sanity client — we access it through
|
|
29
39
|
// a best-effort PATCH via the same client infrastructure.
|
|
@@ -59,6 +69,51 @@ async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, stat
|
|
|
59
69
|
}
|
|
60
70
|
}
|
|
61
71
|
// ---------------------------------------------------------------------------
|
|
72
|
+
// Artifact capture
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
/**
|
|
75
|
+
* Capture a snapshot of the pipeline config, final state, and step results.
|
|
76
|
+
* Strips secrets (API keys, tokens) from the config.
|
|
77
|
+
*/
|
|
78
|
+
function capturePipelineContext(ctx, state, results) {
|
|
79
|
+
if (!ctx.collector.enabled)
|
|
80
|
+
return;
|
|
81
|
+
const sanitized = Object.fromEntries(Object.entries(ctx.config).filter(([k]) => !/token|secret|key/i.test(k)));
|
|
82
|
+
ctx.collector.capture("pipeline", "pipeline-context", {
|
|
83
|
+
config: sanitized,
|
|
84
|
+
state: {
|
|
85
|
+
reportId: state.reportId,
|
|
86
|
+
evalFingerprint: state.evalFingerprint,
|
|
87
|
+
belowCritical: state.belowCritical,
|
|
88
|
+
remoteCacheHits: state.remoteCacheHits
|
|
89
|
+
? [...state.remoteCacheHits]
|
|
90
|
+
: undefined,
|
|
91
|
+
releaseAutoScope: state.releaseAutoScope,
|
|
92
|
+
testSummary: state.testSummary,
|
|
93
|
+
},
|
|
94
|
+
steps: Object.entries(results).map(([name, result]) => ({
|
|
95
|
+
name,
|
|
96
|
+
status: result.status,
|
|
97
|
+
durationMs: result.status !== "skipped" ? result.durationMs : undefined,
|
|
98
|
+
})),
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Flush captured artifacts to disk. Non-blocking — failures are logged
|
|
103
|
+
* but never affect the pipeline result.
|
|
104
|
+
*/
|
|
105
|
+
async function flushArtifacts(ctx) {
|
|
106
|
+
if (!ctx.collector.enabled)
|
|
107
|
+
return;
|
|
108
|
+
try {
|
|
109
|
+
const result = await ctx.collector.flush();
|
|
110
|
+
ctx.logger.info(`Captured ${result.artifactCount} artifacts → ${result.destination}`);
|
|
111
|
+
}
|
|
112
|
+
catch (err) {
|
|
113
|
+
ctx.logger.warn(`Artifact capture flush failed: ${err instanceof Error ? err.message : err}`);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
62
117
|
// Orchestrator
|
|
63
118
|
// ---------------------------------------------------------------------------
|
|
64
119
|
/**
|
|
@@ -76,6 +131,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
76
131
|
const validation = { issues: [], valid: true };
|
|
77
132
|
const pipelineStart = Date.now();
|
|
78
133
|
const hasJob = !!ctx.config.jobId;
|
|
134
|
+
const jobUpdates = [];
|
|
79
135
|
ctx.logger.section("ai-literacy-framework — Evaluation Pipeline");
|
|
80
136
|
ctx.logger.debug(`Pipeline starting with ${steps.length} steps`, {
|
|
81
137
|
steps: steps.map((s) => s.name),
|
|
@@ -86,7 +142,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
86
142
|
});
|
|
87
143
|
// Report initial running status
|
|
88
144
|
if (hasJob) {
|
|
89
|
-
await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running");
|
|
145
|
+
await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running", undefined, jobUpdates);
|
|
90
146
|
}
|
|
91
147
|
for (let i = 0; i < steps.length; i++) {
|
|
92
148
|
const step = steps[i];
|
|
@@ -94,7 +150,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
94
150
|
ctx.logger.section(step.name);
|
|
95
151
|
// Report current step progress
|
|
96
152
|
if (hasJob) {
|
|
97
|
-
await reportJobProgress(ctx, step.name, i, steps.length, "running");
|
|
153
|
+
await reportJobProgress(ctx, step.name, i, steps.length, "running", undefined, jobUpdates);
|
|
98
154
|
}
|
|
99
155
|
const result = await runStep(step, ctx, state);
|
|
100
156
|
results[step.name] = result;
|
|
@@ -111,8 +167,15 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
111
167
|
await reportJobProgress(ctx, step.name, i + 1, steps.length, "failed", {
|
|
112
168
|
message: failedError,
|
|
113
169
|
step: step.name,
|
|
114
|
-
});
|
|
170
|
+
}, jobUpdates);
|
|
115
171
|
}
|
|
172
|
+
// Capture pipeline context and job updates before flushing
|
|
173
|
+
capturePipelineContext(ctx, state, results);
|
|
174
|
+
if (jobUpdates.length > 0) {
|
|
175
|
+
ctx.collector.capture("job-store", "job-updates", jobUpdates);
|
|
176
|
+
}
|
|
177
|
+
// Flush captured artifacts even on failure (partial capture is useful)
|
|
178
|
+
await flushArtifacts(ctx);
|
|
116
179
|
return {
|
|
117
180
|
belowCritical: state.belowCritical,
|
|
118
181
|
durationMs: Date.now() - pipelineStart,
|
|
@@ -129,7 +192,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
129
192
|
}
|
|
130
193
|
// Report step completion
|
|
131
194
|
if (hasJob) {
|
|
132
|
-
await reportJobProgress(ctx, step.name, i + 1, steps.length, "running");
|
|
195
|
+
await reportJobProgress(ctx, step.name, i + 1, steps.length, "running", undefined, jobUpdates);
|
|
133
196
|
}
|
|
134
197
|
}
|
|
135
198
|
const durationMs = Date.now() - pipelineStart;
|
|
@@ -166,6 +229,13 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
166
229
|
ctx.logger.warn("Failed to report job completion — continuing");
|
|
167
230
|
}
|
|
168
231
|
}
|
|
232
|
+
// Capture pipeline context and job updates before flushing
|
|
233
|
+
capturePipelineContext(ctx, state, results);
|
|
234
|
+
if (jobUpdates.length > 0) {
|
|
235
|
+
ctx.collector.capture("job-store", "job-updates", jobUpdates);
|
|
236
|
+
}
|
|
237
|
+
// Flush captured artifacts (non-blocking — failures never affect pipeline result)
|
|
238
|
+
await flushArtifacts(ctx);
|
|
169
239
|
return {
|
|
170
240
|
belowCritical: state.belowCritical,
|
|
171
241
|
durationMs,
|
|
@@ -36,8 +36,12 @@ export async function runStep(step, ctx, state = {}) {
|
|
|
36
36
|
if (canCache) {
|
|
37
37
|
try {
|
|
38
38
|
const inputs = step.cacheInputs(ctx);
|
|
39
|
+
const context = step.cacheContext?.(ctx);
|
|
39
40
|
ctx.logger.debug(`[${step.name}] Cache inputs: ${inputs.length} files`);
|
|
40
|
-
|
|
41
|
+
if (context?.length) {
|
|
42
|
+
ctx.logger.debug(`[${step.name}] Cache context: ${context.join(", ")}`);
|
|
43
|
+
}
|
|
44
|
+
const key = await ctx.cache.computeKey(inputs, context);
|
|
41
45
|
cacheKey = key;
|
|
42
46
|
ctx.logger.debug(`[${step.name}] Cache key: ${key}`);
|
|
43
47
|
const cached = await ctx.cache.lookup(step.name, key);
|
|
@@ -4,9 +4,11 @@
|
|
|
4
4
|
* Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
|
|
5
5
|
* typed options derived from AppContext. No env bridge needed.
|
|
6
6
|
*/
|
|
7
|
+
import { existsSync } from "node:fs";
|
|
7
8
|
import { join } from "path";
|
|
8
9
|
import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
|
|
9
10
|
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
11
|
+
import { buildCacheContext } from "../cache-context.js";
|
|
10
12
|
import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
|
|
11
13
|
import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
|
|
12
14
|
import { resultsFileForMode } from "../../pipeline/eval-constants.js";
|
|
@@ -118,6 +120,14 @@ export class CalculateScoresStep {
|
|
|
118
120
|
if (belowCritical.length > 0) {
|
|
119
121
|
state.belowCritical = belowCritical;
|
|
120
122
|
}
|
|
123
|
+
// Capture score artifacts
|
|
124
|
+
const resultsDir = join(ctx.config.rootDir, "results", "latest");
|
|
125
|
+
for (const file of ["score-summary.json", "grader-judgments.json"]) {
|
|
126
|
+
const filePath = join(resultsDir, file);
|
|
127
|
+
if (existsSync(filePath)) {
|
|
128
|
+
ctx.collector.captureFile("calculate-scores", file.replace(".json", ""), filePath);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
121
131
|
const criticalSuffix = belowCritical.length > 0
|
|
122
132
|
? ` (${belowCritical.length} area(s) below critical threshold: ${belowCritical.join(", ")})`
|
|
123
133
|
: "";
|
|
@@ -130,4 +140,7 @@ export class CalculateScoresStep {
|
|
|
130
140
|
cacheInputs(ctx) {
|
|
131
141
|
return getStepInputPaths(ctx.config.rootDir, "calculate-scores");
|
|
132
142
|
}
|
|
143
|
+
cacheContext(ctx) {
|
|
144
|
+
return buildCacheContext(ctx.config);
|
|
145
|
+
}
|
|
133
146
|
}
|
|
@@ -52,11 +52,20 @@ export class CallbackStep {
|
|
|
52
52
|
}
|
|
53
53
|
// Deliver callback — read reportId from pipeline state (set by PublishReportStep)
|
|
54
54
|
ctx.logger.info(`Delivering results to ${this.callback.url}`);
|
|
55
|
-
const
|
|
55
|
+
const callbackPayload = {
|
|
56
56
|
deliveredAt: new Date().toISOString(),
|
|
57
57
|
jobId: this.jobId,
|
|
58
58
|
reportId: state.reportId,
|
|
59
59
|
summary,
|
|
60
|
+
};
|
|
61
|
+
// Capture callback payload (Tier 2 — no secrets: headers are NOT captured)
|
|
62
|
+
ctx.collector.capture("callback", "callback-payload", callbackPayload);
|
|
63
|
+
const result = await deliverCallback(this.callback, callbackPayload);
|
|
64
|
+
// Capture callback response status (not the body — that's the user's system)
|
|
65
|
+
ctx.collector.capture("callback", "callback-response", {
|
|
66
|
+
ok: result.ok,
|
|
67
|
+
attempts: result.attempts,
|
|
68
|
+
error: result.error,
|
|
60
69
|
});
|
|
61
70
|
if (result.ok) {
|
|
62
71
|
return {
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* inlined directly from the former pipeline/steps/compare-step.ts.
|
|
6
6
|
* This is an optional step — failure doesn't stop the pipeline.
|
|
7
7
|
*/
|
|
8
|
-
import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
|
|
8
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, } from "fs";
|
|
9
9
|
import { join, resolve } from "path";
|
|
10
10
|
import { compare } from "../../pipeline/compare.js";
|
|
11
11
|
export class CompareStep {
|
|
@@ -65,9 +65,12 @@ export class CompareStep {
|
|
|
65
65
|
? { noiseThreshold: ctx.config.compareThreshold }
|
|
66
66
|
: undefined;
|
|
67
67
|
const report = compare(baseline, experiment, options);
|
|
68
|
-
// Write report
|
|
69
|
-
|
|
68
|
+
// Write report to outputDir (respects --output-dir)
|
|
69
|
+
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
70
|
+
const reportPath = resolve(ctx.config.outputDir, "comparison-report.json");
|
|
70
71
|
writeFileSync(reportPath, JSON.stringify(report, null, 2));
|
|
72
|
+
// Capture comparison report
|
|
73
|
+
ctx.collector.captureFile("compare", "comparison-report", reportPath);
|
|
71
74
|
// Build summary
|
|
72
75
|
const improved = report.improved.length;
|
|
73
76
|
const regressed = report.regressed.length;
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Calls pure functions from pipeline/discovery-report.ts directly.
|
|
5
5
|
* Optional step — failure doesn't stop the pipeline.
|
|
6
6
|
*/
|
|
7
|
-
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
7
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
8
8
|
import { resolve } from "path";
|
|
9
9
|
import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
|
|
10
10
|
export class DiscoveryReportStep {
|
|
@@ -34,7 +34,11 @@ export class DiscoveryReportStep {
|
|
|
34
34
|
}
|
|
35
35
|
const report = generateDiscoveryReport(scoreSummary, ctx.config.areas);
|
|
36
36
|
const md = formatDiscoveryMarkdown(report);
|
|
37
|
-
|
|
37
|
+
// Write to outputDir (respects --output-dir)
|
|
38
|
+
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
39
|
+
const discoveryPath = resolve(ctx.config.outputDir, "discovery-report.md");
|
|
40
|
+
writeFileSync(discoveryPath, md);
|
|
41
|
+
ctx.collector.captureFile("discovery-report", "discovery-report", discoveryPath);
|
|
38
42
|
console.log(md);
|
|
39
43
|
const invisible = report.invisibleDocs.length;
|
|
40
44
|
const f1 = report.overall.avgF1.toFixed(2);
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shell delegation for the fetch-docs step.
|
|
3
|
+
*
|
|
4
|
+
* Isolates the execSync call so it can be replaced when the pipeline
|
|
5
|
+
* fully migrates to the DocFetcher port.
|
|
6
|
+
*/
|
|
7
|
+
export interface ShellResult {
|
|
8
|
+
ok: boolean;
|
|
9
|
+
error?: string;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Run `pnpm fetch-docs` via shell.
|
|
13
|
+
*
|
|
14
|
+
* Returns a result object instead of throwing so the step can
|
|
15
|
+
* handle the failure uniformly.
|
|
16
|
+
*/
|
|
17
|
+
export declare function runFetchDocsShell(rootDir: string, source?: string): ShellResult;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shell delegation for the fetch-docs step.
|
|
3
|
+
*
|
|
4
|
+
* Isolates the execSync call so it can be replaced when the pipeline
|
|
5
|
+
* fully migrates to the DocFetcher port.
|
|
6
|
+
*/
|
|
7
|
+
import { execSync } from "child_process";
|
|
8
|
+
/**
|
|
9
|
+
* Run `pnpm fetch-docs` via shell.
|
|
10
|
+
*
|
|
11
|
+
* Returns a result object instead of throwing so the step can
|
|
12
|
+
* handle the failure uniformly.
|
|
13
|
+
*/
|
|
14
|
+
export function runFetchDocsShell(rootDir, source) {
|
|
15
|
+
try {
|
|
16
|
+
const sourceArg = source ? ` --source ${source}` : "";
|
|
17
|
+
execSync(`pnpm fetch-docs${sourceArg}`, {
|
|
18
|
+
cwd: rootDir,
|
|
19
|
+
env: process.env,
|
|
20
|
+
stdio: "inherit",
|
|
21
|
+
});
|
|
22
|
+
return { ok: true };
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
25
|
+
return {
|
|
26
|
+
ok: false,
|
|
27
|
+
error: err instanceof Error ? err.message : String(err),
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
}
|