@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/grader-validate.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/grader-validate-runner.ts.
|
|
5
|
+
* The pure analysis functions live in pipeline/grader-validation.ts.
|
|
6
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
7
|
+
*
|
|
8
|
+
* TODO: Update all importers to use pipeline/ modules, then delete this file.
|
|
9
|
+
*
|
|
10
|
+
* @deprecated Import from ../pipeline/grader-validate-runner.js instead.
|
|
11
|
+
*/
|
|
12
|
+
// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
|
|
13
|
+
import "dotenv/config";
|
|
14
|
+
import { dirname, resolve } from "path";
|
|
15
|
+
import { fileURLToPath } from "url";
|
|
16
|
+
// Re-export from pipeline modules
|
|
17
|
+
export { formatValidationReport, runGraderValidate, } from "../pipeline/grader-validate-runner.js";
|
|
18
|
+
export { classifyCorrelation, validateGrader, } from "../pipeline/grader-validation.js";
|
|
19
|
+
import { runGraderValidate } from "../pipeline/grader-validate-runner.js";
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// CLI argument parsing
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
function parseCliArgs() {
|
|
26
|
+
const args = process.argv.slice(2);
|
|
27
|
+
function getOption(name) {
|
|
28
|
+
const idx = args.indexOf(`--${name}`);
|
|
29
|
+
return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
|
|
30
|
+
}
|
|
31
|
+
function getFlag(name) {
|
|
32
|
+
return args.includes(`--${name}`);
|
|
33
|
+
}
|
|
34
|
+
const showHelp = getFlag("help") || getFlag("h");
|
|
35
|
+
if (showHelp) {
|
|
36
|
+
console.log(`
|
|
37
|
+
Usage: pnpm grader-validate [options]
|
|
38
|
+
|
|
39
|
+
Validate grader accuracy against human reference grades.
|
|
40
|
+
|
|
41
|
+
Options:
|
|
42
|
+
--grader <model> Grader model to validate (default: from config/models.yaml)
|
|
43
|
+
--threshold <n> MAE threshold for pass/fail (default: 10)
|
|
44
|
+
--help, -h Show this help
|
|
45
|
+
`);
|
|
46
|
+
process.exit(0);
|
|
47
|
+
}
|
|
48
|
+
const thresholdStr = getOption("threshold");
|
|
49
|
+
return {
|
|
50
|
+
graderOverride: getOption("grader"),
|
|
51
|
+
maeThreshold: thresholdStr ? parseFloat(thresholdStr) : 10,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Legacy main() entry point.
|
|
56
|
+
* @deprecated Use runGraderValidate() from pipeline/grader-validate-runner.js instead.
|
|
57
|
+
*/
|
|
58
|
+
export async function main() {
|
|
59
|
+
const { graderOverride, maeThreshold } = parseCliArgs();
|
|
60
|
+
const result = await runGraderValidate({
|
|
61
|
+
graderModel: graderOverride,
|
|
62
|
+
maeThreshold,
|
|
63
|
+
rootDir: ROOT,
|
|
64
|
+
});
|
|
65
|
+
// Exit with error code if threshold not met
|
|
66
|
+
if (!result.passesThreshold) {
|
|
67
|
+
console.error(`\n ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${maeThreshold}`);
|
|
68
|
+
process.exit(1);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Only run when invoked directly
|
|
72
|
+
if (process.argv[1]?.endsWith("grader-validate.ts") ||
|
|
73
|
+
process.argv[1]?.endsWith("grader-validate.js")) {
|
|
74
|
+
main().catch((err) => {
|
|
75
|
+
console.error("❌ Fatal error:", err);
|
|
76
|
+
process.exit(1);
|
|
77
|
+
});
|
|
78
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/measure-retrieval.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/measure-retrieval.ts.
|
|
5
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
6
|
+
*
|
|
7
|
+
* TODO: Update all importers to use pipeline/measure-retrieval.ts, then delete this file.
|
|
8
|
+
*
|
|
9
|
+
* @deprecated Import from ../pipeline/measure-retrieval.js instead.
|
|
10
|
+
*/
|
|
11
|
+
import "dotenv/config";
|
|
12
|
+
export { calculateNDCG, calculateRecall, formatRetrievalTable, measureRetrieval, type MeasureRetrievalOptions, type RetrievalResult, type RetrievalSummary, type RetrieverFn, } from "../pipeline/measure-retrieval.js";
|
|
13
|
+
/** @deprecated Use measureRetrieval() from pipeline/measure-retrieval.js instead. */
|
|
14
|
+
export declare function main(): Promise<void>;
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/measure-retrieval.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/measure-retrieval.ts.
|
|
5
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
6
|
+
*
|
|
7
|
+
* TODO: Update all importers to use pipeline/measure-retrieval.ts, then delete this file.
|
|
8
|
+
*
|
|
9
|
+
* @deprecated Import from ../pipeline/measure-retrieval.js instead.
|
|
10
|
+
*/
|
|
11
|
+
// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
|
|
12
|
+
import "dotenv/config";
|
|
13
|
+
import { writeFileSync, mkdirSync } from "fs";
|
|
14
|
+
import { join, dirname } from "path";
|
|
15
|
+
import { getSanityClient } from "../sanity/client.js";
|
|
16
|
+
import { formatRetrievalTable, measureRetrieval, } from "../pipeline/measure-retrieval.js";
|
|
17
|
+
// Re-export pipeline types and functions
|
|
18
|
+
export { calculateNDCG, calculateRecall, formatRetrievalTable, measureRetrieval, } from "../pipeline/measure-retrieval.js";
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Sanity text search retriever (side-effecting — uses Sanity client)
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
async function retrieveDocsForQuery(query, k = 10) {
|
|
23
|
+
const client = getSanityClient();
|
|
24
|
+
const results = await client.fetch(`
|
|
25
|
+
*[_type == "article" && !(_id in path("drafts.**"))]
|
|
26
|
+
| score(
|
|
27
|
+
boost(title match $query, 3),
|
|
28
|
+
boost(pt::text(content) match $query, 1)
|
|
29
|
+
)
|
|
30
|
+
| order(_score desc)
|
|
31
|
+
[0...$k] {
|
|
32
|
+
"slug": slug.current,
|
|
33
|
+
_score
|
|
34
|
+
}
|
|
35
|
+
`, { k, query });
|
|
36
|
+
return results.map((r) => r.slug);
|
|
37
|
+
}
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// Legacy main() entry point
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
/** @deprecated Use measureRetrieval() from pipeline/measure-retrieval.js instead. */
|
|
42
|
+
export async function main() {
|
|
43
|
+
console.log("=== Sanity AI Literacy — Retrieval Quality Measurement ===\n");
|
|
44
|
+
const ROOT = join(dirname(new URL(import.meta.url).pathname), "..", "..");
|
|
45
|
+
const summary = await measureRetrieval({
|
|
46
|
+
onProgress: (area, taskId, result) => {
|
|
47
|
+
console.log(` ${taskId}:`);
|
|
48
|
+
console.log(` Recall@5: ${(result.recall_at_5 * 100).toFixed(1)}%`);
|
|
49
|
+
console.log(` Recall@10: ${(result.recall_at_10 * 100).toFixed(1)}%`);
|
|
50
|
+
console.log(` NDCG@10: ${(result.ndcg_at_10 * 100).toFixed(1)}%`);
|
|
51
|
+
},
|
|
52
|
+
retriever: retrieveDocsForQuery,
|
|
53
|
+
rootDir: ROOT,
|
|
54
|
+
});
|
|
55
|
+
// Print summary
|
|
56
|
+
console.log();
|
|
57
|
+
console.log(formatRetrievalTable(summary));
|
|
58
|
+
// Persist results
|
|
59
|
+
const outDir = join(ROOT, "results", "latest");
|
|
60
|
+
mkdirSync(outDir, { recursive: true });
|
|
61
|
+
writeFileSync(join(outDir, "retrieval-results.json"), JSON.stringify(summary, null, 2));
|
|
62
|
+
console.log("\nResults written to results/latest/retrieval-results.json");
|
|
63
|
+
}
|
|
64
|
+
// Only run when invoked directly (not when imported)
|
|
65
|
+
if (process.argv[1]?.endsWith("measure-retrieval.ts") ||
|
|
66
|
+
process.argv[1]?.endsWith("measure-retrieval.js")) {
|
|
67
|
+
main().catch((err) => {
|
|
68
|
+
console.error("Fatal error:", err);
|
|
69
|
+
process.exit(1);
|
|
70
|
+
});
|
|
71
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/pr-comment.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/pr-comment.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/pr-comment.js instead.
|
|
7
|
+
*/
|
|
8
|
+
export { generatePrComment, type PrCommentOptions, } from "../pipeline/pr-comment.js";
|
|
9
|
+
import type { PrCommentOptions } from "../pipeline/pr-comment.js";
|
|
10
|
+
/**
|
|
11
|
+
* Legacy main() entry point.
|
|
12
|
+
* @deprecated Use generatePrComment() from pipeline/pr-comment.ts instead.
|
|
13
|
+
*/
|
|
14
|
+
export declare function main(options?: Omit<PrCommentOptions, "rootDir"> & {
|
|
15
|
+
rootDir?: string;
|
|
16
|
+
}): void;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/pr-comment.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/pr-comment.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/pr-comment.js instead.
|
|
7
|
+
*/
|
|
8
|
+
import { dirname, resolve } from "path";
|
|
9
|
+
import { fileURLToPath } from "url";
|
|
10
|
+
export { generatePrComment, } from "../pipeline/pr-comment.js";
|
|
11
|
+
import { generatePrComment } from "../pipeline/pr-comment.js";
|
|
12
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
14
|
+
/**
|
|
15
|
+
* Legacy main() entry point.
|
|
16
|
+
* @deprecated Use generatePrComment() from pipeline/pr-comment.ts instead.
|
|
17
|
+
*/
|
|
18
|
+
export function main(options) {
|
|
19
|
+
generatePrComment({
|
|
20
|
+
rootDir: options?.rootDir ?? ROOT,
|
|
21
|
+
outputPath: options?.outputPath,
|
|
22
|
+
promptfooUrl: options?.promptfooUrl,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
if (process.argv[1]?.endsWith("pr-comment.ts") ||
|
|
26
|
+
process.argv[1]?.endsWith("pr-comment.js")) {
|
|
27
|
+
main();
|
|
28
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/readiness-report.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/readiness-report.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/readiness-report.js instead.
|
|
7
|
+
*/
|
|
8
|
+
export { formatReadinessMarkdown, generateReadinessReport, type DimensionCheck, type HistoryEntry, type ReadinessReport, } from "../pipeline/readiness-report.js";
|
|
9
|
+
/**
|
|
10
|
+
* Legacy main() entry point.
|
|
11
|
+
* @deprecated Use generateReadinessReport() + formatReadinessMarkdown() directly.
|
|
12
|
+
*/
|
|
13
|
+
export declare function main(): void;
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/readiness-report.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/readiness-report.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/readiness-report.js instead.
|
|
7
|
+
*/
|
|
8
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
|
9
|
+
import { dirname, join, resolve } from "node:path";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
import { load } from "js-yaml";
|
|
12
|
+
import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
|
|
13
|
+
export { formatReadinessMarkdown, generateReadinessReport, } from "../pipeline/readiness-report.js";
|
|
14
|
+
import { generateReadinessReport, formatReadinessMarkdown, } from "../pipeline/readiness-report.js";
|
|
15
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
17
|
+
const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
|
|
18
|
+
const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
|
|
19
|
+
const THRESHOLDS_PATH = join(ROOT, "config", "thresholds.yaml");
|
|
20
|
+
const BASELINES_DIR = join(ROOT, "results", "baselines");
|
|
21
|
+
/**
|
|
22
|
+
* Legacy main() entry point.
|
|
23
|
+
* @deprecated Use generateReadinessReport() + formatReadinessMarkdown() directly.
|
|
24
|
+
*/
|
|
25
|
+
export function main() {
|
|
26
|
+
const args = process.argv.slice(2);
|
|
27
|
+
let area;
|
|
28
|
+
let history = false;
|
|
29
|
+
let output;
|
|
30
|
+
for (let i = 0; i < args.length; i++) {
|
|
31
|
+
const arg = args[i];
|
|
32
|
+
if (arg === "--area" && i + 1 < args.length) {
|
|
33
|
+
area = args[++i];
|
|
34
|
+
}
|
|
35
|
+
else if (arg === "--history") {
|
|
36
|
+
history = true;
|
|
37
|
+
}
|
|
38
|
+
else if (arg === "--output" && i + 1 < args.length) {
|
|
39
|
+
output = args[++i];
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
if (!area) {
|
|
43
|
+
console.error("Usage: readiness-report --area <area> [--history] [--output <file>]");
|
|
44
|
+
process.exit(1);
|
|
45
|
+
}
|
|
46
|
+
// Load data
|
|
47
|
+
if (!existsSync(SCORE_SUMMARY_PATH)) {
|
|
48
|
+
throw new Error(`Score summary not found at ${SCORE_SUMMARY_PATH}. Run \`pnpm pipeline\` first.`);
|
|
49
|
+
}
|
|
50
|
+
const scoreSummary = JSON.parse(readFileSync(SCORE_SUMMARY_PATH, "utf-8"));
|
|
51
|
+
if (!existsSync(THRESHOLDS_PATH)) {
|
|
52
|
+
throw new Error(`Threshold config not found at ${THRESHOLDS_PATH}.`);
|
|
53
|
+
}
|
|
54
|
+
const rawThresholds = readFileSync(THRESHOLDS_PATH, "utf-8");
|
|
55
|
+
const thresholdConfig = ThresholdConfigSchema.parse(load(rawThresholds));
|
|
56
|
+
let gapAnalysis;
|
|
57
|
+
if (existsSync(GAP_ANALYSIS_PATH)) {
|
|
58
|
+
gapAnalysis = JSON.parse(readFileSync(GAP_ANALYSIS_PATH, "utf-8"));
|
|
59
|
+
}
|
|
60
|
+
const historyEntries = [];
|
|
61
|
+
if (history && existsSync(BASELINES_DIR)) {
|
|
62
|
+
const files = readdirSync(BASELINES_DIR)
|
|
63
|
+
.filter((f) => f.endsWith(".json"))
|
|
64
|
+
.sort();
|
|
65
|
+
for (const file of files) {
|
|
66
|
+
try {
|
|
67
|
+
const raw = readFileSync(join(BASELINES_DIR, file), "utf-8");
|
|
68
|
+
const data = JSON.parse(raw);
|
|
69
|
+
const areaScore = data.scores?.find((s) => s.feature === area);
|
|
70
|
+
if (!areaScore)
|
|
71
|
+
continue;
|
|
72
|
+
const nameWithoutExt = file.replace(/\.json$/, "");
|
|
73
|
+
const parts = nameWithoutExt.split("_");
|
|
74
|
+
const tag = parts.length > 4 ? parts.slice(4).join("_") : undefined;
|
|
75
|
+
historyEntries.push({
|
|
76
|
+
score: areaScore.totalScore,
|
|
77
|
+
tag,
|
|
78
|
+
timestamp: data.timestamp ?? nameWithoutExt,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
// Skip malformed baseline files
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const report = generateReadinessReport({
|
|
87
|
+
area,
|
|
88
|
+
gapAnalysis,
|
|
89
|
+
history: historyEntries,
|
|
90
|
+
scoreSummary,
|
|
91
|
+
thresholdConfig,
|
|
92
|
+
});
|
|
93
|
+
const markdown = formatReadinessMarkdown(report);
|
|
94
|
+
if (output) {
|
|
95
|
+
writeFileSync(output, markdown, "utf-8");
|
|
96
|
+
console.error(`✅ Readiness report written to ${output}`);
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
console.log(markdown);
|
|
100
|
+
}
|
|
101
|
+
if (!report.pass) {
|
|
102
|
+
process.exit(1);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
if (process.argv[1]?.endsWith("readiness-report.ts") ||
|
|
106
|
+
process.argv[1]?.endsWith("readiness-report.js")) {
|
|
107
|
+
main();
|
|
108
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/webhook-server.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/webhook-server.ts.
|
|
5
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
6
|
+
*
|
|
7
|
+
* TODO: Update all importers to use pipeline/webhook-server.ts, then delete this file.
|
|
8
|
+
*
|
|
9
|
+
* @deprecated Import from ../pipeline/webhook-server.js instead.
|
|
10
|
+
*/
|
|
11
|
+
export { startWebhookServer, type WebhookServerOptions, } from "../pipeline/webhook-server.js";
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/webhook-server.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/webhook-server.ts.
|
|
5
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
6
|
+
*
|
|
7
|
+
* TODO: Update all importers to use pipeline/webhook-server.ts, then delete this file.
|
|
8
|
+
*
|
|
9
|
+
* @deprecated Import from ../pipeline/webhook-server.js instead.
|
|
10
|
+
*/
|
|
11
|
+
import { dirname, resolve } from "path";
|
|
12
|
+
import { fileURLToPath } from "url";
|
|
13
|
+
import { startWebhookServer } from "../pipeline/webhook-server.js";
|
|
14
|
+
export { startWebhookServer, } from "../pipeline/webhook-server.js";
|
|
15
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
17
|
+
// When imported directly (e.g., `tsx src/lib/webhook-server.ts`), start the server
|
|
18
|
+
startWebhookServer({
|
|
19
|
+
dailyBudget: parseInt(process.env.WEBHOOK_DAILY_BUDGET ?? "20", 10),
|
|
20
|
+
debounceMs: parseInt(process.env.WEBHOOK_DEBOUNCE_MS ?? "10000", 10),
|
|
21
|
+
githubToken: process.env.GITHUB_TOKEN ?? "",
|
|
22
|
+
port: parseInt(process.env.WEBHOOK_PORT ?? "3333", 10),
|
|
23
|
+
rootDir: ROOT,
|
|
24
|
+
});
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* weekly-digest.ts
|
|
3
|
+
*
|
|
4
|
+
* CLI script to generate and deliver a weekly evaluation digest.
|
|
5
|
+
*
|
|
6
|
+
* Queries the Sanity Content Lake for all reports within the configured
|
|
7
|
+
* lookback window, computes trend analysis, and delivers the digest
|
|
8
|
+
* via configured channels (Slack, stdout, or both).
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* pnpm weekly-digest # send to configured Slack webhook
|
|
12
|
+
* pnpm weekly-digest --dry-run # print to stdout only
|
|
13
|
+
* pnpm weekly-digest --lookback 14 # 14-day lookback window
|
|
14
|
+
* pnpm weekly-digest --json # output raw JSON
|
|
15
|
+
*
|
|
16
|
+
* Environment variables:
|
|
17
|
+
* SLACK_WEBHOOK_URL — Slack incoming webhook URL
|
|
18
|
+
* SANITY_API_TOKEN — Sanity read token
|
|
19
|
+
* AILF_TRIGGER_TYPE — set to "scheduled" by the cron workflow
|
|
20
|
+
* AILF_SCHEDULE — the schedule name (e.g., "weekly-digest")
|
|
21
|
+
*
|
|
22
|
+
* @see docs/design-docs/report-store/implementation.md — Phase 5
|
|
23
|
+
*/
|
|
24
|
+
export declare function main(): Promise<void>;
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* weekly-digest.ts
|
|
3
|
+
*
|
|
4
|
+
* CLI script to generate and deliver a weekly evaluation digest.
|
|
5
|
+
*
|
|
6
|
+
* Queries the Sanity Content Lake for all reports within the configured
|
|
7
|
+
* lookback window, computes trend analysis, and delivers the digest
|
|
8
|
+
* via configured channels (Slack, stdout, or both).
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* pnpm weekly-digest # send to configured Slack webhook
|
|
12
|
+
* pnpm weekly-digest --dry-run # print to stdout only
|
|
13
|
+
* pnpm weekly-digest --lookback 14 # 14-day lookback window
|
|
14
|
+
* pnpm weekly-digest --json # output raw JSON
|
|
15
|
+
*
|
|
16
|
+
* Environment variables:
|
|
17
|
+
* SLACK_WEBHOOK_URL — Slack incoming webhook URL
|
|
18
|
+
* SANITY_API_TOKEN — Sanity read token
|
|
19
|
+
* AILF_TRIGGER_TYPE — set to "scheduled" by the cron workflow
|
|
20
|
+
* AILF_SCHEDULE — the schedule name (e.g., "weekly-digest")
|
|
21
|
+
*
|
|
22
|
+
* @see docs/design-docs/report-store/implementation.md — Phase 5
|
|
23
|
+
*/
|
|
24
|
+
import { config as dotenvConfig } from "dotenv";
|
|
25
|
+
import { existsSync } from "fs";
|
|
26
|
+
import { dirname, resolve } from "path";
|
|
27
|
+
import { fileURLToPath } from "url";
|
|
28
|
+
import { generateDigest } from "../schedules/digest.js";
|
|
29
|
+
import { getDigestConfig } from "../schedules/loader.js";
|
|
30
|
+
import { formatWeeklyDigest } from "../sinks/slack/format.js";
|
|
31
|
+
// Load root .env (same override behavior as pipeline.ts)
|
|
32
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
33
|
+
const envPath = resolve(__dirname, "..", "..", "..", "..", ".env");
|
|
34
|
+
if (existsSync(envPath)) {
|
|
35
|
+
dotenvConfig({ override: true, path: envPath });
|
|
36
|
+
}
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
// CLI argument parsing
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
const args = process.argv.slice(2);
|
|
41
|
+
function getOption(name) {
|
|
42
|
+
const idx = args.indexOf(`--${name}`);
|
|
43
|
+
return idx >= 0 && idx + 1 < args.length ? args[idx + 1] : undefined;
|
|
44
|
+
}
|
|
45
|
+
function hasFlag(name) {
|
|
46
|
+
return args.includes(`--${name}`);
|
|
47
|
+
}
|
|
48
|
+
const DRY_RUN = hasFlag("dry-run");
|
|
49
|
+
const JSON_OUTPUT = hasFlag("json");
|
|
50
|
+
const lookbackOverride = getOption("lookback");
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
// Main
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
export async function main() {
|
|
55
|
+
console.log();
|
|
56
|
+
console.log("=== AI Literacy Weekly Digest ===");
|
|
57
|
+
console.log();
|
|
58
|
+
// Load digest config
|
|
59
|
+
const digestConfig = getDigestConfig();
|
|
60
|
+
const lookbackDays = lookbackOverride
|
|
61
|
+
? parseInt(lookbackOverride, 10)
|
|
62
|
+
: (digestConfig?.lookbackDays ?? 7);
|
|
63
|
+
console.log(` Lookback: ${lookbackDays} days`);
|
|
64
|
+
console.log(` Mode: ${DRY_RUN ? "dry run (stdout only)" : "live"}`);
|
|
65
|
+
console.log();
|
|
66
|
+
// Generate digest — uses AILF_REPORT_* env vars for report store access,
|
|
67
|
+
// independent of SANITY_DATASET/SANITY_PROJECT_ID (which control doc evaluation)
|
|
68
|
+
const digest = await generateDigest({
|
|
69
|
+
dataset: process.env.AILF_REPORT_DATASET,
|
|
70
|
+
lookbackDays,
|
|
71
|
+
projectId: process.env.AILF_REPORT_PROJECT_ID,
|
|
72
|
+
token: process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN,
|
|
73
|
+
});
|
|
74
|
+
if (!digest) {
|
|
75
|
+
console.log(" No reports found in the lookback window. Nothing to send.");
|
|
76
|
+
process.exit(0);
|
|
77
|
+
}
|
|
78
|
+
// Output
|
|
79
|
+
console.log(` Reports found: ${digest.reportCount}`);
|
|
80
|
+
console.log(` Overall: ${Math.round(digest.overallLatest)} (${digest.overallTrend})`);
|
|
81
|
+
console.log(` Improved: ${digest.improved.join(", ") || "none"}`);
|
|
82
|
+
console.log(` Regressed: ${digest.regressed.join(", ") || "none"}`);
|
|
83
|
+
console.log(` Stable: ${digest.stable.join(", ") || "none"}`);
|
|
84
|
+
console.log();
|
|
85
|
+
if (JSON_OUTPUT) {
|
|
86
|
+
console.log(JSON.stringify(digest, null, 2));
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
// Format for Slack
|
|
90
|
+
const message = formatWeeklyDigest(digest);
|
|
91
|
+
if (DRY_RUN) {
|
|
92
|
+
console.log(" --- Slack Message Preview ---");
|
|
93
|
+
console.log(` Text: ${message.text}`);
|
|
94
|
+
console.log();
|
|
95
|
+
for (const block of message.blocks) {
|
|
96
|
+
if (block.text) {
|
|
97
|
+
console.log(` [${block.type}] ${block.text.text}`);
|
|
98
|
+
}
|
|
99
|
+
if (block.fields) {
|
|
100
|
+
for (const field of block.fields) {
|
|
101
|
+
console.log(` [field] ${field.text}`);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
if (block.elements) {
|
|
105
|
+
for (const el of block.elements) {
|
|
106
|
+
console.log(` [element] ${el.text}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
console.log();
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
// Deliver via Slack
|
|
114
|
+
const webhookUrl = digestConfig?.slackWebhookUrl ?? process.env.SLACK_WEBHOOK_URL;
|
|
115
|
+
if (!webhookUrl) {
|
|
116
|
+
console.warn(" ⚠️ No Slack webhook URL configured. Set SLACK_WEBHOOK_URL or configure in schedules.yaml");
|
|
117
|
+
console.log(" Printing digest to stdout instead:");
|
|
118
|
+
console.log();
|
|
119
|
+
console.log(` ${message.text}`);
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
console.log(" Sending to Slack...");
|
|
123
|
+
try {
|
|
124
|
+
const response = await fetch(webhookUrl, {
|
|
125
|
+
body: JSON.stringify(message),
|
|
126
|
+
headers: { "Content-Type": "application/json" },
|
|
127
|
+
method: "POST",
|
|
128
|
+
});
|
|
129
|
+
if (response.ok) {
|
|
130
|
+
console.log(" ✅ Digest sent successfully");
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
const text = await response.text();
|
|
134
|
+
console.warn(` ⚠️ Slack delivery failed: ${response.status} ${text}`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
catch (error) {
|
|
138
|
+
console.warn(` ⚠️ Slack delivery error: ${error instanceof Error ? error.message : String(error)}`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Only run when invoked directly (not when imported)
|
|
142
|
+
if (process.argv[1]?.endsWith("weekly-digest.ts") ||
|
|
143
|
+
process.argv[1]?.endsWith("weekly-digest.js")) {
|
|
144
|
+
main().catch((error) => {
|
|
145
|
+
console.error("Fatal error:", error);
|
|
146
|
+
process.exit(1);
|
|
147
|
+
});
|
|
148
|
+
}
|
|
@@ -8,7 +8,9 @@
|
|
|
8
8
|
* Once all commands construct ResolvedConfig directly (or use --config),
|
|
9
9
|
* this bridge can be deleted.
|
|
10
10
|
*/
|
|
11
|
+
import { join } from "node:path";
|
|
11
12
|
import { createAppContext } from "../composition-root.js";
|
|
13
|
+
import { tryLoadConfigFile } from "../pipeline/compiler/config-loader.js";
|
|
12
14
|
/**
|
|
13
15
|
* Map legacy ResolvedOptions to ResolvedConfig.
|
|
14
16
|
*
|
|
@@ -50,6 +52,7 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
50
52
|
noCache: opts.noCache,
|
|
51
53
|
noRemoteCache: opts.noRemoteCache,
|
|
52
54
|
graderReplications: opts.graderReplications,
|
|
55
|
+
outputDir: opts.outputDir,
|
|
53
56
|
outputPath: opts.outputPath,
|
|
54
57
|
urls: opts.urlArgs.length > 0 ? opts.urlArgs : undefined,
|
|
55
58
|
headers: opts.headerArgs.length > 0
|
|
@@ -75,6 +78,10 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
75
78
|
remote: opts.remote ?? false,
|
|
76
79
|
apiUrl: opts.apiUrl ?? "https://ailf-api.sanity.build",
|
|
77
80
|
apiKey: opts.apiKey,
|
|
81
|
+
captureEnabled: opts.captureEnabled ?? false,
|
|
82
|
+
captureDir: opts.captureDir ?? join(rootDir, "results", "captures"),
|
|
83
|
+
captureCompress: opts.captureCompress ?? true,
|
|
84
|
+
captureExtras: opts.captureExtras ?? true,
|
|
78
85
|
};
|
|
79
86
|
}
|
|
80
87
|
/**
|
|
@@ -85,5 +92,11 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
85
92
|
*/
|
|
86
93
|
export function buildAppContext(opts, rootDir) {
|
|
87
94
|
const config = mapToResolvedConfig(opts, rootDir);
|
|
95
|
+
// Inject config-file-only values that don't come from CLI options.
|
|
96
|
+
// evalBudgetMs lives on ModelsConfig, not CLI flags.
|
|
97
|
+
const models = tryLoadConfigFile("models", rootDir);
|
|
98
|
+
if (models?.data?.evalBudgetMs) {
|
|
99
|
+
config.evalBudgetMs = models.data.evalBudgetMs;
|
|
100
|
+
}
|
|
88
101
|
return createAppContext(config);
|
|
89
102
|
}
|
|
@@ -35,8 +35,10 @@ export function buildStepSequence(ctx, pipelineStart = Date.now()) {
|
|
|
35
35
|
if (config.repoTasksPath) {
|
|
36
36
|
steps.push(new MirrorRepoTasksStep());
|
|
37
37
|
}
|
|
38
|
-
// Step 1: Fetch documentation (
|
|
39
|
-
|
|
38
|
+
// Step 1: Fetch documentation (literacy mode only — other modes don't use canonical docs)
|
|
39
|
+
if (config.mode === "literacy") {
|
|
40
|
+
steps.push(new FetchDocsStep());
|
|
41
|
+
}
|
|
40
42
|
// Step 2: Generate Promptfoo configs
|
|
41
43
|
steps.push(new GenerateConfigsStep());
|
|
42
44
|
// Step 3: Run evaluation (steps handle --skip-eval internally)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build cache context strings from the resolved pipeline configuration.
|
|
3
|
+
*
|
|
4
|
+
* These non-file strings participate in cache key computation so that
|
|
5
|
+
* different CLI filters (mode, variant, area, task, tag) produce
|
|
6
|
+
* distinct cache entries. Without them, running `--mode knowledge-probe`
|
|
7
|
+
* after `--mode literacy` would return cached literacy results.
|
|
8
|
+
*
|
|
9
|
+
* @see packages/core/src/ports/pipeline-step.ts — cacheContext() method
|
|
10
|
+
* @see packages/eval/src/pipeline/cache.ts — hashFiles() context parameter
|
|
11
|
+
*/
|
|
12
|
+
import type { ResolvedConfig } from "../_vendor/ailf-core/index.d.ts";
|
|
13
|
+
/**
|
|
14
|
+
* Derive deterministic context strings from the resolved pipeline config.
|
|
15
|
+
*
|
|
16
|
+
* Included in every cacheable step's key so that:
|
|
17
|
+
* - `--mode literacy` and `--mode knowledge-probe` never share cache
|
|
18
|
+
* - `--variant agentic` and `--variant baseline` never share cache
|
|
19
|
+
* - `--area studio` and `--area groq` never share cache
|
|
20
|
+
* - `--task T001` and `--task T002` never share cache
|
|
21
|
+
* - `--tag critical` and `--tag smoke` never share cache
|
|
22
|
+
*/
|
|
23
|
+
export declare function buildCacheContext(config: ResolvedConfig): string[];
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build cache context strings from the resolved pipeline configuration.
|
|
3
|
+
*
|
|
4
|
+
* These non-file strings participate in cache key computation so that
|
|
5
|
+
* different CLI filters (mode, variant, area, task, tag) produce
|
|
6
|
+
* distinct cache entries. Without them, running `--mode knowledge-probe`
|
|
7
|
+
* after `--mode literacy` would return cached literacy results.
|
|
8
|
+
*
|
|
9
|
+
* @see packages/core/src/ports/pipeline-step.ts — cacheContext() method
|
|
10
|
+
* @see packages/eval/src/pipeline/cache.ts — hashFiles() context parameter
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Derive deterministic context strings from the resolved pipeline config.
|
|
14
|
+
*
|
|
15
|
+
* Included in every cacheable step's key so that:
|
|
16
|
+
* - `--mode literacy` and `--mode knowledge-probe` never share cache
|
|
17
|
+
* - `--variant agentic` and `--variant baseline` never share cache
|
|
18
|
+
* - `--area studio` and `--area groq` never share cache
|
|
19
|
+
* - `--task T001` and `--task T002` never share cache
|
|
20
|
+
* - `--tag critical` and `--tag smoke` never share cache
|
|
21
|
+
*/
|
|
22
|
+
export function buildCacheContext(config) {
|
|
23
|
+
const context = [];
|
|
24
|
+
// Mode is always present — it's required in ResolvedConfig
|
|
25
|
+
context.push(`mode:${config.mode}`);
|
|
26
|
+
// Variant (literacy sub-mode: baseline, agentic, observed, full)
|
|
27
|
+
if (config.variant) {
|
|
28
|
+
context.push(`variant:${config.variant}`);
|
|
29
|
+
}
|
|
30
|
+
// Area filter — sorted for deterministic hashing
|
|
31
|
+
if (config.areas && config.areas.length > 0) {
|
|
32
|
+
context.push(`areas:${[...config.areas].sort().join(",")}`);
|
|
33
|
+
}
|
|
34
|
+
// Task filter — sorted for deterministic hashing
|
|
35
|
+
if (config.tasks && config.tasks.length > 0) {
|
|
36
|
+
context.push(`tasks:${[...config.tasks].sort().join(",")}`);
|
|
37
|
+
}
|
|
38
|
+
// Tag filter — sorted for deterministic hashing
|
|
39
|
+
if (config.tags && config.tags.length > 0) {
|
|
40
|
+
context.push(`tags:${[...config.tags].sort().join(",")}`);
|
|
41
|
+
}
|
|
42
|
+
return context;
|
|
43
|
+
}
|