npm - @sanity/ailf - Versions diffs - 2.0.0 → 2.0.1 - Mend

@sanity/ailf 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (442) hide show

package/canonical/grader-references/README.md +2 -2
package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
package/config/features.ts +1 -1
package/config/models.ts +28 -23
package/config/sources.ts +1 -1
package/config/thresholds.ts +1 -1
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
package/dist/_vendor/ailf-core/config-helpers.js +29 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
package/dist/_vendor/ailf-core/examples/index.js +208 -114
package/dist/_vendor/ailf-core/index.d.ts +1 -0
package/dist/_vendor/ailf-core/index.js +1 -0
package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
package/dist/_vendor/ailf-core/services/index.js +1 -1
package/dist/_vendor/ailf-core/services/scoring.js +9 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
package/dist/_vendor/ailf-tasks/cli.js +61 -0
package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
package/dist/_vendor/ailf-tasks/index.js +16 -0
package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
package/dist/_vendor/ailf-tasks/parser.js +73 -0
package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
package/dist/_vendor/ailf-tasks/schemas.js +180 -0
package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
package/dist/_vendor/ailf-tasks/validation.js +162 -0
package/dist/adapters/api-client/remediation.js +2 -2
package/dist/adapters/config-sources/file-config-adapter.js +6 -1
package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
package/dist/adapters/index.d.ts +0 -1
package/dist/adapters/index.js +0 -1
package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
package/dist/adapters/task-sources/composite-task-source.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
package/dist/adapters/task-sources/index.d.ts +1 -2
package/dist/adapters/task-sources/index.js +1 -2
package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
package/dist/adapters/task-sources/repo-schemas.js +2 -2
package/dist/adapters/task-sources/repo-task-source.js +1 -1
package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
package/dist/adapters/task-sources/repo-trigger.js +1 -1
package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
package/dist/adapters/task-sources/task-file-loader.js +20 -6
package/dist/agent-observer/test-imports.d.ts +7 -0
package/dist/agent-observer/test-imports.js +185 -0
package/dist/artifact-capture/comparator.d.ts +22 -0
package/dist/artifact-capture/comparator.js +493 -0
package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
package/dist/artifact-capture/filesystem-collector.js +237 -0
package/dist/artifact-capture/redact-artifact.d.ts +20 -0
package/dist/artifact-capture/redact-artifact.js +115 -0
package/dist/assertions/source-isolation.d.ts +1 -1
package/dist/assertions/source-isolation.js +1 -1
package/dist/cli.js +4 -0
package/dist/commands/calculate-scores.js +1 -0
package/dist/commands/capture-compare.d.ts +15 -0
package/dist/commands/capture-compare.js +253 -0
package/dist/commands/capture-list.d.ts +12 -0
package/dist/commands/capture-list.js +147 -0
package/dist/commands/capture.d.ts +9 -0
package/dist/commands/capture.js +16 -0
package/dist/commands/chronic-failures.d.ts +8 -0
package/dist/commands/chronic-failures.js +33 -0
package/dist/commands/explain-handler.d.ts +1 -1
package/dist/commands/explain-handler.js +37 -8
package/dist/commands/fetch-docs.js +1 -0
package/dist/commands/generate-configs.d.ts +3 -3
package/dist/commands/generate-configs.js +20 -8
package/dist/commands/init.d.ts +2 -3
package/dist/commands/init.js +56 -170
package/dist/commands/pipeline-action.d.ts +7 -1
package/dist/commands/pipeline-action.js +43 -19
package/dist/commands/pipeline.d.ts +6 -1
package/dist/commands/pipeline.js +7 -2
package/dist/commands/pr-comment.js +1 -0
package/dist/commands/publish.js +1 -0
package/dist/commands/shared/help.js +2 -2
package/dist/commands/update-quality-scores.d.ts +5 -0
package/dist/commands/update-quality-scores.js +20 -0
package/dist/composition-root.d.ts +2 -3
package/dist/composition-root.js +27 -14
package/dist/config/features.ts +23 -0
package/dist/config/models.ts +100 -0
package/dist/config/prompts.ts +16 -0
package/dist/config/rubrics.ts +225 -0
package/dist/config/schedules.ts +47 -0
package/dist/config/sinks.ts +37 -0
package/dist/config/sources.ts +21 -0
package/dist/config/thresholds.ts +61 -0
package/dist/lib/agent-behavior-report.d.ts +8 -0
package/dist/lib/agent-behavior-report.js +185 -0
package/dist/lib/baseline.d.ts +19 -0
package/dist/lib/baseline.js +153 -0
package/dist/lib/calculate-scores.d.ts +23 -0
package/dist/lib/calculate-scores.js +42 -0
package/dist/lib/compare.d.ts +18 -0
package/dist/lib/compare.js +170 -0
package/dist/lib/coverage-audit.d.ts +4 -0
package/dist/lib/coverage-audit.js +42 -0
package/dist/lib/discovery-report.d.ts +13 -0
package/dist/lib/discovery-report.js +57 -0
package/dist/lib/fetch-docs.d.ts +30 -0
package/dist/lib/fetch-docs.js +171 -0
package/dist/lib/generate-configs.d.ts +25 -0
package/dist/lib/generate-configs.js +42 -0
package/dist/lib/grader-api.d.ts +21 -0
package/dist/lib/grader-api.js +34 -0
package/dist/lib/grader-compare.d.ts +19 -0
package/dist/lib/grader-compare.js +91 -0
package/dist/lib/grader-consistency.d.ts +27 -0
package/dist/lib/grader-consistency.js +79 -0
package/dist/lib/grader-sensitivity.d.ts +19 -0
package/dist/lib/grader-sensitivity.js +75 -0
package/dist/lib/grader-validate.d.ts +19 -0
package/dist/lib/grader-validate.js +78 -0
package/dist/lib/measure-retrieval.d.ts +14 -0
package/dist/lib/measure-retrieval.js +71 -0
package/dist/lib/pr-comment.d.ts +16 -0
package/dist/lib/pr-comment.js +28 -0
package/dist/lib/readiness-report.d.ts +13 -0
package/dist/lib/readiness-report.js +108 -0
package/dist/lib/webhook-server.d.ts +11 -0
package/dist/lib/webhook-server.js +24 -0
package/dist/lib/weekly-digest.d.ts +24 -0
package/dist/lib/weekly-digest.js +148 -0
package/dist/orchestration/build-app-context.js +13 -0
package/dist/orchestration/cache-context.d.ts +23 -0
package/dist/orchestration/cache-context.js +43 -0
package/dist/orchestration/env-bridge.d.ts +21 -0
package/dist/orchestration/env-bridge.js +66 -0
package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
package/dist/orchestration/load-pipeline-tasks.js +52 -0
package/dist/orchestration/pipeline-orchestrator.js +75 -5
package/dist/orchestration/step-runner.js +5 -1
package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
package/dist/orchestration/steps/calculate-scores-step.js +13 -0
package/dist/orchestration/steps/callback-step.js +10 -1
package/dist/orchestration/steps/compare-step.js +6 -3
package/dist/orchestration/steps/discovery-report-step.js +6 -2
package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
package/dist/orchestration/steps/fetch-docs-step.js +30 -16
package/dist/orchestration/steps/gap-analysis-step.js +13 -2
package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
package/dist/orchestration/steps/generate-configs-step.js +50 -15
package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
package/dist/orchestration/steps/publish-report-step.js +19 -0
package/dist/orchestration/steps/readiness-step.js +8 -3
package/dist/orchestration/steps/report-step.js +17 -4
package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
package/dist/orchestration/steps/run-eval-step.js +51 -31
package/dist/pipeline/agent-behavior-report.js +6 -0
package/dist/pipeline/attribution.d.ts +1 -1
package/dist/pipeline/attribution.js +1 -1
package/dist/pipeline/cache.js +29 -15
package/dist/pipeline/calculate-scores.d.ts +2 -0
package/dist/pipeline/calculate-scores.js +70 -33
package/dist/pipeline/chronic-failures.d.ts +55 -0
package/dist/pipeline/chronic-failures.js +110 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
package/dist/pipeline/compiler/assertion-mapper.js +1 -1
package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
package/dist/pipeline/compiler/config-loader.d.ts +14 -0
package/dist/pipeline/compiler/config-loader.js +42 -2
package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
package/dist/pipeline/compiler/fixture-resolver.js +1 -1
package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
package/dist/pipeline/compiler/ignore-fields.js +1 -1
package/dist/pipeline/compiler/index.d.ts +2 -5
package/dist/pipeline/compiler/index.js +2 -5
package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
package/dist/pipeline/compiler/literacy-bridge.js +1 -1
package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
package/dist/pipeline/compiler/provider-assembler.js +13 -7
package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/index.js +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
package/dist/pipeline/compiler/scoring-bridge.js +1 -1
package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
package/dist/pipeline/compiler/task-bridge.js +92 -0
package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
package/dist/pipeline/compiler/task-graph-builder.js +1 -4
package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
package/dist/pipeline/compiler/telemetry/index.js +1 -1
package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
package/dist/pipeline/compiler/variable-resolver.js +1 -1
package/dist/pipeline/coverage-audit.d.ts +1 -1
package/dist/pipeline/coverage-audit.js +1 -1
package/dist/pipeline/degradations.d.ts +1 -1
package/dist/pipeline/degradations.js +1 -1
package/dist/pipeline/failure-modes.d.ts +1 -1
package/dist/pipeline/failure-modes.js +13 -1
package/dist/pipeline/gap-analysis.d.ts +1 -1
package/dist/pipeline/gap-analysis.js +3 -1
package/dist/pipeline/generate-configs.d.ts +2 -2
package/dist/pipeline/generate-configs.js +15 -8
package/dist/pipeline/grader-compare-runner.d.ts +1 -1
package/dist/pipeline/grader-compare-runner.js +7 -1
package/dist/pipeline/grader-comparison.d.ts +1 -1
package/dist/pipeline/grader-comparison.js +1 -1
package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
package/dist/pipeline/grader-consistency-runner.js +7 -1
package/dist/pipeline/grader-consistency.d.ts +1 -1
package/dist/pipeline/grader-consistency.js +1 -1
package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
package/dist/pipeline/grader-sensitivity-runner.js +1 -1
package/dist/pipeline/grader-sensitivity.d.ts +1 -1
package/dist/pipeline/grader-sensitivity.js +1 -1
package/dist/pipeline/grader-validate-runner.d.ts +1 -1
package/dist/pipeline/grader-validate-runner.js +2 -2
package/dist/pipeline/grader-validation.d.ts +1 -1
package/dist/pipeline/grader-validation.js +1 -1
package/dist/pipeline/map-request-to-config.js +15 -2
package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
package/dist/pipeline/mirror-repo-tasks.js +1 -1
package/dist/pipeline/plan-format.d.ts +1 -1
package/dist/pipeline/plan-format.js +1 -1
package/dist/pipeline/plan.d.ts +1 -1
package/dist/pipeline/plan.js +67 -29
package/dist/pipeline/probe.d.ts +1 -1
package/dist/pipeline/probe.js +1 -1
package/dist/pipeline/readiness-report.d.ts +2 -2
package/dist/pipeline/readiness-report.js +2 -2
package/dist/pipeline/release-classification.d.ts +1 -1
package/dist/pipeline/release-classification.js +1 -1
package/dist/pipeline/release-report.d.ts +1 -1
package/dist/pipeline/release-report.js +1 -1
package/dist/pipeline/repo-eval-comment.d.ts +1 -1
package/dist/pipeline/repo-eval-comment.js +1 -1
package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
package/dist/pipeline/repo-threshold-evaluator.js +1 -1
package/dist/pipeline/resolve-mappings.d.ts +6 -6
package/dist/pipeline/resolve-mappings.js +44 -44
package/dist/pipeline/retrieval-metrics.d.ts +3 -3
package/dist/pipeline/retrieval-metrics.js +28 -20
package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
package/dist/pipeline/steps/calculate-scores-step.js +89 -0
package/dist/pipeline/steps/compare-step.d.ts +18 -0
package/dist/pipeline/steps/compare-step.js +90 -0
package/dist/pipeline/steps/eval-step.d.ts +53 -0
package/dist/pipeline/steps/eval-step.js +347 -0
package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
package/dist/pipeline/steps/fetch-docs-step.js +84 -0
package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
package/dist/pipeline/steps/generate-configs-step.js +98 -0
package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
package/dist/pipeline/steps/grader-consistency-step.js +74 -0
package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
package/dist/pipeline/steps/publish-report-step.js +243 -0
package/dist/pipeline/steps/report-step.d.ts +13 -0
package/dist/pipeline/steps/report-step.js +56 -0
package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
package/dist/pipeline/steps/update-scores-step.js +42 -0
package/dist/pipeline/targeted-loo.d.ts +1 -1
package/dist/pipeline/targeted-loo.js +1 -1
package/dist/pipeline/thresholds.d.ts +1 -1
package/dist/pipeline/thresholds.js +1 -1
package/dist/pipeline/validate.js +13 -0
package/dist/report-store.d.ts +17 -0
package/dist/report-store.js +24 -0
package/dist/scripts/agent-behavior-report.d.ts +19 -0
package/dist/scripts/agent-behavior-report.js +315 -0
package/dist/scripts/baseline.d.ts +43 -0
package/dist/scripts/baseline.js +267 -0
package/dist/scripts/calculate-scores.d.ts +166 -0
package/dist/scripts/calculate-scores.js +1296 -0
package/dist/scripts/compare.d.ts +22 -0
package/dist/scripts/compare.js +334 -0
package/dist/scripts/coverage-audit.d.ts +44 -0
package/dist/scripts/coverage-audit.js +209 -0
package/dist/scripts/debug-eval.d.ts +19 -0
package/dist/scripts/debug-eval.js +73 -0
package/dist/scripts/discovery-report.d.ts +58 -0
package/dist/scripts/discovery-report.js +250 -0
package/dist/scripts/fetch-docs.d.ts +35 -0
package/dist/scripts/fetch-docs.js +472 -0
package/dist/scripts/generate-configs.d.ts +66 -0
package/dist/scripts/generate-configs.js +459 -0
package/dist/scripts/grader-api.d.ts +27 -0
package/dist/scripts/grader-api.js +206 -0
package/dist/scripts/grader-compare.d.ts +22 -0
package/dist/scripts/grader-compare.js +368 -0
package/dist/scripts/grader-consistency.d.ts +20 -0
package/dist/scripts/grader-consistency.js +313 -0
package/dist/scripts/grader-sensitivity.d.ts +22 -0
package/dist/scripts/grader-sensitivity.js +354 -0
package/dist/scripts/grader-validate.d.ts +19 -0
package/dist/scripts/grader-validate.js +267 -0
package/dist/scripts/measure-retrieval.d.ts +10 -0
package/dist/scripts/measure-retrieval.js +145 -0
package/dist/scripts/migrate-task-mode.d.ts +1 -1
package/dist/scripts/migrate-task-mode.js +1 -1
package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
package/dist/scripts/pipeline.d.ts +76 -0
package/dist/scripts/pipeline.js +1031 -0
package/dist/scripts/pr-comment.d.ts +10 -0
package/dist/scripts/pr-comment.js +510 -0
package/dist/scripts/readiness-report.d.ts +88 -0
package/dist/scripts/readiness-report.js +342 -0
package/dist/scripts/update-quality-scores.d.ts +15 -0
package/dist/scripts/update-quality-scores.js +184 -0
package/dist/scripts/validate-task-sources.d.ts +1 -1
package/dist/scripts/validate-task-sources.js +1 -1
package/dist/scripts/validate.d.ts +13 -0
package/dist/scripts/validate.js +79 -0
package/dist/scripts/webhook-server.d.ts +26 -0
package/dist/scripts/webhook-server.js +147 -0
package/dist/scripts/weekly-digest.d.ts +24 -0
package/dist/scripts/weekly-digest.js +144 -0
package/dist/sinks/format-slack.d.ts +64 -0
package/dist/sinks/format-slack.js +306 -0
package/dist/sinks/slack-sink.d.ts +27 -0
package/dist/sinks/slack-sink.js +78 -0
package/dist/sinks/types.d.ts +1 -1
package/dist/sinks/types.js +1 -1
package/dist/sinks/webhook-sink.d.ts +19 -0
package/dist/sinks/webhook-sink.js +50 -0
package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
package/dist/tasks/literacy/content-lake.task.ts +181 -0
package/dist/tasks/literacy/frameworks.task.ts +129 -0
package/dist/tasks/literacy/functions.task.ts +70 -0
package/dist/tasks/literacy/groq.task.ts +259 -0
package/dist/tasks/literacy/image-handling.task.ts +95 -0
package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
package/dist/tasks/literacy/portable-text.task.ts +169 -0
package/dist/tasks/literacy/studio-setup.task.ts +134 -0
package/dist/tasks/literacy/visual-editing.task.ts +147 -0
package/package.json +24 -24
package/tasks/.expanded.agentic.yaml +280 -0
package/tasks/.expanded.yaml +565 -0
package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
package/tasks/literacy/content-lake.task.ts +181 -0
package/tasks/literacy/frameworks.task.ts +1 -0
package/tasks/literacy/functions.task.ts +1 -0
package/tasks/literacy/groq.task.ts +1 -0
package/tasks/literacy/image-handling.task.ts +95 -0
package/tasks/literacy/nextjs-live.task.ts +2 -1
package/tasks/literacy/portable-text.task.ts +169 -0
package/tasks/literacy/studio-setup.task.ts +5 -2
package/tasks/literacy/visual-editing.task.ts +1 -0
package/LICENSE +0 -21
package/tasks/frameworks.yaml +0 -98
package/tasks/functions.yaml +0 -51
package/tasks/groq.yaml +0 -216
package/tasks/nextjs-live.yaml +0 -62
package/tasks/studio-setup.yaml +0 -111
package/tasks/visual-editing.yaml +0 -120

package/dist/commands/init.d.ts CHANGED Viewed

@@ -6,9 +6,8 @@
  * not live evaluation tasks.
  *
  * TypeScript output (default) uses define* helpers from @sanity/ailf-core
- * for full IDE autocomplete and type checking. YAML output preserves
- * inline comments from the source files. JSON output is a plain
- * serialization of the parsed data.
+ * for full IDE autocomplete and type checking. YAML output serializes the
+ * parsed task data. JSON output is a plain serialization of the parsed data.
  *
  * Usage:
  *   ailf init                        # TypeScript output (default)

package/dist/commands/init.js CHANGED Viewed

@@ -6,9 +6,8 @@
  * not live evaluation tasks.
  *
  * TypeScript output (default) uses define* helpers from @sanity/ailf-core
- * for full IDE autocomplete and type checking. YAML output preserves
- * inline comments from the source files. JSON output is a plain
- * serialization of the parsed data.
+ * for full IDE autocomplete and type checking. YAML output serializes the
+ * parsed task data. JSON output is a plain serialization of the parsed data.
  *
  * Usage:
  *   ailf init                        # TypeScript output (default)
@@ -20,7 +19,7 @@
 import { Command } from "commander";
 import { existsSync, mkdirSync, writeFileSync } from "fs";
 import { resolve, relative } from "path";
-import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_TS_FILE_NAMES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
+import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
 // ---------------------------------------------------------------------------
 // Command factory
 // ---------------------------------------------------------------------------
@@ -54,6 +53,10 @@ function rel(from, to) {
     const r = relative(from, to);
     return r.startsWith(".") ? r : `./${r}`;
 }
+/** Filter task stems by mode using TASK_EXAMPLES metadata */
+function taskStemsForMode(mode) {
+    return TASK_EXAMPLES.filter((t) => t.mode === mode).map((t) => t.stem);
+}
 // ---------------------------------------------------------------------------
 // Init logic
 // ---------------------------------------------------------------------------
@@ -66,6 +69,11 @@ async function runInit(opts) {
     }
     const format = opts.outputFormat;
     const force = opts.force;
+    if (format === "yaml") {
+        console.warn("  ⚠ --output-format yaml is deprecated. TypeScript (default) is the\n" +
+            "    recommended format — it provides full IDE autocomplete via defineTask().\n" +
+            "    YAML output will be removed in a future release.\n");
+    }
     // Resolve target from the caller's actual working directory
     const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
     const targetDir = resolve(callerCwd, opts.path);
@@ -115,71 +123,52 @@ async function runInit(opts) {
     // 3. Write example tasks to .ailf/tasks/
     const modeFilter = opts.mode;
     const isCustomMode = modeFilter === "custom";
+    // Determine which task stems to write based on mode filter
+    let stemsToWrite;
+    if (isCustomMode) {
+        // Custom mode: write one literacy example as a starting point
+        stemsToWrite = taskStemsForMode("literacy").slice(0, 1);
+    }
+    else if (modeFilter === "literacy") {
+        stemsToWrite = taskStemsForMode("literacy");
+    }
+    else if (modeFilter === "mcp-server") {
+        stemsToWrite = taskStemsForMode("mcp-server");
+    }
+    else if (modeFilter === "knowledge-probe") {
+        stemsToWrite = taskStemsForMode("knowledge-probe");
+    }
+    else {
+        // Default (no --mode): write all tasks
+        stemsToWrite = [...TASK_FILE_NAMES];
+    }
     if (format === "ts") {
-        // TypeScript: *.task.ts files with defineTask helper
-        // Default (no --mode): write literacy examples + draft MCP/probe examples
-        // --mode literacy: only literacy examples
-        // --mode mcp-server: only MCP examples (active, not draft)
-        // --mode custom: only a custom example task
-        if (!modeFilter || modeFilter === "literacy") {
-            for (const stem of TASK_TS_FILE_NAMES) {
-                const taskPath = resolve(tasksDir, `${stem}.task.ts`);
-                const content = taskTsFiles[stem];
-                if (writeIfNew(taskPath, content, force)) {
-                    written.push(rel(targetDir, taskPath));
-                }
-                else {
-                    skipped.push(rel(targetDir, taskPath));
-                }
-            }
-        }
-        // Draft examples for other modes (default init only)
-        if (!modeFilter) {
-            const mcpPath = resolve(tasksDir, "example-mcp-tool-usage.task.ts");
-            if (writeIfNew(mcpPath, MCP_DRAFT_TASK_TS, force)) {
-                written.push(rel(targetDir, mcpPath));
-            }
-            else {
-                skipped.push(rel(targetDir, mcpPath));
-            }
-            const probePath = resolve(tasksDir, "example-knowledge-probe.task.ts");
-            if (writeIfNew(probePath, PROBE_DRAFT_TASK_TS, force)) {
-                written.push(rel(targetDir, probePath));
-            }
-            else {
-                skipped.push(rel(targetDir, probePath));
+        for (const stem of stemsToWrite) {
+            let content = taskTsFiles[stem];
+            if (!content)
+                continue;
+            // For MCP-only init, activate the draft task
+            if (modeFilter === "mcp-server") {
+                content = content.replace('status: "draft",', '// status: "active", // Activated — this task runs in evaluations');
             }
-        }
-        // MCP-only init
-        if (modeFilter === "mcp-server") {
-            const mcpContent = MCP_DRAFT_TASK_TS.replace('status: "draft",', '// status: "active", // Activated — this task runs in evaluations');
-            const mcpPath = resolve(tasksDir, "example-mcp-tool-usage.task.ts");
-            if (writeIfNew(mcpPath, mcpContent, force)) {
-                written.push(rel(targetDir, mcpPath));
+            const fileName = isCustomMode && stem === stemsToWrite[0]
+                ? "example-custom.task.ts"
+                : `${stem}.task.ts`;
+            const taskPath = resolve(tasksDir, fileName);
+            if (writeIfNew(taskPath, content, force)) {
+                written.push(rel(targetDir, taskPath));
             }
             else {
-                skipped.push(rel(targetDir, mcpPath));
-            }
-        }
-        // Custom preset scaffold
-        if (isCustomMode) {
-            const customTaskPath = resolve(tasksDir, "example-custom.task.ts");
-            // Reuse the GROQ literacy task as a starting point
-            if (taskTsFiles[TASK_TS_FILE_NAMES[0]]) {
-                if (writeIfNew(customTaskPath, taskTsFiles[TASK_TS_FILE_NAMES[0]], force)) {
-                    written.push(rel(targetDir, customTaskPath));
-                }
-                else {
-                    skipped.push(rel(targetDir, customTaskPath));
-                }
+                skipped.push(rel(targetDir, taskPath));
             }
         }
     }
     else if (format === "yaml") {
-        // YAML: raw string passthrough (preserves comments)
-        for (const stem of TASK_FILE_NAMES) {
-            const taskPath = resolve(tasksDir, `${stem}.yaml`);
+        for (const stem of stemsToWrite) {
             const content = taskYamlFiles[stem];
+            if (!content)
+                continue;
+            const taskPath = resolve(tasksDir, `${stem}.yaml`);
             if (writeIfNew(taskPath, content, force)) {
                 written.push(rel(targetDir, taskPath));
             }
@@ -193,8 +182,12 @@ async function runInit(opts) {
         const tasks = Array.isArray(allTaskData)
             ? allTaskData
             : [allTaskData];
+        // Build a set of task IDs that match the selected stems
+        const selectedIds = new Set(stemsToWrite.flatMap((s) => TASK_EXAMPLES.filter((t) => t.stem === s).map((t) => t.stem)));
         for (const task of tasks) {
             const taskId = task.id;
+            if (!selectedIds.has(taskId))
+                continue;
             const taskPath = resolve(tasksDir, `${taskId}.json`);
             const content = JSON.stringify([task], null, 2) + "\n";
             if (writeIfNew(taskPath, content, force)) {
@@ -263,8 +256,7 @@ async function runInit(opts) {
     if (format === "ts") {
         console.log();
         console.log(`  💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
-        console.log("     via defineTask() from @sanity/ailf-core. YAML and JSON are");
-        console.log("     also supported — re-run with --output-format yaml if preferred.");
+        console.log("     via defineTask() from @sanity/ailf-core.");
     }
     console.log();
     console.log("  🔑 Retrieve secrets from 1Password (Sanity employees):");
@@ -282,114 +274,8 @@ async function runInit(opts) {
     console.log();
 }
 // ---------------------------------------------------------------------------
-// Draft example templates for non-literacy modes
+// Custom preset scaffold template
 // ---------------------------------------------------------------------------
-const MCP_DRAFT_TASK_TS = `/**
- * Example Task: MCP Server tool-use evaluation (DRAFT).
- *
- * Tests whether an LLM can correctly discover and invoke Sanity MCP server
- * tools. Connects to the hosted Sanity MCP server at https://mcp.sanity.io.
- *
- * Prerequisites:
- *   - A Sanity API token with read access (for token-based auth)
- *   - Or: OAuth authentication will be prompted on first connect
- *
- * Authentication options:
- *   1. Token-based: set SANITY_API_TOKEN env var
- *   2. OAuth: the server prompts for login on first connect
- *
- * Setup: npx sanity@latest mcp configure
- * Docs:  https://www.sanity.io/docs/ai/mcp-server
- *
- * This task is a DRAFT — it won't run unless activated or explicitly targeted.
- * To activate: change status to "active" or remove the status field.
- */
-import { defineTask } from "../_vendor/ailf-core/index.js"
-export default defineTask({
-  mode: "mcp-server",
-  id: "example-mcp-tool-usage",
-  title: "MCP tool discovery and invocation",
-  description: "Example — tests Sanity MCP server tool-use (draft)",
-  area: "mcp",
-  // ── Server configuration ────────────────────────────────────
-  // The Sanity MCP server is hosted remotely at https://mcp.sanity.io.
-  // Authentication via API token header or OAuth.
-  //
-  // For token auth, set SANITY_API_TOKEN in your environment.
-  serverConfig: {
-    transport: "streamable-http",
-    url: "https://mcp.sanity.io",
-    env: {
-      SANITY_API_TOKEN: process.env.SANITY_API_TOKEN ?? "",
-    },
-  },
-  prompt: {
-    text: \`Use the available MCP tools to query all documents of type "article"
-in the Sanity dataset. Return the title and slug for each document.
-Limit results to 5 documents.\`,
-  },
-  assertions: [
-    {
-      type: "llm-rubric",
-      template: "mcp-input-validation",
-      criteria: [
-        "Correctly identifies the query_documents tool",
-        "Passes a valid GROQ query to filter by document type",
-        "Requests only the needed fields (title, slug)",
-      ],
-    },
-  ],
-  status: "draft",
-})
-`;
-const PROBE_DRAFT_TASK_TS = `/**
- * Example Task: Knowledge probe baseline (DRAFT).
- *
- * Tests what the model knows about a topic without providing documentation.
- * Used to establish a baseline for comparison with literacy evaluations.
- * This task is a DRAFT — it won't run unless activated or explicitly targeted.
- *
- * To activate: change status to "active" or remove the status field.
- */
-import { defineTask } from "../_vendor/ailf-core/index.js"
-export default defineTask({
-  mode: "knowledge-probe",
-  id: "example-knowledge-probe",
-  title: "Model knowledge of GROQ syntax",
-  description: "Example — probes baseline model knowledge (draft)",
-  area: "groq",
-  prompt: {
-    text: \`Explain the GROQ query language used by Sanity. Cover:
-1. Basic query syntax and projections
-2. How to filter and sort results
-3. Common patterns for fetching related documents
-Provide working code examples.\`,
-  },
-  assertions: [
-    {
-      type: "llm-rubric",
-      template: "task-completion",
-      criteria: [
-        "Demonstrates understanding of GROQ query syntax",
-        "Shows filtering and projection patterns",
-        "Code examples use valid GROQ syntax",
-      ],
-    },
-  ],
-  status: "draft",
-})
-`;
 const CUSTOM_PRESET_TS = `/**
  * Custom preset — your domain-specific evaluation configuration.
  *
@@ -401,7 +287,7 @@ const CUSTOM_PRESET_TS = `/**
  * To use a different mode (e.g., "mcp-server"), change the mode field.
  * Available built-in modes: literacy, mcp-server, knowledge-probe, agent-harness.
  *
- * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/PRESETS.md
+ * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
  */
 import { definePreset } from "../_vendor/ailf-core/index.js"

package/dist/commands/pipeline-action.d.ts CHANGED Viewed

@@ -36,6 +36,8 @@ export interface ResolvedOptions {
     noAutoScope: boolean;
     noCache: boolean;
     noRemoteCache: boolean;
+    /** Base directory for user-facing pipeline output artifacts (always resolved). */
+    outputDir: string;
     outputPath?: string;
     perspectiveOverride?: string;
     projectIdOverride?: string;
@@ -57,10 +59,14 @@ export interface ResolvedOptions {
     repoTasksPath?: string;
     taskOption?: string;
     tagOption?: string[];
-    taskSourceType?: "content-lake" | "repo" | "yaml";
+    taskSourceType?: "content-lake" | "repo";
     urlArgs: string[];
     apiUrl: string;
     apiKey?: string;
+    captureEnabled: boolean;
+    captureDir?: string;
+    captureCompress: boolean;
+    captureExtras: boolean;
 }
 /**
  * Pure option resolution — computes ResolvedOptions from CLI flags without

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -10,7 +10,7 @@
  *
  * @see packages/eval/src/orchestration/ for the step-based pipeline
  */
-import { existsSync, readFileSync, writeFileSync } from "fs";
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
 import { dirname, resolve } from "path";
 import { fileURLToPath } from "url";
 import { classifyUrls } from "../pipeline/classify-url.js";
@@ -209,6 +209,23 @@ export function computeResolvedOptions(opts) {
     const remote = opts.remote || process.env.AILF_REMOTE === "1";
     const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
     const apiKey = process.env.AILF_API_KEY ?? undefined;
+    // Output directory: explicit flag → repo-task heuristic → default
+    const resolvedRepoTasksPath = opts.repoTasksPath
+        ? resolve(callerCwd, opts.repoTasksPath)
+        : undefined;
+    const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
+    let outputDir;
+    if (opts.outputDir) {
+        outputDir = resolve(callerCwd, opts.outputDir);
+    }
+    else if (resolvedTaskSourceType === "repo" || resolvedRepoTasksPath) {
+        outputDir = resolvedRepoTasksPath
+            ? resolve(resolvedRepoTasksPath, "..", "results", "latest")
+            : resolve(callerCwd, ".ailf", "results", "latest");
+    }
+    else {
+        outputDir = resolve(ROOT, "results", "latest");
+    }
     return {
         allowedOriginArgs,
         apiKey,
@@ -233,6 +250,7 @@ export function computeResolvedOptions(opts) {
         noAutoScope: opts.autoScope === false,
         noCache: !opts.cache,
         noRemoteCache: opts.remoteCache === false,
+        outputDir,
         outputPath: opts.output,
         perspectiveOverride,
         projectIdOverride,
@@ -250,24 +268,25 @@ export function computeResolvedOptions(opts) {
         skipFetch: opts.skipFetch,
         source: opts.source,
         studioOriginOverride,
-        repoTasksPath: opts.repoTasksPath
-            ? resolve(callerCwd, opts.repoTasksPath)
-            : undefined,
+        repoTasksPath: resolvedRepoTasksPath,
         taskOption,
         tagOption,
-        taskSourceType: resolveTaskSourceType(opts.taskSource),
+        taskSourceType: resolvedTaskSourceType,
         urlArgs,
+        captureEnabled: opts.capture || process.env.AILF_CAPTURE === "1",
+        captureDir: opts.captureDir ?? process.env.AILF_CAPTURE_DIR,
+        captureCompress: opts.captureCompress !== false &&
+            process.env.AILF_CAPTURE_COMPRESS !== "0",
+        captureExtras: opts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0",
     };
 }
 /** Resolve and validate the --task-source flag value. */
 function resolveTaskSourceType(raw) {
     if (!raw || raw === "content-lake")
         return undefined; // default — Content Lake
-    if (raw === "yaml")
-        return "yaml";
     if (raw === "repo")
         return "repo";
-    console.error(`❌ Invalid --task-source "${raw}". Must be "yaml", "repo", or "content-lake".`);
+    console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
     process.exit(1);
 }
 // ---------------------------------------------------------------------------
@@ -304,17 +323,26 @@ export async function executePipeline(cliOpts) {
         if (cliOpts.output) {
             config.outputPath = resolve(callerCwd, cliOpts.output);
         }
+        // Output dir: explicit CLI flag → repo-task heuristic → file-config default
+        if (cliOpts.outputDir) {
+            config.outputDir = resolve(callerCwd, cliOpts.outputDir);
+        }
+        else if (config.repoTasksPath) {
+            config.outputDir = resolve(config.repoTasksPath, "..", "results", "latest");
+        }
         // Create AppContext directly from the merged config so adapters
         // (especially taskSource) are wired from the file config's
         // taskSourceType — not from CLI defaults.
+        console.log(`  📂 Output directory: ${config.outputDir}`);
         const ctx = createAppContext(config);
         const pipelineStart = Date.now();
         const steps = buildStepSequence(ctx, pipelineStart);
         const result = await orchestratePipeline(ctx, steps);
-        writePipelineResult(result);
+        writePipelineResult(result, config.outputDir);
         process.exit(result.success ? 0 : 1);
     }
     const o = resolveOptions(cliOpts);
+    console.log(`  📂 Output directory: ${o.outputDir}`);
     // Remote mode — submit to AILF API instead of running locally.
     // Use the caller's working directory (not the package root) because
     // remote mode reads .ailf/tasks/ from the user's repo, not from
@@ -350,7 +378,7 @@ export async function executePipeline(cliOpts) {
     const pipelineStart = Date.now();
     const steps = buildStepSequence(ctx, pipelineStart);
     const result = await orchestratePipeline(ctx, steps);
-    writePipelineResult(result);
+    writePipelineResult(result, o.outputDir);
     process.exit(result.success ? 0 : 1);
 }
 // ---------------------------------------------------------------------------
@@ -362,15 +390,11 @@ export async function executePipeline(cliOpts) {
 function resolveOptions(opts) {
     return computeResolvedOptions(opts);
 }
-function writePipelineResult(result) {
-    const resultFile = resolve(ROOT, "results", "latest", "pipeline-result.json");
-    try {
-        writeFileSync(resultFile, JSON.stringify(result, null, 2));
-        console.log(`  📄 Pipeline result: ${resultFile}\n`);
-    }
-    catch {
-        // results/latest/ may not exist yet — not critical
-    }
+function writePipelineResult(result, outputDir) {
+    mkdirSync(outputDir, { recursive: true });
+    const resultFile = resolve(outputDir, "pipeline-result.json");
+    writeFileSync(resultFile, JSON.stringify(result, null, 2));
+    console.log(`  📄 Pipeline result: ${resultFile}\n`);
 }
 /**
  * Load .ailf/config.yaml if --repo-tasks-path is set and the config file

package/dist/commands/pipeline.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * options object, bridges to process.env for downstream modules, and
  * delegates to runPipeline().
  *
- * @see docs/CLI.md for the full flag reference.
+ * @see docs/cli.md for the full flag reference.
  */
 import { Command } from "commander";
 /**
@@ -37,6 +37,7 @@ export interface PipelineCliOptions {
     mode: string;
     variant?: string;
     output?: string;
+    outputDir?: string;
     promptfooUrl?: string;
     publish?: boolean;
     publishTag?: string;
@@ -63,5 +64,9 @@ export interface PipelineCliOptions {
     url: string[];
     urls: string[];
     apiUrl?: string;
+    capture: boolean;
+    captureDir?: string;
+    captureCompress: boolean;
+    captureExtras: boolean;
 }
 export declare function createPipelineCommand(): Command;

package/dist/commands/pipeline.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * options object, bridges to process.env for downstream modules, and
  * delegates to runPipeline().
  *
- * @see docs/CLI.md for the full flag reference.
+ * @see docs/cli.md for the full flag reference.
  */
 import { Command } from "commander";
 import { LiteracyVariant } from "../pipeline/normalize-mode.js";
@@ -48,11 +48,16 @@ export function createPipelineCommand() {
         .option("--report-project <id>", "Sanity project ID for report store")
         .option("--config <path>", "Load pipeline config from a TS/JS/YAML/JSON file (overrides most CLI flags)")
         .option("-o, --output <path>", "Write PR comment markdown to file")
+        .option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
         .option("--promptfoo-url <url>", "Promptfoo share URL for report")
-        .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge), yaml (tasks/*.yaml files, legacy)", "content-lake")
+        .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
         .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
         .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
         .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
+        .option("--capture", "Enable artifact capture for this run", false)
+        .option("--capture-dir <path>", "Base directory for capture output (default: results/captures/)")
+        .option("--no-capture-compress", "Disable tar.gz compression of captures")
+        .option("--no-capture-extras", "Exclude mode-specific artifacts from captures")
         .action(async (opts) => {
         const { executePipeline } = await import("./pipeline-action.js");
         await executePipeline(opts);

package/dist/commands/pr-comment.js CHANGED Viewed

@@ -20,6 +20,7 @@ export function createPrCommentCommand() {
         try {
             const ctx = createAppContext({
                 rootDir: ROOT,
+                outputDir: resolve(ROOT, "results", "latest"),
                 mode: "literacy",
                 noAutoScope: false,
                 skipFetch: true,

package/dist/commands/publish.js CHANGED Viewed

@@ -87,6 +87,7 @@ async function runPublishCommand(summaryPath, opts) {
         noAutoScope: false,
         noCache: true,
         noRemoteCache: true,
+        outputDir: resolve(ROOT, "results", "latest"),
         publishEnabled: true,
         publishTag: opts.tag,
         readinessEnabled: false,

package/dist/commands/shared/help.js CHANGED Viewed

@@ -74,8 +74,8 @@ Quick Start:
 Documentation:
   Repository   https://github.com/sanity-io/ai-literacy-framework
-  CLI Guide    https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/CLI.md
-  Getting Started  https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/GETTING_STARTED.md
+  CLI Guide    https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/cli.md
+  Getting Started  https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/getting-started.md
 Run ailf <command> --help for detailed usage of any command.`;
 // ---------------------------------------------------------------------------

package/dist/commands/update-quality-scores.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+/**
+ * update-quality-scores command — update QUALITY_SCORE.md from scores.
+ */
+import { Command } from "commander";
+export declare function createUpdateQualityScoresCommand(): Command;

package/dist/commands/update-quality-scores.js ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * update-quality-scores command — update QUALITY_SCORE.md from scores.
+ */
+import { Command } from "commander";
+export function createUpdateQualityScoresCommand() {
+    return new Command("update-quality-scores")
+        .description("Update docs/QUALITY_SCORE.md from score-summary.json")
+        .action(async () => {
+        const { updateQualityScores } = await import("../scripts/update-quality-scores.js");
+        console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
+        const result = updateQualityScores();
+        if (result.success) {
+            console.log(`  ✅ ${result.message}`);
+        }
+        else {
+            console.error(`  ❌ ${result.message}`);
+            process.exit(1);
+        }
+    });
+}

package/dist/composition-root.d.ts CHANGED Viewed

@@ -13,15 +13,14 @@
  * - After: one factory, one place to change adapter wiring
  *
  * @see packages/core/src/ports/context.ts — AppContext interface
- * @see docs/exec-plans/ports-and-adapters/phase-7-composition-root.md
+ * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
  */
 import { type AppContext, type AssertionRegistration, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
 /**
  * Create a fully wired AppContext from resolved configuration.
  *
  * Every adapter is constructed here and nowhere else (outside of tests).
- * Swapping an adapter (e.g., YamlTaskSource → ContentLakeTaskSource)
- * is a one-line change in this function.
+ * Swapping an adapter is a one-line change in this function.
  */
 export declare function createAppContext(config: ResolvedConfig): AppContext;
 /**