@sanity/ailf 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +24 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
package/dist/commands/init.d.ts
CHANGED
|
@@ -6,9 +6,8 @@
|
|
|
6
6
|
* not live evaluation tasks.
|
|
7
7
|
*
|
|
8
8
|
* TypeScript output (default) uses define* helpers from @sanity/ailf-core
|
|
9
|
-
* for full IDE autocomplete and type checking. YAML output
|
|
10
|
-
*
|
|
11
|
-
* serialization of the parsed data.
|
|
9
|
+
* for full IDE autocomplete and type checking. YAML output serializes the
|
|
10
|
+
* parsed task data. JSON output is a plain serialization of the parsed data.
|
|
12
11
|
*
|
|
13
12
|
* Usage:
|
|
14
13
|
* ailf init # TypeScript output (default)
|
package/dist/commands/init.js
CHANGED
|
@@ -6,9 +6,8 @@
|
|
|
6
6
|
* not live evaluation tasks.
|
|
7
7
|
*
|
|
8
8
|
* TypeScript output (default) uses define* helpers from @sanity/ailf-core
|
|
9
|
-
* for full IDE autocomplete and type checking. YAML output
|
|
10
|
-
*
|
|
11
|
-
* serialization of the parsed data.
|
|
9
|
+
* for full IDE autocomplete and type checking. YAML output serializes the
|
|
10
|
+
* parsed task data. JSON output is a plain serialization of the parsed data.
|
|
12
11
|
*
|
|
13
12
|
* Usage:
|
|
14
13
|
* ailf init # TypeScript output (default)
|
|
@@ -20,7 +19,7 @@
|
|
|
20
19
|
import { Command } from "commander";
|
|
21
20
|
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
22
21
|
import { resolve, relative } from "path";
|
|
23
|
-
import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES,
|
|
22
|
+
import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
|
|
24
23
|
// ---------------------------------------------------------------------------
|
|
25
24
|
// Command factory
|
|
26
25
|
// ---------------------------------------------------------------------------
|
|
@@ -54,6 +53,10 @@ function rel(from, to) {
|
|
|
54
53
|
const r = relative(from, to);
|
|
55
54
|
return r.startsWith(".") ? r : `./${r}`;
|
|
56
55
|
}
|
|
56
|
+
/** Filter task stems by mode using TASK_EXAMPLES metadata */
|
|
57
|
+
function taskStemsForMode(mode) {
|
|
58
|
+
return TASK_EXAMPLES.filter((t) => t.mode === mode).map((t) => t.stem);
|
|
59
|
+
}
|
|
57
60
|
// ---------------------------------------------------------------------------
|
|
58
61
|
// Init logic
|
|
59
62
|
// ---------------------------------------------------------------------------
|
|
@@ -66,6 +69,11 @@ async function runInit(opts) {
|
|
|
66
69
|
}
|
|
67
70
|
const format = opts.outputFormat;
|
|
68
71
|
const force = opts.force;
|
|
72
|
+
if (format === "yaml") {
|
|
73
|
+
console.warn(" ⚠ --output-format yaml is deprecated. TypeScript (default) is the\n" +
|
|
74
|
+
" recommended format — it provides full IDE autocomplete via defineTask().\n" +
|
|
75
|
+
" YAML output will be removed in a future release.\n");
|
|
76
|
+
}
|
|
69
77
|
// Resolve target from the caller's actual working directory
|
|
70
78
|
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
71
79
|
const targetDir = resolve(callerCwd, opts.path);
|
|
@@ -115,71 +123,52 @@ async function runInit(opts) {
|
|
|
115
123
|
// 3. Write example tasks to .ailf/tasks/
|
|
116
124
|
const modeFilter = opts.mode;
|
|
117
125
|
const isCustomMode = modeFilter === "custom";
|
|
126
|
+
// Determine which task stems to write based on mode filter
|
|
127
|
+
let stemsToWrite;
|
|
128
|
+
if (isCustomMode) {
|
|
129
|
+
// Custom mode: write one literacy example as a starting point
|
|
130
|
+
stemsToWrite = taskStemsForMode("literacy").slice(0, 1);
|
|
131
|
+
}
|
|
132
|
+
else if (modeFilter === "literacy") {
|
|
133
|
+
stemsToWrite = taskStemsForMode("literacy");
|
|
134
|
+
}
|
|
135
|
+
else if (modeFilter === "mcp-server") {
|
|
136
|
+
stemsToWrite = taskStemsForMode("mcp-server");
|
|
137
|
+
}
|
|
138
|
+
else if (modeFilter === "knowledge-probe") {
|
|
139
|
+
stemsToWrite = taskStemsForMode("knowledge-probe");
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
// Default (no --mode): write all tasks
|
|
143
|
+
stemsToWrite = [...TASK_FILE_NAMES];
|
|
144
|
+
}
|
|
118
145
|
if (format === "ts") {
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
const taskPath = resolve(tasksDir, `${stem}.task.ts`);
|
|
127
|
-
const content = taskTsFiles[stem];
|
|
128
|
-
if (writeIfNew(taskPath, content, force)) {
|
|
129
|
-
written.push(rel(targetDir, taskPath));
|
|
130
|
-
}
|
|
131
|
-
else {
|
|
132
|
-
skipped.push(rel(targetDir, taskPath));
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
// Draft examples for other modes (default init only)
|
|
137
|
-
if (!modeFilter) {
|
|
138
|
-
const mcpPath = resolve(tasksDir, "example-mcp-tool-usage.task.ts");
|
|
139
|
-
if (writeIfNew(mcpPath, MCP_DRAFT_TASK_TS, force)) {
|
|
140
|
-
written.push(rel(targetDir, mcpPath));
|
|
141
|
-
}
|
|
142
|
-
else {
|
|
143
|
-
skipped.push(rel(targetDir, mcpPath));
|
|
144
|
-
}
|
|
145
|
-
const probePath = resolve(tasksDir, "example-knowledge-probe.task.ts");
|
|
146
|
-
if (writeIfNew(probePath, PROBE_DRAFT_TASK_TS, force)) {
|
|
147
|
-
written.push(rel(targetDir, probePath));
|
|
148
|
-
}
|
|
149
|
-
else {
|
|
150
|
-
skipped.push(rel(targetDir, probePath));
|
|
146
|
+
for (const stem of stemsToWrite) {
|
|
147
|
+
let content = taskTsFiles[stem];
|
|
148
|
+
if (!content)
|
|
149
|
+
continue;
|
|
150
|
+
// For MCP-only init, activate the draft task
|
|
151
|
+
if (modeFilter === "mcp-server") {
|
|
152
|
+
content = content.replace('status: "draft",', '// status: "active", // Activated — this task runs in evaluations');
|
|
151
153
|
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
const
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
written.push(rel(targetDir, mcpPath));
|
|
154
|
+
const fileName = isCustomMode && stem === stemsToWrite[0]
|
|
155
|
+
? "example-custom.task.ts"
|
|
156
|
+
: `${stem}.task.ts`;
|
|
157
|
+
const taskPath = resolve(tasksDir, fileName);
|
|
158
|
+
if (writeIfNew(taskPath, content, force)) {
|
|
159
|
+
written.push(rel(targetDir, taskPath));
|
|
159
160
|
}
|
|
160
161
|
else {
|
|
161
|
-
skipped.push(rel(targetDir,
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
// Custom preset scaffold
|
|
165
|
-
if (isCustomMode) {
|
|
166
|
-
const customTaskPath = resolve(tasksDir, "example-custom.task.ts");
|
|
167
|
-
// Reuse the GROQ literacy task as a starting point
|
|
168
|
-
if (taskTsFiles[TASK_TS_FILE_NAMES[0]]) {
|
|
169
|
-
if (writeIfNew(customTaskPath, taskTsFiles[TASK_TS_FILE_NAMES[0]], force)) {
|
|
170
|
-
written.push(rel(targetDir, customTaskPath));
|
|
171
|
-
}
|
|
172
|
-
else {
|
|
173
|
-
skipped.push(rel(targetDir, customTaskPath));
|
|
174
|
-
}
|
|
162
|
+
skipped.push(rel(targetDir, taskPath));
|
|
175
163
|
}
|
|
176
164
|
}
|
|
177
165
|
}
|
|
178
166
|
else if (format === "yaml") {
|
|
179
|
-
|
|
180
|
-
for (const stem of TASK_FILE_NAMES) {
|
|
181
|
-
const taskPath = resolve(tasksDir, `${stem}.yaml`);
|
|
167
|
+
for (const stem of stemsToWrite) {
|
|
182
168
|
const content = taskYamlFiles[stem];
|
|
169
|
+
if (!content)
|
|
170
|
+
continue;
|
|
171
|
+
const taskPath = resolve(tasksDir, `${stem}.yaml`);
|
|
183
172
|
if (writeIfNew(taskPath, content, force)) {
|
|
184
173
|
written.push(rel(targetDir, taskPath));
|
|
185
174
|
}
|
|
@@ -193,8 +182,12 @@ async function runInit(opts) {
|
|
|
193
182
|
const tasks = Array.isArray(allTaskData)
|
|
194
183
|
? allTaskData
|
|
195
184
|
: [allTaskData];
|
|
185
|
+
// Build a set of task IDs that match the selected stems
|
|
186
|
+
const selectedIds = new Set(stemsToWrite.flatMap((s) => TASK_EXAMPLES.filter((t) => t.stem === s).map((t) => t.stem)));
|
|
196
187
|
for (const task of tasks) {
|
|
197
188
|
const taskId = task.id;
|
|
189
|
+
if (!selectedIds.has(taskId))
|
|
190
|
+
continue;
|
|
198
191
|
const taskPath = resolve(tasksDir, `${taskId}.json`);
|
|
199
192
|
const content = JSON.stringify([task], null, 2) + "\n";
|
|
200
193
|
if (writeIfNew(taskPath, content, force)) {
|
|
@@ -263,8 +256,7 @@ async function runInit(opts) {
|
|
|
263
256
|
if (format === "ts") {
|
|
264
257
|
console.log();
|
|
265
258
|
console.log(` 💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
|
|
266
|
-
console.log(" via defineTask() from @sanity/ailf-core.
|
|
267
|
-
console.log(" also supported — re-run with --output-format yaml if preferred.");
|
|
259
|
+
console.log(" via defineTask() from @sanity/ailf-core.");
|
|
268
260
|
}
|
|
269
261
|
console.log();
|
|
270
262
|
console.log(" 🔑 Retrieve secrets from 1Password (Sanity employees):");
|
|
@@ -282,114 +274,8 @@ async function runInit(opts) {
|
|
|
282
274
|
console.log();
|
|
283
275
|
}
|
|
284
276
|
// ---------------------------------------------------------------------------
|
|
285
|
-
//
|
|
277
|
+
// Custom preset scaffold template
|
|
286
278
|
// ---------------------------------------------------------------------------
|
|
287
|
-
const MCP_DRAFT_TASK_TS = `/**
|
|
288
|
-
* Example Task: MCP Server tool-use evaluation (DRAFT).
|
|
289
|
-
*
|
|
290
|
-
* Tests whether an LLM can correctly discover and invoke Sanity MCP server
|
|
291
|
-
* tools. Connects to the hosted Sanity MCP server at https://mcp.sanity.io.
|
|
292
|
-
*
|
|
293
|
-
* Prerequisites:
|
|
294
|
-
* - A Sanity API token with read access (for token-based auth)
|
|
295
|
-
* - Or: OAuth authentication will be prompted on first connect
|
|
296
|
-
*
|
|
297
|
-
* Authentication options:
|
|
298
|
-
* 1. Token-based: set SANITY_API_TOKEN env var
|
|
299
|
-
* 2. OAuth: the server prompts for login on first connect
|
|
300
|
-
*
|
|
301
|
-
* Setup: npx sanity@latest mcp configure
|
|
302
|
-
* Docs: https://www.sanity.io/docs/ai/mcp-server
|
|
303
|
-
*
|
|
304
|
-
* This task is a DRAFT — it won't run unless activated or explicitly targeted.
|
|
305
|
-
* To activate: change status to "active" or remove the status field.
|
|
306
|
-
*/
|
|
307
|
-
|
|
308
|
-
import { defineTask } from "../_vendor/ailf-core/index.js"
|
|
309
|
-
|
|
310
|
-
export default defineTask({
|
|
311
|
-
mode: "mcp-server",
|
|
312
|
-
id: "example-mcp-tool-usage",
|
|
313
|
-
title: "MCP tool discovery and invocation",
|
|
314
|
-
description: "Example — tests Sanity MCP server tool-use (draft)",
|
|
315
|
-
area: "mcp",
|
|
316
|
-
|
|
317
|
-
// ── Server configuration ────────────────────────────────────
|
|
318
|
-
// The Sanity MCP server is hosted remotely at https://mcp.sanity.io.
|
|
319
|
-
// Authentication via API token header or OAuth.
|
|
320
|
-
//
|
|
321
|
-
// For token auth, set SANITY_API_TOKEN in your environment.
|
|
322
|
-
serverConfig: {
|
|
323
|
-
transport: "streamable-http",
|
|
324
|
-
url: "https://mcp.sanity.io",
|
|
325
|
-
env: {
|
|
326
|
-
SANITY_API_TOKEN: process.env.SANITY_API_TOKEN ?? "",
|
|
327
|
-
},
|
|
328
|
-
},
|
|
329
|
-
|
|
330
|
-
prompt: {
|
|
331
|
-
text: \`Use the available MCP tools to query all documents of type "article"
|
|
332
|
-
in the Sanity dataset. Return the title and slug for each document.
|
|
333
|
-
Limit results to 5 documents.\`,
|
|
334
|
-
},
|
|
335
|
-
|
|
336
|
-
assertions: [
|
|
337
|
-
{
|
|
338
|
-
type: "llm-rubric",
|
|
339
|
-
template: "mcp-input-validation",
|
|
340
|
-
criteria: [
|
|
341
|
-
"Correctly identifies the query_documents tool",
|
|
342
|
-
"Passes a valid GROQ query to filter by document type",
|
|
343
|
-
"Requests only the needed fields (title, slug)",
|
|
344
|
-
],
|
|
345
|
-
},
|
|
346
|
-
],
|
|
347
|
-
|
|
348
|
-
status: "draft",
|
|
349
|
-
})
|
|
350
|
-
`;
|
|
351
|
-
const PROBE_DRAFT_TASK_TS = `/**
|
|
352
|
-
* Example Task: Knowledge probe baseline (DRAFT).
|
|
353
|
-
*
|
|
354
|
-
* Tests what the model knows about a topic without providing documentation.
|
|
355
|
-
* Used to establish a baseline for comparison with literacy evaluations.
|
|
356
|
-
* This task is a DRAFT — it won't run unless activated or explicitly targeted.
|
|
357
|
-
*
|
|
358
|
-
* To activate: change status to "active" or remove the status field.
|
|
359
|
-
*/
|
|
360
|
-
|
|
361
|
-
import { defineTask } from "../_vendor/ailf-core/index.js"
|
|
362
|
-
|
|
363
|
-
export default defineTask({
|
|
364
|
-
mode: "knowledge-probe",
|
|
365
|
-
id: "example-knowledge-probe",
|
|
366
|
-
title: "Model knowledge of GROQ syntax",
|
|
367
|
-
description: "Example — probes baseline model knowledge (draft)",
|
|
368
|
-
area: "groq",
|
|
369
|
-
|
|
370
|
-
prompt: {
|
|
371
|
-
text: \`Explain the GROQ query language used by Sanity. Cover:
|
|
372
|
-
1. Basic query syntax and projections
|
|
373
|
-
2. How to filter and sort results
|
|
374
|
-
3. Common patterns for fetching related documents
|
|
375
|
-
Provide working code examples.\`,
|
|
376
|
-
},
|
|
377
|
-
|
|
378
|
-
assertions: [
|
|
379
|
-
{
|
|
380
|
-
type: "llm-rubric",
|
|
381
|
-
template: "task-completion",
|
|
382
|
-
criteria: [
|
|
383
|
-
"Demonstrates understanding of GROQ query syntax",
|
|
384
|
-
"Shows filtering and projection patterns",
|
|
385
|
-
"Code examples use valid GROQ syntax",
|
|
386
|
-
],
|
|
387
|
-
},
|
|
388
|
-
],
|
|
389
|
-
|
|
390
|
-
status: "draft",
|
|
391
|
-
})
|
|
392
|
-
`;
|
|
393
279
|
const CUSTOM_PRESET_TS = `/**
|
|
394
280
|
* Custom preset — your domain-specific evaluation configuration.
|
|
395
281
|
*
|
|
@@ -401,7 +287,7 @@ const CUSTOM_PRESET_TS = `/**
|
|
|
401
287
|
* To use a different mode (e.g., "mcp-server"), change the mode field.
|
|
402
288
|
* Available built-in modes: literacy, mcp-server, knowledge-probe, agent-harness.
|
|
403
289
|
*
|
|
404
|
-
* @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/
|
|
290
|
+
* @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
|
|
405
291
|
*/
|
|
406
292
|
|
|
407
293
|
import { definePreset } from "../_vendor/ailf-core/index.js"
|
|
@@ -36,6 +36,8 @@ export interface ResolvedOptions {
|
|
|
36
36
|
noAutoScope: boolean;
|
|
37
37
|
noCache: boolean;
|
|
38
38
|
noRemoteCache: boolean;
|
|
39
|
+
/** Base directory for user-facing pipeline output artifacts (always resolved). */
|
|
40
|
+
outputDir: string;
|
|
39
41
|
outputPath?: string;
|
|
40
42
|
perspectiveOverride?: string;
|
|
41
43
|
projectIdOverride?: string;
|
|
@@ -57,10 +59,14 @@ export interface ResolvedOptions {
|
|
|
57
59
|
repoTasksPath?: string;
|
|
58
60
|
taskOption?: string;
|
|
59
61
|
tagOption?: string[];
|
|
60
|
-
taskSourceType?: "content-lake" | "repo"
|
|
62
|
+
taskSourceType?: "content-lake" | "repo";
|
|
61
63
|
urlArgs: string[];
|
|
62
64
|
apiUrl: string;
|
|
63
65
|
apiKey?: string;
|
|
66
|
+
captureEnabled: boolean;
|
|
67
|
+
captureDir?: string;
|
|
68
|
+
captureCompress: boolean;
|
|
69
|
+
captureExtras: boolean;
|
|
64
70
|
}
|
|
65
71
|
/**
|
|
66
72
|
* Pure option resolution — computes ResolvedOptions from CLI flags without
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
*
|
|
11
11
|
* @see packages/eval/src/orchestration/ for the step-based pipeline
|
|
12
12
|
*/
|
|
13
|
-
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
13
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
14
14
|
import { dirname, resolve } from "path";
|
|
15
15
|
import { fileURLToPath } from "url";
|
|
16
16
|
import { classifyUrls } from "../pipeline/classify-url.js";
|
|
@@ -209,6 +209,23 @@ export function computeResolvedOptions(opts) {
|
|
|
209
209
|
const remote = opts.remote || process.env.AILF_REMOTE === "1";
|
|
210
210
|
const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
|
|
211
211
|
const apiKey = process.env.AILF_API_KEY ?? undefined;
|
|
212
|
+
// Output directory: explicit flag → repo-task heuristic → default
|
|
213
|
+
const resolvedRepoTasksPath = opts.repoTasksPath
|
|
214
|
+
? resolve(callerCwd, opts.repoTasksPath)
|
|
215
|
+
: undefined;
|
|
216
|
+
const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
|
|
217
|
+
let outputDir;
|
|
218
|
+
if (opts.outputDir) {
|
|
219
|
+
outputDir = resolve(callerCwd, opts.outputDir);
|
|
220
|
+
}
|
|
221
|
+
else if (resolvedTaskSourceType === "repo" || resolvedRepoTasksPath) {
|
|
222
|
+
outputDir = resolvedRepoTasksPath
|
|
223
|
+
? resolve(resolvedRepoTasksPath, "..", "results", "latest")
|
|
224
|
+
: resolve(callerCwd, ".ailf", "results", "latest");
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
outputDir = resolve(ROOT, "results", "latest");
|
|
228
|
+
}
|
|
212
229
|
return {
|
|
213
230
|
allowedOriginArgs,
|
|
214
231
|
apiKey,
|
|
@@ -233,6 +250,7 @@ export function computeResolvedOptions(opts) {
|
|
|
233
250
|
noAutoScope: opts.autoScope === false,
|
|
234
251
|
noCache: !opts.cache,
|
|
235
252
|
noRemoteCache: opts.remoteCache === false,
|
|
253
|
+
outputDir,
|
|
236
254
|
outputPath: opts.output,
|
|
237
255
|
perspectiveOverride,
|
|
238
256
|
projectIdOverride,
|
|
@@ -250,24 +268,25 @@ export function computeResolvedOptions(opts) {
|
|
|
250
268
|
skipFetch: opts.skipFetch,
|
|
251
269
|
source: opts.source,
|
|
252
270
|
studioOriginOverride,
|
|
253
|
-
repoTasksPath:
|
|
254
|
-
? resolve(callerCwd, opts.repoTasksPath)
|
|
255
|
-
: undefined,
|
|
271
|
+
repoTasksPath: resolvedRepoTasksPath,
|
|
256
272
|
taskOption,
|
|
257
273
|
tagOption,
|
|
258
|
-
taskSourceType:
|
|
274
|
+
taskSourceType: resolvedTaskSourceType,
|
|
259
275
|
urlArgs,
|
|
276
|
+
captureEnabled: opts.capture || process.env.AILF_CAPTURE === "1",
|
|
277
|
+
captureDir: opts.captureDir ?? process.env.AILF_CAPTURE_DIR,
|
|
278
|
+
captureCompress: opts.captureCompress !== false &&
|
|
279
|
+
process.env.AILF_CAPTURE_COMPRESS !== "0",
|
|
280
|
+
captureExtras: opts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0",
|
|
260
281
|
};
|
|
261
282
|
}
|
|
262
283
|
/** Resolve and validate the --task-source flag value. */
|
|
263
284
|
function resolveTaskSourceType(raw) {
|
|
264
285
|
if (!raw || raw === "content-lake")
|
|
265
286
|
return undefined; // default — Content Lake
|
|
266
|
-
if (raw === "yaml")
|
|
267
|
-
return "yaml";
|
|
268
287
|
if (raw === "repo")
|
|
269
288
|
return "repo";
|
|
270
|
-
console.error(`❌ Invalid --task-source "${raw}". Must be "
|
|
289
|
+
console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
|
|
271
290
|
process.exit(1);
|
|
272
291
|
}
|
|
273
292
|
// ---------------------------------------------------------------------------
|
|
@@ -304,17 +323,26 @@ export async function executePipeline(cliOpts) {
|
|
|
304
323
|
if (cliOpts.output) {
|
|
305
324
|
config.outputPath = resolve(callerCwd, cliOpts.output);
|
|
306
325
|
}
|
|
326
|
+
// Output dir: explicit CLI flag → repo-task heuristic → file-config default
|
|
327
|
+
if (cliOpts.outputDir) {
|
|
328
|
+
config.outputDir = resolve(callerCwd, cliOpts.outputDir);
|
|
329
|
+
}
|
|
330
|
+
else if (config.repoTasksPath) {
|
|
331
|
+
config.outputDir = resolve(config.repoTasksPath, "..", "results", "latest");
|
|
332
|
+
}
|
|
307
333
|
// Create AppContext directly from the merged config so adapters
|
|
308
334
|
// (especially taskSource) are wired from the file config's
|
|
309
335
|
// taskSourceType — not from CLI defaults.
|
|
336
|
+
console.log(` 📂 Output directory: ${config.outputDir}`);
|
|
310
337
|
const ctx = createAppContext(config);
|
|
311
338
|
const pipelineStart = Date.now();
|
|
312
339
|
const steps = buildStepSequence(ctx, pipelineStart);
|
|
313
340
|
const result = await orchestratePipeline(ctx, steps);
|
|
314
|
-
writePipelineResult(result);
|
|
341
|
+
writePipelineResult(result, config.outputDir);
|
|
315
342
|
process.exit(result.success ? 0 : 1);
|
|
316
343
|
}
|
|
317
344
|
const o = resolveOptions(cliOpts);
|
|
345
|
+
console.log(` 📂 Output directory: ${o.outputDir}`);
|
|
318
346
|
// Remote mode — submit to AILF API instead of running locally.
|
|
319
347
|
// Use the caller's working directory (not the package root) because
|
|
320
348
|
// remote mode reads .ailf/tasks/ from the user's repo, not from
|
|
@@ -350,7 +378,7 @@ export async function executePipeline(cliOpts) {
|
|
|
350
378
|
const pipelineStart = Date.now();
|
|
351
379
|
const steps = buildStepSequence(ctx, pipelineStart);
|
|
352
380
|
const result = await orchestratePipeline(ctx, steps);
|
|
353
|
-
writePipelineResult(result);
|
|
381
|
+
writePipelineResult(result, o.outputDir);
|
|
354
382
|
process.exit(result.success ? 0 : 1);
|
|
355
383
|
}
|
|
356
384
|
// ---------------------------------------------------------------------------
|
|
@@ -362,15 +390,11 @@ export async function executePipeline(cliOpts) {
|
|
|
362
390
|
function resolveOptions(opts) {
|
|
363
391
|
return computeResolvedOptions(opts);
|
|
364
392
|
}
|
|
365
|
-
function writePipelineResult(result) {
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
}
|
|
371
|
-
catch {
|
|
372
|
-
// results/latest/ may not exist yet — not critical
|
|
373
|
-
}
|
|
393
|
+
function writePipelineResult(result, outputDir) {
|
|
394
|
+
mkdirSync(outputDir, { recursive: true });
|
|
395
|
+
const resultFile = resolve(outputDir, "pipeline-result.json");
|
|
396
|
+
writeFileSync(resultFile, JSON.stringify(result, null, 2));
|
|
397
|
+
console.log(` 📄 Pipeline result: ${resultFile}\n`);
|
|
374
398
|
}
|
|
375
399
|
/**
|
|
376
400
|
* Load .ailf/config.yaml if --repo-tasks-path is set and the config file
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* options object, bridges to process.env for downstream modules, and
|
|
6
6
|
* delegates to runPipeline().
|
|
7
7
|
*
|
|
8
|
-
* @see docs/
|
|
8
|
+
* @see docs/cli.md for the full flag reference.
|
|
9
9
|
*/
|
|
10
10
|
import { Command } from "commander";
|
|
11
11
|
/**
|
|
@@ -37,6 +37,7 @@ export interface PipelineCliOptions {
|
|
|
37
37
|
mode: string;
|
|
38
38
|
variant?: string;
|
|
39
39
|
output?: string;
|
|
40
|
+
outputDir?: string;
|
|
40
41
|
promptfooUrl?: string;
|
|
41
42
|
publish?: boolean;
|
|
42
43
|
publishTag?: string;
|
|
@@ -63,5 +64,9 @@ export interface PipelineCliOptions {
|
|
|
63
64
|
url: string[];
|
|
64
65
|
urls: string[];
|
|
65
66
|
apiUrl?: string;
|
|
67
|
+
capture: boolean;
|
|
68
|
+
captureDir?: string;
|
|
69
|
+
captureCompress: boolean;
|
|
70
|
+
captureExtras: boolean;
|
|
66
71
|
}
|
|
67
72
|
export declare function createPipelineCommand(): Command;
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* options object, bridges to process.env for downstream modules, and
|
|
6
6
|
* delegates to runPipeline().
|
|
7
7
|
*
|
|
8
|
-
* @see docs/
|
|
8
|
+
* @see docs/cli.md for the full flag reference.
|
|
9
9
|
*/
|
|
10
10
|
import { Command } from "commander";
|
|
11
11
|
import { LiteracyVariant } from "../pipeline/normalize-mode.js";
|
|
@@ -48,11 +48,16 @@ export function createPipelineCommand() {
|
|
|
48
48
|
.option("--report-project <id>", "Sanity project ID for report store")
|
|
49
49
|
.option("--config <path>", "Load pipeline config from a TS/JS/YAML/JSON file (overrides most CLI flags)")
|
|
50
50
|
.option("-o, --output <path>", "Write PR comment markdown to file")
|
|
51
|
+
.option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
|
|
51
52
|
.option("--promptfoo-url <url>", "Promptfoo share URL for report")
|
|
52
|
-
.option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)
|
|
53
|
+
.option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
|
|
53
54
|
.option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
|
|
54
55
|
.option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
|
|
55
56
|
.option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
|
|
57
|
+
.option("--capture", "Enable artifact capture for this run", false)
|
|
58
|
+
.option("--capture-dir <path>", "Base directory for capture output (default: results/captures/)")
|
|
59
|
+
.option("--no-capture-compress", "Disable tar.gz compression of captures")
|
|
60
|
+
.option("--no-capture-extras", "Exclude mode-specific artifacts from captures")
|
|
56
61
|
.action(async (opts) => {
|
|
57
62
|
const { executePipeline } = await import("./pipeline-action.js");
|
|
58
63
|
await executePipeline(opts);
|
package/dist/commands/publish.js
CHANGED
|
@@ -74,8 +74,8 @@ Quick Start:
|
|
|
74
74
|
|
|
75
75
|
Documentation:
|
|
76
76
|
Repository https://github.com/sanity-io/ai-literacy-framework
|
|
77
|
-
CLI Guide https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/
|
|
78
|
-
Getting Started https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/
|
|
77
|
+
CLI Guide https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/cli.md
|
|
78
|
+
Getting Started https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/getting-started.md
|
|
79
79
|
|
|
80
80
|
Run ailf <command> --help for detailed usage of any command.`;
|
|
81
81
|
// ---------------------------------------------------------------------------
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* update-quality-scores command — update QUALITY_SCORE.md from scores.
|
|
3
|
+
*/
|
|
4
|
+
import { Command } from "commander";
|
|
5
|
+
export function createUpdateQualityScoresCommand() {
|
|
6
|
+
return new Command("update-quality-scores")
|
|
7
|
+
.description("Update docs/QUALITY_SCORE.md from score-summary.json")
|
|
8
|
+
.action(async () => {
|
|
9
|
+
const { updateQualityScores } = await import("../scripts/update-quality-scores.js");
|
|
10
|
+
console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
|
|
11
|
+
const result = updateQualityScores();
|
|
12
|
+
if (result.success) {
|
|
13
|
+
console.log(` ✅ ${result.message}`);
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
console.error(` ❌ ${result.message}`);
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
}
|
|
@@ -13,15 +13,14 @@
|
|
|
13
13
|
* - After: one factory, one place to change adapter wiring
|
|
14
14
|
*
|
|
15
15
|
* @see packages/core/src/ports/context.ts — AppContext interface
|
|
16
|
-
* @see docs/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
16
|
+
* @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
18
|
import { type AppContext, type AssertionRegistration, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
|
|
19
19
|
/**
|
|
20
20
|
* Create a fully wired AppContext from resolved configuration.
|
|
21
21
|
*
|
|
22
22
|
* Every adapter is constructed here and nowhere else (outside of tests).
|
|
23
|
-
* Swapping an adapter
|
|
24
|
-
* is a one-line change in this function.
|
|
23
|
+
* Swapping an adapter is a one-line change in this function.
|
|
25
24
|
*/
|
|
26
25
|
export declare function createAppContext(config: ResolvedConfig): AppContext;
|
|
27
26
|
/**
|