@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* generate-configs command — generate promptfoo config files
|
|
2
|
+
* generate-configs command — generate promptfoo config files via the compiler pipeline.
|
|
3
3
|
*
|
|
4
|
-
* Uses the composition root to wire adapters, then
|
|
5
|
-
* directly — the same code path as the pipeline.
|
|
4
|
+
* Uses the composition root to wire adapters, then runs GenerateConfigsStep
|
|
5
|
+
* directly — the same code path as the full pipeline.
|
|
6
6
|
*/
|
|
7
7
|
import { dirname, resolve } from "path";
|
|
8
8
|
import { fileURLToPath } from "url";
|
|
9
9
|
import { Command } from "commander";
|
|
10
10
|
import { createAppContext } from "../composition-root.js";
|
|
11
|
-
import {
|
|
11
|
+
import { GenerateConfigsStep } from "../orchestration/steps/generate-configs-step.js";
|
|
12
12
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
13
|
const ROOT = resolve(__dirname, "..", "..");
|
|
14
14
|
export function createGenerateConfigsCommand() {
|
|
@@ -19,6 +19,7 @@ export function createGenerateConfigsCommand() {
|
|
|
19
19
|
try {
|
|
20
20
|
const ctx = createAppContext({
|
|
21
21
|
rootDir: ROOT,
|
|
22
|
+
outputDir: resolve(ROOT, "results", "latest"),
|
|
22
23
|
mode: "literacy",
|
|
23
24
|
noAutoScope: false,
|
|
24
25
|
skipFetch: true,
|
|
@@ -35,10 +36,21 @@ export function createGenerateConfigsCommand() {
|
|
|
35
36
|
remote: false,
|
|
36
37
|
apiUrl: "https://ailf-api.sanity.build",
|
|
37
38
|
});
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
const step = new GenerateConfigsStep();
|
|
40
|
+
// Run validation checks first
|
|
41
|
+
const issues = step.check(ctx);
|
|
42
|
+
if (issues.length > 0) {
|
|
43
|
+
for (const issue of issues) {
|
|
44
|
+
console.error(` ❌ ${issue.message}`);
|
|
45
|
+
}
|
|
46
|
+
process.exitCode = 1;
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
const result = await step.execute(ctx, {});
|
|
50
|
+
if (result.status === "failed") {
|
|
51
|
+
console.error(`❌ ${result.error}`);
|
|
52
|
+
process.exitCode = 1;
|
|
53
|
+
}
|
|
42
54
|
}
|
|
43
55
|
catch (err) {
|
|
44
56
|
process.exitCode = 1;
|
package/dist/commands/init.d.ts
CHANGED
|
@@ -5,12 +5,13 @@
|
|
|
5
5
|
* task files. The generated files are ready-to-edit starting points —
|
|
6
6
|
* not live evaluation tasks.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* serialization of the parsed data
|
|
8
|
+
* TypeScript output (default) uses define* helpers from @sanity/ailf-core
|
|
9
|
+
* for full IDE autocomplete and type checking. YAML output serializes the
|
|
10
|
+
* parsed task data. JSON output is a plain serialization of the parsed data.
|
|
11
11
|
*
|
|
12
12
|
* Usage:
|
|
13
|
-
* ailf init #
|
|
13
|
+
* ailf init # TypeScript output (default)
|
|
14
|
+
* ailf init --output-format yaml # YAML output
|
|
14
15
|
* ailf init --output-format json # JSON output
|
|
15
16
|
* ailf init --force # overwrite existing files
|
|
16
17
|
* ailf init --path ./my-dir # target a specific directory
|
package/dist/commands/init.js
CHANGED
|
@@ -5,12 +5,13 @@
|
|
|
5
5
|
* task files. The generated files are ready-to-edit starting points —
|
|
6
6
|
* not live evaluation tasks.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* serialization of the parsed data
|
|
8
|
+
* TypeScript output (default) uses define* helpers from @sanity/ailf-core
|
|
9
|
+
* for full IDE autocomplete and type checking. YAML output serializes the
|
|
10
|
+
* parsed task data. JSON output is a plain serialization of the parsed data.
|
|
11
11
|
*
|
|
12
12
|
* Usage:
|
|
13
|
-
* ailf init #
|
|
13
|
+
* ailf init # TypeScript output (default)
|
|
14
|
+
* ailf init --output-format yaml # YAML output
|
|
14
15
|
* ailf init --output-format json # JSON output
|
|
15
16
|
* ailf init --force # overwrite existing files
|
|
16
17
|
* ailf init --path ./my-dir # target a specific directory
|
|
@@ -18,16 +19,17 @@
|
|
|
18
19
|
import { Command } from "commander";
|
|
19
20
|
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
20
21
|
import { resolve, relative } from "path";
|
|
21
|
-
import { ailfConfigData, ailfConfigYaml, taskYamlFiles, TASK_FILE_NAMES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
|
|
22
|
+
import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
|
|
22
23
|
// ---------------------------------------------------------------------------
|
|
23
24
|
// Command factory
|
|
24
25
|
// ---------------------------------------------------------------------------
|
|
25
26
|
export function createInitCommand() {
|
|
26
27
|
return new Command("init")
|
|
27
28
|
.description("Initialize a directory for AI Literacy Framework evaluation")
|
|
28
|
-
.option("--output-format <fmt>", 'Output format for generated files: "
|
|
29
|
+
.option("--output-format <fmt>", 'Output format for generated files: "ts" (default), "yaml", or "json"', "ts")
|
|
29
30
|
.option("--force", "Overwrite existing files", false)
|
|
30
31
|
.option("--path <dir>", "Target directory (default: current directory)", ".")
|
|
32
|
+
.option("--mode <mode>", "Scaffold for a specific mode: literacy, mcp-server, custom (default: all modes)")
|
|
31
33
|
.action(async (opts) => {
|
|
32
34
|
await runInit(opts);
|
|
33
35
|
});
|
|
@@ -51,13 +53,27 @@ function rel(from, to) {
|
|
|
51
53
|
const r = relative(from, to);
|
|
52
54
|
return r.startsWith(".") ? r : `./${r}`;
|
|
53
55
|
}
|
|
56
|
+
/** Filter task stems by mode using TASK_EXAMPLES metadata */
|
|
57
|
+
function taskStemsForMode(mode) {
|
|
58
|
+
return TASK_EXAMPLES.filter((t) => t.mode === mode).map((t) => t.stem);
|
|
59
|
+
}
|
|
54
60
|
// ---------------------------------------------------------------------------
|
|
55
61
|
// Init logic
|
|
56
62
|
// ---------------------------------------------------------------------------
|
|
57
63
|
async function runInit(opts) {
|
|
58
|
-
const
|
|
59
|
-
|
|
64
|
+
const validFormats = new Set(["ts", "yaml", "json"]);
|
|
65
|
+
if (!validFormats.has(opts.outputFormat)) {
|
|
66
|
+
console.error(` ✗ Invalid output format "${opts.outputFormat}". Valid options: ts, yaml, json`);
|
|
67
|
+
process.exitCode = 1;
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
const format = opts.outputFormat;
|
|
60
71
|
const force = opts.force;
|
|
72
|
+
if (format === "yaml") {
|
|
73
|
+
console.warn(" ⚠ --output-format yaml is deprecated. TypeScript (default) is the\n" +
|
|
74
|
+
" recommended format — it provides full IDE autocomplete via defineTask().\n" +
|
|
75
|
+
" YAML output will be removed in a future release.\n");
|
|
76
|
+
}
|
|
61
77
|
// Resolve target from the caller's actual working directory
|
|
62
78
|
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
63
79
|
const targetDir = resolve(callerCwd, opts.path);
|
|
@@ -72,27 +88,87 @@ async function runInit(opts) {
|
|
|
72
88
|
console.log(` ✓ Created ${rel(targetDir, tasksDir)}/`);
|
|
73
89
|
const written = [];
|
|
74
90
|
const skipped = [];
|
|
75
|
-
// 2. Write
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
91
|
+
// 2. Write project config
|
|
92
|
+
if (format === "ts") {
|
|
93
|
+
// TypeScript: ailf.config.ts with defineConfig helper
|
|
94
|
+
const configPath = resolve(ailfDir, "ailf.config.ts");
|
|
95
|
+
if (writeIfNew(configPath, ailfConfigTs, force)) {
|
|
96
|
+
written.push(rel(targetDir, configPath));
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
skipped.push(rel(targetDir, configPath));
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
else if (format === "yaml") {
|
|
103
|
+
// YAML: raw string passthrough (preserves comments)
|
|
104
|
+
const configPath = resolve(ailfDir, "config.yaml");
|
|
105
|
+
if (writeIfNew(configPath, ailfConfigYaml, force)) {
|
|
106
|
+
written.push(rel(targetDir, configPath));
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
skipped.push(rel(targetDir, configPath));
|
|
110
|
+
}
|
|
84
111
|
}
|
|
85
112
|
else {
|
|
86
|
-
|
|
113
|
+
// JSON: serialize the parsed data
|
|
114
|
+
const configPath = resolve(ailfDir, "config.json");
|
|
115
|
+
const content = JSON.stringify(ailfConfigData, null, 2) + "\n";
|
|
116
|
+
if (writeIfNew(configPath, content, force)) {
|
|
117
|
+
written.push(rel(targetDir, configPath));
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
skipped.push(rel(targetDir, configPath));
|
|
121
|
+
}
|
|
87
122
|
}
|
|
88
123
|
// 3. Write example tasks to .ailf/tasks/
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
124
|
+
const modeFilter = opts.mode;
|
|
125
|
+
const isCustomMode = modeFilter === "custom";
|
|
126
|
+
// Determine which task stems to write based on mode filter
|
|
127
|
+
let stemsToWrite;
|
|
128
|
+
if (isCustomMode) {
|
|
129
|
+
// Custom mode: write one literacy example as a starting point
|
|
130
|
+
stemsToWrite = taskStemsForMode("literacy").slice(0, 1);
|
|
131
|
+
}
|
|
132
|
+
else if (modeFilter === "literacy") {
|
|
133
|
+
stemsToWrite = taskStemsForMode("literacy");
|
|
134
|
+
}
|
|
135
|
+
else if (modeFilter === "mcp-server") {
|
|
136
|
+
stemsToWrite = taskStemsForMode("mcp-server");
|
|
137
|
+
}
|
|
138
|
+
else if (modeFilter === "knowledge-probe") {
|
|
139
|
+
stemsToWrite = taskStemsForMode("knowledge-probe");
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
// Default (no --mode): write all tasks
|
|
143
|
+
stemsToWrite = [...TASK_FILE_NAMES];
|
|
144
|
+
}
|
|
145
|
+
if (format === "ts") {
|
|
146
|
+
for (const stem of stemsToWrite) {
|
|
147
|
+
let content = taskTsFiles[stem];
|
|
148
|
+
if (!content)
|
|
149
|
+
continue;
|
|
150
|
+
// For MCP-only init, activate the draft task
|
|
151
|
+
if (modeFilter === "mcp-server") {
|
|
152
|
+
content = content.replace('status: "draft",', '// status: "active", // Activated — this task runs in evaluations');
|
|
153
|
+
}
|
|
154
|
+
const fileName = isCustomMode && stem === stemsToWrite[0]
|
|
155
|
+
? "example-custom.task.ts"
|
|
156
|
+
: `${stem}.task.ts`;
|
|
157
|
+
const taskPath = resolve(tasksDir, fileName);
|
|
158
|
+
if (writeIfNew(taskPath, content, force)) {
|
|
159
|
+
written.push(rel(targetDir, taskPath));
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
skipped.push(rel(targetDir, taskPath));
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
else if (format === "yaml") {
|
|
167
|
+
for (const stem of stemsToWrite) {
|
|
95
168
|
const content = taskYamlFiles[stem];
|
|
169
|
+
if (!content)
|
|
170
|
+
continue;
|
|
171
|
+
const taskPath = resolve(tasksDir, `${stem}.yaml`);
|
|
96
172
|
if (writeIfNew(taskPath, content, force)) {
|
|
97
173
|
written.push(rel(targetDir, taskPath));
|
|
98
174
|
}
|
|
@@ -106,8 +182,12 @@ async function runInit(opts) {
|
|
|
106
182
|
const tasks = Array.isArray(allTaskData)
|
|
107
183
|
? allTaskData
|
|
108
184
|
: [allTaskData];
|
|
185
|
+
// Build a set of task IDs that match the selected stems
|
|
186
|
+
const selectedIds = new Set(stemsToWrite.flatMap((s) => TASK_EXAMPLES.filter((t) => t.stem === s).map((t) => t.stem)));
|
|
109
187
|
for (const task of tasks) {
|
|
110
188
|
const taskId = task.id;
|
|
189
|
+
if (!selectedIds.has(taskId))
|
|
190
|
+
continue;
|
|
111
191
|
const taskPath = resolve(tasksDir, `${taskId}.json`);
|
|
112
192
|
const content = JSON.stringify([task], null, 2) + "\n";
|
|
113
193
|
if (writeIfNew(taskPath, content, force)) {
|
|
@@ -118,6 +198,16 @@ async function runInit(opts) {
|
|
|
118
198
|
}
|
|
119
199
|
}
|
|
120
200
|
}
|
|
201
|
+
// 3b. Write custom preset scaffold (--mode custom only)
|
|
202
|
+
if (isCustomMode && format === "ts") {
|
|
203
|
+
const presetPath = resolve(ailfDir, "preset.ts");
|
|
204
|
+
if (writeIfNew(presetPath, CUSTOM_PRESET_TS, force)) {
|
|
205
|
+
written.push(rel(targetDir, presetPath));
|
|
206
|
+
}
|
|
207
|
+
else {
|
|
208
|
+
skipped.push(rel(targetDir, presetPath));
|
|
209
|
+
}
|
|
210
|
+
}
|
|
121
211
|
// 4. Write .gitignore in .ailf/ (keep results out of version control)
|
|
122
212
|
const gitignorePath = resolve(ailfDir, ".gitignore");
|
|
123
213
|
const gitignoreContent = `# AILF generated files\nresults/\ncontexts/\n`;
|
|
@@ -150,18 +240,24 @@ async function runInit(opts) {
|
|
|
150
240
|
console.log(` ⊘ Skipped ${f} (already exists, use --force to overwrite)`);
|
|
151
241
|
}
|
|
152
242
|
}
|
|
243
|
+
const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
|
|
153
244
|
console.log();
|
|
154
245
|
console.log(" Next steps:");
|
|
155
246
|
console.log();
|
|
156
247
|
console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
|
|
157
248
|
console.log(" slugs and prompts for your documentation");
|
|
158
|
-
console.log(
|
|
249
|
+
console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
|
|
159
250
|
console.log(" 3. Add two GitHub Actions secrets");
|
|
160
251
|
console.log(" (Settings → Secrets and variables → Actions):");
|
|
161
252
|
console.log(" • AILF_API_KEY — your API key (starts with ailf_live_sk_)");
|
|
162
253
|
console.log(" • NPM_TOKEN — npm token with read access to @sanity scope");
|
|
163
254
|
console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
|
|
164
255
|
console.log(" automatically on PRs");
|
|
256
|
+
if (format === "ts") {
|
|
257
|
+
console.log();
|
|
258
|
+
console.log(` 💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
|
|
259
|
+
console.log(" via defineTask() from @sanity/ailf-core.");
|
|
260
|
+
}
|
|
165
261
|
console.log();
|
|
166
262
|
console.log(" 🔑 Retrieve secrets from 1Password (Sanity employees):");
|
|
167
263
|
console.log();
|
|
@@ -177,3 +273,72 @@ async function runInit(opts) {
|
|
|
177
273
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
178
274
|
console.log();
|
|
179
275
|
}
|
|
276
|
+
// ---------------------------------------------------------------------------
|
|
277
|
+
// Custom preset scaffold template
|
|
278
|
+
// ---------------------------------------------------------------------------
|
|
279
|
+
const CUSTOM_PRESET_TS = `/**
|
|
280
|
+
* Custom preset — your domain-specific evaluation configuration.
|
|
281
|
+
*
|
|
282
|
+
* This preset targets the "literacy" mode base and inherits its evaluation
|
|
283
|
+
* methodology (rubrics, scoring profiles, prompt templates). You only need
|
|
284
|
+
* to provide domain-specific configuration: where your docs live, what
|
|
285
|
+
* features to track, and how to fetch documentation.
|
|
286
|
+
*
|
|
287
|
+
* To use a different mode (e.g., "mcp-server"), change the mode field.
|
|
288
|
+
* Available built-in modes: literacy, mcp-server, knowledge-probe, agent-harness.
|
|
289
|
+
*
|
|
290
|
+
* @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
|
|
291
|
+
*/
|
|
292
|
+
|
|
293
|
+
import { definePreset } from "../_vendor/ailf-core/index.js"
|
|
294
|
+
|
|
295
|
+
export default definePreset({
|
|
296
|
+
name: "my-docs-evaluation",
|
|
297
|
+
manifest: {
|
|
298
|
+
name: "my-docs-evaluation",
|
|
299
|
+
version: "1.0.0",
|
|
300
|
+
description: "Documentation literacy evaluation for my project.",
|
|
301
|
+
pluginApiVersion: 1,
|
|
302
|
+
},
|
|
303
|
+
|
|
304
|
+
// Target the literacy mode base — inherits rubrics, scoring, prompts.
|
|
305
|
+
// Change to "mcp-server" to evaluate MCP tool usage instead.
|
|
306
|
+
mode: "literacy",
|
|
307
|
+
|
|
308
|
+
// Source definitions — where your documentation lives.
|
|
309
|
+
sourceDefs: [
|
|
310
|
+
{
|
|
311
|
+
name: "production",
|
|
312
|
+
baseUrl: "https://docs.example.com",
|
|
313
|
+
// projectId: "your-sanity-project-id",
|
|
314
|
+
// dataset: "production",
|
|
315
|
+
},
|
|
316
|
+
],
|
|
317
|
+
|
|
318
|
+
// Feature registry — what product features you're tracking coverage for.
|
|
319
|
+
featureDefs: {
|
|
320
|
+
features: [
|
|
321
|
+
{
|
|
322
|
+
id: "getting-started",
|
|
323
|
+
name: "Getting Started Guide",
|
|
324
|
+
sections: ["guides"],
|
|
325
|
+
status: "covered",
|
|
326
|
+
area: "guides",
|
|
327
|
+
priority: "critical",
|
|
328
|
+
},
|
|
329
|
+
{
|
|
330
|
+
id: "api-reference",
|
|
331
|
+
name: "API Reference",
|
|
332
|
+
sections: ["reference"],
|
|
333
|
+
status: "uncovered",
|
|
334
|
+
priority: "high",
|
|
335
|
+
},
|
|
336
|
+
],
|
|
337
|
+
},
|
|
338
|
+
|
|
339
|
+
// Optional: override mode base rubrics, scoring, or prompts here.
|
|
340
|
+
// rubricTemplates: [{ ... }],
|
|
341
|
+
// scoringProfiles: { ... },
|
|
342
|
+
// promptTemplates: { ... },
|
|
343
|
+
})
|
|
344
|
+
`;
|
|
@@ -36,6 +36,8 @@ export interface ResolvedOptions {
|
|
|
36
36
|
noAutoScope: boolean;
|
|
37
37
|
noCache: boolean;
|
|
38
38
|
noRemoteCache: boolean;
|
|
39
|
+
/** Base directory for user-facing pipeline output artifacts (always resolved). */
|
|
40
|
+
outputDir: string;
|
|
39
41
|
outputPath?: string;
|
|
40
42
|
perspectiveOverride?: string;
|
|
41
43
|
projectIdOverride?: string;
|
|
@@ -57,10 +59,14 @@ export interface ResolvedOptions {
|
|
|
57
59
|
repoTasksPath?: string;
|
|
58
60
|
taskOption?: string;
|
|
59
61
|
tagOption?: string[];
|
|
60
|
-
taskSourceType?: "content-lake" | "repo"
|
|
62
|
+
taskSourceType?: "content-lake" | "repo";
|
|
61
63
|
urlArgs: string[];
|
|
62
64
|
apiUrl: string;
|
|
63
65
|
apiKey?: string;
|
|
66
|
+
captureEnabled: boolean;
|
|
67
|
+
captureDir?: string;
|
|
68
|
+
captureCompress: boolean;
|
|
69
|
+
captureExtras: boolean;
|
|
64
70
|
}
|
|
65
71
|
/**
|
|
66
72
|
* Pure option resolution — computes ResolvedOptions from CLI flags without
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
*
|
|
11
11
|
* @see packages/eval/src/orchestration/ for the step-based pipeline
|
|
12
12
|
*/
|
|
13
|
-
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
13
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
14
14
|
import { dirname, resolve } from "path";
|
|
15
15
|
import { fileURLToPath } from "url";
|
|
16
16
|
import { classifyUrls } from "../pipeline/classify-url.js";
|
|
@@ -209,6 +209,23 @@ export function computeResolvedOptions(opts) {
|
|
|
209
209
|
const remote = opts.remote || process.env.AILF_REMOTE === "1";
|
|
210
210
|
const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
|
|
211
211
|
const apiKey = process.env.AILF_API_KEY ?? undefined;
|
|
212
|
+
// Output directory: explicit flag → repo-task heuristic → default
|
|
213
|
+
const resolvedRepoTasksPath = opts.repoTasksPath
|
|
214
|
+
? resolve(callerCwd, opts.repoTasksPath)
|
|
215
|
+
: undefined;
|
|
216
|
+
const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
|
|
217
|
+
let outputDir;
|
|
218
|
+
if (opts.outputDir) {
|
|
219
|
+
outputDir = resolve(callerCwd, opts.outputDir);
|
|
220
|
+
}
|
|
221
|
+
else if (resolvedTaskSourceType === "repo" || resolvedRepoTasksPath) {
|
|
222
|
+
outputDir = resolvedRepoTasksPath
|
|
223
|
+
? resolve(resolvedRepoTasksPath, "..", "results", "latest")
|
|
224
|
+
: resolve(callerCwd, ".ailf", "results", "latest");
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
outputDir = resolve(ROOT, "results", "latest");
|
|
228
|
+
}
|
|
212
229
|
return {
|
|
213
230
|
allowedOriginArgs,
|
|
214
231
|
apiKey,
|
|
@@ -233,6 +250,7 @@ export function computeResolvedOptions(opts) {
|
|
|
233
250
|
noAutoScope: opts.autoScope === false,
|
|
234
251
|
noCache: !opts.cache,
|
|
235
252
|
noRemoteCache: opts.remoteCache === false,
|
|
253
|
+
outputDir,
|
|
236
254
|
outputPath: opts.output,
|
|
237
255
|
perspectiveOverride,
|
|
238
256
|
projectIdOverride,
|
|
@@ -250,24 +268,25 @@ export function computeResolvedOptions(opts) {
|
|
|
250
268
|
skipFetch: opts.skipFetch,
|
|
251
269
|
source: opts.source,
|
|
252
270
|
studioOriginOverride,
|
|
253
|
-
repoTasksPath:
|
|
254
|
-
? resolve(callerCwd, opts.repoTasksPath)
|
|
255
|
-
: undefined,
|
|
271
|
+
repoTasksPath: resolvedRepoTasksPath,
|
|
256
272
|
taskOption,
|
|
257
273
|
tagOption,
|
|
258
|
-
taskSourceType:
|
|
274
|
+
taskSourceType: resolvedTaskSourceType,
|
|
259
275
|
urlArgs,
|
|
276
|
+
captureEnabled: opts.capture || process.env.AILF_CAPTURE === "1",
|
|
277
|
+
captureDir: opts.captureDir ?? process.env.AILF_CAPTURE_DIR,
|
|
278
|
+
captureCompress: opts.captureCompress !== false &&
|
|
279
|
+
process.env.AILF_CAPTURE_COMPRESS !== "0",
|
|
280
|
+
captureExtras: opts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0",
|
|
260
281
|
};
|
|
261
282
|
}
|
|
262
283
|
/** Resolve and validate the --task-source flag value. */
|
|
263
284
|
function resolveTaskSourceType(raw) {
|
|
264
285
|
if (!raw || raw === "content-lake")
|
|
265
286
|
return undefined; // default — Content Lake
|
|
266
|
-
if (raw === "yaml")
|
|
267
|
-
return "yaml";
|
|
268
287
|
if (raw === "repo")
|
|
269
288
|
return "repo";
|
|
270
|
-
console.error(`❌ Invalid --task-source "${raw}". Must be "
|
|
289
|
+
console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
|
|
271
290
|
process.exit(1);
|
|
272
291
|
}
|
|
273
292
|
// ---------------------------------------------------------------------------
|
|
@@ -304,17 +323,26 @@ export async function executePipeline(cliOpts) {
|
|
|
304
323
|
if (cliOpts.output) {
|
|
305
324
|
config.outputPath = resolve(callerCwd, cliOpts.output);
|
|
306
325
|
}
|
|
326
|
+
// Output dir: explicit CLI flag → repo-task heuristic → file-config default
|
|
327
|
+
if (cliOpts.outputDir) {
|
|
328
|
+
config.outputDir = resolve(callerCwd, cliOpts.outputDir);
|
|
329
|
+
}
|
|
330
|
+
else if (config.repoTasksPath) {
|
|
331
|
+
config.outputDir = resolve(config.repoTasksPath, "..", "results", "latest");
|
|
332
|
+
}
|
|
307
333
|
// Create AppContext directly from the merged config so adapters
|
|
308
334
|
// (especially taskSource) are wired from the file config's
|
|
309
335
|
// taskSourceType — not from CLI defaults.
|
|
336
|
+
console.log(` 📂 Output directory: ${config.outputDir}`);
|
|
310
337
|
const ctx = createAppContext(config);
|
|
311
338
|
const pipelineStart = Date.now();
|
|
312
339
|
const steps = buildStepSequence(ctx, pipelineStart);
|
|
313
340
|
const result = await orchestratePipeline(ctx, steps);
|
|
314
|
-
writePipelineResult(result);
|
|
341
|
+
writePipelineResult(result, config.outputDir);
|
|
315
342
|
process.exit(result.success ? 0 : 1);
|
|
316
343
|
}
|
|
317
344
|
const o = resolveOptions(cliOpts);
|
|
345
|
+
console.log(` 📂 Output directory: ${o.outputDir}`);
|
|
318
346
|
// Remote mode — submit to AILF API instead of running locally.
|
|
319
347
|
// Use the caller's working directory (not the package root) because
|
|
320
348
|
// remote mode reads .ailf/tasks/ from the user's repo, not from
|
|
@@ -350,7 +378,7 @@ export async function executePipeline(cliOpts) {
|
|
|
350
378
|
const pipelineStart = Date.now();
|
|
351
379
|
const steps = buildStepSequence(ctx, pipelineStart);
|
|
352
380
|
const result = await orchestratePipeline(ctx, steps);
|
|
353
|
-
writePipelineResult(result);
|
|
381
|
+
writePipelineResult(result, o.outputDir);
|
|
354
382
|
process.exit(result.success ? 0 : 1);
|
|
355
383
|
}
|
|
356
384
|
// ---------------------------------------------------------------------------
|
|
@@ -362,15 +390,11 @@ export async function executePipeline(cliOpts) {
|
|
|
362
390
|
function resolveOptions(opts) {
|
|
363
391
|
return computeResolvedOptions(opts);
|
|
364
392
|
}
|
|
365
|
-
function writePipelineResult(result) {
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
}
|
|
371
|
-
catch {
|
|
372
|
-
// results/latest/ may not exist yet — not critical
|
|
373
|
-
}
|
|
393
|
+
function writePipelineResult(result, outputDir) {
|
|
394
|
+
mkdirSync(outputDir, { recursive: true });
|
|
395
|
+
const resultFile = resolve(outputDir, "pipeline-result.json");
|
|
396
|
+
writeFileSync(resultFile, JSON.stringify(result, null, 2));
|
|
397
|
+
console.log(` 📄 Pipeline result: ${resultFile}\n`);
|
|
374
398
|
}
|
|
375
399
|
/**
|
|
376
400
|
* Load .ailf/config.yaml if --repo-tasks-path is set and the config file
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* options object, bridges to process.env for downstream modules, and
|
|
6
6
|
* delegates to runPipeline().
|
|
7
7
|
*
|
|
8
|
-
* @see docs/
|
|
8
|
+
* @see docs/cli.md for the full flag reference.
|
|
9
9
|
*/
|
|
10
10
|
import { Command } from "commander";
|
|
11
11
|
/**
|
|
@@ -37,6 +37,7 @@ export interface PipelineCliOptions {
|
|
|
37
37
|
mode: string;
|
|
38
38
|
variant?: string;
|
|
39
39
|
output?: string;
|
|
40
|
+
outputDir?: string;
|
|
40
41
|
promptfooUrl?: string;
|
|
41
42
|
publish?: boolean;
|
|
42
43
|
publishTag?: string;
|
|
@@ -63,5 +64,9 @@ export interface PipelineCliOptions {
|
|
|
63
64
|
url: string[];
|
|
64
65
|
urls: string[];
|
|
65
66
|
apiUrl?: string;
|
|
67
|
+
capture: boolean;
|
|
68
|
+
captureDir?: string;
|
|
69
|
+
captureCompress: boolean;
|
|
70
|
+
captureExtras: boolean;
|
|
66
71
|
}
|
|
67
72
|
export declare function createPipelineCommand(): Command;
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* options object, bridges to process.env for downstream modules, and
|
|
6
6
|
* delegates to runPipeline().
|
|
7
7
|
*
|
|
8
|
-
* @see docs/
|
|
8
|
+
* @see docs/cli.md for the full flag reference.
|
|
9
9
|
*/
|
|
10
10
|
import { Command } from "commander";
|
|
11
11
|
import { LiteracyVariant } from "../pipeline/normalize-mode.js";
|
|
@@ -48,11 +48,16 @@ export function createPipelineCommand() {
|
|
|
48
48
|
.option("--report-project <id>", "Sanity project ID for report store")
|
|
49
49
|
.option("--config <path>", "Load pipeline config from a TS/JS/YAML/JSON file (overrides most CLI flags)")
|
|
50
50
|
.option("-o, --output <path>", "Write PR comment markdown to file")
|
|
51
|
+
.option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
|
|
51
52
|
.option("--promptfoo-url <url>", "Promptfoo share URL for report")
|
|
52
|
-
.option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)
|
|
53
|
+
.option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
|
|
53
54
|
.option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
|
|
54
55
|
.option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
|
|
55
56
|
.option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
|
|
57
|
+
.option("--capture", "Enable artifact capture for this run", false)
|
|
58
|
+
.option("--capture-dir <path>", "Base directory for capture output (default: results/captures/)")
|
|
59
|
+
.option("--no-capture-compress", "Disable tar.gz compression of captures")
|
|
60
|
+
.option("--no-capture-extras", "Exclude mode-specific artifacts from captures")
|
|
56
61
|
.action(async (opts) => {
|
|
57
62
|
const { executePipeline } = await import("./pipeline-action.js");
|
|
58
63
|
await executePipeline(opts);
|
package/dist/commands/publish.js
CHANGED
|
@@ -74,8 +74,8 @@ Quick Start:
|
|
|
74
74
|
|
|
75
75
|
Documentation:
|
|
76
76
|
Repository https://github.com/sanity-io/ai-literacy-framework
|
|
77
|
-
CLI Guide https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/
|
|
78
|
-
Getting Started https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/
|
|
77
|
+
CLI Guide https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/cli.md
|
|
78
|
+
Getting Started https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/getting-started.md
|
|
79
79
|
|
|
80
80
|
Run ailf <command> --help for detailed usage of any command.`;
|
|
81
81
|
// ---------------------------------------------------------------------------
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* update-quality-scores command — update QUALITY_SCORE.md from scores.
|
|
3
|
+
*/
|
|
4
|
+
import { Command } from "commander";
|
|
5
|
+
export function createUpdateQualityScoresCommand() {
|
|
6
|
+
return new Command("update-quality-scores")
|
|
7
|
+
.description("Update docs/QUALITY_SCORE.md from score-summary.json")
|
|
8
|
+
.action(async () => {
|
|
9
|
+
const { updateQualityScores } = await import("../scripts/update-quality-scores.js");
|
|
10
|
+
console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
|
|
11
|
+
const result = updateQualityScores();
|
|
12
|
+
if (result.success) {
|
|
13
|
+
console.log(` ✅ ${result.message}`);
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
console.error(` ❌ ${result.message}`);
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* validate-tasks command — standalone validation of
|
|
2
|
+
* validate-tasks command — standalone validation of task files.
|
|
3
3
|
*
|
|
4
|
-
* Validates .ailf/tasks/*.yaml files against the
|
|
4
|
+
* Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
|
|
5
5
|
* running the full pipeline. Useful for pre-commit hooks and CI checks
|
|
6
6
|
* in external repos.
|
|
7
7
|
*
|