@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,719 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Explain-handler.ts
|
|
3
|
+
*
|
|
4
|
+
* Handles the global --explain flag for any CLI command.
|
|
5
|
+
*
|
|
6
|
+
* Called from the Commander preAction hook in cli.ts. Inspects which
|
|
7
|
+
* command is about to run, builds the appropriate execution plan, and
|
|
8
|
+
* prints it. If --yes is also set, prompts the user to confirm execution.
|
|
9
|
+
*
|
|
10
|
+
* ## How it works
|
|
11
|
+
*
|
|
12
|
+
* Every command is registered in `EXPLAIN_REGISTRY` with either:
|
|
13
|
+
* - **Static metadata** — description, filesRead, filesCreated, steps
|
|
14
|
+
* - **A builder function** — for commands that need to inspect CLI options
|
|
15
|
+
* or perform async work (e.g., pipeline, init)
|
|
16
|
+
*
|
|
17
|
+
* Adding --explain support for a new command = adding one registry entry.
|
|
18
|
+
* Commands not in the registry fall back to a minimal generic plan.
|
|
19
|
+
*
|
|
20
|
+
* @see docs/exec-plans/active/execution-preview.md
|
|
21
|
+
*/
|
|
22
|
+
import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
|
|
23
|
+
import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
|
|
24
|
+
import { formatPlanConsole, formatPlanJson } from "../pipeline/plan-format.js";
|
|
25
|
+
import { computeResolvedOptions } from "./pipeline-action.js";
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Registry
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
/**
|
|
30
|
+
* Explain metadata for every command.
|
|
31
|
+
*
|
|
32
|
+
* To add --explain support for a new command, add an entry here.
|
|
33
|
+
* Static entries are converted to a plan via `buildSimpleCommandPlan`.
|
|
34
|
+
* Builder functions receive the Commander command and rootDir.
|
|
35
|
+
*/
|
|
36
|
+
const EXPLAIN_REGISTRY = {
|
|
37
|
+
// ── Dynamic builders (inspect CLI options) ────────────────────────
|
|
38
|
+
baseline: buildBaselineExplainPlan,
|
|
39
|
+
init: buildInitExplainPlan,
|
|
40
|
+
pipeline: buildPipelineExplainPlan,
|
|
41
|
+
// ── Static metadata ───────────────────────────────────────────────
|
|
42
|
+
"agent-report": {
|
|
43
|
+
description: "Generate an agent behavior observation report from eval results",
|
|
44
|
+
filesCreated: [
|
|
45
|
+
"results/latest/agent-report.json",
|
|
46
|
+
"results/latest/agent-report.md",
|
|
47
|
+
],
|
|
48
|
+
filesRead: ["results/latest/eval-results.json"],
|
|
49
|
+
steps: [
|
|
50
|
+
{
|
|
51
|
+
cacheStatus: "miss",
|
|
52
|
+
name: "Load eval results",
|
|
53
|
+
reason: "Parse eval-results.json for agent behavior data",
|
|
54
|
+
willRun: true,
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
cacheStatus: "miss",
|
|
58
|
+
name: "Analyze agent behavior",
|
|
59
|
+
reason: "Extract tool usage, search patterns, and retrieval strategies",
|
|
60
|
+
willRun: true,
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
cacheStatus: "miss",
|
|
64
|
+
name: "Write report",
|
|
65
|
+
reason: "Generate JSON + Markdown observation report",
|
|
66
|
+
willRun: true,
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
},
|
|
70
|
+
cache: {
|
|
71
|
+
description: "Manage the local pipeline cache (clear manifests or show status)",
|
|
72
|
+
filesRead: ["results/cache/*.json"],
|
|
73
|
+
steps: [
|
|
74
|
+
{
|
|
75
|
+
cacheStatus: "miss",
|
|
76
|
+
name: "Inspect cache directory",
|
|
77
|
+
reason: "Read results/cache/ for manifest files",
|
|
78
|
+
willRun: true,
|
|
79
|
+
},
|
|
80
|
+
],
|
|
81
|
+
},
|
|
82
|
+
"calculate-scores": {
|
|
83
|
+
description: "Calculate AI Literacy Scores from Promptfoo evaluation results",
|
|
84
|
+
filesCreated: ["results/latest/score-summary.json"],
|
|
85
|
+
filesRead: [
|
|
86
|
+
"results/latest/eval-results.json",
|
|
87
|
+
"config/rubrics.yaml",
|
|
88
|
+
"config/models.yaml",
|
|
89
|
+
],
|
|
90
|
+
steps: [
|
|
91
|
+
{
|
|
92
|
+
cacheStatus: "miss",
|
|
93
|
+
name: "Load eval results",
|
|
94
|
+
reason: "Parse eval-results.json for graded responses",
|
|
95
|
+
willRun: true,
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
cacheStatus: "miss",
|
|
99
|
+
name: "Compute scores",
|
|
100
|
+
reason: "Calculate per-area, per-model, and overall AI Literacy Scores",
|
|
101
|
+
willRun: true,
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
cacheStatus: "miss",
|
|
105
|
+
name: "Write summary",
|
|
106
|
+
reason: "Persist score-summary.json to results/latest/",
|
|
107
|
+
willRun: true,
|
|
108
|
+
},
|
|
109
|
+
],
|
|
110
|
+
},
|
|
111
|
+
compare: {
|
|
112
|
+
description: "Compare current evaluation scores against a saved baseline",
|
|
113
|
+
filesCreated: ["results/latest/comparison-report.json"],
|
|
114
|
+
filesRead: [
|
|
115
|
+
"results/latest/score-summary.json",
|
|
116
|
+
"results/baselines/*.json",
|
|
117
|
+
],
|
|
118
|
+
steps: [
|
|
119
|
+
{
|
|
120
|
+
cacheStatus: "miss",
|
|
121
|
+
name: "Load current scores",
|
|
122
|
+
reason: "Read results/latest/score-summary.json",
|
|
123
|
+
willRun: true,
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
cacheStatus: "miss",
|
|
127
|
+
name: "Load baseline",
|
|
128
|
+
reason: "Find and load latest baseline snapshot",
|
|
129
|
+
willRun: true,
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
cacheStatus: "miss",
|
|
133
|
+
name: "Compare",
|
|
134
|
+
reason: "Compute per-area deltas with noise threshold",
|
|
135
|
+
willRun: true,
|
|
136
|
+
},
|
|
137
|
+
],
|
|
138
|
+
},
|
|
139
|
+
"coverage-audit": {
|
|
140
|
+
description: "Cross-reference feature registry against evaluation tasks for coverage gaps",
|
|
141
|
+
filesRead: ["config/features.yaml", "tasks/*.yaml"],
|
|
142
|
+
steps: [
|
|
143
|
+
{
|
|
144
|
+
cacheStatus: "miss",
|
|
145
|
+
name: "Load feature registry",
|
|
146
|
+
reason: "Parse config/features.yaml for product feature list",
|
|
147
|
+
willRun: true,
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
cacheStatus: "miss",
|
|
151
|
+
name: "Scan tasks",
|
|
152
|
+
reason: "Read all task YAML files for feature area references",
|
|
153
|
+
willRun: true,
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
cacheStatus: "miss",
|
|
157
|
+
name: "Compute coverage",
|
|
158
|
+
reason: "Match features to tasks, identify gaps, count document utilization",
|
|
159
|
+
willRun: true,
|
|
160
|
+
},
|
|
161
|
+
],
|
|
162
|
+
},
|
|
163
|
+
"discovery-report": {
|
|
164
|
+
description: "Generate agent discoverability report from agentic retrieval metrics",
|
|
165
|
+
filesCreated: ["results/latest/discovery-report.md"],
|
|
166
|
+
filesRead: ["results/latest/score-summary.json"],
|
|
167
|
+
steps: [
|
|
168
|
+
{
|
|
169
|
+
cacheStatus: "miss",
|
|
170
|
+
name: "Load score summary",
|
|
171
|
+
reason: "Read retrieval metrics from score-summary.json",
|
|
172
|
+
willRun: true,
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
cacheStatus: "miss",
|
|
176
|
+
name: "Analyze discoverability",
|
|
177
|
+
reason: "Per-area retrieval breakdown, invisible docs, recommendations",
|
|
178
|
+
willRun: true,
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
cacheStatus: "miss",
|
|
182
|
+
name: "Format report",
|
|
183
|
+
reason: "Generate Markdown discovery report",
|
|
184
|
+
willRun: true,
|
|
185
|
+
},
|
|
186
|
+
],
|
|
187
|
+
},
|
|
188
|
+
eval: {
|
|
189
|
+
description: "Run Promptfoo evaluation directly (passthrough — all flags forwarded to promptfoo)",
|
|
190
|
+
filesCreated: ["results/latest/eval-results.json"],
|
|
191
|
+
filesRead: ["promptfooconfig.yaml"],
|
|
192
|
+
steps: [
|
|
193
|
+
{
|
|
194
|
+
cacheStatus: "miss",
|
|
195
|
+
name: "Exec promptfoo eval",
|
|
196
|
+
reason: "Spawn `promptfoo eval` subprocess with forwarded arguments",
|
|
197
|
+
willRun: true,
|
|
198
|
+
},
|
|
199
|
+
],
|
|
200
|
+
},
|
|
201
|
+
"fetch-docs": {
|
|
202
|
+
description: "Fetch documentation from Sanity CMS and generate canonical context files",
|
|
203
|
+
filesCreated: ["contexts/canonical/*.md"],
|
|
204
|
+
filesRead: ["config/sources.yaml", "config/models.yaml"],
|
|
205
|
+
steps: [
|
|
206
|
+
{
|
|
207
|
+
cacheStatus: "miss",
|
|
208
|
+
name: "Extract canonical doc slugs",
|
|
209
|
+
reason: "Read task definitions for canonical_docs fields",
|
|
210
|
+
willRun: true,
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
cacheStatus: "miss",
|
|
214
|
+
name: "Fetch from Sanity",
|
|
215
|
+
reason: "Query Sanity Content Lake via GROQ for each document",
|
|
216
|
+
willRun: true,
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
cacheStatus: "miss",
|
|
220
|
+
name: "Write context files",
|
|
221
|
+
reason: "Convert Portable Text → Markdown, write per-task context files",
|
|
222
|
+
willRun: true,
|
|
223
|
+
},
|
|
224
|
+
],
|
|
225
|
+
},
|
|
226
|
+
"generate-configs": {
|
|
227
|
+
description: "Generate Promptfoo config files from models.yaml and task definitions",
|
|
228
|
+
filesCreated: [
|
|
229
|
+
"promptfooconfig.yaml",
|
|
230
|
+
"promptfooconfig.observed.yaml",
|
|
231
|
+
"promptfooconfig.agentic.yaml",
|
|
232
|
+
"tasks/.expanded.yaml",
|
|
233
|
+
],
|
|
234
|
+
filesRead: [
|
|
235
|
+
"config/models.yaml",
|
|
236
|
+
"config/prompts.yaml",
|
|
237
|
+
"config/rubrics.yaml",
|
|
238
|
+
"config/sources.yaml",
|
|
239
|
+
],
|
|
240
|
+
steps: [
|
|
241
|
+
{
|
|
242
|
+
cacheStatus: "miss",
|
|
243
|
+
name: "Load models",
|
|
244
|
+
reason: "Parse config/models.yaml for active model list",
|
|
245
|
+
willRun: true,
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
cacheStatus: "miss",
|
|
249
|
+
name: "Expand tasks",
|
|
250
|
+
reason: "Single-definition → gold + baseline test entries",
|
|
251
|
+
willRun: true,
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
cacheStatus: "miss",
|
|
255
|
+
name: "Generate configs",
|
|
256
|
+
reason: "Write baseline, observed, and agentic Promptfoo configs",
|
|
257
|
+
willRun: true,
|
|
258
|
+
},
|
|
259
|
+
],
|
|
260
|
+
},
|
|
261
|
+
grader: {
|
|
262
|
+
description: "Grader reliability tools (consistency, compare, sensitivity, validate)",
|
|
263
|
+
filesRead: [
|
|
264
|
+
"results/latest/eval-results.json",
|
|
265
|
+
"config/rubrics.yaml",
|
|
266
|
+
"canonical/reference-solutions/",
|
|
267
|
+
],
|
|
268
|
+
steps: [
|
|
269
|
+
{
|
|
270
|
+
cacheStatus: "miss",
|
|
271
|
+
name: "Load eval results",
|
|
272
|
+
reason: "Parse eval-results.json for graded responses to re-evaluate",
|
|
273
|
+
willRun: true,
|
|
274
|
+
},
|
|
275
|
+
{
|
|
276
|
+
cacheStatus: "miss",
|
|
277
|
+
name: "Run grader analysis",
|
|
278
|
+
reason: "Execute selected subcommand (consistency / compare / sensitivity / validate)",
|
|
279
|
+
willRun: true,
|
|
280
|
+
},
|
|
281
|
+
],
|
|
282
|
+
},
|
|
283
|
+
"lookup-doc": {
|
|
284
|
+
description: "Search Sanity for documentation articles by keyword (find slugs for canonicalDocs)",
|
|
285
|
+
steps: [
|
|
286
|
+
{
|
|
287
|
+
cacheStatus: "miss",
|
|
288
|
+
name: "Query Sanity",
|
|
289
|
+
reason: "Search articles by title and slug keyword match",
|
|
290
|
+
willRun: true,
|
|
291
|
+
},
|
|
292
|
+
],
|
|
293
|
+
},
|
|
294
|
+
"measure-retrieval": {
|
|
295
|
+
description: "Measure Sanity text search retrieval quality against canonical document annotations",
|
|
296
|
+
filesCreated: ["results/latest/retrieval-metrics.json"],
|
|
297
|
+
filesRead: ["tasks/*.yaml"],
|
|
298
|
+
steps: [
|
|
299
|
+
{
|
|
300
|
+
cacheStatus: "miss",
|
|
301
|
+
name: "Extract queries + ground truth",
|
|
302
|
+
reason: "Build query → expected docs pairs from task canonical_docs",
|
|
303
|
+
willRun: true,
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
cacheStatus: "miss",
|
|
307
|
+
name: "Run retrieval queries",
|
|
308
|
+
reason: "Execute Sanity text search for each task query",
|
|
309
|
+
willRun: true,
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
cacheStatus: "miss",
|
|
313
|
+
name: "Compute metrics",
|
|
314
|
+
reason: "Calculate precision, recall, F1 per query and overall",
|
|
315
|
+
willRun: true,
|
|
316
|
+
},
|
|
317
|
+
],
|
|
318
|
+
},
|
|
319
|
+
"pr-comment": {
|
|
320
|
+
description: "Generate a markdown PR comment from evaluation scores for CI posting",
|
|
321
|
+
filesRead: ["results/latest/score-summary.json"],
|
|
322
|
+
steps: [
|
|
323
|
+
{
|
|
324
|
+
cacheStatus: "miss",
|
|
325
|
+
name: "Load scores",
|
|
326
|
+
reason: "Read score-summary.json",
|
|
327
|
+
willRun: true,
|
|
328
|
+
},
|
|
329
|
+
{
|
|
330
|
+
cacheStatus: "miss",
|
|
331
|
+
name: "Generate comment",
|
|
332
|
+
reason: "Build markdown with score tables, model breakdown, and Promptfoo link",
|
|
333
|
+
willRun: true,
|
|
334
|
+
},
|
|
335
|
+
],
|
|
336
|
+
},
|
|
337
|
+
publish: {
|
|
338
|
+
description: "Publish a local evaluation report to the Sanity Content Lake (standalone)",
|
|
339
|
+
filesRead: ["results/latest/score-summary.json"],
|
|
340
|
+
steps: [
|
|
341
|
+
{
|
|
342
|
+
cacheStatus: "miss",
|
|
343
|
+
name: "Load score summary",
|
|
344
|
+
reason: "Read and validate score-summary.json",
|
|
345
|
+
willRun: true,
|
|
346
|
+
},
|
|
347
|
+
{
|
|
348
|
+
cacheStatus: "miss",
|
|
349
|
+
name: "Build provenance",
|
|
350
|
+
reason: "Collect git SHA, branch, CI metadata, source info",
|
|
351
|
+
willRun: true,
|
|
352
|
+
},
|
|
353
|
+
{
|
|
354
|
+
cacheStatus: "miss",
|
|
355
|
+
name: "Write to Sanity",
|
|
356
|
+
reason: "Create immutable ailf.report document in Content Lake",
|
|
357
|
+
willRun: true,
|
|
358
|
+
},
|
|
359
|
+
{
|
|
360
|
+
cacheStatus: "miss",
|
|
361
|
+
name: "Fan out to sinks",
|
|
362
|
+
reason: "Deliver to configured sinks (Slack, BigQuery, webhooks)",
|
|
363
|
+
willRun: true,
|
|
364
|
+
},
|
|
365
|
+
],
|
|
366
|
+
},
|
|
367
|
+
"readiness-report": {
|
|
368
|
+
description: "Generate launch readiness checklist for a feature area with threshold evaluation",
|
|
369
|
+
filesRead: [
|
|
370
|
+
"results/latest/score-summary.json",
|
|
371
|
+
"results/latest/gap-analysis.json",
|
|
372
|
+
"config/thresholds.yaml",
|
|
373
|
+
"results/baselines/",
|
|
374
|
+
],
|
|
375
|
+
filesCreated: ["results/latest/readiness-report.md"],
|
|
376
|
+
steps: [
|
|
377
|
+
{
|
|
378
|
+
cacheStatus: "miss",
|
|
379
|
+
name: "Load scores + thresholds",
|
|
380
|
+
reason: "Read score-summary.json and thresholds.yaml for gate evaluation",
|
|
381
|
+
willRun: true,
|
|
382
|
+
},
|
|
383
|
+
{
|
|
384
|
+
cacheStatus: "miss",
|
|
385
|
+
name: "Evaluate readiness",
|
|
386
|
+
reason: "Check per-dimension scores against thresholds, build go/no-go checklist",
|
|
387
|
+
willRun: true,
|
|
388
|
+
},
|
|
389
|
+
{
|
|
390
|
+
cacheStatus: "miss",
|
|
391
|
+
name: "Format report",
|
|
392
|
+
reason: "Generate Markdown readiness report with historical trends",
|
|
393
|
+
willRun: true,
|
|
394
|
+
},
|
|
395
|
+
],
|
|
396
|
+
},
|
|
397
|
+
validate: {
|
|
398
|
+
description: "Validate all YAML config files, task definitions, reference solutions, and environment",
|
|
399
|
+
filesRead: [
|
|
400
|
+
"config/models.yaml",
|
|
401
|
+
"config/rubrics.yaml",
|
|
402
|
+
"config/features.yaml",
|
|
403
|
+
"config/thresholds.yaml",
|
|
404
|
+
],
|
|
405
|
+
steps: [
|
|
406
|
+
{
|
|
407
|
+
cacheStatus: "miss",
|
|
408
|
+
name: "Validate configuration",
|
|
409
|
+
reason: "Parse all YAML configs through Zod schemas, cross-reference mappings",
|
|
410
|
+
willRun: true,
|
|
411
|
+
},
|
|
412
|
+
{
|
|
413
|
+
cacheStatus: "miss",
|
|
414
|
+
name: "Check environment",
|
|
415
|
+
reason: "Verify OPENAI_API_KEY, SANITY_API_TOKEN, AILF_REPORT_SANITY_API_TOKEN are set",
|
|
416
|
+
willRun: true,
|
|
417
|
+
},
|
|
418
|
+
],
|
|
419
|
+
},
|
|
420
|
+
"validate-tasks": {
|
|
421
|
+
description: "Validate repo-based task YAML files (.ailf/tasks/) against the schema",
|
|
422
|
+
filesRead: [".ailf/tasks/*.yaml"],
|
|
423
|
+
steps: [
|
|
424
|
+
{
|
|
425
|
+
cacheStatus: "miss",
|
|
426
|
+
name: "Scan task files",
|
|
427
|
+
reason: "Find all .yaml/.yml files in the target directory",
|
|
428
|
+
willRun: true,
|
|
429
|
+
},
|
|
430
|
+
{
|
|
431
|
+
cacheStatus: "miss",
|
|
432
|
+
name: "Validate schemas",
|
|
433
|
+
reason: "Parse each file through RepoTaskSchema (Zod) and cross-reference IDs",
|
|
434
|
+
willRun: true,
|
|
435
|
+
},
|
|
436
|
+
{
|
|
437
|
+
cacheStatus: "miss",
|
|
438
|
+
name: "Check invariants",
|
|
439
|
+
reason: "Verify canonical_docs references, doc_coverage completeness, unique IDs",
|
|
440
|
+
willRun: true,
|
|
441
|
+
},
|
|
442
|
+
],
|
|
443
|
+
},
|
|
444
|
+
"webhook-server": {
|
|
445
|
+
description: "Start a local webhook development server for testing content-change triggers",
|
|
446
|
+
steps: [
|
|
447
|
+
{
|
|
448
|
+
cacheStatus: "miss",
|
|
449
|
+
name: "Start HTTP server",
|
|
450
|
+
reason: "Listen for Sanity webhook payloads with debounce + budget control",
|
|
451
|
+
willRun: true,
|
|
452
|
+
},
|
|
453
|
+
],
|
|
454
|
+
},
|
|
455
|
+
"weekly-digest": {
|
|
456
|
+
description: "Generate and deliver a weekly evaluation trend digest via Slack",
|
|
457
|
+
filesRead: ["config/schedules.yaml", "config/sinks.yaml"],
|
|
458
|
+
steps: [
|
|
459
|
+
{
|
|
460
|
+
cacheStatus: "miss",
|
|
461
|
+
name: "Load digest config",
|
|
462
|
+
reason: "Read schedules.yaml for lookback window and delivery targets",
|
|
463
|
+
willRun: true,
|
|
464
|
+
},
|
|
465
|
+
{
|
|
466
|
+
cacheStatus: "miss",
|
|
467
|
+
name: "Query report store",
|
|
468
|
+
reason: "Fetch reports within lookback window from Sanity Content Lake",
|
|
469
|
+
willRun: true,
|
|
470
|
+
},
|
|
471
|
+
{
|
|
472
|
+
cacheStatus: "miss",
|
|
473
|
+
name: "Compute trends",
|
|
474
|
+
reason: "Calculate score deltas, area regressions, and improvements",
|
|
475
|
+
willRun: true,
|
|
476
|
+
},
|
|
477
|
+
{
|
|
478
|
+
cacheStatus: "miss",
|
|
479
|
+
name: "Deliver digest",
|
|
480
|
+
reason: "Format and send to Slack (or stdout in --dry-run mode)",
|
|
481
|
+
willRun: true,
|
|
482
|
+
},
|
|
483
|
+
],
|
|
484
|
+
},
|
|
485
|
+
};
|
|
486
|
+
// ---------------------------------------------------------------------------
|
|
487
|
+
// Main handler
|
|
488
|
+
// ---------------------------------------------------------------------------
|
|
489
|
+
/**
|
|
490
|
+
* Handle the --explain flag for any command.
|
|
491
|
+
*
|
|
492
|
+
* Looks up the command in `EXPLAIN_REGISTRY`. Static entries are converted
|
|
493
|
+
* to a plan via `buildSimpleCommandPlan`. Builder functions are called
|
|
494
|
+
* directly. Commands not in the registry get a minimal generic plan.
|
|
495
|
+
*
|
|
496
|
+
* @param actionCommand - The Commander command about to execute
|
|
497
|
+
* @param confirmExecution - If true (--yes), prompt to proceed after showing the plan
|
|
498
|
+
* @param rootDir - Path to the eval package root (packages/eval)
|
|
499
|
+
*/
|
|
500
|
+
export async function handleExplain(actionCommand, confirmExecution, rootDir) {
|
|
501
|
+
const commandName = actionCommand.name();
|
|
502
|
+
const entry = EXPLAIN_REGISTRY[commandName];
|
|
503
|
+
let plan;
|
|
504
|
+
if (typeof entry === "function") {
|
|
505
|
+
// Dynamic builder
|
|
506
|
+
plan = await entry(actionCommand, rootDir);
|
|
507
|
+
}
|
|
508
|
+
else if (entry) {
|
|
509
|
+
// Static metadata → simple plan
|
|
510
|
+
plan = buildSimpleCommandPlan({
|
|
511
|
+
command: commandName,
|
|
512
|
+
description: entry.description,
|
|
513
|
+
filesCreated: entry.filesCreated,
|
|
514
|
+
filesRead: entry.filesRead,
|
|
515
|
+
rootDir,
|
|
516
|
+
steps: entry.steps,
|
|
517
|
+
});
|
|
518
|
+
}
|
|
519
|
+
else {
|
|
520
|
+
// Unregistered command — minimal fallback
|
|
521
|
+
plan = buildSimpleCommandPlan({
|
|
522
|
+
command: commandName,
|
|
523
|
+
description: `Run the '${commandName}' command`,
|
|
524
|
+
rootDir,
|
|
525
|
+
});
|
|
526
|
+
}
|
|
527
|
+
// --format is a global option on the root program (actionCommand.parent)
|
|
528
|
+
const globalParentOpts = actionCommand.parent?.opts();
|
|
529
|
+
const formatOpt = globalParentOpts?.format ?? "console";
|
|
530
|
+
if (formatOpt === "json") {
|
|
531
|
+
console.log(formatPlanJson(plan));
|
|
532
|
+
}
|
|
533
|
+
else {
|
|
534
|
+
console.log();
|
|
535
|
+
console.log(formatPlanConsole(plan));
|
|
536
|
+
}
|
|
537
|
+
// --yes: prompt to confirm, then re-run without --explain
|
|
538
|
+
if (confirmExecution && plan.errors.length === 0) {
|
|
539
|
+
try {
|
|
540
|
+
const { confirm } = await import("@inquirer/prompts");
|
|
541
|
+
const proceed = await confirm({
|
|
542
|
+
default: true,
|
|
543
|
+
message: "Proceed with execution?",
|
|
544
|
+
});
|
|
545
|
+
if (proceed) {
|
|
546
|
+
// Remove --explain and --yes from argv and re-parse
|
|
547
|
+
const filteredArgv = process.argv.filter((a) => a !== "--explain" && a !== "--yes");
|
|
548
|
+
// Re-import the program and re-parse with the filtered argv.
|
|
549
|
+
// Because we're inside a preAction hook that will exit(0), we need
|
|
550
|
+
// To signal the caller NOT to exit. We do this by throwing a
|
|
551
|
+
// Sentinel that cli.ts can catch.
|
|
552
|
+
// oxlint-disable-next-line @typescript-eslint/only-throw-error
|
|
553
|
+
throw { __proceedArgv: filteredArgv };
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
catch (err) {
|
|
557
|
+
// If it's our sentinel, re-throw for the caller
|
|
558
|
+
if (err !== null &&
|
|
559
|
+
typeof err === "object" &&
|
|
560
|
+
"__proceedArgv" in err) {
|
|
561
|
+
throw err;
|
|
562
|
+
}
|
|
563
|
+
// User cancelled or prompt failed — just exit
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
// ---------------------------------------------------------------------------
|
|
568
|
+
// Dynamic plan builders
|
|
569
|
+
// ---------------------------------------------------------------------------
|
|
570
|
+
/**
|
|
571
|
+
* Build a plan for the `init` command.
|
|
572
|
+
*
|
|
573
|
+
* Shows which files and directories will be created, taking into
|
|
574
|
+
* account the --output-format and --path flags.
|
|
575
|
+
*/
|
|
576
|
+
function buildInitExplainPlan(actionCommand, rootDir) {
|
|
577
|
+
const opts = actionCommand.opts();
|
|
578
|
+
const format = opts.outputFormat === "json" ? "json" : "yaml";
|
|
579
|
+
const ext = format === "json" ? ".json" : ".yaml";
|
|
580
|
+
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
581
|
+
const targetDir = opts.path ?? ".";
|
|
582
|
+
const ailfDir = `${targetDir}/.ailf`;
|
|
583
|
+
const tasksDir = `${ailfDir}/tasks`;
|
|
584
|
+
const taskFileNames = [...TASK_FILE_NAMES];
|
|
585
|
+
const filesCreated = [
|
|
586
|
+
`${ailfDir}/config${ext}`,
|
|
587
|
+
...taskFileNames.map((stem) => `${tasksDir}/${stem}${ext}`),
|
|
588
|
+
`${ailfDir}/.gitignore`,
|
|
589
|
+
];
|
|
590
|
+
return buildSimpleCommandPlan({
|
|
591
|
+
command: "init",
|
|
592
|
+
description: `Initialize .ailf/ directory structure in ${callerCwd}/${targetDir === "." ? "" : targetDir}`,
|
|
593
|
+
filesCreated,
|
|
594
|
+
filesRead: [],
|
|
595
|
+
rootDir,
|
|
596
|
+
steps: [
|
|
597
|
+
{
|
|
598
|
+
cacheStatus: "miss",
|
|
599
|
+
name: "Create directories",
|
|
600
|
+
reason: `Create ${ailfDir}/ and ${tasksDir}/`,
|
|
601
|
+
willRun: true,
|
|
602
|
+
},
|
|
603
|
+
{
|
|
604
|
+
cacheStatus: "miss",
|
|
605
|
+
name: `Write config${ext}`,
|
|
606
|
+
reason: `Project configuration template (${format.toUpperCase()} format)`,
|
|
607
|
+
willRun: true,
|
|
608
|
+
},
|
|
609
|
+
{
|
|
610
|
+
cacheStatus: "miss",
|
|
611
|
+
name: `Write example tasks (${taskFileNames.length} files)`,
|
|
612
|
+
reason: `Commented starter tasks in ${tasksDir}/ (${format.toUpperCase()} format)`,
|
|
613
|
+
willRun: true,
|
|
614
|
+
},
|
|
615
|
+
{
|
|
616
|
+
cacheStatus: "miss",
|
|
617
|
+
name: "Write .gitignore",
|
|
618
|
+
reason: "Exclude results/ and contexts/ from version control",
|
|
619
|
+
willRun: true,
|
|
620
|
+
},
|
|
621
|
+
],
|
|
622
|
+
});
|
|
623
|
+
}
|
|
624
|
+
/**
|
|
625
|
+
* Build a plan for the `baseline` command.
|
|
626
|
+
*
|
|
627
|
+
* Inspects the subcommand argument (save/compare/history) to tailor
|
|
628
|
+
* the description and file lists.
|
|
629
|
+
*/
|
|
630
|
+
function buildBaselineExplainPlan(actionCommand, rootDir) {
|
|
631
|
+
const subcommand = actionCommand.args[0] ?? "save";
|
|
632
|
+
const descriptions = {
|
|
633
|
+
compare: "Compare current scores against the latest saved baseline",
|
|
634
|
+
history: "List all saved baseline snapshots with dates and scores",
|
|
635
|
+
save: "Save current evaluation scores as a new baseline snapshot",
|
|
636
|
+
};
|
|
637
|
+
return buildSimpleCommandPlan({
|
|
638
|
+
command: `baseline ${subcommand}`,
|
|
639
|
+
description: descriptions[subcommand] ?? `Baseline operation: ${subcommand}`,
|
|
640
|
+
filesCreated: subcommand === "save" ? ["results/baselines/<timestamp>.json"] : [],
|
|
641
|
+
filesRead: ["results/latest/score-summary.json", "results/baselines/"],
|
|
642
|
+
rootDir,
|
|
643
|
+
});
|
|
644
|
+
}
|
|
645
|
+
/**
|
|
646
|
+
* Build a plan for the `pipeline` command — the richest plan with steps,
|
|
647
|
+
* tasks, models, cost estimates, and cache predictions.
|
|
648
|
+
*/
|
|
649
|
+
async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
650
|
+
const raw = actionCommand.opts();
|
|
651
|
+
// Merge Commander-parsed opts with safe defaults for array/boolean fields
|
|
652
|
+
const withDefaults = {
|
|
653
|
+
allowedOrigin: raw.allowedOrigin ?? [],
|
|
654
|
+
allowedOrigins: raw.allowedOrigins ?? [],
|
|
655
|
+
area: raw.area,
|
|
656
|
+
before: raw.before,
|
|
657
|
+
cache: raw.cache ?? true,
|
|
658
|
+
changedDocs: raw.changedDocs,
|
|
659
|
+
compare: raw.compare ?? false,
|
|
660
|
+
compareBaseline: raw.compareBaseline,
|
|
661
|
+
concurrency: raw.concurrency,
|
|
662
|
+
debug: raw.debug ?? false,
|
|
663
|
+
debugN: raw.debugN,
|
|
664
|
+
debugPattern: raw.debugPattern,
|
|
665
|
+
debugSample: raw.debugSample,
|
|
666
|
+
discoveryReport: raw.discoveryReport ?? false,
|
|
667
|
+
dryRun: raw.dryRun ?? false,
|
|
668
|
+
gapAnalysis: raw.gapAnalysis ?? true,
|
|
669
|
+
graderReplications: raw.graderReplications,
|
|
670
|
+
header: raw.header ?? [],
|
|
671
|
+
headers: raw.headers ?? [],
|
|
672
|
+
mode: raw.mode ?? "full",
|
|
673
|
+
output: raw.output,
|
|
674
|
+
promptfooUrl: raw.promptfooUrl,
|
|
675
|
+
publish: raw.publish,
|
|
676
|
+
publishTag: raw.publishTag,
|
|
677
|
+
readiness: raw.readiness ?? false,
|
|
678
|
+
reportDataset: raw.reportDataset,
|
|
679
|
+
reportProject: raw.reportProject,
|
|
680
|
+
sanityDataset: raw.sanityDataset,
|
|
681
|
+
sanityDocument: raw.sanityDocument ?? [],
|
|
682
|
+
sanityDocuments: raw.sanityDocuments ?? [],
|
|
683
|
+
sanityPerspective: raw.sanityPerspective,
|
|
684
|
+
sanityProject: raw.sanityProject,
|
|
685
|
+
sanityStudioOrigin: raw.sanityStudioOrigin,
|
|
686
|
+
search: raw.search,
|
|
687
|
+
skipEval: raw.skipEval ?? false,
|
|
688
|
+
skipFetch: raw.skipFetch ?? false,
|
|
689
|
+
source: raw.source,
|
|
690
|
+
task: raw.task,
|
|
691
|
+
threshold: raw.threshold,
|
|
692
|
+
url: raw.url ?? [],
|
|
693
|
+
urls: raw.urls ?? [],
|
|
694
|
+
};
|
|
695
|
+
const resolved = computeResolvedOptions(withDefaults);
|
|
696
|
+
const planOpts = {
|
|
697
|
+
areaOption: resolved.areaOption,
|
|
698
|
+
beforeOption: resolved.beforeOption,
|
|
699
|
+
compareBaseline: resolved.compareBaseline,
|
|
700
|
+
compareEnabled: resolved.compareEnabled,
|
|
701
|
+
compareThreshold: resolved.compareThreshold,
|
|
702
|
+
concurrency: resolved.concurrency,
|
|
703
|
+
debug: resolved.debug,
|
|
704
|
+
discoveryReportEnabled: resolved.discoveryReportEnabled,
|
|
705
|
+
dryRun: resolved.dryRun,
|
|
706
|
+
gapAnalysisEnabled: resolved.gapAnalysisEnabled,
|
|
707
|
+
graderReplications: resolved.graderReplications,
|
|
708
|
+
mode: resolved.mode,
|
|
709
|
+
noCache: resolved.noCache,
|
|
710
|
+
publishEnabled: resolved.publishEnabled,
|
|
711
|
+
readinessEnabled: resolved.readinessEnabled,
|
|
712
|
+
skipEval: resolved.skipEval,
|
|
713
|
+
skipFetch: resolved.skipFetch,
|
|
714
|
+
source: resolved.source,
|
|
715
|
+
repoTasksPath: resolved.repoTasksPath,
|
|
716
|
+
taskOption: resolved.taskOption,
|
|
717
|
+
};
|
|
718
|
+
return await buildPipelinePlan(planOpts, rootDir);
|
|
719
|
+
}
|