@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline action — resolves CLI options and orchestrates pipeline steps.
|
|
3
|
+
*
|
|
4
|
+
* This file is the thin CLI-to-orchestrator bridge. The bulk of execution
|
|
5
|
+
* logic lives in packages/eval/src/orchestration/.
|
|
6
|
+
*
|
|
7
|
+
* Responsibilities:
|
|
8
|
+
* - Resolve CLI flags into typed ResolvedOptions
|
|
9
|
+
* - Delegate to the PipelineOrchestrator for step execution
|
|
10
|
+
*
|
|
11
|
+
* @see packages/eval/src/orchestration/ for the step-based pipeline
|
|
12
|
+
*/
|
|
13
|
+
import { writeFileSync } from "fs";
|
|
14
|
+
import { dirname, resolve } from "path";
|
|
15
|
+
import { fileURLToPath } from "url";
|
|
16
|
+
import { classifyUrls } from "../pipeline/classify-url.js";
|
|
17
|
+
import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
|
|
18
|
+
import { buildAppContext } from "../orchestration/build-app-context.js";
|
|
19
|
+
import { buildStepSequence } from "../orchestration/build-step-sequence.js";
|
|
20
|
+
import { orchestratePipeline } from "../orchestration/pipeline-orchestrator.js";
|
|
21
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Valid modes & search modes
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
const VALID_MODES = ["baseline", "observed", "agentic", "full"];
|
|
27
|
+
const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
|
|
28
|
+
/**
|
|
29
|
+
* Pure option resolution — computes ResolvedOptions from CLI flags without
|
|
30
|
+
* any side effects. Safe to call from --explain without mutating process.env.
|
|
31
|
+
*
|
|
32
|
+
* Exported so the plan builder can call it independently.
|
|
33
|
+
*/
|
|
34
|
+
export function computeResolvedOptions(opts) {
|
|
35
|
+
// Validate mode
|
|
36
|
+
const mode = opts.mode;
|
|
37
|
+
if (!VALID_MODES.includes(mode)) {
|
|
38
|
+
console.error(`❌ Invalid mode "${opts.mode}". Must be one of: ${VALID_MODES.join(", ")}`);
|
|
39
|
+
process.exit(1);
|
|
40
|
+
}
|
|
41
|
+
// Debug options — any sub-flag (--debug-n, --debug-pattern, --debug-sample)
|
|
42
|
+
// implies --debug, so users don't need to pass both.
|
|
43
|
+
// When DEBUG_EVAL is explicitly "0", ignore the sub-flags from env.
|
|
44
|
+
// CLI flags (--debug-n, --debug-pattern, --debug-sample) always win.
|
|
45
|
+
const debugEnvDisabled = process.env.DEBUG_EVAL === "0";
|
|
46
|
+
const debugN = opts.debugN ??
|
|
47
|
+
(process.env.DEBUG_EVAL_N && !debugEnvDisabled
|
|
48
|
+
? parseInt(process.env.DEBUG_EVAL_N, 10)
|
|
49
|
+
: undefined);
|
|
50
|
+
const debugPattern = opts.debugPattern ??
|
|
51
|
+
(process.env.DEBUG_EVAL_PATTERN && !debugEnvDisabled
|
|
52
|
+
? process.env.DEBUG_EVAL_PATTERN
|
|
53
|
+
: undefined);
|
|
54
|
+
const debugSample = opts.debugSample ??
|
|
55
|
+
(process.env.DEBUG_EVAL_SAMPLE && !debugEnvDisabled
|
|
56
|
+
? parseInt(process.env.DEBUG_EVAL_SAMPLE, 10)
|
|
57
|
+
: undefined);
|
|
58
|
+
const debugEnabled = opts.debug ||
|
|
59
|
+
process.env.DEBUG_EVAL === "1" ||
|
|
60
|
+
debugN !== undefined ||
|
|
61
|
+
debugPattern !== undefined ||
|
|
62
|
+
debugSample !== undefined;
|
|
63
|
+
const debug = debugEnabled
|
|
64
|
+
? {
|
|
65
|
+
enabled: true,
|
|
66
|
+
firstN: debugN,
|
|
67
|
+
pattern: debugPattern,
|
|
68
|
+
sample: debugSample,
|
|
69
|
+
}
|
|
70
|
+
: undefined;
|
|
71
|
+
// Search mode validation
|
|
72
|
+
const searchMode = opts.search ?? process.env.EVAL_SEARCH_MODE ?? "open";
|
|
73
|
+
if (!VALID_SEARCH_MODES.includes(searchMode)) {
|
|
74
|
+
console.error(`❌ Invalid --search mode "${searchMode}". Must be one of: ${VALID_SEARCH_MODES.join(", ")}`);
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
// Merge repeatable args (singular + plural aliases)
|
|
78
|
+
const urlArgs = [...opts.url, ...opts.urls];
|
|
79
|
+
const headerArgs = [...opts.header, ...opts.headers];
|
|
80
|
+
const allowedOriginArgs = [...opts.allowedOrigin, ...opts.allowedOrigins];
|
|
81
|
+
const sanityDocumentArgs = [...opts.sanityDocument, ...opts.sanityDocuments];
|
|
82
|
+
// Source overrides
|
|
83
|
+
const datasetOverride = opts.sanityDataset;
|
|
84
|
+
const projectIdOverride = opts.sanityProject;
|
|
85
|
+
const perspectiveOverride = opts.sanityPerspective;
|
|
86
|
+
const studioOriginOverride = opts.sanityStudioOrigin;
|
|
87
|
+
// URL classification (pure computation — results captured, not applied to env)
|
|
88
|
+
if (urlArgs.length > 0) {
|
|
89
|
+
const classification = classifyUrls(urlArgs);
|
|
90
|
+
if (classification.documentIds.length > 0) {
|
|
91
|
+
const existing = sanityDocumentArgs.length > 0 ? sanityDocumentArgs : [];
|
|
92
|
+
const merged = [...new Set([...existing, ...classification.documentIds])];
|
|
93
|
+
sanityDocumentArgs.length = 0;
|
|
94
|
+
sanityDocumentArgs.push(...merged);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
// Validate custom headers (early error)
|
|
98
|
+
if (headerArgs.length > 0) {
|
|
99
|
+
for (const h of headerArgs) {
|
|
100
|
+
const colonIdx = h.indexOf(":");
|
|
101
|
+
if (colonIdx === -1) {
|
|
102
|
+
console.error(`❌ Invalid header format: "${h}". Expected "Key: Value".`);
|
|
103
|
+
process.exit(1);
|
|
104
|
+
}
|
|
105
|
+
const key = h.slice(0, colonIdx).trim();
|
|
106
|
+
if (!key) {
|
|
107
|
+
console.error(`❌ Invalid header: empty key in "${h}"`);
|
|
108
|
+
process.exit(1);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// Auto-infer allowed origin from --url
|
|
113
|
+
if (urlArgs.length > 0 && allowedOriginArgs.length === 0) {
|
|
114
|
+
try {
|
|
115
|
+
const hostname = new URL(urlArgs[0]).hostname.replace(/^www\./, "");
|
|
116
|
+
allowedOriginArgs.push(hostname);
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
// Invalid URL — will be caught later in validation
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
// Scoping
|
|
123
|
+
const areaOption = opts.area ?? process.env.EVAL_FILTER_AREAS ?? undefined;
|
|
124
|
+
const taskOption = opts.task ?? process.env.EVAL_FILTER_TASKS ?? undefined;
|
|
125
|
+
const changedDocsOption = opts.changedDocs ?? process.env.EVAL_CHANGED_DOCS ?? undefined;
|
|
126
|
+
// Document-driven scoping (pure — computes impactSummary without env writes)
|
|
127
|
+
let impactSummary;
|
|
128
|
+
if (changedDocsOption) {
|
|
129
|
+
const changedSlugs = changedDocsOption
|
|
130
|
+
.split(",")
|
|
131
|
+
.map((s) => s.trim())
|
|
132
|
+
.filter(Boolean);
|
|
133
|
+
if (changedSlugs.length > 0) {
|
|
134
|
+
const reverseMapping = buildReverseMapping(ROOT);
|
|
135
|
+
impactSummary = assessImpact(changedSlugs, reverseMapping);
|
|
136
|
+
if (impactSummary.areas.length === 0) {
|
|
137
|
+
console.warn(`\n⚠️ No evaluation tasks reference any of the changed documents:`);
|
|
138
|
+
for (const slug of changedSlugs) {
|
|
139
|
+
console.warn(` - ${slug}`);
|
|
140
|
+
}
|
|
141
|
+
console.warn(`\n Score impact cannot be measured for these documents.\n`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Comparison: --before auto-enables --compare
|
|
146
|
+
const beforeOption = opts.before;
|
|
147
|
+
const compareEnabled = opts.compare || beforeOption !== undefined;
|
|
148
|
+
// Publish: smart default — auto-publish full runs when report store is configured
|
|
149
|
+
const reportStoreToken = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
|
|
150
|
+
const reportStoreConfigured = Boolean(reportStoreToken);
|
|
151
|
+
let publishEnabled;
|
|
152
|
+
if (opts.publish !== undefined) {
|
|
153
|
+
// Explicit --publish or --no-publish always wins
|
|
154
|
+
publishEnabled = opts.publish;
|
|
155
|
+
}
|
|
156
|
+
else if (process.env.AILF_PUBLISH === "1") {
|
|
157
|
+
publishEnabled = true;
|
|
158
|
+
}
|
|
159
|
+
else if (process.env.AILF_PUBLISH === "0") {
|
|
160
|
+
publishEnabled = false;
|
|
161
|
+
}
|
|
162
|
+
else {
|
|
163
|
+
// Smart default: full runs auto-publish when store is configured
|
|
164
|
+
publishEnabled = reportStoreConfigured && !debugEnabled;
|
|
165
|
+
}
|
|
166
|
+
// Report store overrides — fall back to the eval dataset so that
|
|
167
|
+
// perspective evaluations publish reports to the same dataset the
|
|
168
|
+
// Studio is reading from. AILF_REPORT_DATASET wins when set explicitly.
|
|
169
|
+
const reportDataset = opts.reportDataset ??
|
|
170
|
+
process.env.AILF_REPORT_DATASET ??
|
|
171
|
+
datasetOverride ??
|
|
172
|
+
undefined;
|
|
173
|
+
const reportProjectId = opts.reportProject ?? process.env.AILF_REPORT_PROJECT_ID ?? undefined;
|
|
174
|
+
return {
|
|
175
|
+
allowedOriginArgs,
|
|
176
|
+
areaOption,
|
|
177
|
+
beforeOption,
|
|
178
|
+
changedDocsOption,
|
|
179
|
+
compareBaseline: opts.compareBaseline,
|
|
180
|
+
compareEnabled,
|
|
181
|
+
compareThreshold: opts.threshold,
|
|
182
|
+
concurrency: opts.concurrency,
|
|
183
|
+
datasetOverride,
|
|
184
|
+
debug,
|
|
185
|
+
discoveryReportEnabled: opts.discoveryReport,
|
|
186
|
+
dryRun: opts.dryRun,
|
|
187
|
+
gapAnalysisEnabled: opts.gapAnalysis,
|
|
188
|
+
graderReplications: opts.graderReplications,
|
|
189
|
+
headerArgs,
|
|
190
|
+
impactSummary,
|
|
191
|
+
mode,
|
|
192
|
+
noCache: !opts.cache,
|
|
193
|
+
noRemoteCache: opts.remoteCache === false,
|
|
194
|
+
outputPath: opts.output,
|
|
195
|
+
perspectiveOverride,
|
|
196
|
+
projectIdOverride,
|
|
197
|
+
promptfooUrl: opts.promptfooUrl,
|
|
198
|
+
publishEnabled,
|
|
199
|
+
publishTag: opts.publishTag,
|
|
200
|
+
readinessEnabled: opts.readiness,
|
|
201
|
+
reportDataset,
|
|
202
|
+
reportProjectId,
|
|
203
|
+
sanityDocumentArgs,
|
|
204
|
+
searchMode,
|
|
205
|
+
skipEval: opts.skipEval,
|
|
206
|
+
skipFetch: opts.skipFetch,
|
|
207
|
+
source: opts.source,
|
|
208
|
+
studioOriginOverride,
|
|
209
|
+
repoTasksPath: opts.repoTasksPath,
|
|
210
|
+
taskOption,
|
|
211
|
+
taskSourceType: resolveTaskSourceType(opts.taskSource),
|
|
212
|
+
urlArgs,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/** Resolve and validate the --task-source flag value. */
|
|
216
|
+
function resolveTaskSourceType(raw) {
|
|
217
|
+
if (!raw || raw === "content-lake")
|
|
218
|
+
return undefined; // default — Content Lake
|
|
219
|
+
if (raw === "yaml")
|
|
220
|
+
return "yaml";
|
|
221
|
+
console.error(`❌ Invalid --task-source "${raw}". Must be "yaml" or "content-lake".`);
|
|
222
|
+
process.exit(1);
|
|
223
|
+
}
|
|
224
|
+
// ---------------------------------------------------------------------------
|
|
225
|
+
// Pipeline entry point
|
|
226
|
+
// ---------------------------------------------------------------------------
|
|
227
|
+
/**
|
|
228
|
+
* Execute the evaluation pipeline.
|
|
229
|
+
*
|
|
230
|
+
* 1. Resolve CLI options into typed ResolvedOptions
|
|
231
|
+
* 2. Build AppContext (composition root wires adapters)
|
|
232
|
+
* 3. Build step sequence from context
|
|
233
|
+
* 4. Delegate to the PipelineOrchestrator
|
|
234
|
+
*/
|
|
235
|
+
export async function executePipeline(cliOpts) {
|
|
236
|
+
// When --config is provided, resolve config from file instead of CLI flags
|
|
237
|
+
if (cliOpts.config) {
|
|
238
|
+
const { existsSync } = await import("fs");
|
|
239
|
+
if (!existsSync(cliOpts.config)) {
|
|
240
|
+
console.error(`❌ Config file not found: ${cliOpts.config}`);
|
|
241
|
+
process.exit(1);
|
|
242
|
+
}
|
|
243
|
+
const { FileConfigAdapter } = await import("../adapters/config-sources/file-config-adapter.js");
|
|
244
|
+
const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
|
|
245
|
+
const config = await adapter.resolve();
|
|
246
|
+
const ctx = buildAppContext(
|
|
247
|
+
// Build a minimal ResolvedOptions to satisfy the bridge.
|
|
248
|
+
// FileConfigAdapter already resolved to ResolvedConfig, so we
|
|
249
|
+
// pass it through by constructing AppContext directly.
|
|
250
|
+
computeResolvedOptions(cliOpts), ROOT);
|
|
251
|
+
// Override config with the file-based config
|
|
252
|
+
const fileCtx = { ...ctx, config };
|
|
253
|
+
const pipelineStart = Date.now();
|
|
254
|
+
const steps = buildStepSequence(fileCtx, pipelineStart);
|
|
255
|
+
const result = await orchestratePipeline(fileCtx, steps);
|
|
256
|
+
writePipelineResult(result);
|
|
257
|
+
process.exit(result.success ? 0 : 1);
|
|
258
|
+
}
|
|
259
|
+
const o = resolveOptions(cliOpts);
|
|
260
|
+
// Dry-run: validate only, don't execute steps
|
|
261
|
+
if (o.dryRun) {
|
|
262
|
+
const { validateConfiguration } = await import("../pipeline/validate.js");
|
|
263
|
+
const { checkEnvironment } = await import("../pipeline/checks.js");
|
|
264
|
+
const validation = validateConfiguration(ROOT);
|
|
265
|
+
const envIssues = checkEnvironment(ROOT);
|
|
266
|
+
validation.issues.push(...envIssues);
|
|
267
|
+
const errors = validation.issues.filter((i) => i.severity === "error");
|
|
268
|
+
if (errors.length > 0) {
|
|
269
|
+
console.error("❌ Configuration validation failed:\n");
|
|
270
|
+
for (const e of errors) {
|
|
271
|
+
console.error(` ERROR [${e.source}] ${e.message}`);
|
|
272
|
+
}
|
|
273
|
+
process.exit(1);
|
|
274
|
+
}
|
|
275
|
+
console.log("\n ✅ Configuration is valid");
|
|
276
|
+
console.log(" Pipeline configuration is valid. No steps were executed.");
|
|
277
|
+
console.log(" Remove --dry-run to execute the full pipeline.\n");
|
|
278
|
+
process.exit(0);
|
|
279
|
+
}
|
|
280
|
+
const ctx = buildAppContext(o, ROOT);
|
|
281
|
+
const pipelineStart = Date.now();
|
|
282
|
+
const steps = buildStepSequence(ctx, pipelineStart);
|
|
283
|
+
const result = await orchestratePipeline(ctx, steps);
|
|
284
|
+
writePipelineResult(result);
|
|
285
|
+
process.exit(result.success ? 0 : 1);
|
|
286
|
+
}
|
|
287
|
+
// ---------------------------------------------------------------------------
|
|
288
|
+
// Internal helpers
|
|
289
|
+
// ---------------------------------------------------------------------------
|
|
290
|
+
/**
|
|
291
|
+
* Resolve CLI options into typed ResolvedOptions.
|
|
292
|
+
*/
|
|
293
|
+
function resolveOptions(opts) {
|
|
294
|
+
return computeResolvedOptions(opts);
|
|
295
|
+
}
|
|
296
|
+
function writePipelineResult(result) {
|
|
297
|
+
const resultFile = resolve(ROOT, "results", "latest", "pipeline-result.json");
|
|
298
|
+
try {
|
|
299
|
+
writeFileSync(resultFile, JSON.stringify(result, null, 2));
|
|
300
|
+
console.log(` 📄 Pipeline result: ${resultFile}\n`);
|
|
301
|
+
}
|
|
302
|
+
catch {
|
|
303
|
+
// results/latest/ may not exist yet — not critical
|
|
304
|
+
}
|
|
305
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline command — the main evaluation pipeline orchestrator.
|
|
3
|
+
*
|
|
4
|
+
* Defines all 36+ CLI flags via Commander, resolves them into a typed
|
|
5
|
+
* options object, bridges to process.env for downstream modules, and
|
|
6
|
+
* delegates to runPipeline().
|
|
7
|
+
*
|
|
8
|
+
* @see docs/API.md for the full flag reference.
|
|
9
|
+
*/
|
|
10
|
+
import { Command } from "commander";
|
|
11
|
+
/**
|
|
12
|
+
* Raw CLI options as parsed by Commander.
|
|
13
|
+
* Field names follow Commander's camelCase convention for kebab-case flags.
|
|
14
|
+
*/
|
|
15
|
+
export interface PipelineCliOptions {
|
|
16
|
+
allowedOrigin: string[];
|
|
17
|
+
allowedOrigins: string[];
|
|
18
|
+
area?: string;
|
|
19
|
+
before?: string;
|
|
20
|
+
cache: boolean;
|
|
21
|
+
changedDocs?: string;
|
|
22
|
+
compare: boolean;
|
|
23
|
+
compareBaseline?: string;
|
|
24
|
+
concurrency?: number;
|
|
25
|
+
config?: string;
|
|
26
|
+
debug: boolean;
|
|
27
|
+
debugN?: number;
|
|
28
|
+
debugPattern?: string;
|
|
29
|
+
debugSample?: number;
|
|
30
|
+
discoveryReport: boolean;
|
|
31
|
+
dryRun: boolean;
|
|
32
|
+
gapAnalysis: boolean;
|
|
33
|
+
graderReplications?: number;
|
|
34
|
+
header: string[];
|
|
35
|
+
headers: string[];
|
|
36
|
+
mode: string;
|
|
37
|
+
output?: string;
|
|
38
|
+
promptfooUrl?: string;
|
|
39
|
+
publish?: boolean;
|
|
40
|
+
publishTag?: string;
|
|
41
|
+
readiness: boolean;
|
|
42
|
+
remoteCache?: boolean;
|
|
43
|
+
reportDataset?: string;
|
|
44
|
+
reportProject?: string;
|
|
45
|
+
sanityDataset?: string;
|
|
46
|
+
sanityDocument: string[];
|
|
47
|
+
sanityDocuments: string[];
|
|
48
|
+
sanityPerspective?: string;
|
|
49
|
+
sanityProject?: string;
|
|
50
|
+
sanityStudioOrigin?: string;
|
|
51
|
+
search?: string;
|
|
52
|
+
skipEval: boolean;
|
|
53
|
+
skipFetch: boolean;
|
|
54
|
+
source?: string;
|
|
55
|
+
repoTasksPath?: string;
|
|
56
|
+
task?: string;
|
|
57
|
+
taskSource?: string;
|
|
58
|
+
threshold?: number;
|
|
59
|
+
url: string[];
|
|
60
|
+
urls: string[];
|
|
61
|
+
}
|
|
62
|
+
export declare function createPipelineCommand(): Command;
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline command — the main evaluation pipeline orchestrator.
|
|
3
|
+
*
|
|
4
|
+
* Defines all 36+ CLI flags via Commander, resolves them into a typed
|
|
5
|
+
* options object, bridges to process.env for downstream modules, and
|
|
6
|
+
* delegates to runPipeline().
|
|
7
|
+
*
|
|
8
|
+
* @see docs/API.md for the full flag reference.
|
|
9
|
+
*/
|
|
10
|
+
import { Command } from "commander";
|
|
11
|
+
import { addAgenticOptions, addDebugOptions, addSanitySourceOptions, } from "./shared/options.js";
|
|
12
|
+
export function createPipelineCommand() {
|
|
13
|
+
const cmd = new Command("pipeline")
|
|
14
|
+
.description("Run the full evaluation pipeline")
|
|
15
|
+
.option("-m, --mode <mode>", "Evaluation mode: full (default — floor + ceiling + actual), baseline (floor + ceiling only), agentic (actual only), observed", "full")
|
|
16
|
+
.option("-s, --source <name>", "Documentation source name (from sources.yaml)")
|
|
17
|
+
.option("-n, --dry-run", "Validate configuration only, no execution", false)
|
|
18
|
+
.option("--skip-fetch", "Reuse cached documentation contexts", false)
|
|
19
|
+
.option("--skip-eval", "Recalculate from existing eval results", false)
|
|
20
|
+
.option("--no-cache", "Bypass all pipeline-level caching")
|
|
21
|
+
.option("--no-remote-cache", "Disable Content Lake cache lookup (local cache still active)")
|
|
22
|
+
.option("-a, --area <areas>", "Scope to feature areas (comma-separated)")
|
|
23
|
+
.option("-t, --task <id>", "Scope to specific task ID")
|
|
24
|
+
.option("--changed-docs <slugs>", "Auto-scope to tasks affected by these document slugs")
|
|
25
|
+
.option("-j, --concurrency <n>", "Max parallel API calls during evaluation", parseInt)
|
|
26
|
+
.option("--grader-replications <n>", "Grader consistency replications", parseInt)
|
|
27
|
+
.option("--before <source>", "Before-state for impact evaluation")
|
|
28
|
+
.option("-c, --compare", "Compare scores against latest baseline", false)
|
|
29
|
+
.option("--compare-baseline <path>", "Specific baseline file to compare")
|
|
30
|
+
.option("--threshold <n>", "Noise threshold for comparison (default: 2)", parseFloat)
|
|
31
|
+
.option("--no-gap-analysis", "Skip failure mode + impact analysis")
|
|
32
|
+
.option("--readiness", "Generate launch readiness checklist", false)
|
|
33
|
+
.option("--discovery-report", "Generate agent discoverability report", false)
|
|
34
|
+
.option("-p, --publish", "Write report to Sanity + fan out to sinks (auto-enabled for full runs when report store is configured)")
|
|
35
|
+
.option("--no-publish", "Suppress auto-publishing")
|
|
36
|
+
.option("--publish-tag <tag>", "Label for published report")
|
|
37
|
+
.option("--report-dataset <name>", "Sanity dataset for report store")
|
|
38
|
+
.option("--report-project <id>", "Sanity project ID for report store")
|
|
39
|
+
.option("--config <path>", "Load pipeline config from a JSON/YAML file (overrides most CLI flags)")
|
|
40
|
+
.option("-o, --output <path>", "Write PR comment markdown to file")
|
|
41
|
+
.option("--promptfoo-url <url>", "Promptfoo share URL for report")
|
|
42
|
+
.option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), yaml (tasks/*.yaml files, legacy)", "content-lake")
|
|
43
|
+
.option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
|
|
44
|
+
.action(async (opts) => {
|
|
45
|
+
const { executePipeline } = await import("./pipeline-action.js");
|
|
46
|
+
await executePipeline(opts);
|
|
47
|
+
});
|
|
48
|
+
// Add shared option groups
|
|
49
|
+
addDebugOptions(cmd);
|
|
50
|
+
addSanitySourceOptions(cmd);
|
|
51
|
+
addAgenticOptions(cmd);
|
|
52
|
+
return cmd;
|
|
53
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pr-comment command — generate a PR comment from evaluation scores.
|
|
3
|
+
*
|
|
4
|
+
* Uses the composition root to wire adapters, then calls
|
|
5
|
+
* generatePrComment() directly from pipeline/.
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from "commander";
|
|
8
|
+
export declare function createPrCommentCommand(): Command;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pr-comment command — generate a PR comment from evaluation scores.
|
|
3
|
+
*
|
|
4
|
+
* Uses the composition root to wire adapters, then calls
|
|
5
|
+
* generatePrComment() directly from pipeline/.
|
|
6
|
+
*/
|
|
7
|
+
import { dirname, resolve } from "path";
|
|
8
|
+
import { fileURLToPath } from "url";
|
|
9
|
+
import { Command } from "commander";
|
|
10
|
+
import { createAppContext } from "../composition-root.js";
|
|
11
|
+
import { generatePrComment } from "../pipeline/pr-comment.js";
|
|
12
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
14
|
+
export function createPrCommentCommand() {
|
|
15
|
+
return new Command("pr-comment")
|
|
16
|
+
.description("Generate a markdown PR comment from evaluation scores")
|
|
17
|
+
.option("--output <path>", "Write comment to file (default: stdout)")
|
|
18
|
+
.option("--promptfoo-url <url>", "Promptfoo share URL to include")
|
|
19
|
+
.action(async (opts) => {
|
|
20
|
+
try {
|
|
21
|
+
const ctx = createAppContext({
|
|
22
|
+
rootDir: ROOT,
|
|
23
|
+
mode: "baseline",
|
|
24
|
+
skipFetch: true,
|
|
25
|
+
skipEval: true,
|
|
26
|
+
compareEnabled: false,
|
|
27
|
+
gapAnalysisEnabled: false,
|
|
28
|
+
readinessEnabled: false,
|
|
29
|
+
discoveryReportEnabled: false,
|
|
30
|
+
publishEnabled: false,
|
|
31
|
+
noCache: true,
|
|
32
|
+
noRemoteCache: true,
|
|
33
|
+
searchMode: "open",
|
|
34
|
+
});
|
|
35
|
+
generatePrComment({
|
|
36
|
+
outputPath: opts.output,
|
|
37
|
+
promptfooUrl: opts.promptfooUrl,
|
|
38
|
+
rootDir: ctx.config.rootDir,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
catch (err) {
|
|
42
|
+
process.exitCode = 1;
|
|
43
|
+
if (err instanceof Error)
|
|
44
|
+
console.error(err.message);
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* publish command — manually publish a local evaluation report to the
|
|
3
|
+
* Sanity Content Lake without re-running the pipeline.
|
|
4
|
+
*
|
|
5
|
+
* Reads a score-summary.json (defaulting to results/latest/score-summary.json),
|
|
6
|
+
* builds provenance, writes the report to Sanity, and fans out to configured
|
|
7
|
+
* sinks — exactly the same as the publish step in `ailf pipeline`, but
|
|
8
|
+
* standalone.
|
|
9
|
+
*
|
|
10
|
+
* Uses createAppContext() (composition root) for all infrastructure access.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ailf publish # default path
|
|
14
|
+
* ailf publish ./my-results/score-summary.json # custom path
|
|
15
|
+
* ailf publish --tag "manual-2026-03-13" # with a label
|
|
16
|
+
* ailf publish --dry-run # preview without writing
|
|
17
|
+
*
|
|
18
|
+
* @see packages/eval/src/composition-root.ts
|
|
19
|
+
* @see docs/design-docs/report-store/architecture.md
|
|
20
|
+
*/
|
|
21
|
+
import { Command } from "commander";
|
|
22
|
+
export interface PublishCommandOptions {
|
|
23
|
+
dryRun: boolean;
|
|
24
|
+
tag?: string;
|
|
25
|
+
}
|
|
26
|
+
export declare function createPublishCommand(): Command;
|