@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/gap-analysis.ts
|
|
3
|
+
*
|
|
4
|
+
* Expected impact estimation for identified gaps.
|
|
5
|
+
*
|
|
6
|
+
* Phase 3b of the Scenario Matrix implementation.
|
|
7
|
+
*
|
|
8
|
+
* Given failure modes and scores, estimates the score lift that fixing
|
|
9
|
+
* each gap would produce. Gaps are prioritized by estimated lift × task count
|
|
10
|
+
* to produce an actionable remediation plan.
|
|
11
|
+
*
|
|
12
|
+
* The estimation model is conservative: it assumes fixing a gap raises the
|
|
13
|
+
* bottleneck dimension to the median of non-bottlenecked dimensions (not 100).
|
|
14
|
+
* This produces realistic estimates rather than theoretical maximums.
|
|
15
|
+
*
|
|
16
|
+
* @see docs/exec-plans/completed/scenario-matrix-implementation/phase-3-gap-analysis.md
|
|
17
|
+
*/
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Constants
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
/** Default dimension weights (must match rubrics.yaml) */
|
|
22
|
+
const DEFAULT_WEIGHTS = {
|
|
23
|
+
"code-correctness": 0.25,
|
|
24
|
+
"doc-coverage": 0.25,
|
|
25
|
+
"task-completion": 0.5,
|
|
26
|
+
};
|
|
27
|
+
/** Map failure modes to the dimensions they typically bottleneck */
|
|
28
|
+
const MODE_BOTTLENECKS = {
|
|
29
|
+
"incorrect-docs": ["code-correctness", "task-completion"],
|
|
30
|
+
"missing-docs": ["task-completion", "doc-coverage"],
|
|
31
|
+
"model-limitation": [], // Not a docs problem
|
|
32
|
+
"outdated-docs": ["code-correctness", "doc-coverage"],
|
|
33
|
+
"poor-structure": ["doc-coverage", "task-completion"],
|
|
34
|
+
unclassified: [],
|
|
35
|
+
};
|
|
36
|
+
/** Remediation descriptions by failure mode */
|
|
37
|
+
const REMEDIATION_MAP = {
|
|
38
|
+
"incorrect-docs": "Fix factual errors in existing documentation",
|
|
39
|
+
"missing-docs": "Write new documentation for uncovered functionality",
|
|
40
|
+
"model-limitation": "Not a documentation problem — track for model improvement",
|
|
41
|
+
"outdated-docs": "Update documentation to reflect current API/patterns",
|
|
42
|
+
"poor-structure": "Restructure documentation for clarity and discoverability",
|
|
43
|
+
unclassified: "Flag for manual review — cause unclear",
|
|
44
|
+
};
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
// Public API
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
/**
|
|
49
|
+
* Build a complete gap analysis report.
|
|
50
|
+
*
|
|
51
|
+
* @param failureModeReport - Classified failure modes from Phase 3a
|
|
52
|
+
* @param scores - Per-area feature scores
|
|
53
|
+
* @param weights - Dimension weights
|
|
54
|
+
* @returns Gap analysis report with prioritized remediation plan
|
|
55
|
+
*/
|
|
56
|
+
export function buildGapAnalysisReport(failureModeReport, scores, weights) {
|
|
57
|
+
const gaps = estimateImpact(failureModeReport, scores, weights);
|
|
58
|
+
const totalPotentialLift = gaps.reduce((sum, g) => sum + g.estimatedLift, 0);
|
|
59
|
+
return {
|
|
60
|
+
gaps,
|
|
61
|
+
generatedAt: new Date().toISOString(),
|
|
62
|
+
totalPotentialLift: Math.round(totalPotentialLift * 10) / 10,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Estimate the impact of fixing each identified gap.
|
|
67
|
+
*
|
|
68
|
+
* For each area with failure modes, calculates the potential score lift
|
|
69
|
+
* from fixing the identified issues. Uses a conservative estimation model
|
|
70
|
+
* that targets the median of non-bottlenecked dimensions rather than 100.
|
|
71
|
+
*
|
|
72
|
+
* @param failureModeReport - Classified failure modes from Phase 3a
|
|
73
|
+
* @param scores - Per-area feature scores
|
|
74
|
+
* @param weights - Dimension weights (defaults to rubrics.yaml weights)
|
|
75
|
+
* @returns Gap estimates sorted by priority (highest first)
|
|
76
|
+
*/
|
|
77
|
+
export function estimateImpact(failureModeReport, scores, weights = DEFAULT_WEIGHTS) {
|
|
78
|
+
const gaps = [];
|
|
79
|
+
const scoreByArea = new Map();
|
|
80
|
+
for (const score of scores) {
|
|
81
|
+
scoreByArea.set(score.feature, score);
|
|
82
|
+
}
|
|
83
|
+
// Group failure modes by area + mode combination
|
|
84
|
+
for (const [area, areaData] of Object.entries(failureModeReport.byArea)) {
|
|
85
|
+
const areaScore = scoreByArea.get(area);
|
|
86
|
+
if (!areaScore)
|
|
87
|
+
continue;
|
|
88
|
+
// Get the tasks affected by this area
|
|
89
|
+
const areaTasks = failureModeReport.classifiedJudgments
|
|
90
|
+
.filter((cj) => cj.judgment.taskId.startsWith(area) &&
|
|
91
|
+
cj.classification.mode !== "unclassified" &&
|
|
92
|
+
cj.classification.mode !== "model-limitation")
|
|
93
|
+
.map((cj) => cj.judgment.taskId);
|
|
94
|
+
const uniqueTasks = [...new Set(areaTasks)];
|
|
95
|
+
// For each non-trivial failure mode in this area
|
|
96
|
+
for (const [modeStr, count] of Object.entries(areaData.modes)) {
|
|
97
|
+
const mode = modeStr;
|
|
98
|
+
if (count === 0)
|
|
99
|
+
continue;
|
|
100
|
+
if (mode === "unclassified" || mode === "model-limitation")
|
|
101
|
+
continue;
|
|
102
|
+
const bottleneckDimensions = MODE_BOTTLENECKS[mode];
|
|
103
|
+
if (bottleneckDimensions.length === 0)
|
|
104
|
+
continue;
|
|
105
|
+
const dimensionScores = getDimensionScores(areaScore);
|
|
106
|
+
const currentDimensionScores = {};
|
|
107
|
+
for (const dim of bottleneckDimensions) {
|
|
108
|
+
currentDimensionScores[dim] = dimensionScores[dim] ?? 0;
|
|
109
|
+
}
|
|
110
|
+
// Conservative estimation: target median of non-bottlenecked dimensions
|
|
111
|
+
const nonBottleneckScores = Object.entries(dimensionScores)
|
|
112
|
+
.filter(([dim]) => !bottleneckDimensions.includes(dim))
|
|
113
|
+
.map(([, score]) => score);
|
|
114
|
+
const targetScore = nonBottleneckScores.length > 0 ? median(nonBottleneckScores) : 70; // Fallback target
|
|
115
|
+
// Estimate composite lift
|
|
116
|
+
const estimatedLift = estimateCompositeLift(dimensionScores, bottleneckDimensions, targetScore, weights);
|
|
117
|
+
// Determine confidence based on failure mode confidence distribution
|
|
118
|
+
const modeJudgments = failureModeReport.classifiedJudgments.filter((cj) => cj.judgment.taskId.startsWith(area) && cj.classification.mode === mode);
|
|
119
|
+
const highConfCount = modeJudgments.filter((cj) => cj.classification.confidence === "high").length;
|
|
120
|
+
const confidence = highConfCount > modeJudgments.length / 2
|
|
121
|
+
? "high"
|
|
122
|
+
: highConfCount > 0
|
|
123
|
+
? "medium"
|
|
124
|
+
: "low";
|
|
125
|
+
gaps.push({
|
|
126
|
+
affectedTaskIds: uniqueTasks.length > 0 ? uniqueTasks : [area],
|
|
127
|
+
area,
|
|
128
|
+
bottleneckDimensions,
|
|
129
|
+
confidence,
|
|
130
|
+
currentDimensionScores,
|
|
131
|
+
estimatedLift: Math.round(estimatedLift * 10) / 10,
|
|
132
|
+
failureMode: mode,
|
|
133
|
+
priority: Math.round(estimatedLift * Math.max(uniqueTasks.length, 1) * 10) / 10,
|
|
134
|
+
remediation: REMEDIATION_MAP[mode],
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// Sort by priority (highest first)
|
|
139
|
+
return gaps.sort((a, b) => b.priority - a.priority);
|
|
140
|
+
}
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
// Formatting
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
/**
|
|
145
|
+
* Format a gap analysis report for console output.
|
|
146
|
+
*/
|
|
147
|
+
export function formatGapAnalysisConsole(report) {
|
|
148
|
+
const lines = [];
|
|
149
|
+
lines.push("📋 PRIORITIZED REMEDIATION PLAN");
|
|
150
|
+
lines.push("");
|
|
151
|
+
if (report.gaps.length === 0) {
|
|
152
|
+
lines.push(" No actionable gaps identified.");
|
|
153
|
+
lines.push("");
|
|
154
|
+
return lines.join("\n");
|
|
155
|
+
}
|
|
156
|
+
lines.push(` Total potential lift: +${report.totalPotentialLift.toFixed(1)} points`);
|
|
157
|
+
lines.push("");
|
|
158
|
+
// Priority table
|
|
159
|
+
lines.push(" Priority Area Failure Mode Est. Lift Tasks Action");
|
|
160
|
+
lines.push(" ──────── ──────────────── ─────────────── ───────── ───── ──────────────────────────────");
|
|
161
|
+
for (let i = 0; i < report.gaps.length; i++) {
|
|
162
|
+
const gap = report.gaps[i];
|
|
163
|
+
const liftStr = `+${gap.estimatedLift.toFixed(1)}`;
|
|
164
|
+
lines.push(` #${(i + 1).toString().padEnd(6)} ${gap.area.padEnd(16)} ${gap.failureMode.padEnd(15)} ${liftStr.padStart(9)} ${gap.affectedTaskIds.length.toString().padStart(5)} ${gap.remediation}`);
|
|
165
|
+
}
|
|
166
|
+
lines.push("");
|
|
167
|
+
return lines.join("\n");
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Format a gap analysis report as markdown for PR comments.
|
|
171
|
+
*/
|
|
172
|
+
export function formatGapAnalysisMarkdown(report) {
|
|
173
|
+
const lines = [];
|
|
174
|
+
lines.push("### 📋 Prioritized Remediation Plan");
|
|
175
|
+
lines.push("");
|
|
176
|
+
if (report.gaps.length === 0) {
|
|
177
|
+
lines.push("No actionable gaps identified.");
|
|
178
|
+
lines.push("");
|
|
179
|
+
return lines.join("\n");
|
|
180
|
+
}
|
|
181
|
+
lines.push(`**Total potential lift: +${report.totalPotentialLift.toFixed(1)} points**`);
|
|
182
|
+
lines.push("");
|
|
183
|
+
lines.push("| Priority | Area | Failure Mode | Est. Lift | Tasks | Action |");
|
|
184
|
+
lines.push("|----------|------|--------------|-----------|-------|--------|");
|
|
185
|
+
for (let i = 0; i < report.gaps.length; i++) {
|
|
186
|
+
const gap = report.gaps[i];
|
|
187
|
+
const confIcon = gap.confidence === "high"
|
|
188
|
+
? "🟢"
|
|
189
|
+
: gap.confidence === "medium"
|
|
190
|
+
? "🟡"
|
|
191
|
+
: "🔴";
|
|
192
|
+
lines.push(`| #${i + 1} | ${gap.area} | ${confIcon} ${gap.failureMode} | +${gap.estimatedLift.toFixed(1)} | ${gap.affectedTaskIds.length} | ${gap.remediation} |`);
|
|
193
|
+
}
|
|
194
|
+
lines.push("");
|
|
195
|
+
return lines.join("\n");
|
|
196
|
+
}
|
|
197
|
+
// ---------------------------------------------------------------------------
|
|
198
|
+
// Internal helpers
|
|
199
|
+
// ---------------------------------------------------------------------------
|
|
200
|
+
/**
|
|
201
|
+
* Estimate the composite score lift from raising bottleneck dimensions
|
|
202
|
+
* to the target score.
|
|
203
|
+
*/
|
|
204
|
+
function estimateCompositeLift(dimensionScores, bottleneckDimensions, targetScore, weights) {
|
|
205
|
+
let lift = 0;
|
|
206
|
+
for (const dim of bottleneckDimensions) {
|
|
207
|
+
const current = dimensionScores[dim] ?? 0;
|
|
208
|
+
const headroom = Math.max(0, targetScore - current);
|
|
209
|
+
const weight = weights[dim] ?? 0;
|
|
210
|
+
lift += headroom * weight;
|
|
211
|
+
}
|
|
212
|
+
return lift;
|
|
213
|
+
}
|
|
214
|
+
/** Extract dimension scores from a FeatureScore */
|
|
215
|
+
function getDimensionScores(score) {
|
|
216
|
+
return {
|
|
217
|
+
"code-correctness": score.codeCorrectness,
|
|
218
|
+
"doc-coverage": score.docCoverage,
|
|
219
|
+
"task-completion": score.taskCompletion,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
/** Calculate the median of an array of numbers */
|
|
223
|
+
function median(values) {
|
|
224
|
+
if (values.length === 0)
|
|
225
|
+
return 0;
|
|
226
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
227
|
+
const mid = Math.floor(sorted.length / 2);
|
|
228
|
+
return sorted.length % 2 !== 0
|
|
229
|
+
? sorted[mid]
|
|
230
|
+
: (sorted[mid - 1] + sorted[mid]) / 2;
|
|
231
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/generate-configs.ts
|
|
3
|
+
*
|
|
4
|
+
* Reads config/models.yaml (the central model registry) and generates all
|
|
5
|
+
* promptfoo config files with the correct provider entries.
|
|
6
|
+
*
|
|
7
|
+
* This keeps model definitions in one place — add a model to config/models.yaml
|
|
8
|
+
* and run `pnpm generate-configs` to propagate it to all eval modes.
|
|
9
|
+
*
|
|
10
|
+
* Generated configs:
|
|
11
|
+
* - promptfooconfig.yaml (baseline: with-docs vs without-docs)
|
|
12
|
+
* - promptfooconfig.observed.yaml (instrumented HTTP recording)
|
|
13
|
+
* - promptfooconfig.agentic.yaml (agentic tool-calling: naive vs optimized)
|
|
14
|
+
*
|
|
15
|
+
* All functions accept rootDir as a parameter — no module-level constants.
|
|
16
|
+
* No process.argv parsing. No env var fallbacks. Callers provide typed options.
|
|
17
|
+
*
|
|
18
|
+
* @see config/models.yaml — the central model registry
|
|
19
|
+
* @see docs/exec-plans/active/eliminate-lib-layer.md
|
|
20
|
+
*/
|
|
21
|
+
import { type TaskDefinition } from "../_vendor/ailf-core/index.d.ts";
|
|
22
|
+
import type { FilterOptions } from "./types.js";
|
|
23
|
+
import { type ResolvedSourceConfig } from "../sources.js";
|
|
24
|
+
export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "../_vendor/ailf-core/index.d.ts";
|
|
25
|
+
/** Auto-discover all task YAML files in the tasks/ directory. */
|
|
26
|
+
export declare function discoverTaskFiles(rootDir: string): string[];
|
|
27
|
+
interface LoadedPrompts {
|
|
28
|
+
agentic: {
|
|
29
|
+
id: string;
|
|
30
|
+
label: string;
|
|
31
|
+
raw: string;
|
|
32
|
+
};
|
|
33
|
+
withDocs: {
|
|
34
|
+
id: string;
|
|
35
|
+
label: string;
|
|
36
|
+
raw: string;
|
|
37
|
+
};
|
|
38
|
+
withoutDocs: {
|
|
39
|
+
id: string;
|
|
40
|
+
label: string;
|
|
41
|
+
raw: string;
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
/** Load prompt templates from config/prompts.yaml. Throws if missing or malformed. */
|
|
45
|
+
export declare function loadPrompts(rootDir: string): LoadedPrompts;
|
|
46
|
+
/** Options for the generateConfigs function. */
|
|
47
|
+
export interface GenerateConfigsOptions {
|
|
48
|
+
/** Allowed origins for agentic mode (controls source isolation assertion) */
|
|
49
|
+
allowedOrigins?: string[];
|
|
50
|
+
/** Filter to specific feature areas or task IDs */
|
|
51
|
+
filter?: FilterOptions;
|
|
52
|
+
/** Pre-resolved source config (skips loadSource() call) */
|
|
53
|
+
resolvedSource?: ResolvedSourceConfig;
|
|
54
|
+
/** Root directory of the eval package (required) */
|
|
55
|
+
rootDir: string;
|
|
56
|
+
/** Search mode for agentic mode */
|
|
57
|
+
searchMode?: string;
|
|
58
|
+
/** Documentation source name (e.g., "branch", "local") */
|
|
59
|
+
source?: string;
|
|
60
|
+
/** Pre-loaded task definitions from a TaskSource adapter.
|
|
61
|
+
* When provided, expandTaskDefinitions() is used instead of
|
|
62
|
+
* loadAndExpandTasks() (which reads from tasks/*.yaml files). */
|
|
63
|
+
tasks?: TaskDefinition[];
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Generate Promptfoo config files from models.yaml + task definitions.
|
|
67
|
+
*
|
|
68
|
+
* All parameters are passed via the typed options object — no process.argv
|
|
69
|
+
* parsing or env var fallbacks. Callers (command handlers, orchestration
|
|
70
|
+
* steps) are responsible for resolving options from their own context.
|
|
71
|
+
*/
|
|
72
|
+
export declare function generateConfigs(options: GenerateConfigsOptions): void;
|