@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/callback-delivery.ts
|
|
3
|
+
*
|
|
4
|
+
* Delivers evaluation results to a callback URL with HMAC signature.
|
|
5
|
+
* Used by the API-triggered pipeline flow: after the report is published
|
|
6
|
+
* to the Content Lake (system of record), the result is also POSTed
|
|
7
|
+
* to the caller's callback URL.
|
|
8
|
+
*
|
|
9
|
+
* Fire-and-forget with 3 retries and exponential backoff. If callback
|
|
10
|
+
* fails, log a warning — the result is still in the Content Lake.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/api-service-gateway.md
|
|
13
|
+
*/
|
|
14
|
+
import type { CallbackPayload } from "../_vendor/ailf-core/index.d.ts";
|
|
15
|
+
export type { CallbackPayload } from "../_vendor/ailf-core/index.d.ts";
|
|
16
|
+
export interface CallbackConfig {
|
|
17
|
+
/** Optional custom headers to include in the callback request */
|
|
18
|
+
headers?: Record<string, string>;
|
|
19
|
+
/** The callback URL to POST results to */
|
|
20
|
+
url: string;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Generate an HMAC-SHA256 signature for the callback payload.
|
|
24
|
+
*
|
|
25
|
+
* The signature is computed over the raw JSON body using the callback URL
|
|
26
|
+
* as the signing key. This allows the receiver to verify the request
|
|
27
|
+
* originated from AILF without requiring a pre-shared secret.
|
|
28
|
+
*
|
|
29
|
+
* The receiver should:
|
|
30
|
+
* 1. Read the raw request body
|
|
31
|
+
* 2. Compute HMAC-SHA256(body, callbackUrl)
|
|
32
|
+
* 3. Compare with the X-AILF-Signature header
|
|
33
|
+
*/
|
|
34
|
+
export declare function computeSignature(body: string, signingKey: string): string;
|
|
35
|
+
/**
|
|
36
|
+
* Deliver evaluation results to a callback URL.
|
|
37
|
+
*
|
|
38
|
+
* POSTs the payload as JSON with:
|
|
39
|
+
* - Content-Type: application/json
|
|
40
|
+
* - X-AILF-Signature: HMAC-SHA256 signature
|
|
41
|
+
* - Any custom headers from the callback config
|
|
42
|
+
*
|
|
43
|
+
* Retries up to 3 times with exponential backoff (1s → 2s → 4s).
|
|
44
|
+
* Returns success/failure status — never throws.
|
|
45
|
+
*/
|
|
46
|
+
export declare function deliverCallback(callback: CallbackConfig, payload: CallbackPayload): Promise<{
|
|
47
|
+
ok: boolean;
|
|
48
|
+
error?: string;
|
|
49
|
+
attempts: number;
|
|
50
|
+
}>;
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/callback-delivery.ts
|
|
3
|
+
*
|
|
4
|
+
* Delivers evaluation results to a callback URL with HMAC signature.
|
|
5
|
+
* Used by the API-triggered pipeline flow: after the report is published
|
|
6
|
+
* to the Content Lake (system of record), the result is also POSTed
|
|
7
|
+
* to the caller's callback URL.
|
|
8
|
+
*
|
|
9
|
+
* Fire-and-forget with 3 retries and exponential backoff. If callback
|
|
10
|
+
* fails, log a warning — the result is still in the Content Lake.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/api-service-gateway.md
|
|
13
|
+
*/
|
|
14
|
+
import { createHmac } from "crypto";
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Constants
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
const MAX_RETRIES = 3;
|
|
19
|
+
const BASE_DELAY_MS = 1000;
|
|
20
|
+
const CALLBACK_TIMEOUT_MS = 10_000;
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// HMAC Signature
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
/**
|
|
25
|
+
* Generate an HMAC-SHA256 signature for the callback payload.
|
|
26
|
+
*
|
|
27
|
+
* The signature is computed over the raw JSON body using the callback URL
|
|
28
|
+
* as the signing key. This allows the receiver to verify the request
|
|
29
|
+
* originated from AILF without requiring a pre-shared secret.
|
|
30
|
+
*
|
|
31
|
+
* The receiver should:
|
|
32
|
+
* 1. Read the raw request body
|
|
33
|
+
* 2. Compute HMAC-SHA256(body, callbackUrl)
|
|
34
|
+
* 3. Compare with the X-AILF-Signature header
|
|
35
|
+
*/
|
|
36
|
+
export function computeSignature(body, signingKey) {
|
|
37
|
+
return createHmac("sha256", signingKey).update(body).digest("hex");
|
|
38
|
+
}
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// Delivery
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
/**
|
|
43
|
+
* Deliver evaluation results to a callback URL.
|
|
44
|
+
*
|
|
45
|
+
* POSTs the payload as JSON with:
|
|
46
|
+
* - Content-Type: application/json
|
|
47
|
+
* - X-AILF-Signature: HMAC-SHA256 signature
|
|
48
|
+
* - Any custom headers from the callback config
|
|
49
|
+
*
|
|
50
|
+
* Retries up to 3 times with exponential backoff (1s → 2s → 4s).
|
|
51
|
+
* Returns success/failure status — never throws.
|
|
52
|
+
*/
|
|
53
|
+
export async function deliverCallback(callback, payload) {
|
|
54
|
+
const body = JSON.stringify(payload);
|
|
55
|
+
const signature = computeSignature(body, callback.url);
|
|
56
|
+
let lastError;
|
|
57
|
+
let attempts = 0;
|
|
58
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
59
|
+
attempts = attempt + 1;
|
|
60
|
+
try {
|
|
61
|
+
const controller = new AbortController();
|
|
62
|
+
const timeout = setTimeout(() => controller.abort(), CALLBACK_TIMEOUT_MS);
|
|
63
|
+
const response = await fetch(callback.url, {
|
|
64
|
+
method: "POST",
|
|
65
|
+
headers: {
|
|
66
|
+
"Content-Type": "application/json",
|
|
67
|
+
"X-AILF-Signature": signature,
|
|
68
|
+
...(callback.headers ?? {}),
|
|
69
|
+
},
|
|
70
|
+
body,
|
|
71
|
+
signal: controller.signal,
|
|
72
|
+
});
|
|
73
|
+
clearTimeout(timeout);
|
|
74
|
+
if (response.ok) {
|
|
75
|
+
return { ok: true, attempts };
|
|
76
|
+
}
|
|
77
|
+
lastError = `HTTP ${response.status}: ${response.statusText}`;
|
|
78
|
+
}
|
|
79
|
+
catch (err) {
|
|
80
|
+
lastError = err instanceof Error ? err.message : String(err);
|
|
81
|
+
}
|
|
82
|
+
// Don't delay after the last attempt
|
|
83
|
+
if (attempt < MAX_RETRIES) {
|
|
84
|
+
const delayMs = BASE_DELAY_MS * Math.pow(2, attempt);
|
|
85
|
+
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return { ok: false, error: lastError, attempts };
|
|
89
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/checks.ts
|
|
3
|
+
*
|
|
4
|
+
* Pre/postcondition checking for pipeline steps.
|
|
5
|
+
* Each function verifies filesystem or environment state and returns
|
|
6
|
+
* ValidationIssue[] — an empty array means all checks passed.
|
|
7
|
+
*/
|
|
8
|
+
import type { ValidationIssue } from "./types.js";
|
|
9
|
+
/**
|
|
10
|
+
* Check that `contexts/canonical/<taskId>.md` exists and is non-empty
|
|
11
|
+
* for every task ID.
|
|
12
|
+
*/
|
|
13
|
+
export declare function checkCanonicalContextsExist(rootDir: string, taskIds: string[]): ValidationIssue[];
|
|
14
|
+
/**
|
|
15
|
+
* Check that `contexts/<area>.md` exists and is non-empty for every area.
|
|
16
|
+
* This is the precondition for the eval step — it cannot run without docs.
|
|
17
|
+
*/
|
|
18
|
+
export declare function checkContextsExist(rootDir: string, areas: string[]): ValidationIssue[];
|
|
19
|
+
/**
|
|
20
|
+
* Check that required environment variables are set.
|
|
21
|
+
* Loads the root `.env` file first (with override, matching the dotenv CLI
|
|
22
|
+
* `-o` flag used by other scripts), then checks for required keys.
|
|
23
|
+
*/
|
|
24
|
+
export declare function checkEnvironment(rootDir: string): ValidationIssue[];
|
|
25
|
+
/**
|
|
26
|
+
* Check that the baseline `promptfooconfig.yaml` exists. Optionally check
|
|
27
|
+
* for `promptfooconfig.observed.yaml` and `promptfooconfig.agentic.yaml`.
|
|
28
|
+
*/
|
|
29
|
+
export declare function checkGeneratedConfigsExist(rootDir: string): ValidationIssue[];
|
|
30
|
+
/**
|
|
31
|
+
* Check that the eval results JSON file exists, is valid JSON, and contains
|
|
32
|
+
* a `results` array.
|
|
33
|
+
*/
|
|
34
|
+
export declare function checkResultsExist(rootDir: string, resultsPath?: string): ValidationIssue[];
|
|
35
|
+
/**
|
|
36
|
+
* Check that `results/latest/score-summary.json` exists, parses as JSON,
|
|
37
|
+
* contains feature area scores, and no scores are NaN or out of 0–100.
|
|
38
|
+
*/
|
|
39
|
+
export declare function checkScoreSummaryValid(rootDir: string): ValidationIssue[];
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/checks.ts
|
|
3
|
+
*
|
|
4
|
+
* Pre/postcondition checking for pipeline steps.
|
|
5
|
+
* Each function verifies filesystem or environment state and returns
|
|
6
|
+
* ValidationIssue[] — an empty array means all checks passed.
|
|
7
|
+
*/
|
|
8
|
+
import { config as loadEnv } from "dotenv";
|
|
9
|
+
import { existsSync, readFileSync, statSync } from "fs";
|
|
10
|
+
import { join, resolve } from "path";
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Precondition: contexts exist for each feature area
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
/**
|
|
15
|
+
* Check that `contexts/canonical/<taskId>.md` exists and is non-empty
|
|
16
|
+
* for every task ID.
|
|
17
|
+
*/
|
|
18
|
+
export function checkCanonicalContextsExist(rootDir, taskIds) {
|
|
19
|
+
const issues = [];
|
|
20
|
+
const canonicalDir = resolve(rootDir, "contexts", "canonical");
|
|
21
|
+
for (const taskId of taskIds) {
|
|
22
|
+
const filePath = join(canonicalDir, `${taskId}.md`);
|
|
23
|
+
if (!existsSync(filePath)) {
|
|
24
|
+
issues.push({
|
|
25
|
+
message: `Missing canonical context for task "${taskId}". Run \`pnpm fetch-docs\` to generate it.`,
|
|
26
|
+
path: filePath,
|
|
27
|
+
severity: "error",
|
|
28
|
+
source: "checkCanonicalContextsExist",
|
|
29
|
+
});
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
const stat = statSync(filePath);
|
|
33
|
+
if (stat.size === 0) {
|
|
34
|
+
issues.push({
|
|
35
|
+
message: `Canonical context for task "${taskId}" is empty. Re-run \`pnpm fetch-docs\`.`,
|
|
36
|
+
path: filePath,
|
|
37
|
+
severity: "error",
|
|
38
|
+
source: "checkCanonicalContextsExist",
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return issues;
|
|
43
|
+
}
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// Precondition: canonical context files exist per task
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
/**
|
|
48
|
+
* Check that `contexts/<area>.md` exists and is non-empty for every area.
|
|
49
|
+
* This is the precondition for the eval step — it cannot run without docs.
|
|
50
|
+
*/
|
|
51
|
+
export function checkContextsExist(rootDir, areas) {
|
|
52
|
+
const issues = [];
|
|
53
|
+
const contextsDir = resolve(rootDir, "contexts");
|
|
54
|
+
for (const area of areas) {
|
|
55
|
+
const filePath = join(contextsDir, `${area}.md`);
|
|
56
|
+
if (!existsSync(filePath)) {
|
|
57
|
+
issues.push({
|
|
58
|
+
message: `Missing context file for area "${area}". Run \`pnpm fetch-docs\` to generate it.`,
|
|
59
|
+
path: filePath,
|
|
60
|
+
severity: "error",
|
|
61
|
+
source: "checkContextsExist",
|
|
62
|
+
});
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
const stat = statSync(filePath);
|
|
66
|
+
if (stat.size === 0) {
|
|
67
|
+
issues.push({
|
|
68
|
+
message: `Context file for area "${area}" is empty. Re-run \`pnpm fetch-docs\`.`,
|
|
69
|
+
path: filePath,
|
|
70
|
+
severity: "error",
|
|
71
|
+
source: "checkContextsExist",
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return issues;
|
|
76
|
+
}
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
// Postcondition: eval results exist and are valid JSON
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
/**
|
|
81
|
+
* Check that required environment variables are set.
|
|
82
|
+
* Loads the root `.env` file first (with override, matching the dotenv CLI
|
|
83
|
+
* `-o` flag used by other scripts), then checks for required keys.
|
|
84
|
+
*/
|
|
85
|
+
export function checkEnvironment(rootDir) {
|
|
86
|
+
const issues = [];
|
|
87
|
+
// Load root .env so we see the same vars as dotenv -e ../../.env -o
|
|
88
|
+
const envPath = resolve(rootDir, "..", "..", ".env");
|
|
89
|
+
if (existsSync(envPath)) {
|
|
90
|
+
loadEnv({ override: true, path: envPath });
|
|
91
|
+
}
|
|
92
|
+
if (!process.env.OPENAI_API_KEY) {
|
|
93
|
+
issues.push({
|
|
94
|
+
message: "OPENAI_API_KEY is not set. Ensure it is defined in the root .env file.",
|
|
95
|
+
severity: "warning",
|
|
96
|
+
source: "checkEnvironment",
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
if (!process.env.SANITY_API_TOKEN) {
|
|
100
|
+
issues.push({
|
|
101
|
+
message: "SANITY_API_TOKEN is not set. It is needed for 'turbo fetch-docs'. Ensure it is defined in the root .env file.",
|
|
102
|
+
severity: "warning",
|
|
103
|
+
source: "checkEnvironment",
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
return issues;
|
|
107
|
+
}
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
// Postcondition: score summary is valid
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
/**
|
|
112
|
+
* Check that the baseline `promptfooconfig.yaml` exists. Optionally check
|
|
113
|
+
* for `promptfooconfig.observed.yaml` and `promptfooconfig.agentic.yaml`.
|
|
114
|
+
*/
|
|
115
|
+
export function checkGeneratedConfigsExist(rootDir) {
|
|
116
|
+
const issues = [];
|
|
117
|
+
const baselinePath = resolve(rootDir, "promptfooconfig.yaml");
|
|
118
|
+
if (!existsSync(baselinePath)) {
|
|
119
|
+
issues.push({
|
|
120
|
+
message: "Baseline config 'promptfooconfig.yaml' not found. Run 'pnpm generate-configs'.",
|
|
121
|
+
path: baselinePath,
|
|
122
|
+
severity: "error",
|
|
123
|
+
source: "checkGeneratedConfigsExist",
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
const optionalConfigs = [
|
|
127
|
+
"promptfooconfig.observed.yaml",
|
|
128
|
+
"promptfooconfig.agentic.yaml",
|
|
129
|
+
];
|
|
130
|
+
for (const name of optionalConfigs) {
|
|
131
|
+
const configPath = resolve(rootDir, name);
|
|
132
|
+
if (!existsSync(configPath)) {
|
|
133
|
+
issues.push({
|
|
134
|
+
message: `Optional config \`${name}\` not found. Run \`pnpm generate-configs\` to create it.`,
|
|
135
|
+
path: configPath,
|
|
136
|
+
severity: "warning",
|
|
137
|
+
source: "checkGeneratedConfigsExist",
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return issues;
|
|
142
|
+
}
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// Precondition: generated promptfoo configs exist
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
/**
|
|
147
|
+
* Check that the eval results JSON file exists, is valid JSON, and contains
|
|
148
|
+
* a `results` array.
|
|
149
|
+
*/
|
|
150
|
+
export function checkResultsExist(rootDir, resultsPath) {
|
|
151
|
+
const issues = [];
|
|
152
|
+
const filePath = resolve(rootDir, resultsPath ?? join("results", "latest", "eval-results.json"));
|
|
153
|
+
if (!existsSync(filePath)) {
|
|
154
|
+
issues.push({
|
|
155
|
+
message: "Eval results file not found. Run 'pnpm eval' first.",
|
|
156
|
+
path: filePath,
|
|
157
|
+
severity: "error",
|
|
158
|
+
source: "checkResultsExist",
|
|
159
|
+
});
|
|
160
|
+
return issues;
|
|
161
|
+
}
|
|
162
|
+
let parsed;
|
|
163
|
+
try {
|
|
164
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
165
|
+
parsed = JSON.parse(raw);
|
|
166
|
+
}
|
|
167
|
+
catch (err) {
|
|
168
|
+
issues.push({
|
|
169
|
+
message: `Eval results file is not valid JSON: ${err instanceof Error ? err.message : String(err)}`,
|
|
170
|
+
path: filePath,
|
|
171
|
+
severity: "error",
|
|
172
|
+
source: "checkResultsExist",
|
|
173
|
+
});
|
|
174
|
+
return issues;
|
|
175
|
+
}
|
|
176
|
+
if (typeof parsed !== "object" || parsed === null || !("results" in parsed)) {
|
|
177
|
+
issues.push({
|
|
178
|
+
message: 'Eval results file is missing the "results" key.',
|
|
179
|
+
path: filePath,
|
|
180
|
+
severity: "error",
|
|
181
|
+
source: "checkResultsExist",
|
|
182
|
+
});
|
|
183
|
+
return issues;
|
|
184
|
+
}
|
|
185
|
+
// Promptfoo wraps results in: { results: { results: [...], stats: {...} } }
|
|
186
|
+
const outer = parsed.results;
|
|
187
|
+
const resultsArray = Array.isArray(outer)
|
|
188
|
+
? outer
|
|
189
|
+
: typeof outer === "object" &&
|
|
190
|
+
outer !== null &&
|
|
191
|
+
"results" in outer &&
|
|
192
|
+
Array.isArray(outer.results)
|
|
193
|
+
? outer.results
|
|
194
|
+
: null;
|
|
195
|
+
if (!resultsArray) {
|
|
196
|
+
issues.push({
|
|
197
|
+
message: 'The "results" key in eval results is not an array (or results.results).',
|
|
198
|
+
path: filePath,
|
|
199
|
+
severity: "error",
|
|
200
|
+
source: "checkResultsExist",
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
return issues;
|
|
204
|
+
}
|
|
205
|
+
// ---------------------------------------------------------------------------
|
|
206
|
+
// Environment variable checks
|
|
207
|
+
// ---------------------------------------------------------------------------
|
|
208
|
+
/**
|
|
209
|
+
* Check that `results/latest/score-summary.json` exists, parses as JSON,
|
|
210
|
+
* contains feature area scores, and no scores are NaN or out of 0–100.
|
|
211
|
+
*/
|
|
212
|
+
export function checkScoreSummaryValid(rootDir) {
|
|
213
|
+
const issues = [];
|
|
214
|
+
const filePath = resolve(rootDir, "results", "latest", "score-summary.json");
|
|
215
|
+
if (!existsSync(filePath)) {
|
|
216
|
+
issues.push({
|
|
217
|
+
message: "Score summary not found. Run 'pnpm report' to generate it.",
|
|
218
|
+
path: filePath,
|
|
219
|
+
severity: "error",
|
|
220
|
+
source: "checkScoreSummaryValid",
|
|
221
|
+
});
|
|
222
|
+
return issues;
|
|
223
|
+
}
|
|
224
|
+
let parsed;
|
|
225
|
+
try {
|
|
226
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
227
|
+
parsed = JSON.parse(raw);
|
|
228
|
+
}
|
|
229
|
+
catch (err) {
|
|
230
|
+
issues.push({
|
|
231
|
+
message: `Score summary is not valid JSON: ${err instanceof Error ? err.message : String(err)}`,
|
|
232
|
+
path: filePath,
|
|
233
|
+
severity: "error",
|
|
234
|
+
source: "checkScoreSummaryValid",
|
|
235
|
+
});
|
|
236
|
+
return issues;
|
|
237
|
+
}
|
|
238
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
239
|
+
issues.push({
|
|
240
|
+
message: "Score summary is not a JSON object.",
|
|
241
|
+
path: filePath,
|
|
242
|
+
severity: "error",
|
|
243
|
+
source: "checkScoreSummaryValid",
|
|
244
|
+
});
|
|
245
|
+
return issues;
|
|
246
|
+
}
|
|
247
|
+
const record = parsed;
|
|
248
|
+
const scoreKeys = Object.keys(record);
|
|
249
|
+
if (scoreKeys.length === 0) {
|
|
250
|
+
issues.push({
|
|
251
|
+
message: "Score summary contains no feature area scores.",
|
|
252
|
+
path: filePath,
|
|
253
|
+
severity: "error",
|
|
254
|
+
source: "checkScoreSummaryValid",
|
|
255
|
+
});
|
|
256
|
+
return issues;
|
|
257
|
+
}
|
|
258
|
+
for (const key of scoreKeys) {
|
|
259
|
+
const value = record[key];
|
|
260
|
+
if (typeof value === "number") {
|
|
261
|
+
if (Number.isNaN(value)) {
|
|
262
|
+
issues.push({
|
|
263
|
+
message: `Score for "${key}" is NaN.`,
|
|
264
|
+
path: filePath,
|
|
265
|
+
severity: "error",
|
|
266
|
+
source: "checkScoreSummaryValid",
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
else if (value < 0 || value > 100) {
|
|
270
|
+
issues.push({
|
|
271
|
+
message: `Score for "${key}" is ${value}, which is outside the 0–100 range.`,
|
|
272
|
+
path: filePath,
|
|
273
|
+
severity: "warning",
|
|
274
|
+
source: "checkScoreSummaryValid",
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
return issues;
|
|
280
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* classify-url.ts
|
|
3
|
+
*
|
|
4
|
+
* Classifies URLs passed via --url/--urls into specific source types.
|
|
5
|
+
* Enables intelligent inference: a Sanity Studio release URL automatically
|
|
6
|
+
* sets the perspective, a Studio document URL extracts the document ID, etc.
|
|
7
|
+
*
|
|
8
|
+
* Classification rules (applied in order):
|
|
9
|
+
* 1. /releases/<id> → Sanity Release (extract perspective)
|
|
10
|
+
* 2. /structure/...;<uuid> → Sanity Studio Document (extract doc ID + optional perspective)
|
|
11
|
+
* 3. everything else → Direct URL (fetch as documentation)
|
|
12
|
+
*/
|
|
13
|
+
/** Discriminated union of all classified URL types */
|
|
14
|
+
export type ClassifiedUrl = DirectUrl | SanityDocumentUrl | SanityReleaseUrl;
|
|
15
|
+
/** A URL classified as a direct documentation page (HTML, Markdown, etc.) */
|
|
16
|
+
export interface DirectUrl {
|
|
17
|
+
type: "direct-url";
|
|
18
|
+
url: string;
|
|
19
|
+
}
|
|
20
|
+
/** A URL classified as a Sanity Studio document URL */
|
|
21
|
+
export interface SanityDocumentUrl {
|
|
22
|
+
documentId: string;
|
|
23
|
+
perspectiveId?: string;
|
|
24
|
+
studioOrigin: string;
|
|
25
|
+
type: "sanity-document";
|
|
26
|
+
}
|
|
27
|
+
/** A URL classified as a Sanity Studio release URL */
|
|
28
|
+
export interface SanityReleaseUrl {
|
|
29
|
+
perspectiveId: string;
|
|
30
|
+
studioOrigin: string;
|
|
31
|
+
type: "sanity-release";
|
|
32
|
+
}
|
|
33
|
+
/** Result of classifying multiple URLs */
|
|
34
|
+
export interface ClassificationResult {
|
|
35
|
+
/** URLs classified as direct documentation pages */
|
|
36
|
+
directUrls: string[];
|
|
37
|
+
/** Sanity document IDs extracted from Studio document URLs */
|
|
38
|
+
documentIds: string[];
|
|
39
|
+
/** Perspective ID inferred from release or document URLs (first wins) */
|
|
40
|
+
inferredPerspective?: string;
|
|
41
|
+
/** Studio origin inferred from Sanity URLs (first wins) */
|
|
42
|
+
inferredStudioOrigin?: string;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Classify a single URL into one of the known source types.
|
|
46
|
+
*
|
|
47
|
+
* @param input - A URL string to classify
|
|
48
|
+
* @returns A discriminated union describing the URL type and extracted metadata
|
|
49
|
+
* @throws If the input is not a valid URL
|
|
50
|
+
*/
|
|
51
|
+
export declare function classifyUrl(input: string): ClassifiedUrl;
|
|
52
|
+
/**
|
|
53
|
+
* Classify an array of URLs and aggregate the results.
|
|
54
|
+
*
|
|
55
|
+
* Collects direct URLs and document IDs into separate lists. Perspective
|
|
56
|
+
* and studio origin are inferred from the first Sanity URL that provides them.
|
|
57
|
+
*
|
|
58
|
+
* @param urls - Array of URL strings to classify
|
|
59
|
+
* @returns Aggregated classification result
|
|
60
|
+
*/
|
|
61
|
+
export declare function classifyUrls(urls: string[]): ClassificationResult;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* classify-url.ts
|
|
3
|
+
*
|
|
4
|
+
* Classifies URLs passed via --url/--urls into specific source types.
|
|
5
|
+
* Enables intelligent inference: a Sanity Studio release URL automatically
|
|
6
|
+
* sets the perspective, a Studio document URL extracts the document ID, etc.
|
|
7
|
+
*
|
|
8
|
+
* Classification rules (applied in order):
|
|
9
|
+
* 1. /releases/<id> → Sanity Release (extract perspective)
|
|
10
|
+
* 2. /structure/...;<uuid> → Sanity Studio Document (extract doc ID + optional perspective)
|
|
11
|
+
* 3. everything else → Direct URL (fetch as documentation)
|
|
12
|
+
*/
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Patterns
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
/** UUID v4 pattern (case-insensitive) */
|
|
17
|
+
const UUID_PATTERN = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i;
|
|
18
|
+
/** Matches /releases/<perspective-id> at the end of a pathname */
|
|
19
|
+
const RELEASE_PATH = /\/releases\/([\w-]+)$/;
|
|
20
|
+
/**
|
|
21
|
+
* Matches /structure/...;<last-segment> where the last semicolon-delimited
|
|
22
|
+
* segment is extracted. The UUID check is applied separately.
|
|
23
|
+
*/
|
|
24
|
+
const STRUCTURE_PATH = /\/structure\/.+;([^;?]+)/;
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// Batch classification
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
/**
|
|
29
|
+
* Classify a single URL into one of the known source types.
|
|
30
|
+
*
|
|
31
|
+
* @param input - A URL string to classify
|
|
32
|
+
* @returns A discriminated union describing the URL type and extracted metadata
|
|
33
|
+
* @throws If the input is not a valid URL
|
|
34
|
+
*/
|
|
35
|
+
export function classifyUrl(input) {
|
|
36
|
+
const parsed = new URL(input);
|
|
37
|
+
// Rule 1: Sanity Studio release URL — /releases/<perspective-id>
|
|
38
|
+
const releaseMatch = parsed.pathname.match(RELEASE_PATH);
|
|
39
|
+
if (releaseMatch) {
|
|
40
|
+
return {
|
|
41
|
+
perspectiveId: releaseMatch[1],
|
|
42
|
+
studioOrigin: parsed.origin,
|
|
43
|
+
type: "sanity-release",
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
// Rule 2: Sanity Studio document URL — /structure/...;<uuid>
|
|
47
|
+
const structureMatch = parsed.pathname.match(STRUCTURE_PATH);
|
|
48
|
+
if (structureMatch && UUID_PATTERN.test(structureMatch[1])) {
|
|
49
|
+
return {
|
|
50
|
+
documentId: structureMatch[1],
|
|
51
|
+
perspectiveId: parsed.searchParams.get("perspective") ?? undefined,
|
|
52
|
+
studioOrigin: parsed.origin,
|
|
53
|
+
type: "sanity-document",
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
// Rule 3: Default — direct documentation URL
|
|
57
|
+
return { type: "direct-url", url: input };
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Classify an array of URLs and aggregate the results.
|
|
61
|
+
*
|
|
62
|
+
* Collects direct URLs and document IDs into separate lists. Perspective
|
|
63
|
+
* and studio origin are inferred from the first Sanity URL that provides them.
|
|
64
|
+
*
|
|
65
|
+
* @param urls - Array of URL strings to classify
|
|
66
|
+
* @returns Aggregated classification result
|
|
67
|
+
*/
|
|
68
|
+
export function classifyUrls(urls) {
|
|
69
|
+
const result = {
|
|
70
|
+
directUrls: [],
|
|
71
|
+
documentIds: [],
|
|
72
|
+
};
|
|
73
|
+
for (const url of urls) {
|
|
74
|
+
const classified = classifyUrl(url);
|
|
75
|
+
switch (classified.type) {
|
|
76
|
+
case "direct-url":
|
|
77
|
+
result.directUrls.push(classified.url);
|
|
78
|
+
break;
|
|
79
|
+
case "sanity-document":
|
|
80
|
+
result.documentIds.push(classified.documentId);
|
|
81
|
+
if (classified.perspectiveId && !result.inferredPerspective) {
|
|
82
|
+
result.inferredPerspective = classified.perspectiveId;
|
|
83
|
+
}
|
|
84
|
+
result.inferredStudioOrigin ??= classified.studioOrigin;
|
|
85
|
+
break;
|
|
86
|
+
case "sanity-release":
|
|
87
|
+
result.inferredPerspective ??= classified.perspectiveId;
|
|
88
|
+
result.inferredStudioOrigin ??= classified.studioOrigin;
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return result;
|
|
93
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/compare.ts
|
|
3
|
+
*
|
|
4
|
+
* Core comparison primitive for the evaluation framework.
|
|
5
|
+
*
|
|
6
|
+
* Takes two ScoreSummary objects (baseline and experiment) and produces a
|
|
7
|
+
* structured ComparisonReport with overall, per-area, and per-dimension
|
|
8
|
+
* deltas, plus improved/regressed/unchanged classification.
|
|
9
|
+
*
|
|
10
|
+
* This is the single function that backs all comparison scenarios:
|
|
11
|
+
* doc improvement, model comparison, branch validation, etc.
|
|
12
|
+
* What varies is what produced each ScoreSummary — the comparison
|
|
13
|
+
* logic is always the same.
|
|
14
|
+
*
|
|
15
|
+
* @see docs/ideas/evaluation-roadmap.md — BP5: Make comparison a primitive
|
|
16
|
+
* @see docs/ideas/metrics-design.md — Tier 4: Comparison results
|
|
17
|
+
*/
|
|
18
|
+
import { type ChangeClass, type CompareOptions, type ComparisonReport, type ScoreSummary } from "./types.js";
|
|
19
|
+
/** Classify a delta as improved, regressed, or unchanged given a threshold */
|
|
20
|
+
export declare function classifyChange(delta: number, threshold: number): ChangeClass;
|
|
21
|
+
/**
|
|
22
|
+
* Compare two evaluation score summaries and produce a structured report.
|
|
23
|
+
*
|
|
24
|
+
* This is a pure function — no side effects, no file I/O.
|
|
25
|
+
*
|
|
26
|
+
* @param baseline The "before" or "control" score summary
|
|
27
|
+
* @param experiment The "after" or "treatment" score summary
|
|
28
|
+
* @param options Optional configuration (noise threshold, etc.)
|
|
29
|
+
* @returns A ComparisonReport with deltas, classifications, and breakdowns
|
|
30
|
+
*/
|
|
31
|
+
export declare function compare(baseline: ScoreSummary, experiment: ScoreSummary, options?: CompareOptions): ComparisonReport;
|