@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Publish evaluation report to the report store.
|
|
3
|
+
*
|
|
4
|
+
* Inlines the logic from the former pipeline/steps/publish-report-step.ts.
|
|
5
|
+
* Uses ctx.reportStore and ctx.sinks from the composition root instead of
|
|
6
|
+
* constructing ReportStore and loadSinks() internally.
|
|
7
|
+
*
|
|
8
|
+
* Design principles:
|
|
9
|
+
* - P1: Reports are immutable events (write-once to Sanity)
|
|
10
|
+
* - P5: Local-first (pipeline never fails because of a store write)
|
|
11
|
+
* - P6: Sinks are fire-and-forget (failures logged, not thrown)
|
|
12
|
+
*/
|
|
13
|
+
import type { AppContext, PipelineState, PipelineStep, PromptfooUrlEntry, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
14
|
+
export declare class PublishReportStep implements PipelineStep {
|
|
15
|
+
private readonly pipelineStart;
|
|
16
|
+
private readonly options;
|
|
17
|
+
readonly name = "publish-report";
|
|
18
|
+
readonly optional = true;
|
|
19
|
+
constructor(pipelineStart: number, options?: {
|
|
20
|
+
evalFingerprint?: string;
|
|
21
|
+
promptfooUrls?: PromptfooUrlEntry[];
|
|
22
|
+
publishTag?: string;
|
|
23
|
+
});
|
|
24
|
+
check(): ValidationIssue[];
|
|
25
|
+
execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
|
|
26
|
+
}
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Publish evaluation report to the report store.
|
|
3
|
+
*
|
|
4
|
+
* Inlines the logic from the former pipeline/steps/publish-report-step.ts.
|
|
5
|
+
* Uses ctx.reportStore and ctx.sinks from the composition root instead of
|
|
6
|
+
* constructing ReportStore and loadSinks() internally.
|
|
7
|
+
*
|
|
8
|
+
* Design principles:
|
|
9
|
+
* - P1: Reports are immutable events (write-once to Sanity)
|
|
10
|
+
* - P5: Local-first (pipeline never fails because of a store write)
|
|
11
|
+
* - P6: Sinks are fire-and-forget (failures logged, not thrown)
|
|
12
|
+
*/
|
|
13
|
+
import { readFileSync } from "fs";
|
|
14
|
+
import { resolve } from "path";
|
|
15
|
+
import { checkScoreSummaryValid } from "../../pipeline/checks.js";
|
|
16
|
+
import { buildProvenance, } from "../../pipeline/provenance.js";
|
|
17
|
+
import { generateReportId } from "../../report-store.js";
|
|
18
|
+
import { withRetry } from "../../sinks/retry.js";
|
|
19
|
+
export class PublishReportStep {
|
|
20
|
+
pipelineStart;
|
|
21
|
+
options;
|
|
22
|
+
name = "publish-report";
|
|
23
|
+
optional = true;
|
|
24
|
+
constructor(pipelineStart, options = {}) {
|
|
25
|
+
this.pipelineStart = pipelineStart;
|
|
26
|
+
this.options = options;
|
|
27
|
+
}
|
|
28
|
+
check() {
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
async execute(ctx, state) {
|
|
32
|
+
const start = Date.now();
|
|
33
|
+
const { rootDir } = ctx.config;
|
|
34
|
+
// Precondition: score summary exists
|
|
35
|
+
const summaryIssues = checkScoreSummaryValid(rootDir);
|
|
36
|
+
const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
|
|
37
|
+
if (summaryErrors.length > 0) {
|
|
38
|
+
return {
|
|
39
|
+
durationMs: Date.now() - start,
|
|
40
|
+
error: `Score summary missing: ${summaryErrors.map((e) => e.message).join("; ")}`,
|
|
41
|
+
status: "failed",
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
// Read score summary
|
|
45
|
+
let summary;
|
|
46
|
+
try {
|
|
47
|
+
const summaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
|
|
48
|
+
summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
|
|
49
|
+
}
|
|
50
|
+
catch (err) {
|
|
51
|
+
return {
|
|
52
|
+
durationMs: Date.now() - start,
|
|
53
|
+
error: `Failed to read score-summary.json: ${err instanceof Error ? err.message : String(err)}`,
|
|
54
|
+
status: "failed",
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
// Build provenance — prefer state values from upstream steps,
|
|
58
|
+
// fall back to constructor options for backward compatibility
|
|
59
|
+
const provenanceOptions = {
|
|
60
|
+
evalFingerprint: state.evalFingerprint ?? this.options.evalFingerprint,
|
|
61
|
+
promptfooUrls: state.promptfooUrls ?? this.options.promptfooUrls,
|
|
62
|
+
};
|
|
63
|
+
const provenanceInput = buildProvenanceInput(summary, ctx, provenanceOptions);
|
|
64
|
+
const provenance = buildProvenance(provenanceInput);
|
|
65
|
+
// Create report
|
|
66
|
+
const now = new Date().toISOString();
|
|
67
|
+
const reportId = generateReportId();
|
|
68
|
+
const durationMs = Date.now() - this.pipelineStart;
|
|
69
|
+
// Auto-compare against most recent comparable baseline
|
|
70
|
+
const comparison = ctx.reportStore
|
|
71
|
+
? (await ctx.reportStore.autoCompare(summary, provenance, now))
|
|
72
|
+
: null;
|
|
73
|
+
const report = {
|
|
74
|
+
comparison: comparison ?? undefined,
|
|
75
|
+
completedAt: now,
|
|
76
|
+
durationMs,
|
|
77
|
+
id: reportId,
|
|
78
|
+
provenance,
|
|
79
|
+
summary,
|
|
80
|
+
tag: this.options.publishTag ?? ctx.config.publishTag,
|
|
81
|
+
};
|
|
82
|
+
// Share reportId with downstream steps (CallbackStep + orchestrator job update)
|
|
83
|
+
state.reportId = reportId;
|
|
84
|
+
// Write to store (system of record — best-effort, P5)
|
|
85
|
+
const sanityResult = ctx.reportStore
|
|
86
|
+
? await ctx.reportStore.write(report)
|
|
87
|
+
: null;
|
|
88
|
+
// Run sinks (fire-and-forget, P6)
|
|
89
|
+
const publishResult = await runSinks(report, ctx);
|
|
90
|
+
// Build result summary
|
|
91
|
+
const parts = [];
|
|
92
|
+
if (sanityResult) {
|
|
93
|
+
parts.push(`report:${sanityResult}`);
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
parts.push("Sanity write skipped (no token or unreachable)");
|
|
97
|
+
}
|
|
98
|
+
if (comparison) {
|
|
99
|
+
const delta = comparison.deltas.overall;
|
|
100
|
+
const sign = delta >= 0 ? "+" : "";
|
|
101
|
+
parts.push(`vs baseline: ${sign}${delta.toFixed(1)}`);
|
|
102
|
+
}
|
|
103
|
+
if (publishResult.sinkResults.length > 0) {
|
|
104
|
+
const succeeded = publishResult.sinkResults.filter((r) => r.result.status === "success").length;
|
|
105
|
+
const total = publishResult.sinkResults.length;
|
|
106
|
+
parts.push(`sinks: ${succeeded}/${total}`);
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
durationMs: Date.now() - start,
|
|
110
|
+
status: "success",
|
|
111
|
+
summary: `Published — ${parts.join(", ")}`,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
// Helpers
|
|
117
|
+
// ---------------------------------------------------------------------------
|
|
118
|
+
/**
|
|
119
|
+
* Assemble provenance input from the score summary and pipeline context.
|
|
120
|
+
*/
|
|
121
|
+
function buildProvenanceInput(summary, ctx, options) {
|
|
122
|
+
const areas = summary.scores.map((s) => s.feature);
|
|
123
|
+
const mode = ctx.config.mode;
|
|
124
|
+
// Read document IDs from config
|
|
125
|
+
const sanityDocumentIds = ctx.config.sanityDocumentArgs;
|
|
126
|
+
// Read task filter from config
|
|
127
|
+
const taskIds = ctx.config.tasks;
|
|
128
|
+
// Build source from summary metadata or config
|
|
129
|
+
const source = {
|
|
130
|
+
baseUrl: summary.source?.baseUrl ?? "https://www.sanity.io/docs",
|
|
131
|
+
dataset: summary.source?.dataset ?? ctx.config.datasetOverride ?? "next",
|
|
132
|
+
documentIds: [],
|
|
133
|
+
llmsTxt: (summary.source?.baseUrl ?? "https://www.sanity.io/docs") + "/llms.txt",
|
|
134
|
+
name: summary.source?.name ?? "production",
|
|
135
|
+
perspective: summary.source?.perspective ??
|
|
136
|
+
ctx.config.perspectiveOverride ??
|
|
137
|
+
undefined,
|
|
138
|
+
priorityDomain: "sanity.io",
|
|
139
|
+
projectId: summary.source?.projectId ?? ctx.config.projectIdOverride ?? "3do82whm",
|
|
140
|
+
studioOrigin: "https://admin.sanity.io",
|
|
141
|
+
urls: [],
|
|
142
|
+
};
|
|
143
|
+
// Debug runs don't store fingerprints
|
|
144
|
+
const evalFingerprint = !ctx.config.debug?.enabled
|
|
145
|
+
? options.evalFingerprint
|
|
146
|
+
: undefined;
|
|
147
|
+
return {
|
|
148
|
+
areas,
|
|
149
|
+
evalFingerprint,
|
|
150
|
+
mode,
|
|
151
|
+
promptfooUrls: options.promptfooUrls,
|
|
152
|
+
rootDir: ctx.config.rootDir,
|
|
153
|
+
sanityDocumentIds,
|
|
154
|
+
source,
|
|
155
|
+
taskIds,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Fan out a report to all configured sinks.
|
|
160
|
+
*
|
|
161
|
+
* Uses ctx.sinks from the composition root instead of loadSinks().
|
|
162
|
+
* Each sink is run with retry logic (3 attempts, exponential backoff).
|
|
163
|
+
* Failures are logged but never block the pipeline.
|
|
164
|
+
*/
|
|
165
|
+
async function runSinks(report, ctx) {
|
|
166
|
+
const sinks = (ctx.sinks ?? []);
|
|
167
|
+
const sinkResults = [];
|
|
168
|
+
if (sinks.length === 0) {
|
|
169
|
+
return { report, sinkResults };
|
|
170
|
+
}
|
|
171
|
+
// Health check all sinks first (non-blocking)
|
|
172
|
+
for (const sink of sinks) {
|
|
173
|
+
if (sink.healthCheck) {
|
|
174
|
+
try {
|
|
175
|
+
const health = await sink.healthCheck();
|
|
176
|
+
if (!health.healthy) {
|
|
177
|
+
console.warn(` ⚠️ Sink ${sink.name} health check failed: ${health.reason}`);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
catch (err) {
|
|
181
|
+
console.warn(` ⚠️ Sink ${sink.name} health check error: ${err instanceof Error ? err.message : String(err)}`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
// Publish to all sinks in parallel (fire-and-forget with retries)
|
|
186
|
+
const settled = await Promise.allSettled(sinks.map(async (sink) => {
|
|
187
|
+
const result = await withRetry(() => sink.publish(report));
|
|
188
|
+
return { name: sink.name, result };
|
|
189
|
+
}));
|
|
190
|
+
for (const outcome of settled) {
|
|
191
|
+
if (outcome.status === "fulfilled") {
|
|
192
|
+
sinkResults.push(outcome.value);
|
|
193
|
+
const { name, result } = outcome.value;
|
|
194
|
+
if (result.status === "failed") {
|
|
195
|
+
console.warn(` ⚠️ Sink ${name} failed: ${result.error}`);
|
|
196
|
+
}
|
|
197
|
+
else if (result.status === "skipped") {
|
|
198
|
+
console.log(` ⏭️ Sink ${name} skipped: ${result.reason}`);
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
console.log(` ✅ Sink ${name} delivered${result.detail ? ` (${result.detail})` : ""}`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
const error = outcome.reason instanceof Error
|
|
206
|
+
? outcome.reason.message
|
|
207
|
+
: String(outcome.reason);
|
|
208
|
+
sinkResults.push({
|
|
209
|
+
name: "unknown",
|
|
210
|
+
result: { error, status: "failed" },
|
|
211
|
+
});
|
|
212
|
+
console.warn(` ⚠️ Sink delivery error: ${error}`);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return { report, sinkResults };
|
|
216
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Launch readiness report.
|
|
3
|
+
*
|
|
4
|
+
* Calls pure functions from pipeline/readiness-report.ts directly.
|
|
5
|
+
* Optional step — failure doesn't stop the pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
|
+
export declare class ReadinessStep implements PipelineStep {
|
|
9
|
+
readonly name = "readiness";
|
|
10
|
+
readonly optional = true;
|
|
11
|
+
check(): ValidationIssue[];
|
|
12
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Launch readiness report.
|
|
3
|
+
*
|
|
4
|
+
* Calls pure functions from pipeline/readiness-report.ts directly.
|
|
5
|
+
* Optional step — failure doesn't stop the pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
8
|
+
import { resolve } from "path";
|
|
9
|
+
import { load } from "js-yaml";
|
|
10
|
+
import { formatReadinessMarkdown, generateReadinessReport, } from "../../pipeline/readiness-report.js";
|
|
11
|
+
import { ThresholdConfigSchema } from "../../pipeline/schemas.js";
|
|
12
|
+
export class ReadinessStep {
|
|
13
|
+
name = "readiness";
|
|
14
|
+
optional = true;
|
|
15
|
+
check() {
|
|
16
|
+
return [];
|
|
17
|
+
}
|
|
18
|
+
async execute(ctx) {
|
|
19
|
+
const root = ctx.config.rootDir;
|
|
20
|
+
const start = Date.now();
|
|
21
|
+
try {
|
|
22
|
+
const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
|
|
23
|
+
const thresholdsPath = resolve(root, "config", "thresholds.yaml");
|
|
24
|
+
if (!existsSync(scoreSummaryPath)) {
|
|
25
|
+
return {
|
|
26
|
+
durationMs: Date.now() - start,
|
|
27
|
+
error: "score-summary.json not found",
|
|
28
|
+
status: "failed",
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
if (!existsSync(thresholdsPath)) {
|
|
32
|
+
return {
|
|
33
|
+
durationMs: Date.now() - start,
|
|
34
|
+
error: "config/thresholds.yaml not found",
|
|
35
|
+
status: "failed",
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
39
|
+
const rawThresholds = load(readFileSync(thresholdsPath, "utf-8"));
|
|
40
|
+
const thresholdConfig = ThresholdConfigSchema.parse(rawThresholds);
|
|
41
|
+
const gapPath = resolve(root, "results", "latest", "gap-analysis.json");
|
|
42
|
+
const gapAnalysis = existsSync(gapPath)
|
|
43
|
+
? JSON.parse(readFileSync(gapPath, "utf-8"))
|
|
44
|
+
: undefined;
|
|
45
|
+
const readinessAreas = ctx.config.areas ?? scoreSummary.scores.map((s) => s.feature);
|
|
46
|
+
const readinessLines = [];
|
|
47
|
+
for (const area of readinessAreas) {
|
|
48
|
+
const areaScore = scoreSummary.scores.find((s) => s.feature === area);
|
|
49
|
+
if (!areaScore) {
|
|
50
|
+
ctx.logger.warn(`Area "${area}" not found in scores — skipping`);
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
const report = generateReadinessReport({
|
|
54
|
+
area,
|
|
55
|
+
gapAnalysis,
|
|
56
|
+
scoreSummary,
|
|
57
|
+
thresholdConfig,
|
|
58
|
+
});
|
|
59
|
+
const md = formatReadinessMarkdown(report);
|
|
60
|
+
readinessLines.push(md);
|
|
61
|
+
console.log(md);
|
|
62
|
+
}
|
|
63
|
+
if (readinessLines.length > 0) {
|
|
64
|
+
writeFileSync(resolve(root, "results", "latest", "readiness-report.md"), readinessLines.join("\n---\n\n"));
|
|
65
|
+
}
|
|
66
|
+
const passCount = readinessAreas.filter((area) => {
|
|
67
|
+
const areaScore = scoreSummary.scores.find((s) => s.feature === area);
|
|
68
|
+
if (!areaScore)
|
|
69
|
+
return false;
|
|
70
|
+
const report = generateReadinessReport({
|
|
71
|
+
area,
|
|
72
|
+
scoreSummary,
|
|
73
|
+
thresholdConfig,
|
|
74
|
+
});
|
|
75
|
+
return report.pass;
|
|
76
|
+
}).length;
|
|
77
|
+
return {
|
|
78
|
+
durationMs: Date.now() - start,
|
|
79
|
+
status: "success",
|
|
80
|
+
summary: `${passCount}/${readinessAreas.length} areas ready`,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
return {
|
|
85
|
+
durationMs: Date.now() - start,
|
|
86
|
+
error: err instanceof Error ? err.message : String(err),
|
|
87
|
+
status: "failed",
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Generate PR comment / report from scores.
|
|
3
|
+
*
|
|
4
|
+
* Calls generatePrComment() from pipeline/pr-comment.ts with typed options.
|
|
5
|
+
* No env bridge or process.argv manipulation needed.
|
|
6
|
+
*/
|
|
7
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
|
+
export declare class ReportStep implements PipelineStep {
|
|
9
|
+
readonly name = "report";
|
|
10
|
+
check(): ValidationIssue[];
|
|
11
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Generate PR comment / report from scores.
|
|
3
|
+
*
|
|
4
|
+
* Calls generatePrComment() from pipeline/pr-comment.ts with typed options.
|
|
5
|
+
* No env bridge or process.argv manipulation needed.
|
|
6
|
+
*/
|
|
7
|
+
import { resolve } from "path";
|
|
8
|
+
import { checkScoreSummaryValid } from "../../pipeline/checks.js";
|
|
9
|
+
import { generatePrComment } from "../../pipeline/pr-comment.js";
|
|
10
|
+
const DEFAULT_REPORT_PATH = "results/latest/pr-comment.md";
|
|
11
|
+
export class ReportStep {
|
|
12
|
+
name = "report";
|
|
13
|
+
check() {
|
|
14
|
+
return [];
|
|
15
|
+
}
|
|
16
|
+
async execute(ctx) {
|
|
17
|
+
const start = Date.now();
|
|
18
|
+
// Precondition: score summary exists
|
|
19
|
+
const summaryIssues = checkScoreSummaryValid(ctx.config.rootDir);
|
|
20
|
+
const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
|
|
21
|
+
if (summaryErrors.length > 0) {
|
|
22
|
+
return {
|
|
23
|
+
durationMs: Date.now() - start,
|
|
24
|
+
error: `Score summary missing: ${summaryErrors.map((e) => e.message).join("; ")}`,
|
|
25
|
+
status: "failed",
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
const resolvedOutput = ctx.config.outputPath ?? resolve(ctx.config.rootDir, DEFAULT_REPORT_PATH);
|
|
29
|
+
try {
|
|
30
|
+
generatePrComment({
|
|
31
|
+
outputPath: resolvedOutput,
|
|
32
|
+
promptfooUrl: ctx.config.promptfooUrl,
|
|
33
|
+
rootDir: ctx.config.rootDir,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
catch (err) {
|
|
37
|
+
return {
|
|
38
|
+
durationMs: Date.now() - start,
|
|
39
|
+
error: `pr-comment failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
40
|
+
status: "failed",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
return {
|
|
44
|
+
durationMs: Date.now() - start,
|
|
45
|
+
status: "success",
|
|
46
|
+
summary: `Report written to ${resolvedOutput}`,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Run Promptfoo evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Uses ctx.evalRunner (the EvalRunner port) for the actual Promptfoo
|
|
5
|
+
* invocation. Builds a clean env object for the subprocess instead of
|
|
6
|
+
* polluting global process.env.
|
|
7
|
+
*/
|
|
8
|
+
import type { ConcreteEvalMode } from "../../_vendor/ailf-shared/index.d.ts";
|
|
9
|
+
import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
10
|
+
export declare class RunEvalStep implements PipelineStep {
|
|
11
|
+
private readonly mode;
|
|
12
|
+
readonly name: string;
|
|
13
|
+
constructor(mode: ConcreteEvalMode);
|
|
14
|
+
check(): ValidationIssue[];
|
|
15
|
+
execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
|
|
16
|
+
cacheInputs(ctx: AppContext): string[];
|
|
17
|
+
}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Run Promptfoo evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Uses ctx.evalRunner (the EvalRunner port) for the actual Promptfoo
|
|
5
|
+
* invocation. Builds a clean env object for the subprocess instead of
|
|
6
|
+
* polluting global process.env.
|
|
7
|
+
*/
|
|
8
|
+
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
9
|
+
import { resolve } from "path";
|
|
10
|
+
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
11
|
+
import { checkCanonicalContextsExist, checkGeneratedConfigsExist, checkResultsExist, } from "../../pipeline/checks.js";
|
|
12
|
+
import { computeEvalFingerprint } from "../../pipeline/eval-fingerprint.js";
|
|
13
|
+
import { buildFilterFlags, CONFIG_FILES, RESULTS_FILES, scanResultsForErrors, } from "../../pipeline/eval-constants.js";
|
|
14
|
+
export class RunEvalStep {
|
|
15
|
+
mode;
|
|
16
|
+
name;
|
|
17
|
+
constructor(mode) {
|
|
18
|
+
this.mode = mode;
|
|
19
|
+
this.name = `eval-${mode}`;
|
|
20
|
+
}
|
|
21
|
+
check() {
|
|
22
|
+
return [];
|
|
23
|
+
}
|
|
24
|
+
async execute(ctx, state) {
|
|
25
|
+
if (ctx.config.skipEval) {
|
|
26
|
+
return { status: "skipped", reason: "--skip-eval" };
|
|
27
|
+
}
|
|
28
|
+
const start = Date.now();
|
|
29
|
+
const { rootDir, debug, concurrency, noCache } = ctx.config;
|
|
30
|
+
// Precondition: config file exists
|
|
31
|
+
const configIssues = checkGeneratedConfigsExist(rootDir);
|
|
32
|
+
const configErrors = configIssues.filter((i) => i.severity === "error");
|
|
33
|
+
if (configErrors.length > 0) {
|
|
34
|
+
return {
|
|
35
|
+
durationMs: Date.now() - start,
|
|
36
|
+
error: `Config files missing: ${configErrors.map((e) => e.message).join("; ")}`,
|
|
37
|
+
status: "failed",
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
// Precondition: canonical context files exist for filtered tasks.
|
|
41
|
+
// Must apply the same area/task filter as fetch-docs so we only
|
|
42
|
+
// check contexts that were actually fetched.
|
|
43
|
+
const filter = ctx.config.areas || ctx.config.tasks
|
|
44
|
+
? {
|
|
45
|
+
...(ctx.config.areas ? { areas: ctx.config.areas } : {}),
|
|
46
|
+
...(ctx.config.tasks ? { taskIds: ctx.config.tasks } : {}),
|
|
47
|
+
}
|
|
48
|
+
: undefined;
|
|
49
|
+
const tasks = await ctx.taskSource.loadTasks(filter);
|
|
50
|
+
const taskIds = tasks.map((t) => t.id);
|
|
51
|
+
const contextIssues = checkCanonicalContextsExist(rootDir, taskIds);
|
|
52
|
+
const contextErrors = contextIssues.filter((i) => i.severity === "error");
|
|
53
|
+
if (contextErrors.length > 0) {
|
|
54
|
+
return {
|
|
55
|
+
durationMs: Date.now() - start,
|
|
56
|
+
error: `Context files missing. Run fetch-docs first. ${contextErrors.map((e) => e.message).join("; ")}`,
|
|
57
|
+
status: "failed",
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
// -----------------------------------------------------------------
|
|
61
|
+
// Compute eval fingerprint (for remote cache + provenance)
|
|
62
|
+
// -----------------------------------------------------------------
|
|
63
|
+
let evalFingerprint;
|
|
64
|
+
if (!debug?.enabled) {
|
|
65
|
+
try {
|
|
66
|
+
evalFingerprint = computeEvalFingerprint({
|
|
67
|
+
filter: ctx.config.areas || ctx.config.tasks
|
|
68
|
+
? {
|
|
69
|
+
areas: ctx.config.areas,
|
|
70
|
+
taskIds: ctx.config.tasks,
|
|
71
|
+
}
|
|
72
|
+
: undefined,
|
|
73
|
+
graderModel: "default",
|
|
74
|
+
mode: this.mode,
|
|
75
|
+
rootDir,
|
|
76
|
+
});
|
|
77
|
+
// Share fingerprint with downstream steps (PublishReportStep)
|
|
78
|
+
state.evalFingerprint = evalFingerprint;
|
|
79
|
+
}
|
|
80
|
+
catch (err) {
|
|
81
|
+
console.warn(` ⚠️ Could not compute eval fingerprint: ${err instanceof Error ? err.message : String(err)}`);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
// -----------------------------------------------------------------
|
|
85
|
+
// Remote cache check
|
|
86
|
+
// -----------------------------------------------------------------
|
|
87
|
+
if (evalFingerprint &&
|
|
88
|
+
!noCache &&
|
|
89
|
+
!ctx.config.noRemoteCache &&
|
|
90
|
+
ctx.reportStore) {
|
|
91
|
+
const remoteCacheResult = await checkRemoteCache(evalFingerprint, ctx.reportStore, rootDir);
|
|
92
|
+
if (remoteCacheResult) {
|
|
93
|
+
return {
|
|
94
|
+
durationMs: Date.now() - start,
|
|
95
|
+
status: "success",
|
|
96
|
+
summary: `Skipped (remote cache hit) — reusing report ${remoteCacheResult.reportId} from ${remoteCacheResult.completedAt}`,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// -----------------------------------------------------------------
|
|
101
|
+
// Build subprocess env explicitly (no global mutation)
|
|
102
|
+
// -----------------------------------------------------------------
|
|
103
|
+
const subprocessEnv = {
|
|
104
|
+
PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST: "1",
|
|
105
|
+
};
|
|
106
|
+
// Only set env vars that differ from defaults — the subprocess inherits
|
|
107
|
+
// process.env via PromptfooEvalAdapter's { ...process.env, ...config.env }
|
|
108
|
+
if (ctx.config.mode !== "baseline") {
|
|
109
|
+
subprocessEnv.EVAL_MODE = ctx.config.mode;
|
|
110
|
+
}
|
|
111
|
+
if (ctx.config.searchMode !== "open") {
|
|
112
|
+
subprocessEnv.EVAL_SEARCH_MODE = ctx.config.searchMode;
|
|
113
|
+
}
|
|
114
|
+
if (ctx.config.allowedOrigins?.length) {
|
|
115
|
+
subprocessEnv.DOC_ALLOWED_ORIGINS = ctx.config.allowedOrigins.join(",");
|
|
116
|
+
}
|
|
117
|
+
// -----------------------------------------------------------------
|
|
118
|
+
// Execute — use the EvalRunner port
|
|
119
|
+
// -----------------------------------------------------------------
|
|
120
|
+
const configFile = CONFIG_FILES[this.mode];
|
|
121
|
+
const filterFlags = buildFilterFlags(debug);
|
|
122
|
+
const result = await ctx.evalRunner.run({
|
|
123
|
+
concurrency,
|
|
124
|
+
configPath: configFile,
|
|
125
|
+
env: subprocessEnv,
|
|
126
|
+
filterFlags: filterFlags.trim() || undefined,
|
|
127
|
+
});
|
|
128
|
+
// Check if results were written despite non-zero exit
|
|
129
|
+
if (result.status === "failed") {
|
|
130
|
+
const resultsExist = checkResultsExist(rootDir, RESULTS_FILES[this.mode]);
|
|
131
|
+
const hasResults = resultsExist.filter((i) => i.severity === "error").length === 0;
|
|
132
|
+
if (!hasResults) {
|
|
133
|
+
return {
|
|
134
|
+
durationMs: Date.now() - start,
|
|
135
|
+
error: result.error ?? `promptfoo eval failed (mode: ${this.mode})`,
|
|
136
|
+
status: "failed",
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
// Postcondition: results file exists
|
|
141
|
+
const resultsIssues = checkResultsExist(rootDir, RESULTS_FILES[this.mode]);
|
|
142
|
+
const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
|
|
143
|
+
if (resultsErrors.length > 0) {
|
|
144
|
+
return {
|
|
145
|
+
durationMs: Date.now() - start,
|
|
146
|
+
error: `Postcondition failed: ${resultsErrors.map((e) => e.message).join("; ")}`,
|
|
147
|
+
status: "failed",
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
// Scan results for errors
|
|
151
|
+
const errorSummary = scanResultsForErrors(resolve(rootDir, RESULTS_FILES[this.mode]));
|
|
152
|
+
if (errorSummary) {
|
|
153
|
+
console.log();
|
|
154
|
+
console.log(errorSummary);
|
|
155
|
+
}
|
|
156
|
+
const durationMs = Date.now() - start;
|
|
157
|
+
return {
|
|
158
|
+
durationMs,
|
|
159
|
+
status: "success",
|
|
160
|
+
summary: `Evaluation complete (mode: ${this.mode}${debug?.enabled ? ", debug" : ""})`,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
cacheInputs(ctx) {
|
|
164
|
+
return getStepInputPaths(ctx.config.rootDir, `eval-${this.mode}`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// ---------------------------------------------------------------------------
|
|
168
|
+
// Remote cache helpers
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
async function checkRemoteCache(fingerprint, reportStore, rootDir) {
|
|
171
|
+
try {
|
|
172
|
+
const startQuery = Date.now();
|
|
173
|
+
const cachedReport = (await reportStore.findByFingerprint(fingerprint));
|
|
174
|
+
const queryMs = Date.now() - startQuery;
|
|
175
|
+
if (!cachedReport) {
|
|
176
|
+
console.log(` ℹ️ Remote cache miss — no report matches fingerprint (${queryMs}ms)`);
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
const outDir = resolve(rootDir, "results", "latest");
|
|
180
|
+
if (!existsSync(outDir)) {
|
|
181
|
+
mkdirSync(outDir, { recursive: true });
|
|
182
|
+
}
|
|
183
|
+
writeFileSync(resolve(outDir, "score-summary.json"), JSON.stringify(cachedReport.summary, null, 2));
|
|
184
|
+
console.log(` ✅ Remote cache hit — reusing report ${cachedReport.id} from ${cachedReport.completedAt}`);
|
|
185
|
+
console.log(` ℹ️ Fingerprint: ${fingerprint.slice(0, 16)}... (${queryMs}ms)`);
|
|
186
|
+
return {
|
|
187
|
+
completedAt: cachedReport.completedAt,
|
|
188
|
+
reportId: cachedReport.id,
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
catch (err) {
|
|
192
|
+
console.warn(` ⚠️ Remote cache check failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
193
|
+
return null;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Validate configuration.
|
|
3
|
+
*
|
|
4
|
+
* Wraps the existing validateConfiguration() + checkEnvironment() logic
|
|
5
|
+
* behind the PipelineStep interface.
|
|
6
|
+
*/
|
|
7
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
|
+
export declare class ValidateStep implements PipelineStep {
|
|
9
|
+
readonly name = "validate";
|
|
10
|
+
check(): ValidationIssue[];
|
|
11
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Validate configuration.
|
|
3
|
+
*
|
|
4
|
+
* Wraps the existing validateConfiguration() + checkEnvironment() logic
|
|
5
|
+
* behind the PipelineStep interface.
|
|
6
|
+
*/
|
|
7
|
+
import { checkEnvironment } from "../../pipeline/checks.js";
|
|
8
|
+
import { validateConfiguration } from "../../pipeline/validate.js";
|
|
9
|
+
export class ValidateStep {
|
|
10
|
+
name = "validate";
|
|
11
|
+
check() {
|
|
12
|
+
// Validation step has no preconditions — it IS the precondition check.
|
|
13
|
+
return [];
|
|
14
|
+
}
|
|
15
|
+
async execute(ctx) {
|
|
16
|
+
const start = Date.now();
|
|
17
|
+
const validation = validateConfiguration(ctx.config.rootDir);
|
|
18
|
+
const envIssues = checkEnvironment(ctx.config.rootDir);
|
|
19
|
+
validation.issues.push(...envIssues);
|
|
20
|
+
const errors = validation.issues.filter((i) => i.severity === "error");
|
|
21
|
+
const warnings = validation.issues.filter((i) => i.severity === "warning");
|
|
22
|
+
for (const w of warnings) {
|
|
23
|
+
ctx.logger.warn(`[${w.source}] ${w.message}`);
|
|
24
|
+
}
|
|
25
|
+
if (errors.length > 0) {
|
|
26
|
+
for (const e of errors) {
|
|
27
|
+
ctx.logger.error(`[${e.source}] ${e.message}${e.path ? ` at ${e.path}` : ""}`);
|
|
28
|
+
}
|
|
29
|
+
return {
|
|
30
|
+
durationMs: Date.now() - start,
|
|
31
|
+
error: `Configuration invalid: ${errors.length} error(s)`,
|
|
32
|
+
status: "failed",
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
return {
|
|
36
|
+
durationMs: Date.now() - start,
|
|
37
|
+
status: "success",
|
|
38
|
+
summary: `Configuration valid (${warnings.length} warning(s))`,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
}
|