@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shell delegation for the fetch-docs step.
|
|
3
|
+
*
|
|
4
|
+
* Isolates the execSync call so it can be replaced when the pipeline
|
|
5
|
+
* fully migrates to the DocFetcher port.
|
|
6
|
+
*/
|
|
7
|
+
import { execSync } from "child_process";
|
|
8
|
+
/**
|
|
9
|
+
* Run `pnpm fetch-docs` via shell.
|
|
10
|
+
*
|
|
11
|
+
* Returns a result object instead of throwing so the step can
|
|
12
|
+
* handle the failure uniformly.
|
|
13
|
+
*/
|
|
14
|
+
export function runFetchDocsShell(rootDir, source) {
|
|
15
|
+
try {
|
|
16
|
+
const sourceArg = source ? ` --source ${source}` : "";
|
|
17
|
+
execSync(`pnpm fetch-docs${sourceArg}`, {
|
|
18
|
+
cwd: rootDir,
|
|
19
|
+
env: process.env,
|
|
20
|
+
stdio: "inherit",
|
|
21
|
+
});
|
|
22
|
+
return { ok: true };
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
25
|
+
return {
|
|
26
|
+
ok: false,
|
|
27
|
+
error: err instanceof Error ? err.message : String(err),
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Fetch documentation from Sanity CMS.
|
|
3
|
+
*
|
|
4
|
+
* Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
|
|
5
|
+
* handles GROQ queries, perspective diffing, document overlays, and URL
|
|
6
|
+
* fetching. This step orchestrates the call and writes metadata files.
|
|
7
|
+
*/
|
|
8
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
9
|
+
export declare class FetchDocsStep implements PipelineStep {
|
|
10
|
+
readonly name = "fetch-docs";
|
|
11
|
+
check(): ValidationIssue[];
|
|
12
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
13
|
+
cacheInputs(ctx: AppContext): string[];
|
|
14
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Fetch documentation from Sanity CMS.
|
|
3
|
+
*
|
|
4
|
+
* Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
|
|
5
|
+
* handles GROQ queries, perspective diffing, document overlays, and URL
|
|
6
|
+
* fetching. This step orchestrates the call and writes metadata files.
|
|
7
|
+
*/
|
|
8
|
+
import { mkdirSync, writeFileSync } from "fs";
|
|
9
|
+
import { join } from "path";
|
|
10
|
+
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
11
|
+
import { checkCanonicalContextsExist } from "../../pipeline/checks.js";
|
|
12
|
+
import { loadSource } from "../../sources.js";
|
|
13
|
+
import { configToSourceOverrides } from "../config-to-source-overrides.js";
|
|
14
|
+
export class FetchDocsStep {
|
|
15
|
+
name = "fetch-docs";
|
|
16
|
+
check() {
|
|
17
|
+
return [];
|
|
18
|
+
}
|
|
19
|
+
async execute(ctx) {
|
|
20
|
+
if (ctx.config.skipFetch) {
|
|
21
|
+
return { status: "skipped", reason: "--skip-fetch" };
|
|
22
|
+
}
|
|
23
|
+
const start = Date.now();
|
|
24
|
+
// Precondition: at least one task has canonical doc mappings
|
|
25
|
+
const tasks = await ctx.taskSource.loadTasks(buildFilter(ctx));
|
|
26
|
+
const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
|
|
27
|
+
if (tasksWithDocs.length === 0) {
|
|
28
|
+
return {
|
|
29
|
+
durationMs: Date.now() - start,
|
|
30
|
+
error: "No tasks with canonical_docs found. Add canonical_docs to your task definitions.",
|
|
31
|
+
status: "failed",
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
// Resolve source once with typed overrides
|
|
35
|
+
const overrides = configToSourceOverrides(ctx.config);
|
|
36
|
+
const resolvedSource = loadSource(ctx.config.source, overrides);
|
|
37
|
+
// Log source info
|
|
38
|
+
console.log(` Source: ${resolvedSource.name}`);
|
|
39
|
+
console.log(` Base URL: ${resolvedSource.baseUrl}`);
|
|
40
|
+
if (resolvedSource.perspective) {
|
|
41
|
+
console.log(` Perspective: ${resolvedSource.perspective}`);
|
|
42
|
+
}
|
|
43
|
+
if (resolvedSource.documentIds && resolvedSource.documentIds.length > 0) {
|
|
44
|
+
console.log(` Documents: ${resolvedSource.documentIds.length} document ID(s)`);
|
|
45
|
+
}
|
|
46
|
+
if (resolvedSource.urls.length > 0) {
|
|
47
|
+
console.log(` URLs: ${resolvedSource.urls.length} direct URL(s)`);
|
|
48
|
+
}
|
|
49
|
+
// Precondition: docFetcher must be available
|
|
50
|
+
if (!ctx.docFetcher) {
|
|
51
|
+
return {
|
|
52
|
+
durationMs: Date.now() - start,
|
|
53
|
+
error: "DocFetcher port not available. Ensure composition root wires ctx.docFetcher.",
|
|
54
|
+
status: "failed",
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
// Execute the fetch via the DocFetcher port
|
|
58
|
+
try {
|
|
59
|
+
const result = await ctx.docFetcher.fetch(tasksWithDocs, resolvedSource);
|
|
60
|
+
// Write metadata files for downstream pipeline consumption
|
|
61
|
+
if (result.metadata) {
|
|
62
|
+
writeMetadataFiles(ctx.config.rootDir, result.metadata);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
catch (err) {
|
|
66
|
+
return {
|
|
67
|
+
durationMs: Date.now() - start,
|
|
68
|
+
error: `fetch-docs failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
69
|
+
status: "failed",
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
// Postcondition: canonical context files exist for all tasks
|
|
73
|
+
const taskIds = tasksWithDocs.map((t) => t.id);
|
|
74
|
+
const contextIssues = checkCanonicalContextsExist(ctx.config.rootDir, taskIds);
|
|
75
|
+
const contextErrors = contextIssues.filter((i) => i.severity === "error");
|
|
76
|
+
if (contextErrors.length > 0) {
|
|
77
|
+
return {
|
|
78
|
+
durationMs: Date.now() - start,
|
|
79
|
+
error: `Postcondition failed: ${contextErrors.map((e) => e.message).join("; ")}`,
|
|
80
|
+
status: "failed",
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
const durationMs = Date.now() - start;
|
|
84
|
+
return {
|
|
85
|
+
durationMs,
|
|
86
|
+
status: "success",
|
|
87
|
+
summary: `Fetched canonical contexts for ${taskIds.length} tasks`,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
cacheInputs(ctx) {
|
|
91
|
+
return getStepInputPaths(ctx.config.rootDir, "fetch-docs");
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
// Helpers
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
function buildFilter(ctx) {
|
|
98
|
+
const { areas, tasks } = ctx.config;
|
|
99
|
+
if (!areas && !tasks)
|
|
100
|
+
return undefined;
|
|
101
|
+
return {
|
|
102
|
+
...(areas ? { areas } : {}),
|
|
103
|
+
...(tasks ? { taskIds: tasks } : {}),
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Write metadata files returned by DocFetcher to the contexts/ directory.
|
|
108
|
+
*
|
|
109
|
+
* These files are consumed by downstream pipeline steps (scoring,
|
|
110
|
+
* comparison, reporting) for traceability and impact analysis.
|
|
111
|
+
*/
|
|
112
|
+
function writeMetadataFiles(rootDir, metadata) {
|
|
113
|
+
const contextsDir = join(rootDir, "contexts");
|
|
114
|
+
mkdirSync(contextsDir, { recursive: true });
|
|
115
|
+
if (metadata.manifest) {
|
|
116
|
+
const path = join(contextsDir, "document-manifest.json");
|
|
117
|
+
writeFileSync(path, JSON.stringify(metadata.manifest, null, 2));
|
|
118
|
+
console.log(` 📋 Document manifest: ${metadata.manifest.length} docs → contexts/document-manifest.json`);
|
|
119
|
+
}
|
|
120
|
+
if (metadata.releaseImpact) {
|
|
121
|
+
const path = join(contextsDir, "release-impact.json");
|
|
122
|
+
writeFileSync(path, JSON.stringify(metadata.releaseImpact, null, 2));
|
|
123
|
+
console.log(" 📄 Release impact written to contexts/release-impact.json");
|
|
124
|
+
}
|
|
125
|
+
if (metadata.documentOverlay) {
|
|
126
|
+
const path = join(contextsDir, "document-overlay.json");
|
|
127
|
+
writeFileSync(path, JSON.stringify(metadata.documentOverlay, null, 2));
|
|
128
|
+
console.log(" 📄 Document overlay written to contexts/document-overlay.json");
|
|
129
|
+
}
|
|
130
|
+
if (metadata.urlFetch) {
|
|
131
|
+
const path = join(contextsDir, "url-fetch.json");
|
|
132
|
+
writeFileSync(path, JSON.stringify(metadata.urlFetch, null, 2));
|
|
133
|
+
console.log(" 📄 URL fetch metadata written to contexts/url-fetch.json");
|
|
134
|
+
}
|
|
135
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Gap analysis (failure modes + remediation plan).
|
|
3
|
+
*
|
|
4
|
+
* Wraps the inline gap-analysis logic from pipeline-action.ts behind
|
|
5
|
+
* the PipelineStep interface. This includes document manifest enrichment
|
|
6
|
+
* and low-scoring judgment extraction.
|
|
7
|
+
*
|
|
8
|
+
* This is an optional step — failure doesn't stop the pipeline.
|
|
9
|
+
*/
|
|
10
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
11
|
+
export declare class GapAnalysisStep implements PipelineStep {
|
|
12
|
+
readonly name = "gap-analysis";
|
|
13
|
+
readonly optional = true;
|
|
14
|
+
check(ctx: AppContext): ValidationIssue[];
|
|
15
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
16
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Gap analysis (failure modes + remediation plan).
|
|
3
|
+
*
|
|
4
|
+
* Wraps the inline gap-analysis logic from pipeline-action.ts behind
|
|
5
|
+
* the PipelineStep interface. This includes document manifest enrichment
|
|
6
|
+
* and low-scoring judgment extraction.
|
|
7
|
+
*
|
|
8
|
+
* This is an optional step — failure doesn't stop the pipeline.
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
11
|
+
import { join, resolve } from "path";
|
|
12
|
+
export class GapAnalysisStep {
|
|
13
|
+
name = "gap-analysis";
|
|
14
|
+
optional = true;
|
|
15
|
+
check(ctx) {
|
|
16
|
+
const judgmentsPath = resolve(ctx.config.rootDir, "results", "latest", "grader-judgments.json");
|
|
17
|
+
if (!existsSync(judgmentsPath)) {
|
|
18
|
+
return [
|
|
19
|
+
{
|
|
20
|
+
message: "No grader-judgments.json — run a full evaluation first",
|
|
21
|
+
severity: "warning",
|
|
22
|
+
source: "gap-analysis",
|
|
23
|
+
},
|
|
24
|
+
];
|
|
25
|
+
}
|
|
26
|
+
return [];
|
|
27
|
+
}
|
|
28
|
+
async execute(ctx) {
|
|
29
|
+
const root = ctx.config.rootDir;
|
|
30
|
+
const start = Date.now();
|
|
31
|
+
const judgmentsPath = resolve(root, "results", "latest", "grader-judgments.json");
|
|
32
|
+
const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
|
|
33
|
+
if (!existsSync(judgmentsPath)) {
|
|
34
|
+
return {
|
|
35
|
+
status: "skipped",
|
|
36
|
+
reason: "No grader-judgments.json — run a full evaluation first",
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
if (!existsSync(scoreSummaryPath)) {
|
|
40
|
+
return { status: "skipped", reason: "No score-summary.json" };
|
|
41
|
+
}
|
|
42
|
+
try {
|
|
43
|
+
const { buildFailureModeReport, formatFailureModesConsole } = await import("../../pipeline/failure-modes.js");
|
|
44
|
+
const { buildGapAnalysisReport, formatGapAnalysisConsole } = await import("../../pipeline/gap-analysis.js");
|
|
45
|
+
const judgments = JSON.parse(readFileSync(judgmentsPath, "utf-8"));
|
|
46
|
+
const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
47
|
+
const failureModeReport = buildFailureModeReport(judgments, scoreSummary.scores);
|
|
48
|
+
console.log(formatFailureModesConsole(failureModeReport));
|
|
49
|
+
const gapReport = buildGapAnalysisReport(failureModeReport, scoreSummary.scores);
|
|
50
|
+
console.log(formatGapAnalysisConsole(gapReport));
|
|
51
|
+
const outDir = resolve(root, "results", "latest");
|
|
52
|
+
writeFileSync(join(outDir, "failure-modes.json"), JSON.stringify(failureModeReport, null, 2));
|
|
53
|
+
writeFileSync(join(outDir, "gap-analysis.json"), JSON.stringify(gapReport, null, 2));
|
|
54
|
+
// ── Document manifest + enrichment ─────────────────────────
|
|
55
|
+
const { resolveMappings } = await import("../../pipeline/resolve-mappings.js");
|
|
56
|
+
const mappings = resolveMappings(root);
|
|
57
|
+
const manifestPath = resolve(root, "contexts", "document-manifest.json");
|
|
58
|
+
const manifestEntries = existsSync(manifestPath)
|
|
59
|
+
? JSON.parse(readFileSync(manifestPath, "utf-8"))
|
|
60
|
+
: [];
|
|
61
|
+
const refBySlug = new Map();
|
|
62
|
+
for (const entry of manifestEntries) {
|
|
63
|
+
refBySlug.set(entry.slug, entry);
|
|
64
|
+
}
|
|
65
|
+
const resolveRefs = (slugs) => slugs
|
|
66
|
+
.map((slug) => {
|
|
67
|
+
const m = refBySlug.get(slug);
|
|
68
|
+
return m
|
|
69
|
+
? {
|
|
70
|
+
documentId: m._id,
|
|
71
|
+
revision: m._rev,
|
|
72
|
+
slug: m.slug,
|
|
73
|
+
title: m.title,
|
|
74
|
+
}
|
|
75
|
+
: { documentId: "", slug, title: slug };
|
|
76
|
+
})
|
|
77
|
+
.filter((r) => r.documentId !== "");
|
|
78
|
+
const descToDocRefs = new Map();
|
|
79
|
+
const areaToDocRefs = new Map();
|
|
80
|
+
for (const [area, areaData] of Object.entries(mappings.feature_areas)) {
|
|
81
|
+
const areaSlugs = new Set();
|
|
82
|
+
for (const task of areaData.tasks) {
|
|
83
|
+
const taskSlugs = task.canonical_docs.map((d) => d.slug);
|
|
84
|
+
descToDocRefs.set(task.description, resolveRefs(taskSlugs));
|
|
85
|
+
for (const s of taskSlugs)
|
|
86
|
+
areaSlugs.add(s);
|
|
87
|
+
}
|
|
88
|
+
areaToDocRefs.set(area, resolveRefs([...areaSlugs]));
|
|
89
|
+
}
|
|
90
|
+
const documentManifest = resolveRefs([...refBySlug.keys()]);
|
|
91
|
+
const enrichedScores = scoreSummary.scores.map((s) => ({
|
|
92
|
+
...s,
|
|
93
|
+
documents: areaToDocRefs.get(s.feature),
|
|
94
|
+
}));
|
|
95
|
+
// ── Low-scoring judgments ────────────────────────────────────
|
|
96
|
+
const LOW_SCORE_THRESHOLD = 70;
|
|
97
|
+
const MAX_STORED_JUDGMENTS = 50;
|
|
98
|
+
const normalizedJudgments = judgments.map((j) => ({
|
|
99
|
+
...j,
|
|
100
|
+
score: j.score <= 1 && j.score > 0 ? Math.round(j.score * 100) : j.score,
|
|
101
|
+
}));
|
|
102
|
+
const lowScoringJudgments = normalizedJudgments
|
|
103
|
+
.filter((j) => j.score < LOW_SCORE_THRESHOLD)
|
|
104
|
+
.sort((a, b) => a.score - b.score)
|
|
105
|
+
.slice(0, MAX_STORED_JUDGMENTS)
|
|
106
|
+
.map((j) => {
|
|
107
|
+
const baseDesc = j.taskId.replace(/\s*\((gold|baseline)\)\s*$/, "");
|
|
108
|
+
const canonicalDocs = descToDocRefs.get(baseDesc);
|
|
109
|
+
return canonicalDocs ? { ...j, canonicalDocs } : j;
|
|
110
|
+
});
|
|
111
|
+
const enrichedSummary = {
|
|
112
|
+
...scoreSummary,
|
|
113
|
+
documentManifest,
|
|
114
|
+
failureModes: failureModeReport,
|
|
115
|
+
lowScoringJudgments,
|
|
116
|
+
recommendations: gapReport,
|
|
117
|
+
scores: enrichedScores,
|
|
118
|
+
};
|
|
119
|
+
writeFileSync(scoreSummaryPath, JSON.stringify(enrichedSummary, null, 2));
|
|
120
|
+
const gapCount = gapReport.gaps.length;
|
|
121
|
+
const classRate = failureModeReport.classificationRate.toFixed(0);
|
|
122
|
+
return {
|
|
123
|
+
durationMs: Date.now() - start,
|
|
124
|
+
status: "success",
|
|
125
|
+
summary: `${failureModeReport.totalJudgments} judgments analyzed (${classRate}% classified), ${gapCount} actionable gaps identified`,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
catch (err) {
|
|
129
|
+
return {
|
|
130
|
+
durationMs: Date.now() - start,
|
|
131
|
+
error: err instanceof Error ? err.message : String(err),
|
|
132
|
+
status: "failed",
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Generate Promptfoo configuration files.
|
|
3
|
+
*
|
|
4
|
+
* Calls generateConfigs() from pipeline/generate-configs.ts with typed options
|
|
5
|
+
* derived from AppContext. No env bridge needed — source is resolved and
|
|
6
|
+
* passed directly.
|
|
7
|
+
*/
|
|
8
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
9
|
+
export declare class GenerateConfigsStep implements PipelineStep {
|
|
10
|
+
readonly name = "generate-configs";
|
|
11
|
+
check(ctx: AppContext): ValidationIssue[];
|
|
12
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
13
|
+
cacheInputs(ctx: AppContext): string[];
|
|
14
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Generate Promptfoo configuration files.
|
|
3
|
+
*
|
|
4
|
+
* Calls generateConfigs() from pipeline/generate-configs.ts with typed options
|
|
5
|
+
* derived from AppContext. No env bridge needed — source is resolved and
|
|
6
|
+
* passed directly.
|
|
7
|
+
*/
|
|
8
|
+
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
9
|
+
import { checkGeneratedConfigsExist } from "../../pipeline/checks.js";
|
|
10
|
+
import { generateConfigs } from "../../pipeline/generate-configs.js";
|
|
11
|
+
import { validateModelsYaml } from "../../pipeline/validate.js";
|
|
12
|
+
import { loadSource } from "../../sources.js";
|
|
13
|
+
import { configToSourceOverrides } from "../config-to-source-overrides.js";
|
|
14
|
+
export class GenerateConfigsStep {
|
|
15
|
+
name = "generate-configs";
|
|
16
|
+
check(ctx) {
|
|
17
|
+
const issues = validateModelsYaml(ctx.config.rootDir);
|
|
18
|
+
return issues.filter((i) => i.severity === "error");
|
|
19
|
+
}
|
|
20
|
+
async execute(ctx) {
|
|
21
|
+
const start = Date.now();
|
|
22
|
+
// Resolve source once with typed overrides
|
|
23
|
+
const overrides = configToSourceOverrides(ctx.config);
|
|
24
|
+
const resolvedSource = ctx.config.source
|
|
25
|
+
? loadSource(ctx.config.source, overrides)
|
|
26
|
+
: undefined;
|
|
27
|
+
// Load tasks via the TaskSource port — this picks up Content Lake,
|
|
28
|
+
// repo-based, and YAML tasks depending on which adapter is wired.
|
|
29
|
+
let tasks;
|
|
30
|
+
try {
|
|
31
|
+
const filter = ctx.config.areas || ctx.config.tasks
|
|
32
|
+
? { areas: ctx.config.areas, taskIds: ctx.config.tasks }
|
|
33
|
+
: undefined;
|
|
34
|
+
tasks = await ctx.taskSource.loadTasks(filter);
|
|
35
|
+
}
|
|
36
|
+
catch (err) {
|
|
37
|
+
return {
|
|
38
|
+
durationMs: Date.now() - start,
|
|
39
|
+
error: `TaskSource.loadTasks failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
40
|
+
status: "failed",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
try {
|
|
44
|
+
generateConfigs({
|
|
45
|
+
allowedOrigins: ctx.config.allowedOrigins,
|
|
46
|
+
filter: ctx.config.areas || ctx.config.tasks
|
|
47
|
+
? {
|
|
48
|
+
areas: ctx.config.areas,
|
|
49
|
+
taskIds: ctx.config.tasks,
|
|
50
|
+
}
|
|
51
|
+
: undefined,
|
|
52
|
+
resolvedSource,
|
|
53
|
+
rootDir: ctx.config.rootDir,
|
|
54
|
+
searchMode: ctx.config.searchMode,
|
|
55
|
+
source: ctx.config.source,
|
|
56
|
+
tasks,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
return {
|
|
61
|
+
durationMs: Date.now() - start,
|
|
62
|
+
error: `generate-configs failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
63
|
+
status: "failed",
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
// Postcondition: config files exist
|
|
67
|
+
const configIssues = checkGeneratedConfigsExist(ctx.config.rootDir);
|
|
68
|
+
const configErrors = configIssues.filter((i) => i.severity === "error");
|
|
69
|
+
if (configErrors.length > 0) {
|
|
70
|
+
return {
|
|
71
|
+
durationMs: Date.now() - start,
|
|
72
|
+
error: `Postcondition failed: ${configErrors.map((e) => e.message).join("; ")}`,
|
|
73
|
+
status: "failed",
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
return {
|
|
77
|
+
durationMs: Date.now() - start,
|
|
78
|
+
status: "success",
|
|
79
|
+
summary: "Generated promptfoo config files",
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
cacheInputs(ctx) {
|
|
83
|
+
return getStepInputPaths(ctx.config.rootDir, "generate-configs");
|
|
84
|
+
}
|
|
85
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Grader consistency analysis.
|
|
3
|
+
*
|
|
4
|
+
* Calls pipeline/grader-consistency-runner.ts directly with typed options.
|
|
5
|
+
* No env bridge needed — all parameters are passed directly.
|
|
6
|
+
*/
|
|
7
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
|
+
export declare class GraderConsistencyStep implements PipelineStep {
|
|
9
|
+
readonly name = "grader-consistency";
|
|
10
|
+
readonly optional = true;
|
|
11
|
+
check(): ValidationIssue[];
|
|
12
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Grader consistency analysis.
|
|
3
|
+
*
|
|
4
|
+
* Calls pipeline/grader-consistency-runner.ts directly with typed options.
|
|
5
|
+
* No env bridge needed — all parameters are passed directly.
|
|
6
|
+
*/
|
|
7
|
+
import { existsSync } from "fs";
|
|
8
|
+
import { resolve } from "path";
|
|
9
|
+
import { checkResultsExist } from "../../pipeline/checks.js";
|
|
10
|
+
import { RESULTS_FILES } from "../../pipeline/eval-constants.js";
|
|
11
|
+
import { runGraderConsistency } from "../../pipeline/grader-consistency-runner.js";
|
|
12
|
+
export class GraderConsistencyStep {
|
|
13
|
+
name = "grader-consistency";
|
|
14
|
+
optional = true;
|
|
15
|
+
check() {
|
|
16
|
+
return [];
|
|
17
|
+
}
|
|
18
|
+
async execute(ctx) {
|
|
19
|
+
const start = Date.now();
|
|
20
|
+
const replications = ctx.config.graderReplications ?? 5;
|
|
21
|
+
const concreteMode = ctx.config.mode === "full"
|
|
22
|
+
? "baseline"
|
|
23
|
+
: ctx.config.mode;
|
|
24
|
+
const resultsFile = RESULTS_FILES[concreteMode];
|
|
25
|
+
// Precondition: results file exists
|
|
26
|
+
const resultsIssues = checkResultsExist(ctx.config.rootDir, resultsFile);
|
|
27
|
+
const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
|
|
28
|
+
if (resultsErrors.length > 0) {
|
|
29
|
+
return {
|
|
30
|
+
durationMs: Date.now() - start,
|
|
31
|
+
error: `Results missing: ${resultsErrors.map((e) => e.message).join("; ")}. Run eval first.`,
|
|
32
|
+
status: "failed",
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
try {
|
|
36
|
+
await runGraderConsistency({
|
|
37
|
+
replications,
|
|
38
|
+
resultsPath: resolve(ctx.config.rootDir, resultsFile),
|
|
39
|
+
rootDir: ctx.config.rootDir,
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
catch (err) {
|
|
43
|
+
return {
|
|
44
|
+
durationMs: Date.now() - start,
|
|
45
|
+
error: `grader-consistency failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
46
|
+
status: "failed",
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
// Postcondition: output file exists
|
|
50
|
+
const outputPath = resolve(ctx.config.rootDir, "results", "latest", "grader-consistency.json");
|
|
51
|
+
if (!existsSync(outputPath)) {
|
|
52
|
+
return {
|
|
53
|
+
durationMs: Date.now() - start,
|
|
54
|
+
error: "grader-consistency.json was not created",
|
|
55
|
+
status: "failed",
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
durationMs: Date.now() - start,
|
|
60
|
+
status: "success",
|
|
61
|
+
summary: `Grader consistency analysis complete (${replications} replications)`,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PipelineStep implementations — wrappers around existing step functions.
|
|
3
|
+
*
|
|
4
|
+
* Each class delegates to the legacy step function, reading config from
|
|
5
|
+
* AppContext instead of positional parameters.
|
|
6
|
+
*/
|
|
7
|
+
export { CalculateScoresStep } from "./calculate-scores-step.js";
|
|
8
|
+
export { CompareStep } from "./compare-step.js";
|
|
9
|
+
export { DiscoveryReportStep } from "./discovery-report-step.js";
|
|
10
|
+
export { FetchDocsStep } from "./fetch-docs-step.js";
|
|
11
|
+
export { GapAnalysisStep } from "./gap-analysis-step.js";
|
|
12
|
+
export { GenerateConfigsStep } from "./generate-configs-step.js";
|
|
13
|
+
export { MirrorRepoTasksStep } from "./mirror-repo-tasks-step.js";
|
|
14
|
+
export { GraderConsistencyStep } from "./grader-consistency-step.js";
|
|
15
|
+
export { PublishReportStep } from "./publish-report-step.js";
|
|
16
|
+
export { ReadinessStep } from "./readiness-step.js";
|
|
17
|
+
export { ReportStep } from "./report-step.js";
|
|
18
|
+
export { RunEvalStep } from "./run-eval-step.js";
|
|
19
|
+
export { ValidateStep } from "./validate-step.js";
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PipelineStep implementations — wrappers around existing step functions.
|
|
3
|
+
*
|
|
4
|
+
* Each class delegates to the legacy step function, reading config from
|
|
5
|
+
* AppContext instead of positional parameters.
|
|
6
|
+
*/
|
|
7
|
+
export { CalculateScoresStep } from "./calculate-scores-step.js";
|
|
8
|
+
export { CompareStep } from "./compare-step.js";
|
|
9
|
+
export { DiscoveryReportStep } from "./discovery-report-step.js";
|
|
10
|
+
export { FetchDocsStep } from "./fetch-docs-step.js";
|
|
11
|
+
export { GapAnalysisStep } from "./gap-analysis-step.js";
|
|
12
|
+
export { GenerateConfigsStep } from "./generate-configs-step.js";
|
|
13
|
+
export { MirrorRepoTasksStep } from "./mirror-repo-tasks-step.js";
|
|
14
|
+
export { GraderConsistencyStep } from "./grader-consistency-step.js";
|
|
15
|
+
export { PublishReportStep } from "./publish-report-step.js";
|
|
16
|
+
export { ReadinessStep } from "./readiness-step.js";
|
|
17
|
+
export { ReportStep } from "./report-step.js";
|
|
18
|
+
export { RunEvalStep } from "./run-eval-step.js";
|
|
19
|
+
export { ValidateStep } from "./validate-step.js";
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Mirror repo-based tasks to the Content Lake.
|
|
3
|
+
*
|
|
4
|
+
* When --repo-tasks-path is provided, this step upserts mirror documents
|
|
5
|
+
* in the Sanity Content Lake for all repo-sourced tasks. This makes
|
|
6
|
+
* repo tasks visible in Studio with full provenance tracking.
|
|
7
|
+
*
|
|
8
|
+
* The step is optional — mirror failure does not block the pipeline.
|
|
9
|
+
* It runs after validate and before fetch-docs so mirror documents
|
|
10
|
+
* exist before evaluation begins.
|
|
11
|
+
*
|
|
12
|
+
* @see packages/eval/src/pipeline/mirror-repo-tasks.ts
|
|
13
|
+
* @see docs/exec-plans/completed/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
14
|
+
*/
|
|
15
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
16
|
+
export declare class MirrorRepoTasksStep implements PipelineStep {
|
|
17
|
+
readonly name = "mirror-repo-tasks";
|
|
18
|
+
readonly optional = true;
|
|
19
|
+
check(_ctx: AppContext): ValidationIssue[];
|
|
20
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
21
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Mirror repo-based tasks to the Content Lake.
|
|
3
|
+
*
|
|
4
|
+
* When --repo-tasks-path is provided, this step upserts mirror documents
|
|
5
|
+
* in the Sanity Content Lake for all repo-sourced tasks. This makes
|
|
6
|
+
* repo tasks visible in Studio with full provenance tracking.
|
|
7
|
+
*
|
|
8
|
+
* The step is optional — mirror failure does not block the pipeline.
|
|
9
|
+
* It runs after validate and before fetch-docs so mirror documents
|
|
10
|
+
* exist before evaluation begins.
|
|
11
|
+
*
|
|
12
|
+
* @see packages/eval/src/pipeline/mirror-repo-tasks.ts
|
|
13
|
+
* @see docs/exec-plans/completed/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
14
|
+
*/
|
|
15
|
+
import { getSanityClient } from "../../sanity/client.js";
|
|
16
|
+
import { detectGitContext, mirrorRepoTasks, } from "../../pipeline/mirror-repo-tasks.js";
|
|
17
|
+
export class MirrorRepoTasksStep {
|
|
18
|
+
name = "mirror-repo-tasks";
|
|
19
|
+
optional = true;
|
|
20
|
+
check(_ctx) {
|
|
21
|
+
// This step is optional — no hard preconditions
|
|
22
|
+
// If no repo-tasks-path, it will be skipped in execute()
|
|
23
|
+
return [];
|
|
24
|
+
}
|
|
25
|
+
async execute(ctx) {
|
|
26
|
+
const start = Date.now();
|
|
27
|
+
// Skip if no repo tasks configured
|
|
28
|
+
if (!ctx.config.repoTasksPath) {
|
|
29
|
+
return { status: "skipped", reason: "No --repo-tasks-path provided" };
|
|
30
|
+
}
|
|
31
|
+
// Need a write token for mirroring
|
|
32
|
+
const token = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
|
|
33
|
+
if (!token) {
|
|
34
|
+
return {
|
|
35
|
+
status: "skipped",
|
|
36
|
+
reason: "No write token available for Content Lake mirroring",
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
try {
|
|
40
|
+
// Load repo tasks via the task source
|
|
41
|
+
// The CompositeTaskSource is already wired — but we only want
|
|
42
|
+
// the repo tasks, not the Content Lake ones. Load directly from
|
|
43
|
+
// RepoTaskSource via a fresh instance.
|
|
44
|
+
const { RepoTaskSource } = await import("../../adapters/task-sources/repo-task-source.js");
|
|
45
|
+
const repoSource = new RepoTaskSource(ctx.config.repoTasksPath);
|
|
46
|
+
const repoTasks = await repoSource.loadTasks();
|
|
47
|
+
if (repoTasks.length === 0) {
|
|
48
|
+
return {
|
|
49
|
+
durationMs: Date.now() - start,
|
|
50
|
+
status: "success",
|
|
51
|
+
summary: "No repo tasks to mirror",
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
// Detect git context (from env vars or git CLI)
|
|
55
|
+
const git = await detectGitContext(ctx.config.repoTasksPath);
|
|
56
|
+
ctx.logger.info(` Mirroring ${repoTasks.length} repo task(s) from ${git.repo}@${git.branch}`);
|
|
57
|
+
// Create a client with write access
|
|
58
|
+
const client = getSanityClient({ token });
|
|
59
|
+
// Run the mirror
|
|
60
|
+
const result = await mirrorRepoTasks({
|
|
61
|
+
client,
|
|
62
|
+
tasks: repoTasks,
|
|
63
|
+
git,
|
|
64
|
+
});
|
|
65
|
+
// Log results
|
|
66
|
+
if (result.areasCreated.length > 0) {
|
|
67
|
+
ctx.logger.info(` Auto-created feature areas: ${result.areasCreated.join(", ")}`);
|
|
68
|
+
}
|
|
69
|
+
if (result.unresolvedSlugs.length > 0) {
|
|
70
|
+
ctx.logger.warn(` Unresolved canonical doc slugs: ${result.unresolvedSlugs.join(", ")}`);
|
|
71
|
+
}
|
|
72
|
+
for (const err of result.errors) {
|
|
73
|
+
ctx.logger.warn(` Mirror error: ${err}`);
|
|
74
|
+
}
|
|
75
|
+
return {
|
|
76
|
+
durationMs: Date.now() - start,
|
|
77
|
+
status: "success",
|
|
78
|
+
summary: `Mirrored ${result.upserted} task(s), skipped ${result.skipped} unchanged` +
|
|
79
|
+
(result.errors.length > 0
|
|
80
|
+
? ` (${result.errors.length} error(s))`
|
|
81
|
+
: ""),
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
86
|
+
ctx.logger.warn(`Mirror step failed (non-blocking): ${msg}`);
|
|
87
|
+
return {
|
|
88
|
+
durationMs: Date.now() - start,
|
|
89
|
+
status: "success",
|
|
90
|
+
summary: `Mirror failed (non-blocking): ${msg}`,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|