@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline orchestrator — sequences steps and short-circuits on failure.
|
|
3
|
+
*
|
|
4
|
+
* This replaces the 1,672-line executePipeline() in pipeline-action.ts
|
|
5
|
+
* with declarative step sequencing. Each step is run through the
|
|
6
|
+
* StepRunner which provides uniform timing, precondition checking,
|
|
7
|
+
* error handling, and logging.
|
|
8
|
+
*
|
|
9
|
+
* When a jobId is present in the config (API-triggered evaluations),
|
|
10
|
+
* the orchestrator emits progress updates to the Content Lake after
|
|
11
|
+
* each step completes. This enables the GET /v1/jobs/:jobId polling
|
|
12
|
+
* endpoint to show real-time progress.
|
|
13
|
+
*/
|
|
14
|
+
import { runStep } from "./step-runner.js";
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Job progress reporter
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
/**
|
|
19
|
+
* Report job progress to the Content Lake via the report store's
|
|
20
|
+
* underlying Sanity client. Best-effort — failures are logged and
|
|
21
|
+
* never block the pipeline.
|
|
22
|
+
*/
|
|
23
|
+
async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, status, errorInfo) {
|
|
24
|
+
const jobId = ctx.config.jobId;
|
|
25
|
+
if (!jobId)
|
|
26
|
+
return;
|
|
27
|
+
// Use the report store's write capability to patch the job document.
|
|
28
|
+
// The report store exposes a Sanity client — we access it through
|
|
29
|
+
// a best-effort PATCH via the same client infrastructure.
|
|
30
|
+
try {
|
|
31
|
+
// Dynamic import to avoid circular deps — the job store is a
|
|
32
|
+
// lightweight module that only needs a Sanity token.
|
|
33
|
+
const { JobStore } = await import("../job-store.js");
|
|
34
|
+
const store = new JobStore({
|
|
35
|
+
token: process.env.AILF_REPORT_SANITY_API_TOKEN ??
|
|
36
|
+
process.env.SANITY_API_TOKEN ??
|
|
37
|
+
undefined,
|
|
38
|
+
});
|
|
39
|
+
const update = {
|
|
40
|
+
status,
|
|
41
|
+
progress: {
|
|
42
|
+
currentStep: stepName,
|
|
43
|
+
completedSteps,
|
|
44
|
+
totalSteps,
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
if (status === "running" && completedSteps === 1) {
|
|
48
|
+
update.startedAt = new Date().toISOString();
|
|
49
|
+
}
|
|
50
|
+
if (errorInfo) {
|
|
51
|
+
update.error = errorInfo;
|
|
52
|
+
update.completedAt = new Date().toISOString();
|
|
53
|
+
}
|
|
54
|
+
await store.updateJob(jobId, update);
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
// Best effort — progress reporting should never block the pipeline
|
|
58
|
+
ctx.logger.warn(`Failed to report job progress for step "${stepName}" — continuing`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// Orchestrator
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
/**
|
|
65
|
+
* Run a sequence of pipeline steps, short-circuiting on required step failure.
|
|
66
|
+
*
|
|
67
|
+
* Optional steps (step.optional === true) may fail without stopping
|
|
68
|
+
* the pipeline. Required step failures cause an immediate abort.
|
|
69
|
+
*
|
|
70
|
+
* When ctx.config.jobId is set, emits progress updates to the Content
|
|
71
|
+
* Lake after each step completes.
|
|
72
|
+
*/
|
|
73
|
+
export async function orchestratePipeline(ctx, steps) {
|
|
74
|
+
const results = {};
|
|
75
|
+
const state = {};
|
|
76
|
+
const validation = { issues: [], valid: true };
|
|
77
|
+
const pipelineStart = Date.now();
|
|
78
|
+
const hasJob = !!ctx.config.jobId;
|
|
79
|
+
ctx.logger.section("ai-literacy-framework — Evaluation Pipeline");
|
|
80
|
+
// Report initial running status
|
|
81
|
+
if (hasJob) {
|
|
82
|
+
await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running");
|
|
83
|
+
}
|
|
84
|
+
for (let i = 0; i < steps.length; i++) {
|
|
85
|
+
const step = steps[i];
|
|
86
|
+
ctx.logger.section(step.name);
|
|
87
|
+
// Report current step progress
|
|
88
|
+
if (hasJob) {
|
|
89
|
+
await reportJobProgress(ctx, step.name, i, steps.length, "running");
|
|
90
|
+
}
|
|
91
|
+
const result = await runStep(step, ctx, state);
|
|
92
|
+
results[step.name] = result;
|
|
93
|
+
// Collect validation issues from the validate step
|
|
94
|
+
if (step.name === "validate" && result.status === "failed") {
|
|
95
|
+
validation.valid = false;
|
|
96
|
+
}
|
|
97
|
+
// Fail fast on required step failure
|
|
98
|
+
if (result.status === "failed" && !step.optional) {
|
|
99
|
+
ctx.logger.error(`Pipeline aborted: ${step.name} failed`);
|
|
100
|
+
// Report failure to job store
|
|
101
|
+
if (hasJob) {
|
|
102
|
+
const errorMsg = result.status === "failed" ? result.error : `${step.name} failed`;
|
|
103
|
+
await reportJobProgress(ctx, step.name, i + 1, steps.length, "failed", {
|
|
104
|
+
message: errorMsg,
|
|
105
|
+
step: step.name,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
durationMs: Date.now() - pipelineStart,
|
|
110
|
+
steps: results,
|
|
111
|
+
success: false,
|
|
112
|
+
validation,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
// Report step completion
|
|
116
|
+
if (hasJob) {
|
|
117
|
+
await reportJobProgress(ctx, step.name, i + 1, steps.length, "running");
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
const durationMs = Date.now() - pipelineStart;
|
|
121
|
+
ctx.logger.section("Pipeline Complete");
|
|
122
|
+
ctx.logger.info(`All steps completed in ${durationMs}ms`);
|
|
123
|
+
// Report completion to job store (with reportId from state if available)
|
|
124
|
+
if (hasJob) {
|
|
125
|
+
try {
|
|
126
|
+
const { JobStore } = await import("../job-store.js");
|
|
127
|
+
const store = new JobStore({
|
|
128
|
+
token: process.env.AILF_REPORT_SANITY_API_TOKEN ??
|
|
129
|
+
process.env.SANITY_API_TOKEN ??
|
|
130
|
+
undefined,
|
|
131
|
+
});
|
|
132
|
+
await store.updateJob(ctx.config.jobId, {
|
|
133
|
+
status: "completed",
|
|
134
|
+
completedAt: new Date().toISOString(),
|
|
135
|
+
progress: {
|
|
136
|
+
currentStep: "complete",
|
|
137
|
+
completedSteps: steps.length,
|
|
138
|
+
totalSteps: steps.length,
|
|
139
|
+
},
|
|
140
|
+
...(state.reportId ? { reportId: state.reportId } : {}),
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
catch {
|
|
144
|
+
ctx.logger.warn("Failed to report job completion — continuing");
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return {
|
|
148
|
+
durationMs,
|
|
149
|
+
steps: results,
|
|
150
|
+
success: true,
|
|
151
|
+
validation,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Uniform step execution with timing, logging, precondition checking,
|
|
3
|
+
* and cache integration.
|
|
4
|
+
*
|
|
5
|
+
* The StepRunner is the single place where cache lookup/record happens.
|
|
6
|
+
* Individual steps declare their cache inputs via cacheInputs(); the
|
|
7
|
+
* runner handles the rest.
|
|
8
|
+
*/
|
|
9
|
+
import type { AppContext, PipelineState, PipelineStep, StepResult } from "../_vendor/ailf-core/index.d.ts";
|
|
10
|
+
/**
|
|
11
|
+
* Execute a single pipeline step with uniform middleware.
|
|
12
|
+
*
|
|
13
|
+
* Order of operations:
|
|
14
|
+
* 1. Precondition check → fail fast on errors
|
|
15
|
+
* 2. Cache lookup (if ctx.cache + step.cacheInputs + !noCache)
|
|
16
|
+
* 3. Execute step logic
|
|
17
|
+
* 4. Cache record (on success)
|
|
18
|
+
* 5. Log result
|
|
19
|
+
*/
|
|
20
|
+
export declare function runStep(step: PipelineStep, ctx: AppContext, state?: PipelineState): Promise<StepResult>;
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Uniform step execution with timing, logging, precondition checking,
|
|
3
|
+
* and cache integration.
|
|
4
|
+
*
|
|
5
|
+
* The StepRunner is the single place where cache lookup/record happens.
|
|
6
|
+
* Individual steps declare their cache inputs via cacheInputs(); the
|
|
7
|
+
* runner handles the rest.
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Execute a single pipeline step with uniform middleware.
|
|
11
|
+
*
|
|
12
|
+
* Order of operations:
|
|
13
|
+
* 1. Precondition check → fail fast on errors
|
|
14
|
+
* 2. Cache lookup (if ctx.cache + step.cacheInputs + !noCache)
|
|
15
|
+
* 3. Execute step logic
|
|
16
|
+
* 4. Cache record (on success)
|
|
17
|
+
* 5. Log result
|
|
18
|
+
*/
|
|
19
|
+
export async function runStep(step, ctx, state = {}) {
|
|
20
|
+
const start = Date.now();
|
|
21
|
+
// 1. Precondition check
|
|
22
|
+
const issues = step.check(ctx);
|
|
23
|
+
const errors = issues.filter((i) => i.severity === "error");
|
|
24
|
+
if (errors.length > 0) {
|
|
25
|
+
const result = {
|
|
26
|
+
durationMs: Date.now() - start,
|
|
27
|
+
error: `Precondition failed: ${errors.map((e) => e.message).join("; ")}`,
|
|
28
|
+
status: "failed",
|
|
29
|
+
};
|
|
30
|
+
ctx.logger.step(step.name, result);
|
|
31
|
+
return result;
|
|
32
|
+
}
|
|
33
|
+
// 2. Cache lookup
|
|
34
|
+
const canCache = ctx.cache && !ctx.config.noCache && typeof step.cacheInputs === "function";
|
|
35
|
+
let cacheKey;
|
|
36
|
+
if (canCache) {
|
|
37
|
+
try {
|
|
38
|
+
const inputs = step.cacheInputs(ctx);
|
|
39
|
+
const key = await ctx.cache.computeKey(inputs);
|
|
40
|
+
cacheKey = key;
|
|
41
|
+
const cached = await ctx.cache.lookup(step.name, key);
|
|
42
|
+
if (cached.hit) {
|
|
43
|
+
const result = {
|
|
44
|
+
durationMs: Date.now() - start,
|
|
45
|
+
status: "success",
|
|
46
|
+
summary: `Skipped (cached) — ${cached.entry.summary}`,
|
|
47
|
+
};
|
|
48
|
+
ctx.logger.step(step.name, result);
|
|
49
|
+
return result;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
// Cache lookup failure is non-fatal — proceed to execute
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// 3. Execute
|
|
57
|
+
try {
|
|
58
|
+
const result = await step.execute(ctx, state);
|
|
59
|
+
// Ensure timing is consistent (step may or may not track its own duration)
|
|
60
|
+
const timed = result.status === "skipped"
|
|
61
|
+
? result
|
|
62
|
+
: { ...result, durationMs: Date.now() - start };
|
|
63
|
+
ctx.logger.step(step.name, timed);
|
|
64
|
+
// 4. Cache record (on success)
|
|
65
|
+
if (canCache && cacheKey && timed.status === "success") {
|
|
66
|
+
try {
|
|
67
|
+
await ctx.cache.record(step.name, cacheKey, {
|
|
68
|
+
durationMs: timed.durationMs,
|
|
69
|
+
outputPaths: [],
|
|
70
|
+
summary: timed.status === "success" ? (timed.summary ?? step.name) : "",
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
// Cache record failure is non-fatal
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return timed;
|
|
78
|
+
}
|
|
79
|
+
catch (err) {
|
|
80
|
+
const result = {
|
|
81
|
+
durationMs: Date.now() - start,
|
|
82
|
+
error: err instanceof Error ? err.message : String(err),
|
|
83
|
+
status: "failed",
|
|
84
|
+
};
|
|
85
|
+
ctx.logger.step(step.name, result);
|
|
86
|
+
return result;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Calculate AI Literacy Scores from eval results.
|
|
3
|
+
*
|
|
4
|
+
* Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
|
|
5
|
+
* typed options derived from AppContext. No env bridge needed.
|
|
6
|
+
*/
|
|
7
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
|
+
export declare class CalculateScoresStep implements PipelineStep {
|
|
9
|
+
readonly name = "calculate-scores";
|
|
10
|
+
check(): ValidationIssue[];
|
|
11
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
12
|
+
cacheInputs(ctx: AppContext): string[];
|
|
13
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Calculate AI Literacy Scores from eval results.
|
|
3
|
+
*
|
|
4
|
+
* Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
|
|
5
|
+
* typed options derived from AppContext. No env bridge needed.
|
|
6
|
+
*/
|
|
7
|
+
import { join } from "path";
|
|
8
|
+
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
9
|
+
import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
|
|
10
|
+
import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
|
|
11
|
+
import { RESULTS_FILES } from "../../pipeline/eval-constants.js";
|
|
12
|
+
import { loadSource } from "../../sources.js";
|
|
13
|
+
import { configToSourceOverrides } from "../config-to-source-overrides.js";
|
|
14
|
+
export class CalculateScoresStep {
|
|
15
|
+
name = "calculate-scores";
|
|
16
|
+
check() {
|
|
17
|
+
return [];
|
|
18
|
+
}
|
|
19
|
+
async execute(ctx) {
|
|
20
|
+
const start = Date.now();
|
|
21
|
+
const primaryMode = ctx.config.mode === "full"
|
|
22
|
+
? "baseline"
|
|
23
|
+
: ctx.config.mode;
|
|
24
|
+
const resultsFile = RESULTS_FILES[primaryMode];
|
|
25
|
+
// Precondition: results file exists
|
|
26
|
+
const resultsIssues = checkResultsExist(ctx.config.rootDir, resultsFile);
|
|
27
|
+
const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
|
|
28
|
+
if (resultsErrors.length > 0) {
|
|
29
|
+
return {
|
|
30
|
+
durationMs: Date.now() - start,
|
|
31
|
+
error: `Results missing: ${resultsErrors.map((e) => e.message).join("; ")}`,
|
|
32
|
+
status: "failed",
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
// Resolve source once with typed overrides
|
|
36
|
+
const overrides = configToSourceOverrides(ctx.config);
|
|
37
|
+
let resolvedSource;
|
|
38
|
+
try {
|
|
39
|
+
resolvedSource = loadSource(ctx.config.source, overrides);
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
// Non-fatal — proceed without source metadata
|
|
43
|
+
}
|
|
44
|
+
try {
|
|
45
|
+
calculateAndWriteScores({
|
|
46
|
+
allowedOrigins: ctx.config.allowedOrigins,
|
|
47
|
+
mode: ctx.config.mode,
|
|
48
|
+
resolvedSource,
|
|
49
|
+
resultsPath: primaryMode !== "baseline"
|
|
50
|
+
? join(ctx.config.rootDir, resultsFile)
|
|
51
|
+
: undefined,
|
|
52
|
+
rootDir: ctx.config.rootDir,
|
|
53
|
+
searchMode: ctx.config.searchMode,
|
|
54
|
+
source: ctx.config.source,
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
catch (err) {
|
|
58
|
+
const code = err !== null && typeof err === "object" && "status" in err
|
|
59
|
+
? err.status
|
|
60
|
+
: undefined;
|
|
61
|
+
if (code !== undefined && code !== 1) {
|
|
62
|
+
return {
|
|
63
|
+
durationMs: Date.now() - start,
|
|
64
|
+
error: `calculate-scores failed with exit code ${code}`,
|
|
65
|
+
status: "failed",
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
if (code === undefined) {
|
|
69
|
+
return {
|
|
70
|
+
durationMs: Date.now() - start,
|
|
71
|
+
error: `calculate-scores failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
72
|
+
status: "failed",
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
// Postcondition: score summary exists and is valid
|
|
77
|
+
const summaryIssues = checkScoreSummaryValid(ctx.config.rootDir);
|
|
78
|
+
const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
|
|
79
|
+
if (summaryErrors.length > 0) {
|
|
80
|
+
return {
|
|
81
|
+
durationMs: Date.now() - start,
|
|
82
|
+
error: `Postcondition failed: ${summaryErrors.map((e) => e.message).join("; ")}`,
|
|
83
|
+
status: "failed",
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
durationMs: Date.now() - start,
|
|
88
|
+
status: "success",
|
|
89
|
+
summary: "Scores calculated and summary written",
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
cacheInputs(ctx) {
|
|
93
|
+
return getStepInputPaths(ctx.config.rootDir, "calculate-scores");
|
|
94
|
+
}
|
|
95
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Deliver results to a callback URL.
|
|
3
|
+
*
|
|
4
|
+
* After the pipeline completes and the report is published, this step
|
|
5
|
+
* POSTs the results to the caller's callback URL. Used for API-triggered
|
|
6
|
+
* evaluations where the caller wants push-based result delivery.
|
|
7
|
+
*
|
|
8
|
+
* This step is always optional — callback failure never blocks the pipeline.
|
|
9
|
+
* The result is already in the Content Lake (system of record).
|
|
10
|
+
*
|
|
11
|
+
* @see packages/eval/src/pipeline/callback-delivery.ts
|
|
12
|
+
* @see docs/design-docs/api-service-gateway.md
|
|
13
|
+
*/
|
|
14
|
+
import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
15
|
+
import { type CallbackConfig } from "../../pipeline/callback-delivery.js";
|
|
16
|
+
export declare class CallbackStep implements PipelineStep {
|
|
17
|
+
private readonly callback;
|
|
18
|
+
private readonly jobId?;
|
|
19
|
+
readonly name = "callback-delivery";
|
|
20
|
+
readonly optional = true;
|
|
21
|
+
constructor(callback: CallbackConfig, jobId?: string | undefined);
|
|
22
|
+
check(): ValidationIssue[];
|
|
23
|
+
execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
|
|
24
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Deliver results to a callback URL.
|
|
3
|
+
*
|
|
4
|
+
* After the pipeline completes and the report is published, this step
|
|
5
|
+
* POSTs the results to the caller's callback URL. Used for API-triggered
|
|
6
|
+
* evaluations where the caller wants push-based result delivery.
|
|
7
|
+
*
|
|
8
|
+
* This step is always optional — callback failure never blocks the pipeline.
|
|
9
|
+
* The result is already in the Content Lake (system of record).
|
|
10
|
+
*
|
|
11
|
+
* @see packages/eval/src/pipeline/callback-delivery.ts
|
|
12
|
+
* @see docs/design-docs/api-service-gateway.md
|
|
13
|
+
*/
|
|
14
|
+
import { readFileSync } from "fs";
|
|
15
|
+
import { resolve } from "path";
|
|
16
|
+
import { deliverCallback, } from "../../pipeline/callback-delivery.js";
|
|
17
|
+
export class CallbackStep {
|
|
18
|
+
callback;
|
|
19
|
+
jobId;
|
|
20
|
+
name = "callback-delivery";
|
|
21
|
+
optional = true;
|
|
22
|
+
constructor(callback, jobId) {
|
|
23
|
+
this.callback = callback;
|
|
24
|
+
this.jobId = jobId;
|
|
25
|
+
}
|
|
26
|
+
check() {
|
|
27
|
+
const issues = [];
|
|
28
|
+
if (!this.callback.url) {
|
|
29
|
+
issues.push({
|
|
30
|
+
message: "Callback URL is required",
|
|
31
|
+
severity: "error",
|
|
32
|
+
source: "callback-delivery",
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
return issues;
|
|
36
|
+
}
|
|
37
|
+
async execute(ctx, state) {
|
|
38
|
+
const start = Date.now();
|
|
39
|
+
const { rootDir } = ctx.config;
|
|
40
|
+
// Read score summary
|
|
41
|
+
let summary;
|
|
42
|
+
try {
|
|
43
|
+
const summaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
|
|
44
|
+
summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
|
|
45
|
+
}
|
|
46
|
+
catch (err) {
|
|
47
|
+
return {
|
|
48
|
+
durationMs: Date.now() - start,
|
|
49
|
+
error: `Failed to read score-summary.json for callback: ${err instanceof Error ? err.message : String(err)}`,
|
|
50
|
+
status: "failed",
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
// Deliver callback — read reportId from pipeline state (set by PublishReportStep)
|
|
54
|
+
ctx.logger.info(`Delivering results to ${this.callback.url}`);
|
|
55
|
+
const result = await deliverCallback(this.callback, {
|
|
56
|
+
deliveredAt: new Date().toISOString(),
|
|
57
|
+
jobId: this.jobId,
|
|
58
|
+
reportId: state.reportId,
|
|
59
|
+
summary,
|
|
60
|
+
});
|
|
61
|
+
if (result.ok) {
|
|
62
|
+
return {
|
|
63
|
+
durationMs: Date.now() - start,
|
|
64
|
+
status: "success",
|
|
65
|
+
summary: `Callback delivered to ${this.callback.url} (${result.attempts} attempt${result.attempts === 1 ? "" : "s"})`,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// Callback failure is not critical — log warning
|
|
69
|
+
ctx.logger.warn(`Callback delivery failed after ${result.attempts} attempts: ${result.error}`);
|
|
70
|
+
return {
|
|
71
|
+
durationMs: Date.now() - start,
|
|
72
|
+
error: `Callback delivery failed: ${result.error}`,
|
|
73
|
+
status: "failed",
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Compare against baseline scores.
|
|
3
|
+
*
|
|
4
|
+
* This step is already pure (no execSync, no env vars) — the logic is
|
|
5
|
+
* inlined directly from the former pipeline/steps/compare-step.ts.
|
|
6
|
+
* This is an optional step — failure doesn't stop the pipeline.
|
|
7
|
+
*/
|
|
8
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
9
|
+
export declare class CompareStep implements PipelineStep {
|
|
10
|
+
readonly name = "compare";
|
|
11
|
+
readonly optional = true;
|
|
12
|
+
check(): ValidationIssue[];
|
|
13
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
14
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Compare against baseline scores.
|
|
3
|
+
*
|
|
4
|
+
* This step is already pure (no execSync, no env vars) — the logic is
|
|
5
|
+
* inlined directly from the former pipeline/steps/compare-step.ts.
|
|
6
|
+
* This is an optional step — failure doesn't stop the pipeline.
|
|
7
|
+
*/
|
|
8
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
|
|
9
|
+
import { join, resolve } from "path";
|
|
10
|
+
import { compare } from "../../pipeline/compare.js";
|
|
11
|
+
export class CompareStep {
|
|
12
|
+
name = "compare";
|
|
13
|
+
optional = true;
|
|
14
|
+
check() {
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
17
|
+
async execute(ctx) {
|
|
18
|
+
const start = Date.now();
|
|
19
|
+
const { rootDir } = ctx.config;
|
|
20
|
+
const scoreSummaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
|
|
21
|
+
if (!existsSync(scoreSummaryPath)) {
|
|
22
|
+
return {
|
|
23
|
+
durationMs: Date.now() - start,
|
|
24
|
+
error: "score-summary.json not found. Run calculate-scores first.",
|
|
25
|
+
status: "failed",
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
// Load experiment (current run)
|
|
29
|
+
const experiment = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
30
|
+
// Resolve baseline
|
|
31
|
+
let resolvedBaselinePath;
|
|
32
|
+
if (ctx.config.compareBaseline) {
|
|
33
|
+
resolvedBaselinePath = resolve(ctx.config.compareBaseline);
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
const baselinesDir = resolve(rootDir, "results", "baselines");
|
|
37
|
+
if (!existsSync(baselinesDir)) {
|
|
38
|
+
return {
|
|
39
|
+
reason: "No baselines directory found. Run 'pnpm baseline:save' first.",
|
|
40
|
+
status: "skipped",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
const files = readdirSync(baselinesDir)
|
|
44
|
+
.filter((f) => f.endsWith(".json"))
|
|
45
|
+
.sort()
|
|
46
|
+
.reverse();
|
|
47
|
+
if (files.length === 0) {
|
|
48
|
+
return {
|
|
49
|
+
reason: "No baseline files found. Run 'pnpm baseline:save' first.",
|
|
50
|
+
status: "skipped",
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
resolvedBaselinePath = join(baselinesDir, files[0]);
|
|
54
|
+
}
|
|
55
|
+
if (!existsSync(resolvedBaselinePath)) {
|
|
56
|
+
return {
|
|
57
|
+
durationMs: Date.now() - start,
|
|
58
|
+
error: `Baseline file not found: ${resolvedBaselinePath}`,
|
|
59
|
+
status: "failed",
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
const baseline = JSON.parse(readFileSync(resolvedBaselinePath, "utf-8"));
|
|
63
|
+
// Run comparison
|
|
64
|
+
const options = ctx.config.compareThreshold
|
|
65
|
+
? { noiseThreshold: ctx.config.compareThreshold }
|
|
66
|
+
: undefined;
|
|
67
|
+
const report = compare(baseline, experiment, options);
|
|
68
|
+
// Write report
|
|
69
|
+
const reportPath = resolve(rootDir, "results", "latest", "comparison-report.json");
|
|
70
|
+
writeFileSync(reportPath, JSON.stringify(report, null, 2));
|
|
71
|
+
// Build summary
|
|
72
|
+
const improved = report.improved.length;
|
|
73
|
+
const regressed = report.regressed.length;
|
|
74
|
+
const unchanged = report.unchanged.length;
|
|
75
|
+
const overallDelta = report.deltas.overall;
|
|
76
|
+
const deltaStr = overallDelta > 0
|
|
77
|
+
? `+${Math.round(overallDelta)}`
|
|
78
|
+
: String(Math.round(overallDelta));
|
|
79
|
+
const parts = [`Overall: ${deltaStr}`];
|
|
80
|
+
if (improved > 0)
|
|
81
|
+
parts.push(`${improved} improved`);
|
|
82
|
+
if (regressed > 0)
|
|
83
|
+
parts.push(`${regressed} regressed`);
|
|
84
|
+
if (unchanged > 0)
|
|
85
|
+
parts.push(`${unchanged} unchanged`);
|
|
86
|
+
return {
|
|
87
|
+
durationMs: Date.now() - start,
|
|
88
|
+
status: "success",
|
|
89
|
+
summary: parts.join(", "),
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Discovery report (agent discoverability analysis).
|
|
3
|
+
*
|
|
4
|
+
* Calls pure functions from pipeline/discovery-report.ts directly.
|
|
5
|
+
* Optional step — failure doesn't stop the pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
|
+
export declare class DiscoveryReportStep implements PipelineStep {
|
|
9
|
+
readonly name = "discovery-report";
|
|
10
|
+
readonly optional = true;
|
|
11
|
+
check(): ValidationIssue[];
|
|
12
|
+
execute(ctx: AppContext): Promise<StepResult>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Discovery report (agent discoverability analysis).
|
|
3
|
+
*
|
|
4
|
+
* Calls pure functions from pipeline/discovery-report.ts directly.
|
|
5
|
+
* Optional step — failure doesn't stop the pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
8
|
+
import { resolve } from "path";
|
|
9
|
+
import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
|
|
10
|
+
export class DiscoveryReportStep {
|
|
11
|
+
name = "discovery-report";
|
|
12
|
+
optional = true;
|
|
13
|
+
check() {
|
|
14
|
+
return [];
|
|
15
|
+
}
|
|
16
|
+
async execute(ctx) {
|
|
17
|
+
const root = ctx.config.rootDir;
|
|
18
|
+
const start = Date.now();
|
|
19
|
+
try {
|
|
20
|
+
const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
|
|
21
|
+
if (!existsSync(scoreSummaryPath)) {
|
|
22
|
+
return {
|
|
23
|
+
durationMs: Date.now() - start,
|
|
24
|
+
error: "score-summary.json not found",
|
|
25
|
+
status: "failed",
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
29
|
+
if (!scoreSummary.retrievalMetrics) {
|
|
30
|
+
return {
|
|
31
|
+
status: "skipped",
|
|
32
|
+
reason: "No retrieval metrics in score summary — run an agentic evaluation first",
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
const report = generateDiscoveryReport(scoreSummary, ctx.config.areas);
|
|
36
|
+
const md = formatDiscoveryMarkdown(report);
|
|
37
|
+
writeFileSync(resolve(root, "results", "latest", "discovery-report.md"), md);
|
|
38
|
+
console.log(md);
|
|
39
|
+
const invisible = report.invisibleDocs.length;
|
|
40
|
+
const f1 = report.overall.avgF1.toFixed(2);
|
|
41
|
+
return {
|
|
42
|
+
durationMs: Date.now() - start,
|
|
43
|
+
status: "success",
|
|
44
|
+
summary: `F1=${f1}, ${invisible} invisible doc${invisible === 1 ? "" : "s"}, ${report.recommendations.length} recommendation${report.recommendations.length === 1 ? "" : "s"}`,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
catch (err) {
|
|
48
|
+
return {
|
|
49
|
+
durationMs: Date.now() - start,
|
|
50
|
+
error: err instanceof Error ? err.message : String(err),
|
|
51
|
+
status: "failed",
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shell delegation for the fetch-docs step.
|
|
3
|
+
*
|
|
4
|
+
* Isolates the execSync call so it can be replaced when the pipeline
|
|
5
|
+
* fully migrates to the DocFetcher port.
|
|
6
|
+
*/
|
|
7
|
+
export interface ShellResult {
|
|
8
|
+
ok: boolean;
|
|
9
|
+
error?: string;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Run `pnpm fetch-docs` via shell.
|
|
13
|
+
*
|
|
14
|
+
* Returns a result object instead of throwing so the step can
|
|
15
|
+
* handle the failure uniformly.
|
|
16
|
+
*/
|
|
17
|
+
export declare function runFetchDocsShell(rootDir: string, source?: string): ShellResult;
|