@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* measure-retrieval.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure retrieval quality measurement functions.
|
|
5
|
+
*
|
|
6
|
+
* Evaluates retrieval quality by comparing what Sanity's text search
|
|
7
|
+
* returns against the manually-annotated canonical documents for each
|
|
8
|
+
* evaluation task. Produces Recall@K and NDCG@K metrics.
|
|
9
|
+
*
|
|
10
|
+
* This answers: "Can a retriever find the docs an LLM actually needs?"
|
|
11
|
+
*
|
|
12
|
+
* Migrated from lib/measure-retrieval.ts — no process.argv/process.env,
|
|
13
|
+
* accepts rootDir and retriever function as parameters.
|
|
14
|
+
*/
|
|
15
|
+
import { resolveMappings } from "./resolve-mappings.js";
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Metrics (pure, exported for testing)
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
export function calculateRecall(canonical, retrieved, k) {
|
|
20
|
+
const retrievedSet = new Set(retrieved.slice(0, k));
|
|
21
|
+
const hits = canonical.filter((doc) => retrievedSet.has(doc)).length;
|
|
22
|
+
return canonical.length === 0 ? 0 : hits / canonical.length;
|
|
23
|
+
}
|
|
24
|
+
export function calculateNDCG(canonical, retrieved, k) {
|
|
25
|
+
const canonicalSet = new Set(canonical);
|
|
26
|
+
// Discounted Cumulative Gain
|
|
27
|
+
let dcg = 0;
|
|
28
|
+
for (let i = 0; i < Math.min(k, retrieved.length); i++) {
|
|
29
|
+
if (canonicalSet.has(retrieved[i])) {
|
|
30
|
+
dcg += 1 / Math.log2(i + 2); // +2 because log2(1) = 0
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
// Ideal DCG
|
|
34
|
+
let idcg = 0;
|
|
35
|
+
for (let i = 0; i < Math.min(k, canonical.length); i++) {
|
|
36
|
+
idcg += 1 / Math.log2(i + 2);
|
|
37
|
+
}
|
|
38
|
+
return idcg === 0 ? 0 : dcg / idcg;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Run retrieval quality measurement for all tasks.
|
|
42
|
+
*
|
|
43
|
+
* @returns A full RetrievalSummary with per-task, per-area, and overall metrics.
|
|
44
|
+
*/
|
|
45
|
+
export async function measureRetrieval(options) {
|
|
46
|
+
const { rootDir, retriever, onProgress } = options;
|
|
47
|
+
const mappings = resolveMappings(rootDir);
|
|
48
|
+
const results = [];
|
|
49
|
+
for (const [area, areaData] of Object.entries(mappings.feature_areas)) {
|
|
50
|
+
for (const task of areaData.tasks) {
|
|
51
|
+
const canonicalSlugs = task.canonical_docs.map((d) => d.slug);
|
|
52
|
+
// Use the task description as a search query
|
|
53
|
+
const retrieved = await retriever(task.description, 10);
|
|
54
|
+
const result = {
|
|
55
|
+
canonical_docs: canonicalSlugs,
|
|
56
|
+
feature_area: area,
|
|
57
|
+
ndcg_at_10: calculateNDCG(canonicalSlugs, retrieved, 10),
|
|
58
|
+
recall_at_5: calculateRecall(canonicalSlugs, retrieved, 5),
|
|
59
|
+
recall_at_10: calculateRecall(canonicalSlugs, retrieved, 10),
|
|
60
|
+
retrieved_docs: retrieved,
|
|
61
|
+
task_id: task.id,
|
|
62
|
+
};
|
|
63
|
+
results.push(result);
|
|
64
|
+
onProgress?.(area, task.id, result);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// Aggregate by feature area
|
|
68
|
+
const byArea = {};
|
|
69
|
+
for (const area of Object.keys(mappings.feature_areas)) {
|
|
70
|
+
const areaResults = results.filter((r) => r.feature_area === area);
|
|
71
|
+
if (areaResults.length === 0)
|
|
72
|
+
continue;
|
|
73
|
+
byArea[area] = {
|
|
74
|
+
avg_ndcg_at_10: areaResults.reduce((s, r) => s + r.ndcg_at_10, 0) / areaResults.length,
|
|
75
|
+
avg_recall_at_5: areaResults.reduce((s, r) => s + r.recall_at_5, 0) / areaResults.length,
|
|
76
|
+
avg_recall_at_10: areaResults.reduce((s, r) => s + r.recall_at_10, 0) /
|
|
77
|
+
areaResults.length,
|
|
78
|
+
task_count: areaResults.length,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
// Overall
|
|
82
|
+
const overall = {
|
|
83
|
+
avg_ndcg_at_10: results.reduce((s, r) => s + r.ndcg_at_10, 0) / (results.length || 1),
|
|
84
|
+
avg_recall_at_5: results.reduce((s, r) => s + r.recall_at_5, 0) / (results.length || 1),
|
|
85
|
+
avg_recall_at_10: results.reduce((s, r) => s + r.recall_at_10, 0) / (results.length || 1),
|
|
86
|
+
};
|
|
87
|
+
return { by_area: byArea, overall, results };
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Format a retrieval summary for console display.
|
|
91
|
+
*/
|
|
92
|
+
export function formatRetrievalTable(summary) {
|
|
93
|
+
const lines = [];
|
|
94
|
+
lines.push("=".repeat(70));
|
|
95
|
+
lines.push("RETRIEVAL QUALITY SUMMARY");
|
|
96
|
+
lines.push("=".repeat(70));
|
|
97
|
+
lines.push("");
|
|
98
|
+
lines.push("| Feature Area | Recall@5 | Recall@10 | NDCG@10 | Tasks |");
|
|
99
|
+
lines.push("|---------------------|----------|-----------|---------|-------|");
|
|
100
|
+
for (const [area, stats] of Object.entries(summary.by_area)) {
|
|
101
|
+
lines.push(`| ${area.padEnd(19)} | ${(stats.avg_recall_at_5 * 100).toFixed(1).padStart(7)}% | ` +
|
|
102
|
+
`${(stats.avg_recall_at_10 * 100).toFixed(1).padStart(8)}% | ` +
|
|
103
|
+
`${(stats.avg_ndcg_at_10 * 100).toFixed(1).padStart(6)}% | ` +
|
|
104
|
+
`${stats.task_count.toString().padStart(5)} |`);
|
|
105
|
+
}
|
|
106
|
+
lines.push("");
|
|
107
|
+
lines.push(`Overall: Recall@5=${(summary.overall.avg_recall_at_5 * 100).toFixed(1)}% ` +
|
|
108
|
+
`Recall@10=${(summary.overall.avg_recall_at_10 * 100).toFixed(1)}% ` +
|
|
109
|
+
`NDCG@10=${(summary.overall.avg_ndcg_at_10 * 100).toFixed(1)}%`);
|
|
110
|
+
return lines.join("\n");
|
|
111
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/mirror-repo-tasks.ts
|
|
3
|
+
*
|
|
4
|
+
* Mirrors repo-based TaskDefinitions into the Sanity Content Lake as
|
|
5
|
+
* ailf.task documents with origin provenance. This makes the Content Lake
|
|
6
|
+
* the universal registry — every task (native or repo-sourced) is visible
|
|
7
|
+
* in Studio.
|
|
8
|
+
*
|
|
9
|
+
* The mirror is idempotent: deterministic document IDs + content hashing
|
|
10
|
+
* means unchanged tasks are skipped. Changed tasks are upserted via
|
|
11
|
+
* createOrReplace.
|
|
12
|
+
*
|
|
13
|
+
* @see docs/exec-plans/completed/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
14
|
+
*/
|
|
15
|
+
import type { SanityClient } from "@sanity/client";
|
|
16
|
+
import { type TaskDefinition } from "../_vendor/ailf-core/index.d.ts";
|
|
17
|
+
export interface MirrorOptions {
|
|
18
|
+
/** Sanity client with write access */
|
|
19
|
+
client: SanityClient;
|
|
20
|
+
/** Tasks to mirror (already loaded from repo) */
|
|
21
|
+
tasks: TaskDefinition[];
|
|
22
|
+
/** Git context for origin provenance */
|
|
23
|
+
git: GitContext;
|
|
24
|
+
/** If true, log what would be done without writing */
|
|
25
|
+
dryRun?: boolean;
|
|
26
|
+
}
|
|
27
|
+
export interface GitContext {
|
|
28
|
+
/** Full repo identifier (e.g., "sanity-io/visual-editing") */
|
|
29
|
+
repo: string;
|
|
30
|
+
/** Repo owner (e.g., "sanity-io") */
|
|
31
|
+
owner: string;
|
|
32
|
+
/** Repo name (e.g., "visual-editing") */
|
|
33
|
+
name: string;
|
|
34
|
+
/** Current branch */
|
|
35
|
+
branch: string;
|
|
36
|
+
/** HEAD commit SHA */
|
|
37
|
+
commitSha: string;
|
|
38
|
+
}
|
|
39
|
+
export interface MirrorResult {
|
|
40
|
+
/** Total tasks processed */
|
|
41
|
+
total: number;
|
|
42
|
+
/** Tasks created or updated */
|
|
43
|
+
upserted: number;
|
|
44
|
+
/** Tasks skipped (unchanged) */
|
|
45
|
+
skipped: number;
|
|
46
|
+
/** Feature areas auto-created */
|
|
47
|
+
areasCreated: string[];
|
|
48
|
+
/** Canonical doc slugs that failed to resolve */
|
|
49
|
+
unresolvedSlugs: string[];
|
|
50
|
+
/** Errors (non-fatal — mirror continues) */
|
|
51
|
+
errors: string[];
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Mirror repo tasks to the Content Lake.
|
|
55
|
+
*
|
|
56
|
+
* For each task:
|
|
57
|
+
* 1. Compute deterministic document ID
|
|
58
|
+
* 2. Compute content hash of the task definition
|
|
59
|
+
* 3. Check if mirror document exists with same hash → skip if unchanged
|
|
60
|
+
* 4. Resolve canonical doc slugs → Sanity references
|
|
61
|
+
* 5. Auto-create feature areas if needed
|
|
62
|
+
* 6. Upsert the ailf.task document with origin block
|
|
63
|
+
*/
|
|
64
|
+
export declare function mirrorRepoTasks(options: MirrorOptions): Promise<MirrorResult>;
|
|
65
|
+
/**
|
|
66
|
+
* Detect git context from GitHub Actions environment variables,
|
|
67
|
+
* falling back to git CLI commands.
|
|
68
|
+
*/
|
|
69
|
+
export declare function detectGitContext(repoTasksPath: string): Promise<GitContext>;
|
|
70
|
+
/**
|
|
71
|
+
* Deterministic mirror document ID.
|
|
72
|
+
*
|
|
73
|
+
* Format: ailf.task.mirror.<owner>.<repo>.<taskId>
|
|
74
|
+
*
|
|
75
|
+
* Dots in owner/repo/taskId are replaced with hyphens to avoid
|
|
76
|
+
* Sanity document ID issues.
|
|
77
|
+
*/
|
|
78
|
+
export declare function mirrorDocId(owner: string, repo: string, taskId: string): string;
|
|
79
|
+
/**
|
|
80
|
+
* Compute a content hash of a TaskDefinition for change detection.
|
|
81
|
+
*
|
|
82
|
+
* Includes all fields that affect the mirror document. Excludes
|
|
83
|
+
* runtime metadata like referenceSolution (filesystem path) since
|
|
84
|
+
* that's not mirrored.
|
|
85
|
+
*/
|
|
86
|
+
export declare function computeTaskHash(task: TaskDefinition): string;
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/mirror-repo-tasks.ts
|
|
3
|
+
*
|
|
4
|
+
* Mirrors repo-based TaskDefinitions into the Sanity Content Lake as
|
|
5
|
+
* ailf.task documents with origin provenance. This makes the Content Lake
|
|
6
|
+
* the universal registry — every task (native or repo-sourced) is visible
|
|
7
|
+
* in Studio.
|
|
8
|
+
*
|
|
9
|
+
* The mirror is idempotent: deterministic document IDs + content hashing
|
|
10
|
+
* means unchanged tasks are skipped. Changed tasks are upserted via
|
|
11
|
+
* createOrReplace.
|
|
12
|
+
*
|
|
13
|
+
* @see docs/exec-plans/completed/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
14
|
+
*/
|
|
15
|
+
import { createHash } from "crypto";
|
|
16
|
+
import { isSlugRef, } from "../_vendor/ailf-core/index.js";
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Public API
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
/**
|
|
21
|
+
* Mirror repo tasks to the Content Lake.
|
|
22
|
+
*
|
|
23
|
+
* For each task:
|
|
24
|
+
* 1. Compute deterministic document ID
|
|
25
|
+
* 2. Compute content hash of the task definition
|
|
26
|
+
* 3. Check if mirror document exists with same hash → skip if unchanged
|
|
27
|
+
* 4. Resolve canonical doc slugs → Sanity references
|
|
28
|
+
* 5. Auto-create feature areas if needed
|
|
29
|
+
* 6. Upsert the ailf.task document with origin block
|
|
30
|
+
*/
|
|
31
|
+
export async function mirrorRepoTasks(options) {
|
|
32
|
+
const { client, tasks, git, dryRun = false } = options;
|
|
33
|
+
const result = {
|
|
34
|
+
total: tasks.length,
|
|
35
|
+
upserted: 0,
|
|
36
|
+
skipped: 0,
|
|
37
|
+
areasCreated: [],
|
|
38
|
+
unresolvedSlugs: [],
|
|
39
|
+
errors: [],
|
|
40
|
+
};
|
|
41
|
+
if (tasks.length === 0)
|
|
42
|
+
return result;
|
|
43
|
+
// Batch-resolve all canonical doc slugs (slug refs only — other ref types
|
|
44
|
+
// are stored without a resolved article reference for now)
|
|
45
|
+
const allSlugs = [
|
|
46
|
+
...new Set(tasks.flatMap((t) => t.canonicalDocs.filter(isSlugRef).map((d) => d.slug))),
|
|
47
|
+
];
|
|
48
|
+
const slugToDocId = await batchResolveDocSlugs(client, allSlugs);
|
|
49
|
+
// Track unresolved slugs
|
|
50
|
+
for (const slug of allSlugs) {
|
|
51
|
+
if (!slugToDocId.has(slug)) {
|
|
52
|
+
result.unresolvedSlugs.push(slug);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// Ensure all feature areas exist
|
|
56
|
+
const areas = [...new Set(tasks.map((t) => t.featureArea))];
|
|
57
|
+
const createdAreas = await ensureFeatureAreas(client, areas, dryRun);
|
|
58
|
+
result.areasCreated = createdAreas;
|
|
59
|
+
// Fetch existing mirror document content hashes for change detection
|
|
60
|
+
const mirrorIds = tasks.map((t) => mirrorDocId(git.owner, git.name, t.id));
|
|
61
|
+
const existingHashes = await fetchExistingHashes(client, mirrorIds);
|
|
62
|
+
// Mirror each task
|
|
63
|
+
for (const task of tasks) {
|
|
64
|
+
try {
|
|
65
|
+
const docId = mirrorDocId(git.owner, git.name, task.id);
|
|
66
|
+
const contentHash = computeTaskHash(task);
|
|
67
|
+
// Skip unchanged
|
|
68
|
+
if (existingHashes.get(docId) === contentHash) {
|
|
69
|
+
result.skipped++;
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
const doc = buildMirrorDocument(task, {
|
|
73
|
+
contentHash,
|
|
74
|
+
docId,
|
|
75
|
+
git,
|
|
76
|
+
slugToDocId,
|
|
77
|
+
});
|
|
78
|
+
if (dryRun) {
|
|
79
|
+
console.log(` [dry-run] Would upsert: ${docId}`);
|
|
80
|
+
result.upserted++;
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
await client.createOrReplace(doc);
|
|
84
|
+
result.upserted++;
|
|
85
|
+
}
|
|
86
|
+
catch (err) {
|
|
87
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
88
|
+
result.errors.push(`Failed to mirror "${task.id}": ${msg}`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
// Detect git context from environment or CLI
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
/**
|
|
97
|
+
* Detect git context from GitHub Actions environment variables,
|
|
98
|
+
* falling back to git CLI commands.
|
|
99
|
+
*/
|
|
100
|
+
export async function detectGitContext(repoTasksPath) {
|
|
101
|
+
// GitHub Actions provides these env vars
|
|
102
|
+
const ghRepo = process.env.GITHUB_REPOSITORY; // "owner/name"
|
|
103
|
+
const ghRef = process.env.GITHUB_REF ?? "";
|
|
104
|
+
const ghSha = process.env.GITHUB_SHA ?? "";
|
|
105
|
+
const ghHeadRef = process.env.GITHUB_HEAD_REF ?? "";
|
|
106
|
+
if (ghRepo) {
|
|
107
|
+
const [owner, name] = ghRepo.split("/");
|
|
108
|
+
const branch = ghHeadRef || ghRef.replace("refs/heads/", "").replace("refs/tags/", "");
|
|
109
|
+
return {
|
|
110
|
+
repo: ghRepo,
|
|
111
|
+
owner: owner ?? "unknown",
|
|
112
|
+
name: name ?? "unknown",
|
|
113
|
+
branch: branch || "unknown",
|
|
114
|
+
commitSha: ghSha || "unknown",
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
// Fallback: try git CLI
|
|
118
|
+
const { execSync } = await import("child_process");
|
|
119
|
+
try {
|
|
120
|
+
const remote = execSync("git remote get-url origin", {
|
|
121
|
+
encoding: "utf-8",
|
|
122
|
+
cwd: repoTasksPath,
|
|
123
|
+
}).trim();
|
|
124
|
+
const branch = execSync("git rev-parse --abbrev-ref HEAD", {
|
|
125
|
+
encoding: "utf-8",
|
|
126
|
+
cwd: repoTasksPath,
|
|
127
|
+
}).trim();
|
|
128
|
+
const commitSha = execSync("git rev-parse HEAD", {
|
|
129
|
+
encoding: "utf-8",
|
|
130
|
+
cwd: repoTasksPath,
|
|
131
|
+
}).trim();
|
|
132
|
+
// Parse remote URL: https://github.com/owner/name.git or git@github.com:owner/name.git
|
|
133
|
+
const match = remote.match(/github\.com[:/]([^/]+)\/([^/.]+)/) ??
|
|
134
|
+
remote.match(/([^/]+)\/([^/.]+?)(?:\.git)?$/);
|
|
135
|
+
const owner = match?.[1] ?? "unknown";
|
|
136
|
+
const name = match?.[2] ?? "unknown";
|
|
137
|
+
return {
|
|
138
|
+
repo: `${owner}/${name}`,
|
|
139
|
+
owner,
|
|
140
|
+
name,
|
|
141
|
+
branch,
|
|
142
|
+
commitSha,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
catch {
|
|
146
|
+
return {
|
|
147
|
+
repo: "unknown/unknown",
|
|
148
|
+
owner: "unknown",
|
|
149
|
+
name: "unknown",
|
|
150
|
+
branch: "unknown",
|
|
151
|
+
commitSha: "unknown",
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
// ---------------------------------------------------------------------------
|
|
156
|
+
// Document ID scheme
|
|
157
|
+
// ---------------------------------------------------------------------------
|
|
158
|
+
/**
|
|
159
|
+
* Deterministic mirror document ID.
|
|
160
|
+
*
|
|
161
|
+
* Format: ailf.task.mirror.<owner>.<repo>.<taskId>
|
|
162
|
+
*
|
|
163
|
+
* Dots in owner/repo/taskId are replaced with hyphens to avoid
|
|
164
|
+
* Sanity document ID issues.
|
|
165
|
+
*/
|
|
166
|
+
export function mirrorDocId(owner, repo, taskId) {
|
|
167
|
+
const sanitize = (s) => s.replace(/[^a-z0-9-]/gi, "-").toLowerCase();
|
|
168
|
+
return `ailf.task.mirror.${sanitize(owner)}.${sanitize(repo)}.${sanitize(taskId)}`;
|
|
169
|
+
}
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
// Content hashing
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
/**
|
|
174
|
+
* Compute a content hash of a TaskDefinition for change detection.
|
|
175
|
+
*
|
|
176
|
+
* Includes all fields that affect the mirror document. Excludes
|
|
177
|
+
* runtime metadata like referenceSolution (filesystem path) since
|
|
178
|
+
* that's not mirrored.
|
|
179
|
+
*/
|
|
180
|
+
export function computeTaskHash(task) {
|
|
181
|
+
const payload = JSON.stringify({
|
|
182
|
+
id: task.id,
|
|
183
|
+
description: task.description,
|
|
184
|
+
featureArea: task.featureArea,
|
|
185
|
+
taskPrompt: task.taskPrompt,
|
|
186
|
+
canonicalDocs: task.canonicalDocs,
|
|
187
|
+
docCoverage: task.docCoverage,
|
|
188
|
+
assertions: task.assertions,
|
|
189
|
+
baseline: task.baseline,
|
|
190
|
+
});
|
|
191
|
+
return createHash("sha256").update(payload).digest("hex").slice(0, 16);
|
|
192
|
+
}
|
|
193
|
+
// ---------------------------------------------------------------------------
|
|
194
|
+
// Batch slug resolution
|
|
195
|
+
// ---------------------------------------------------------------------------
|
|
196
|
+
/**
|
|
197
|
+
* Resolve an array of document slugs to Sanity document IDs in a single query.
|
|
198
|
+
*/
|
|
199
|
+
async function batchResolveDocSlugs(client, slugs) {
|
|
200
|
+
if (slugs.length === 0)
|
|
201
|
+
return new Map();
|
|
202
|
+
const query = `*[_type == "article" && slug.current in $slugs] {
|
|
203
|
+
_id,
|
|
204
|
+
"slug": slug.current
|
|
205
|
+
}`;
|
|
206
|
+
const results = await client.fetch(query, {
|
|
207
|
+
slugs,
|
|
208
|
+
});
|
|
209
|
+
const map = new Map();
|
|
210
|
+
for (const r of results) {
|
|
211
|
+
map.set(r.slug, r._id);
|
|
212
|
+
}
|
|
213
|
+
return map;
|
|
214
|
+
}
|
|
215
|
+
// ---------------------------------------------------------------------------
|
|
216
|
+
// Feature area auto-creation
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
/**
|
|
219
|
+
* Ensure ailf.featureArea documents exist for all referenced areas.
|
|
220
|
+
* Returns the list of newly created area IDs.
|
|
221
|
+
*/
|
|
222
|
+
async function ensureFeatureAreas(client, areas, dryRun) {
|
|
223
|
+
if (areas.length === 0)
|
|
224
|
+
return [];
|
|
225
|
+
// Check which areas already exist
|
|
226
|
+
const existing = await client.fetch(`*[_type == "ailf.featureArea" && areaId.current in $areas].areaId.current`, { areas });
|
|
227
|
+
const existingSet = new Set(existing);
|
|
228
|
+
const missing = areas.filter((a) => !existingSet.has(a));
|
|
229
|
+
if (missing.length === 0)
|
|
230
|
+
return [];
|
|
231
|
+
if (dryRun) {
|
|
232
|
+
for (const area of missing) {
|
|
233
|
+
console.log(` [dry-run] Would create feature area: ${area}`);
|
|
234
|
+
}
|
|
235
|
+
return missing;
|
|
236
|
+
}
|
|
237
|
+
const transaction = client.transaction();
|
|
238
|
+
for (const area of missing) {
|
|
239
|
+
const docId = `ailf.featureArea.${area}`;
|
|
240
|
+
transaction.createOrReplace({
|
|
241
|
+
_id: docId,
|
|
242
|
+
_type: "ailf.featureArea",
|
|
243
|
+
areaId: { _type: "slug", current: area },
|
|
244
|
+
description: area.charAt(0).toUpperCase() + area.slice(1).replace(/-/g, " "),
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
await transaction.commit();
|
|
248
|
+
return missing;
|
|
249
|
+
}
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
// Fetch existing content hashes
|
|
252
|
+
// ---------------------------------------------------------------------------
|
|
253
|
+
/**
|
|
254
|
+
* Fetch existing mirror documents' content hashes for change detection.
|
|
255
|
+
* The hash is stored in origin.contentHash on the document.
|
|
256
|
+
*/
|
|
257
|
+
async function fetchExistingHashes(client, docIds) {
|
|
258
|
+
if (docIds.length === 0)
|
|
259
|
+
return new Map();
|
|
260
|
+
const query = `*[_id in $ids] { _id, "hash": origin.contentHash }`;
|
|
261
|
+
const results = await client.fetch(query, {
|
|
262
|
+
ids: docIds,
|
|
263
|
+
});
|
|
264
|
+
const map = new Map();
|
|
265
|
+
for (const r of results) {
|
|
266
|
+
if (r.hash)
|
|
267
|
+
map.set(r._id, r.hash);
|
|
268
|
+
}
|
|
269
|
+
return map;
|
|
270
|
+
}
|
|
271
|
+
// ---------------------------------------------------------------------------
|
|
272
|
+
// Build mirror document
|
|
273
|
+
// ---------------------------------------------------------------------------
|
|
274
|
+
function buildMirrorDocument(task, opts) {
|
|
275
|
+
const { contentHash, docId, git, slugToDocId } = opts;
|
|
276
|
+
// Build canonical docs with resolved references.
|
|
277
|
+
// Only slug refs can be resolved to article references today.
|
|
278
|
+
// Other ref types (path, id, perspective) are stored with reason only.
|
|
279
|
+
const canonicalDocs = task.canonicalDocs.map((ref, i) => {
|
|
280
|
+
const resolvedId = isSlugRef(ref) ? slugToDocId.get(ref.slug) : undefined;
|
|
281
|
+
return {
|
|
282
|
+
_key: `cd${i}`,
|
|
283
|
+
...(resolvedId ? { doc: { _ref: resolvedId, _type: "reference" } } : {}),
|
|
284
|
+
reason: ref.reason ?? "",
|
|
285
|
+
};
|
|
286
|
+
});
|
|
287
|
+
// Build assertions
|
|
288
|
+
const assertArray = task.assertions.map((a, i) => {
|
|
289
|
+
const entry = {
|
|
290
|
+
_key: `a${i}`,
|
|
291
|
+
type: a.type,
|
|
292
|
+
};
|
|
293
|
+
if (a.type === "llm-rubric" && "template" in a) {
|
|
294
|
+
entry.template = a.template;
|
|
295
|
+
if ("criteria" in a &&
|
|
296
|
+
Array.isArray(a.criteria)) {
|
|
297
|
+
entry.criteria = a.criteria;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
if ("value" in a && a.value !== undefined) {
|
|
301
|
+
entry.value =
|
|
302
|
+
typeof a.value === "string" ? a.value : JSON.stringify(a.value);
|
|
303
|
+
}
|
|
304
|
+
if ("threshold" in a &&
|
|
305
|
+
a.threshold !== undefined) {
|
|
306
|
+
entry.threshold = a.threshold;
|
|
307
|
+
}
|
|
308
|
+
if (a.weight !== undefined) {
|
|
309
|
+
entry.weight = a.weight;
|
|
310
|
+
}
|
|
311
|
+
return entry;
|
|
312
|
+
});
|
|
313
|
+
// Determine the source file path (best-effort from task's featureArea)
|
|
314
|
+
const filePath = `.ailf/tasks/${task.featureArea}.yaml`;
|
|
315
|
+
return {
|
|
316
|
+
_id: docId,
|
|
317
|
+
_type: "ailf.task",
|
|
318
|
+
assert: assertArray,
|
|
319
|
+
canonicalDocs,
|
|
320
|
+
description: task.description,
|
|
321
|
+
docCoverage: task.docCoverage,
|
|
322
|
+
featureArea: {
|
|
323
|
+
_ref: `ailf.featureArea.${task.featureArea}`,
|
|
324
|
+
_type: "reference",
|
|
325
|
+
},
|
|
326
|
+
id: { _type: "slug", current: task.id },
|
|
327
|
+
origin: {
|
|
328
|
+
branch: git.branch,
|
|
329
|
+
commitSha: git.commitSha,
|
|
330
|
+
contentHash,
|
|
331
|
+
lastSyncedAt: new Date().toISOString(),
|
|
332
|
+
path: filePath,
|
|
333
|
+
repo: git.repo,
|
|
334
|
+
repoName: git.name,
|
|
335
|
+
repoOwner: git.owner,
|
|
336
|
+
type: "repo",
|
|
337
|
+
},
|
|
338
|
+
taskPrompt: task.taskPrompt,
|
|
339
|
+
...(task.baseline
|
|
340
|
+
? {
|
|
341
|
+
baseline: {
|
|
342
|
+
...(task.baseline.enabled !== undefined
|
|
343
|
+
? { enabled: task.baseline.enabled }
|
|
344
|
+
: {}),
|
|
345
|
+
...(task.baseline.rubric ? { rubric: task.baseline.rubric } : {}),
|
|
346
|
+
},
|
|
347
|
+
}
|
|
348
|
+
: {}),
|
|
349
|
+
};
|
|
350
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/plan-format.ts
|
|
3
|
+
*
|
|
4
|
+
* Formatters for rendering an ExecutionPlan to console output or JSON.
|
|
5
|
+
*
|
|
6
|
+
* The console formatter produces a rich, human-readable preview with
|
|
7
|
+
* emoji markers, alignment, and color-coding (via unicode markers).
|
|
8
|
+
* The JSON formatter produces machine-readable output for CI/CD.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/exec-plans/active/execution-preview.md
|
|
11
|
+
*/
|
|
12
|
+
import type { ExecutionPlan } from "./plan.js";
|
|
13
|
+
/**
|
|
14
|
+
* Format an execution plan as a rich console string.
|
|
15
|
+
*
|
|
16
|
+
* Produces a multi-section output with emoji markers showing:
|
|
17
|
+
* - Command summary (mode, source, flags)
|
|
18
|
+
* - Step plan (which steps will run, cached, or skipped)
|
|
19
|
+
* - Test/task summary
|
|
20
|
+
* - Model list
|
|
21
|
+
* - Cost estimate
|
|
22
|
+
* - File I/O
|
|
23
|
+
* - Comparison context
|
|
24
|
+
* - Warnings/errors
|
|
25
|
+
*/
|
|
26
|
+
export declare function formatPlanConsole(plan: ExecutionPlan): string;
|
|
27
|
+
/**
|
|
28
|
+
* Format an execution plan as indented JSON.
|
|
29
|
+
*
|
|
30
|
+
* Useful for CI/CD integration, approval gates, and programmatic
|
|
31
|
+
* inspection of the plan.
|
|
32
|
+
*/
|
|
33
|
+
export declare function formatPlanJson(plan: ExecutionPlan): string;
|