@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Interactive mode — guided wizard for the evaluation pipeline.
|
|
3
|
+
*
|
|
4
|
+
* When `ailf` is run with no arguments (or `ailf interactive`), this module
|
|
5
|
+
* prompts the user through mode selection, area scoping, debug options,
|
|
6
|
+
* and common flags — then builds and executes the equivalent `ailf pipeline`
|
|
7
|
+
* command.
|
|
8
|
+
*
|
|
9
|
+
* Uses @inquirer/prompts for a clean, modern terminal UI.
|
|
10
|
+
*/
|
|
11
|
+
import { Command } from "commander";
|
|
12
|
+
export function createInteractiveCommand() {
|
|
13
|
+
return new Command("interactive")
|
|
14
|
+
.description("Guided wizard for common evaluation workflows")
|
|
15
|
+
.action(async () => {
|
|
16
|
+
const result = await runInteractiveWizard();
|
|
17
|
+
if (!result)
|
|
18
|
+
return;
|
|
19
|
+
console.log();
|
|
20
|
+
console.log(` ▸ ailf ${result.command} ${result.args.join(" ")}`);
|
|
21
|
+
console.log();
|
|
22
|
+
// Dynamically import the CLI entry point's program and re-parse
|
|
23
|
+
// with the synthesized argv. This avoids shelling out and keeps
|
|
24
|
+
// everything in-process.
|
|
25
|
+
const { execSync } = await import("child_process");
|
|
26
|
+
const fullCommand = `tsx src/cli.ts ${result.command} ${result.args.join(" ")}`;
|
|
27
|
+
try {
|
|
28
|
+
execSync(fullCommand, { stdio: "inherit" });
|
|
29
|
+
}
|
|
30
|
+
catch (err) {
|
|
31
|
+
const code = err !== null && typeof err === "object" && "status" in err
|
|
32
|
+
? err.status
|
|
33
|
+
: 1;
|
|
34
|
+
process.exit(code);
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// Wizard steps
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
async function runInteractiveWizard() {
|
|
42
|
+
const { confirm, input, select } = await import("@inquirer/prompts");
|
|
43
|
+
console.log();
|
|
44
|
+
console.log(" 🧙 AI Literacy Framework — Interactive Mode");
|
|
45
|
+
console.log(" ────────────────────────────────────────────");
|
|
46
|
+
console.log();
|
|
47
|
+
// Step 1: Choose workflow
|
|
48
|
+
const workflow = await select({
|
|
49
|
+
choices: [
|
|
50
|
+
{
|
|
51
|
+
description: "Full evaluation pipeline (fetch → eval → score → report)",
|
|
52
|
+
name: "Run pipeline",
|
|
53
|
+
value: "pipeline",
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
description: "Compare current scores against a saved baseline",
|
|
57
|
+
name: "Compare scores",
|
|
58
|
+
value: "compare",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
description: "Check YAML files, mappings, and reference solutions",
|
|
62
|
+
name: "Validate config",
|
|
63
|
+
value: "validate",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
description: "Save, compare, or list historical score snapshots",
|
|
67
|
+
name: "Manage baselines",
|
|
68
|
+
value: "baseline",
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
description: "Weekly evaluation trends and area summaries",
|
|
72
|
+
name: "Weekly digest",
|
|
73
|
+
value: "weekly-digest",
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
description: "Measure grader reliability and discrimination power",
|
|
77
|
+
name: "Grader tools",
|
|
78
|
+
value: "grader",
|
|
79
|
+
},
|
|
80
|
+
],
|
|
81
|
+
message: "What would you like to do?",
|
|
82
|
+
});
|
|
83
|
+
if (workflow === "compare") {
|
|
84
|
+
return { args: [], command: "compare" };
|
|
85
|
+
}
|
|
86
|
+
if (workflow === "validate") {
|
|
87
|
+
return { args: [], command: "validate" };
|
|
88
|
+
}
|
|
89
|
+
if (workflow === "weekly-digest") {
|
|
90
|
+
const dryRun = await confirm({
|
|
91
|
+
default: true,
|
|
92
|
+
message: "Dry run? (preview to stdout, don't send to Slack)",
|
|
93
|
+
});
|
|
94
|
+
return { args: dryRun ? ["--dry-run"] : [], command: "weekly-digest" };
|
|
95
|
+
}
|
|
96
|
+
if (workflow === "baseline") {
|
|
97
|
+
const subcommand = await select({
|
|
98
|
+
choices: [
|
|
99
|
+
{ name: "Save current scores", value: "save" },
|
|
100
|
+
{ name: "Compare against latest", value: "compare" },
|
|
101
|
+
{ name: "List saved baselines", value: "history" },
|
|
102
|
+
],
|
|
103
|
+
message: "Baseline operation:",
|
|
104
|
+
});
|
|
105
|
+
return { args: [subcommand], command: "baseline" };
|
|
106
|
+
}
|
|
107
|
+
if (workflow === "grader") {
|
|
108
|
+
const subcommand = await select({
|
|
109
|
+
choices: [
|
|
110
|
+
{
|
|
111
|
+
description: "Measure grading variance across replications",
|
|
112
|
+
name: "Consistency analysis",
|
|
113
|
+
value: "consistency",
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
description: "Test grader discrimination across quality levels",
|
|
117
|
+
name: "Sensitivity test",
|
|
118
|
+
value: "sensitivity",
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
description: "Compare two grader models head-to-head",
|
|
122
|
+
name: "Compare graders",
|
|
123
|
+
value: "compare",
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
description: "Validate against human reference grades",
|
|
127
|
+
name: "Validate grader",
|
|
128
|
+
value: "validate",
|
|
129
|
+
},
|
|
130
|
+
],
|
|
131
|
+
message: "Grader tool:",
|
|
132
|
+
});
|
|
133
|
+
return { args: [subcommand], command: "grader" };
|
|
134
|
+
}
|
|
135
|
+
// --- Pipeline-specific wizard continues below ---
|
|
136
|
+
const args = [];
|
|
137
|
+
// Step 2: Evaluation mode
|
|
138
|
+
const mode = await select({
|
|
139
|
+
choices: [
|
|
140
|
+
{
|
|
141
|
+
description: "Evaluate with pre-fetched documentation context",
|
|
142
|
+
name: "Baseline (with docs vs without docs)",
|
|
143
|
+
value: "baseline",
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
description: "Baseline + record HTTP request patterns",
|
|
147
|
+
name: "Observed (instrumented)",
|
|
148
|
+
value: "observed",
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
description: "Agent searches for docs itself via web tools",
|
|
152
|
+
name: "Agentic (agent-driven retrieval)",
|
|
153
|
+
value: "agentic",
|
|
154
|
+
},
|
|
155
|
+
],
|
|
156
|
+
message: "Evaluation mode:",
|
|
157
|
+
});
|
|
158
|
+
if (mode !== "baseline") {
|
|
159
|
+
args.push("--mode", mode);
|
|
160
|
+
}
|
|
161
|
+
// Step 3: Area scoping
|
|
162
|
+
const scopeByArea = await confirm({
|
|
163
|
+
default: false,
|
|
164
|
+
message: "Scope to specific feature areas?",
|
|
165
|
+
});
|
|
166
|
+
if (scopeByArea) {
|
|
167
|
+
const areas = await input({
|
|
168
|
+
message: "Feature areas (comma-separated, e.g. groq,frameworks):",
|
|
169
|
+
});
|
|
170
|
+
if (areas.trim()) {
|
|
171
|
+
args.push("--area", areas.trim());
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Step 4: Debug mode
|
|
175
|
+
const debug = await confirm({
|
|
176
|
+
default: false,
|
|
177
|
+
message: "Enable debug mode? (run a subset of tests for fast feedback)",
|
|
178
|
+
});
|
|
179
|
+
if (debug) {
|
|
180
|
+
args.push("--debug");
|
|
181
|
+
const debugStyle = await select({
|
|
182
|
+
choices: [
|
|
183
|
+
{ name: "First 2 tests (default)", value: "default" },
|
|
184
|
+
{ name: "First N tests", value: "first-n" },
|
|
185
|
+
{ name: "Random sample", value: "sample" },
|
|
186
|
+
{ name: "Filter by pattern", value: "pattern" },
|
|
187
|
+
],
|
|
188
|
+
message: "Debug scope:",
|
|
189
|
+
});
|
|
190
|
+
if (debugStyle === "first-n") {
|
|
191
|
+
const n = await input({ default: "5", message: "Number of tests:" });
|
|
192
|
+
args.push("--debug-n", n);
|
|
193
|
+
}
|
|
194
|
+
else if (debugStyle === "sample") {
|
|
195
|
+
const n = await input({
|
|
196
|
+
default: "3",
|
|
197
|
+
message: "Sample size:",
|
|
198
|
+
});
|
|
199
|
+
args.push("--debug-sample", n);
|
|
200
|
+
}
|
|
201
|
+
else if (debugStyle === "pattern") {
|
|
202
|
+
const pattern = await input({
|
|
203
|
+
message: "Description regex (e.g. Blog, webhook):",
|
|
204
|
+
});
|
|
205
|
+
if (pattern.trim()) {
|
|
206
|
+
args.push("--debug-pattern", pattern.trim());
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// Step 5: Common pipeline options
|
|
211
|
+
const dryRun = await confirm({
|
|
212
|
+
default: false,
|
|
213
|
+
message: "Dry run? (validate config only, no API calls)",
|
|
214
|
+
});
|
|
215
|
+
if (dryRun) {
|
|
216
|
+
args.push("--dry-run");
|
|
217
|
+
}
|
|
218
|
+
if (!dryRun) {
|
|
219
|
+
const compareAfter = await confirm({
|
|
220
|
+
default: false,
|
|
221
|
+
message: "Compare against baseline after evaluation?",
|
|
222
|
+
});
|
|
223
|
+
if (compareAfter) {
|
|
224
|
+
args.push("--compare");
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
// Step 6: Preview execution plan before running
|
|
228
|
+
if (!dryRun) {
|
|
229
|
+
const preview = await confirm({
|
|
230
|
+
default: true,
|
|
231
|
+
message: "Preview execution plan before running? (--explain --yes)",
|
|
232
|
+
});
|
|
233
|
+
if (preview) {
|
|
234
|
+
args.push("--explain", "--yes");
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
return { args, command: "pipeline" };
|
|
238
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lookup-doc command — search Sanity for documentation articles by keyword.
|
|
3
|
+
*
|
|
4
|
+
* Helps external contributors find the correct `slug` for canonicalDocs
|
|
5
|
+
* references without needing to browse the CMS or guess from URLs.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* ailf lookup-doc webhooks
|
|
9
|
+
* ailf lookup-doc "visual editing"
|
|
10
|
+
* ailf lookup-doc groq --limit 20
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/canonical-doc-resolution.md
|
|
13
|
+
*/
|
|
14
|
+
import { Command } from "commander";
|
|
15
|
+
export declare function createLookupDocCommand(): Command;
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lookup-doc command — search Sanity for documentation articles by keyword.
|
|
3
|
+
*
|
|
4
|
+
* Helps external contributors find the correct `slug` for canonicalDocs
|
|
5
|
+
* references without needing to browse the CMS or guess from URLs.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* ailf lookup-doc webhooks
|
|
9
|
+
* ailf lookup-doc "visual editing"
|
|
10
|
+
* ailf lookup-doc groq --limit 20
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/canonical-doc-resolution.md
|
|
13
|
+
*/
|
|
14
|
+
import { Command } from "commander";
|
|
15
|
+
export function createLookupDocCommand() {
|
|
16
|
+
return new Command("lookup-doc")
|
|
17
|
+
.description("Search Sanity docs by keyword — find slugs for canonicalDocs references")
|
|
18
|
+
.argument("<keyword>", "Search keyword (matches title and slug)")
|
|
19
|
+
.option("-l, --limit <n>", "Maximum results to show", parseInt, 10)
|
|
20
|
+
.option("-s, --source <name>", "Documentation source (from sources.yaml)")
|
|
21
|
+
.action(async (keyword, opts) => {
|
|
22
|
+
const { getSanityClient } = await import("../sanity/client.js");
|
|
23
|
+
const { loadSource } = await import("../sources.js");
|
|
24
|
+
// Resolve source if provided
|
|
25
|
+
let clientOverrides;
|
|
26
|
+
if (opts.source) {
|
|
27
|
+
try {
|
|
28
|
+
const source = loadSource(opts.source);
|
|
29
|
+
clientOverrides = {};
|
|
30
|
+
if (source.dataset)
|
|
31
|
+
clientOverrides.dataset = source.dataset;
|
|
32
|
+
if (source.projectId)
|
|
33
|
+
clientOverrides.projectId = source.projectId;
|
|
34
|
+
}
|
|
35
|
+
catch (err) {
|
|
36
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
37
|
+
console.error(`❌ Failed to load source "${opts.source}": ${msg}`);
|
|
38
|
+
process.exit(1);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
const client = getSanityClient(clientOverrides);
|
|
42
|
+
const escapedKeyword = keyword.replace(/"/g, '\\"');
|
|
43
|
+
console.log(`\nSearching for "${keyword}"...\n`);
|
|
44
|
+
const query = `*[_type == "article"
|
|
45
|
+
&& !(_id in path("drafts.**"))
|
|
46
|
+
&& (
|
|
47
|
+
title match "*${escapedKeyword}*"
|
|
48
|
+
|| slug.current match "*${escapedKeyword}*"
|
|
49
|
+
)
|
|
50
|
+
] | order(title asc) [0...$limit] {
|
|
51
|
+
title,
|
|
52
|
+
"slug": slug.current,
|
|
53
|
+
"section": primarySection->title,
|
|
54
|
+
"sectionSlug": primarySection->slug.current
|
|
55
|
+
}`;
|
|
56
|
+
const results = await client.fetch(query, {
|
|
57
|
+
limit: opts.limit,
|
|
58
|
+
});
|
|
59
|
+
if (results.length === 0) {
|
|
60
|
+
console.log(` No articles found matching "${keyword}".\n\n` +
|
|
61
|
+
" Tips:\n" +
|
|
62
|
+
" - Try a shorter or more general keyword\n" +
|
|
63
|
+
" - Use partial matches (e.g., 'webhook' instead of 'webhooks')\n");
|
|
64
|
+
process.exit(0);
|
|
65
|
+
}
|
|
66
|
+
console.log(` Found ${results.length} article${results.length === 1 ? "" : "s"}:\n`);
|
|
67
|
+
// Find longest slug for alignment
|
|
68
|
+
const maxSlugLen = Math.min(40, Math.max(...results.map((r) => r.slug.length)));
|
|
69
|
+
for (const doc of results) {
|
|
70
|
+
const section = doc.section ?? "Unknown";
|
|
71
|
+
const paddedSlug = doc.slug.padEnd(maxSlugLen);
|
|
72
|
+
console.log(` slug: ${paddedSlug} │ ${doc.title}`);
|
|
73
|
+
console.log(` ${"".padEnd(maxSlugLen + 6)} │ Section: ${section}\n`);
|
|
74
|
+
}
|
|
75
|
+
console.log(" Usage in .ailf/tasks/*.yaml:\n");
|
|
76
|
+
console.log(" canonicalDocs:");
|
|
77
|
+
console.log(` - slug: ${results[0].slug}`);
|
|
78
|
+
console.log(` reason: "${results[0].title}"`);
|
|
79
|
+
if (results[0].sectionSlug) {
|
|
80
|
+
console.log(`\n Or use path: ${results[0].sectionSlug}/${results[0].slug}`);
|
|
81
|
+
}
|
|
82
|
+
console.log();
|
|
83
|
+
});
|
|
84
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* measure-retrieval command — evaluate Sanity text search retrieval quality.
|
|
3
|
+
*/
|
|
4
|
+
import { mkdirSync, writeFileSync } from "fs";
|
|
5
|
+
import { dirname, join, resolve } from "path";
|
|
6
|
+
import { fileURLToPath } from "url";
|
|
7
|
+
import { Command } from "commander";
|
|
8
|
+
import { getSanityClient } from "../sanity/client.js";
|
|
9
|
+
import { formatRetrievalTable, measureRetrieval, } from "../pipeline/measure-retrieval.js";
|
|
10
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
11
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Sanity text search retriever
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
async function retrieveDocsForQuery(query, k = 10) {
|
|
16
|
+
const client = getSanityClient();
|
|
17
|
+
const results = await client.fetch(`
|
|
18
|
+
*[_type == "article" && !(_id in path("drafts.**"))]
|
|
19
|
+
| score(
|
|
20
|
+
boost(title match $query, 3),
|
|
21
|
+
boost(pt::text(content) match $query, 1)
|
|
22
|
+
)
|
|
23
|
+
| order(_score desc)
|
|
24
|
+
[0...$k] {
|
|
25
|
+
"slug": slug.current,
|
|
26
|
+
_score
|
|
27
|
+
}
|
|
28
|
+
`, { k, query });
|
|
29
|
+
return results.map((r) => r.slug);
|
|
30
|
+
}
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Command factory
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
export function createMeasureRetrievalCommand() {
|
|
35
|
+
return new Command("measure-retrieval")
|
|
36
|
+
.description("Measure retrieval quality against canonical document annotations")
|
|
37
|
+
.action(async () => {
|
|
38
|
+
try {
|
|
39
|
+
console.log("=== Sanity AI Literacy — Retrieval Quality Measurement ===\n");
|
|
40
|
+
const summary = await measureRetrieval({
|
|
41
|
+
onProgress: (_area, taskId, result) => {
|
|
42
|
+
console.log(` ${taskId}:`);
|
|
43
|
+
console.log(` Recall@5: ${(result.recall_at_5 * 100).toFixed(1)}%`);
|
|
44
|
+
console.log(` Recall@10: ${(result.recall_at_10 * 100).toFixed(1)}%`);
|
|
45
|
+
console.log(` NDCG@10: ${(result.ndcg_at_10 * 100).toFixed(1)}%`);
|
|
46
|
+
},
|
|
47
|
+
retriever: retrieveDocsForQuery,
|
|
48
|
+
rootDir: ROOT,
|
|
49
|
+
});
|
|
50
|
+
// Print summary
|
|
51
|
+
console.log();
|
|
52
|
+
console.log(formatRetrievalTable(summary));
|
|
53
|
+
// Persist results
|
|
54
|
+
const outDir = join(ROOT, "results", "latest");
|
|
55
|
+
mkdirSync(outDir, { recursive: true });
|
|
56
|
+
writeFileSync(join(outDir, "retrieval-results.json"), JSON.stringify(summary, null, 2));
|
|
57
|
+
console.log("\nResults written to results/latest/retrieval-results.json");
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
process.exitCode = 1;
|
|
61
|
+
if (err instanceof Error)
|
|
62
|
+
console.error(err.message);
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline action — resolves CLI options and orchestrates pipeline steps.
|
|
3
|
+
*
|
|
4
|
+
* This file is the thin CLI-to-orchestrator bridge. The bulk of execution
|
|
5
|
+
* logic lives in packages/eval/src/orchestration/.
|
|
6
|
+
*
|
|
7
|
+
* Responsibilities:
|
|
8
|
+
* - Resolve CLI flags into typed ResolvedOptions
|
|
9
|
+
* - Delegate to the PipelineOrchestrator for step execution
|
|
10
|
+
*
|
|
11
|
+
* @see packages/eval/src/orchestration/ for the step-based pipeline
|
|
12
|
+
*/
|
|
13
|
+
import { type ImpactSummary } from "../pipeline/reverse-mapping.js";
|
|
14
|
+
import type { DebugOptions, EvalMode } from "../pipeline/types.js";
|
|
15
|
+
import type { PipelineCliOptions } from "./pipeline.js";
|
|
16
|
+
export interface ResolvedOptions {
|
|
17
|
+
allowedOriginArgs: string[];
|
|
18
|
+
areaOption?: string;
|
|
19
|
+
beforeOption?: string;
|
|
20
|
+
changedDocsOption?: string;
|
|
21
|
+
compareBaseline?: string;
|
|
22
|
+
compareEnabled: boolean;
|
|
23
|
+
compareThreshold?: number;
|
|
24
|
+
concurrency?: number;
|
|
25
|
+
datasetOverride?: string;
|
|
26
|
+
debug?: DebugOptions;
|
|
27
|
+
discoveryReportEnabled: boolean;
|
|
28
|
+
dryRun: boolean;
|
|
29
|
+
gapAnalysisEnabled: boolean;
|
|
30
|
+
graderReplications?: number;
|
|
31
|
+
headerArgs: string[];
|
|
32
|
+
impactSummary?: ImpactSummary;
|
|
33
|
+
mode: EvalMode;
|
|
34
|
+
noCache: boolean;
|
|
35
|
+
noRemoteCache: boolean;
|
|
36
|
+
outputPath?: string;
|
|
37
|
+
perspectiveOverride?: string;
|
|
38
|
+
projectIdOverride?: string;
|
|
39
|
+
promptfooUrl?: string;
|
|
40
|
+
publishEnabled: boolean;
|
|
41
|
+
publishTag?: string;
|
|
42
|
+
readinessEnabled: boolean;
|
|
43
|
+
reportDataset?: string;
|
|
44
|
+
reportProjectId?: string;
|
|
45
|
+
sanityDocumentArgs: string[];
|
|
46
|
+
searchMode: string;
|
|
47
|
+
skipEval: boolean;
|
|
48
|
+
skipFetch: boolean;
|
|
49
|
+
source?: string;
|
|
50
|
+
studioOriginOverride?: string;
|
|
51
|
+
repoTasksPath?: string;
|
|
52
|
+
taskOption?: string;
|
|
53
|
+
taskSourceType?: "content-lake" | "yaml";
|
|
54
|
+
urlArgs: string[];
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Pure option resolution — computes ResolvedOptions from CLI flags without
|
|
58
|
+
* any side effects. Safe to call from --explain without mutating process.env.
|
|
59
|
+
*
|
|
60
|
+
* Exported so the plan builder can call it independently.
|
|
61
|
+
*/
|
|
62
|
+
export declare function computeResolvedOptions(opts: PipelineCliOptions): ResolvedOptions;
|
|
63
|
+
/**
|
|
64
|
+
* Execute the evaluation pipeline.
|
|
65
|
+
*
|
|
66
|
+
* 1. Resolve CLI options into typed ResolvedOptions
|
|
67
|
+
* 2. Build AppContext (composition root wires adapters)
|
|
68
|
+
* 3. Build step sequence from context
|
|
69
|
+
* 4. Delegate to the PipelineOrchestrator
|
|
70
|
+
*/
|
|
71
|
+
export declare function executePipeline(cliOpts: PipelineCliOptions): Promise<void>;
|