@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/probe.ts
|
|
3
|
+
*
|
|
4
|
+
* Tier B probes for new features without evaluation tasks.
|
|
5
|
+
*
|
|
6
|
+
* Phase 4b of the Scenario Matrix implementation.
|
|
7
|
+
*
|
|
8
|
+
* When a content release adds documents for a genuinely new feature (no
|
|
9
|
+
* evaluation tasks exist), a probe provides a directional "usability"
|
|
10
|
+
* signal by analyzing the document content and, optionally, evaluating
|
|
11
|
+
* a generic implementation prompt.
|
|
12
|
+
*
|
|
13
|
+
* Probes are NOT scored evaluations — they answer "are these docs usable?"
|
|
14
|
+
* not "are these docs good enough?" The output is always labeled as
|
|
15
|
+
* directional and never displayed on the same scale as scored evaluations.
|
|
16
|
+
*
|
|
17
|
+
* @see docs/exec-plans/completed/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
18
|
+
*/
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Constants
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
/** Generic probe prompt template */
|
|
23
|
+
export const PROBE_PROMPT = `Given the following documentation about a Sanity feature, write a TypeScript
|
|
24
|
+
implementation that demonstrates the feature's core functionality.
|
|
25
|
+
|
|
26
|
+
Use only APIs and patterns described in the documentation. If the documentation
|
|
27
|
+
is insufficient to implement something, say so explicitly rather than guessing.
|
|
28
|
+
|
|
29
|
+
Documentation:
|
|
30
|
+
{{docs}}`;
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Public API
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
/**
|
|
35
|
+
* Analyze a model's probe output against the source documentation.
|
|
36
|
+
*
|
|
37
|
+
* Extracts API names from both the documentation and the model output,
|
|
38
|
+
* identifies hallucinations (APIs used but not documented), and classifies
|
|
39
|
+
* the overall usability.
|
|
40
|
+
*
|
|
41
|
+
* This is a pure function — it doesn't call any LLM. The model output
|
|
42
|
+
* is provided as input (from a prior evaluation step or manual run).
|
|
43
|
+
*
|
|
44
|
+
* @param documentContent - The concatenated documentation text
|
|
45
|
+
* @param modelOutput - The model's response to the probe prompt
|
|
46
|
+
* @param documentSlugs - The slugs of the documents that were probed
|
|
47
|
+
* @returns Probe result with usability classification
|
|
48
|
+
*/
|
|
49
|
+
export function analyzeProbeOutput(documentContent, modelOutput, documentSlugs) {
|
|
50
|
+
const docApis = extractApiNames(documentContent);
|
|
51
|
+
const outputApis = extractApiNames(modelOutput);
|
|
52
|
+
// APIs in the output that aren't in the docs = hallucinations
|
|
53
|
+
const docApiSet = new Set(docApis.map((a) => a.toLowerCase()));
|
|
54
|
+
const hallucinatedApis = outputApis.filter((api) => !docApiSet.has(api.toLowerCase()));
|
|
55
|
+
// APIs in both docs and output
|
|
56
|
+
const apiNamesExtracted = outputApis.filter((api) => docApiSet.has(api.toLowerCase()));
|
|
57
|
+
const producedCode = hasCodeOutput(modelOutput);
|
|
58
|
+
const suggestedTasks = generateTaskSuggestions(documentContent, documentSlugs);
|
|
59
|
+
const usability = classifyUsability(producedCode, apiNamesExtracted.length, hallucinatedApis.length, docApis.length);
|
|
60
|
+
return {
|
|
61
|
+
apiNamesExtracted,
|
|
62
|
+
documentSlugs,
|
|
63
|
+
hallucinatedApis,
|
|
64
|
+
producedCode,
|
|
65
|
+
rawOutput: modelOutput,
|
|
66
|
+
suggestedTasks,
|
|
67
|
+
usability,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Format a probe result for console output.
|
|
72
|
+
*/
|
|
73
|
+
export function formatProbeConsole(probe) {
|
|
74
|
+
const lines = [];
|
|
75
|
+
lines.push("🔍 TIER B PROBE RESULT");
|
|
76
|
+
lines.push("");
|
|
77
|
+
lines.push(` Documents: ${probe.documentSlugs.join(", ")}`);
|
|
78
|
+
lines.push(` Usability: ${usabilityIcon(probe.usability)} ${probe.usability}`);
|
|
79
|
+
lines.push(` Produced code: ${probe.producedCode ? "yes" : "no"}`);
|
|
80
|
+
lines.push("");
|
|
81
|
+
if (probe.apiNamesExtracted.length > 0) {
|
|
82
|
+
lines.push(" APIs correctly extracted from docs:");
|
|
83
|
+
for (const api of probe.apiNamesExtracted) {
|
|
84
|
+
lines.push(` ✅ ${api}`);
|
|
85
|
+
}
|
|
86
|
+
lines.push("");
|
|
87
|
+
}
|
|
88
|
+
if (probe.hallucinatedApis.length > 0) {
|
|
89
|
+
lines.push(" Hallucinated APIs (not in docs):");
|
|
90
|
+
for (const api of probe.hallucinatedApis) {
|
|
91
|
+
lines.push(` ❌ ${api}`);
|
|
92
|
+
}
|
|
93
|
+
lines.push("");
|
|
94
|
+
}
|
|
95
|
+
if (probe.suggestedTasks.length > 0) {
|
|
96
|
+
lines.push(" Suggested evaluation tasks:");
|
|
97
|
+
for (let i = 0; i < probe.suggestedTasks.length; i++) {
|
|
98
|
+
lines.push(` ${i + 1}. ${probe.suggestedTasks[i]}`);
|
|
99
|
+
}
|
|
100
|
+
lines.push("");
|
|
101
|
+
}
|
|
102
|
+
return lines.join("\n");
|
|
103
|
+
}
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
// Formatting
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
/**
|
|
108
|
+
* Format a probe result as markdown.
|
|
109
|
+
*/
|
|
110
|
+
export function formatProbeMarkdown(probe) {
|
|
111
|
+
const lines = [];
|
|
112
|
+
const featureName = inferFeatureName(probe.documentSlugs);
|
|
113
|
+
lines.push(`#### 🔍 New Feature Probe: ${featureName}`);
|
|
114
|
+
lines.push("");
|
|
115
|
+
lines.push("**Status:** No evaluation tasks exist for this feature. " +
|
|
116
|
+
"**Assessment:** Directional only — not a scored evaluation.");
|
|
117
|
+
lines.push("");
|
|
118
|
+
lines.push("**Documents Analyzed:**");
|
|
119
|
+
for (const slug of probe.documentSlugs) {
|
|
120
|
+
lines.push(`- \`${slug}\` (new)`);
|
|
121
|
+
}
|
|
122
|
+
lines.push("");
|
|
123
|
+
lines.push(`**Usability:** ${usabilityIcon(probe.usability)} ${probe.usability}`);
|
|
124
|
+
lines.push("");
|
|
125
|
+
if (probe.apiNamesExtracted.length > 0 || probe.hallucinatedApis.length > 0) {
|
|
126
|
+
lines.push("**Probe Findings:**");
|
|
127
|
+
for (const api of probe.apiNamesExtracted) {
|
|
128
|
+
lines.push(`- ✅ Correctly used \`${api}\``);
|
|
129
|
+
}
|
|
130
|
+
for (const api of probe.hallucinatedApis) {
|
|
131
|
+
lines.push(`- ❌ Hallucinated \`${api}\` (not in docs)`);
|
|
132
|
+
}
|
|
133
|
+
lines.push("");
|
|
134
|
+
}
|
|
135
|
+
if (probe.suggestedTasks.length > 0) {
|
|
136
|
+
lines.push("<details>", "<summary>Suggested evaluation tasks</summary>", "");
|
|
137
|
+
for (let i = 0; i < probe.suggestedTasks.length; i++) {
|
|
138
|
+
lines.push(`${i + 1}. ${probe.suggestedTasks[i]}`);
|
|
139
|
+
}
|
|
140
|
+
lines.push("", "</details>", "");
|
|
141
|
+
}
|
|
142
|
+
return lines.join("\n");
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Generate task scaffolding suggestions from document content analysis.
|
|
146
|
+
*
|
|
147
|
+
* Extracts function names, configuration patterns, and common operations
|
|
148
|
+
* from code blocks in the documentation to suggest evaluation tasks.
|
|
149
|
+
*/
|
|
150
|
+
export function generateTaskSuggestions(documentContent, documentSlugs) {
|
|
151
|
+
const suggestions = [];
|
|
152
|
+
const featureName = inferFeatureName(documentSlugs);
|
|
153
|
+
// Extract patterns from code blocks
|
|
154
|
+
const codeBlocks = extractCodeBlocks(documentContent);
|
|
155
|
+
const functionNames = extractFunctionNamesFromCode(codeBlocks);
|
|
156
|
+
const configPatterns = detectConfigPatterns(documentContent);
|
|
157
|
+
const crudOperations = detectCrudOperations(documentContent);
|
|
158
|
+
// Generate suggestions based on detected patterns
|
|
159
|
+
if (configPatterns.length > 0) {
|
|
160
|
+
suggestions.push(`Configure ${featureName} in a Sanity Studio project (${configPatterns.join(", ")})`);
|
|
161
|
+
}
|
|
162
|
+
if (functionNames.length > 0) {
|
|
163
|
+
const topFns = functionNames.slice(0, 3).join(", ");
|
|
164
|
+
suggestions.push(`Implement core ${featureName} functionality using ${topFns}`);
|
|
165
|
+
}
|
|
166
|
+
if (crudOperations.length > 0) {
|
|
167
|
+
suggestions.push(`Perform ${crudOperations.join("/")} operations with ${featureName}`);
|
|
168
|
+
}
|
|
169
|
+
// Always suggest an integration task
|
|
170
|
+
if (suggestions.length > 0) {
|
|
171
|
+
suggestions.push(`Integrate ${featureName} with an existing Next.js application`);
|
|
172
|
+
}
|
|
173
|
+
return suggestions;
|
|
174
|
+
}
|
|
175
|
+
// ---------------------------------------------------------------------------
|
|
176
|
+
// Internal helpers
|
|
177
|
+
// ---------------------------------------------------------------------------
|
|
178
|
+
/** Classify overall usability based on probe metrics */
|
|
179
|
+
function classifyUsability(producedCode, correctApis, hallucinatedApis, totalDocApis) {
|
|
180
|
+
if (!producedCode)
|
|
181
|
+
return "not-usable";
|
|
182
|
+
const hallucinationRate = correctApis + hallucinatedApis > 0
|
|
183
|
+
? hallucinatedApis / (correctApis + hallucinatedApis)
|
|
184
|
+
: 0;
|
|
185
|
+
const coverageRate = totalDocApis > 0 ? correctApis / totalDocApis : 0;
|
|
186
|
+
// Usable: produced code, low hallucination, reasonable API coverage
|
|
187
|
+
if (hallucinationRate < 0.2 && coverageRate > 0.5)
|
|
188
|
+
return "usable";
|
|
189
|
+
// Not usable: high hallucination or very low coverage
|
|
190
|
+
if (hallucinationRate > 0.5 || (coverageRate < 0.2 && totalDocApis > 2)) {
|
|
191
|
+
return "not-usable";
|
|
192
|
+
}
|
|
193
|
+
return "partially-usable";
|
|
194
|
+
}
|
|
195
|
+
/** Detect configuration patterns in documentation */
|
|
196
|
+
function detectConfigPatterns(text) {
|
|
197
|
+
const patterns = [];
|
|
198
|
+
const lower = text.toLowerCase();
|
|
199
|
+
if (/sanity\.config|defineconfig/i.test(lower))
|
|
200
|
+
patterns.push("sanity.config");
|
|
201
|
+
if (/plugin|definePlugin/i.test(lower))
|
|
202
|
+
patterns.push("plugin setup");
|
|
203
|
+
if (/env|environment|api.?key/i.test(lower))
|
|
204
|
+
patterns.push("environment config");
|
|
205
|
+
return patterns;
|
|
206
|
+
}
|
|
207
|
+
/** Detect CRUD operation patterns in text */
|
|
208
|
+
function detectCrudOperations(text) {
|
|
209
|
+
const ops = [];
|
|
210
|
+
const lower = text.toLowerCase();
|
|
211
|
+
if (/\bcreate\b/.test(lower))
|
|
212
|
+
ops.push("create");
|
|
213
|
+
if (/\bread\b|\bfetch\b|\bquery\b|\bget\b/.test(lower))
|
|
214
|
+
ops.push("read");
|
|
215
|
+
if (/\bupdate\b|\bpatch\b|\bmutate\b/.test(lower))
|
|
216
|
+
ops.push("update");
|
|
217
|
+
if (/\bdelete\b|\bremove\b/.test(lower))
|
|
218
|
+
ops.push("delete");
|
|
219
|
+
return ops;
|
|
220
|
+
}
|
|
221
|
+
/** Extract API-like names from text (function calls, imports, etc.) */
|
|
222
|
+
function extractApiNames(text) {
|
|
223
|
+
const apis = new Set();
|
|
224
|
+
// Match function calls: functionName(
|
|
225
|
+
const fnCalls = text.match(/\b([a-zA-Z_]\w{2,})\s*\(/g);
|
|
226
|
+
if (fnCalls) {
|
|
227
|
+
for (const call of fnCalls) {
|
|
228
|
+
const name = call.replace(/\s*\($/, "");
|
|
229
|
+
// Filter out common JS keywords and control flow
|
|
230
|
+
if (!isCommonKeyword(name)) {
|
|
231
|
+
apis.add(name);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
// Match import names: import { name } from
|
|
236
|
+
const imports = text.match(/import\s*\{([^}]+)\}/g);
|
|
237
|
+
if (imports) {
|
|
238
|
+
for (const imp of imports) {
|
|
239
|
+
const names = imp
|
|
240
|
+
.replace(/import\s*\{/, "")
|
|
241
|
+
.replace(/\}/, "")
|
|
242
|
+
.split(",")
|
|
243
|
+
.map((n) => n.trim())
|
|
244
|
+
.filter((n) => n.length > 0 && !n.includes(" as "));
|
|
245
|
+
for (const name of names) {
|
|
246
|
+
apis.add(name);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
return [...apis].sort();
|
|
251
|
+
}
|
|
252
|
+
/** Extract code blocks from markdown content */
|
|
253
|
+
function extractCodeBlocks(text) {
|
|
254
|
+
const blocks = [];
|
|
255
|
+
const regex = /```(?:typescript|ts|javascript|js|tsx|jsx)?\n([\s\S]*?)```/g;
|
|
256
|
+
let match;
|
|
257
|
+
while ((match = regex.exec(text)) !== null) {
|
|
258
|
+
blocks.push(match[1]);
|
|
259
|
+
}
|
|
260
|
+
return blocks;
|
|
261
|
+
}
|
|
262
|
+
/** Extract function/method names from code blocks */
|
|
263
|
+
function extractFunctionNamesFromCode(codeBlocks) {
|
|
264
|
+
const names = new Set();
|
|
265
|
+
for (const block of codeBlocks) {
|
|
266
|
+
// Function declarations
|
|
267
|
+
const fnDecls = block.match(/(?:function|const|let|var)\s+(\w+)/g);
|
|
268
|
+
if (fnDecls) {
|
|
269
|
+
for (const decl of fnDecls) {
|
|
270
|
+
const name = decl.replace(/^(?:function|const|let|var)\s+/, "");
|
|
271
|
+
if (name.length > 2 && !isCommonKeyword(name)) {
|
|
272
|
+
names.add(name);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
// Method calls: obj.method(
|
|
277
|
+
const methods = block.match(/\.([a-zA-Z_]\w+)\s*\(/g);
|
|
278
|
+
if (methods) {
|
|
279
|
+
for (const method of methods) {
|
|
280
|
+
const name = method.replace(/^\./, "").replace(/\s*\($/, "");
|
|
281
|
+
if (name.length > 2) {
|
|
282
|
+
names.add(name);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
return [...names].sort();
|
|
288
|
+
}
|
|
289
|
+
/** Detect whether the model output contains code */
|
|
290
|
+
function hasCodeOutput(output) {
|
|
291
|
+
// Check for code fences or obvious code patterns
|
|
292
|
+
if (/```/.test(output))
|
|
293
|
+
return true;
|
|
294
|
+
if (/import\s+\{/.test(output))
|
|
295
|
+
return true;
|
|
296
|
+
if (/(?:const|let|var|function)\s+\w+/.test(output))
|
|
297
|
+
return true;
|
|
298
|
+
return false;
|
|
299
|
+
}
|
|
300
|
+
/** Infer a human-readable feature name from document slugs */
|
|
301
|
+
function inferFeatureName(slugs) {
|
|
302
|
+
if (slugs.length === 0)
|
|
303
|
+
return "Unknown Feature";
|
|
304
|
+
// Find common prefix
|
|
305
|
+
const parts = slugs[0].split("-");
|
|
306
|
+
if (parts.length > 0) {
|
|
307
|
+
// Capitalize and join first 2 parts
|
|
308
|
+
return parts
|
|
309
|
+
.slice(0, 2)
|
|
310
|
+
.map((p) => p.charAt(0).toUpperCase() + p.slice(1))
|
|
311
|
+
.join(" ");
|
|
312
|
+
}
|
|
313
|
+
return slugs[0];
|
|
314
|
+
}
|
|
315
|
+
/** Check if a name is a common JavaScript keyword */
|
|
316
|
+
function isCommonKeyword(name) {
|
|
317
|
+
const keywords = new Set([
|
|
318
|
+
"async",
|
|
319
|
+
"await",
|
|
320
|
+
"break",
|
|
321
|
+
"case",
|
|
322
|
+
"catch",
|
|
323
|
+
"class",
|
|
324
|
+
"const",
|
|
325
|
+
"continue",
|
|
326
|
+
"debugger",
|
|
327
|
+
"default",
|
|
328
|
+
"delete",
|
|
329
|
+
"else",
|
|
330
|
+
"export",
|
|
331
|
+
"extends",
|
|
332
|
+
"finally",
|
|
333
|
+
"for",
|
|
334
|
+
"from",
|
|
335
|
+
"function",
|
|
336
|
+
"get",
|
|
337
|
+
"if",
|
|
338
|
+
"import",
|
|
339
|
+
"in",
|
|
340
|
+
"let",
|
|
341
|
+
"new",
|
|
342
|
+
"of",
|
|
343
|
+
"return",
|
|
344
|
+
"set",
|
|
345
|
+
"switch",
|
|
346
|
+
"throw",
|
|
347
|
+
"try",
|
|
348
|
+
"typeof",
|
|
349
|
+
"var",
|
|
350
|
+
"void",
|
|
351
|
+
"while",
|
|
352
|
+
"with",
|
|
353
|
+
"yield",
|
|
354
|
+
// Common global functions
|
|
355
|
+
"console",
|
|
356
|
+
"log",
|
|
357
|
+
"error",
|
|
358
|
+
"warn",
|
|
359
|
+
"require",
|
|
360
|
+
"module",
|
|
361
|
+
"exports",
|
|
362
|
+
"describe",
|
|
363
|
+
"it",
|
|
364
|
+
"test",
|
|
365
|
+
"expect",
|
|
366
|
+
"assert",
|
|
367
|
+
"JSON",
|
|
368
|
+
"Array",
|
|
369
|
+
"Object",
|
|
370
|
+
"Map",
|
|
371
|
+
"Set",
|
|
372
|
+
"Promise",
|
|
373
|
+
"String",
|
|
374
|
+
"Number",
|
|
375
|
+
"Boolean",
|
|
376
|
+
"Error",
|
|
377
|
+
]);
|
|
378
|
+
return keywords.has(name);
|
|
379
|
+
}
|
|
380
|
+
/** Get usability icon */
|
|
381
|
+
function usabilityIcon(usability) {
|
|
382
|
+
switch (usability) {
|
|
383
|
+
case "not-usable":
|
|
384
|
+
return "❌";
|
|
385
|
+
case "partially-usable":
|
|
386
|
+
return "🟡";
|
|
387
|
+
case "usable":
|
|
388
|
+
return "✅";
|
|
389
|
+
}
|
|
390
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/provenance.ts
|
|
3
|
+
*
|
|
4
|
+
* Builds ReportProvenance from data available during a pipeline run.
|
|
5
|
+
*
|
|
6
|
+
* Provenance captures what produced an evaluation report: which models,
|
|
7
|
+
* which source, which mode, what triggered it, git metadata, etc.
|
|
8
|
+
* Most of this data already flows through the pipeline — this module
|
|
9
|
+
* just captures what would otherwise be ephemeral.
|
|
10
|
+
*
|
|
11
|
+
* @see docs/design-docs/report-store/domain-model.md
|
|
12
|
+
* @see docs/design-docs/report-store/architecture.md — Provenance collection
|
|
13
|
+
*/
|
|
14
|
+
import type { ResolvedSourceConfig } from "../sources.js";
|
|
15
|
+
import type { EvalMode, PromptfooUrlEntry, ReportProvenance } from "./types.js";
|
|
16
|
+
export interface ProvenanceInput {
|
|
17
|
+
/** Feature areas that were evaluated */
|
|
18
|
+
areas: string[];
|
|
19
|
+
/** SHA-256 hash of the doc context files (from cache system) */
|
|
20
|
+
contextHash?: string;
|
|
21
|
+
/** Evaluation fingerprint for cross-environment cache lookup */
|
|
22
|
+
evalFingerprint?: string;
|
|
23
|
+
/** Evaluation mode */
|
|
24
|
+
mode: EvalMode;
|
|
25
|
+
/** @deprecated Use `promptfooUrls` — kept for backward compatibility */
|
|
26
|
+
promptfooUrl?: string;
|
|
27
|
+
/** Per-mode Promptfoo share URLs */
|
|
28
|
+
promptfooUrls?: PromptfooUrlEntry[];
|
|
29
|
+
/** Path to the package root (for reading models.yaml) */
|
|
30
|
+
rootDir: string;
|
|
31
|
+
/** Sanity document IDs targeted */
|
|
32
|
+
sanityDocumentIds?: string[];
|
|
33
|
+
/** Resolved documentation source */
|
|
34
|
+
source: ResolvedSourceConfig;
|
|
35
|
+
/** Specific task IDs evaluated (if scoped) */
|
|
36
|
+
taskIds?: string[];
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Build a ReportProvenance object from pipeline context.
|
|
40
|
+
*
|
|
41
|
+
* Assembles provenance from:
|
|
42
|
+
* - Pipeline options (mode, source, areas, tasks)
|
|
43
|
+
* - config/models.yaml (model list, grader)
|
|
44
|
+
* - Environment variables (CI metadata, trigger detection)
|
|
45
|
+
* - Optional metadata (context hash, Promptfoo URL)
|
|
46
|
+
*/
|
|
47
|
+
export declare function buildProvenance(input: ProvenanceInput): ReportProvenance;
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/provenance.ts
|
|
3
|
+
*
|
|
4
|
+
* Builds ReportProvenance from data available during a pipeline run.
|
|
5
|
+
*
|
|
6
|
+
* Provenance captures what produced an evaluation report: which models,
|
|
7
|
+
* which source, which mode, what triggered it, git metadata, etc.
|
|
8
|
+
* Most of this data already flows through the pipeline — this module
|
|
9
|
+
* just captures what would otherwise be ephemeral.
|
|
10
|
+
*
|
|
11
|
+
* @see docs/design-docs/report-store/domain-model.md
|
|
12
|
+
* @see docs/design-docs/report-store/architecture.md — Provenance collection
|
|
13
|
+
*/
|
|
14
|
+
import { readFileSync } from "fs";
|
|
15
|
+
import { resolve } from "path";
|
|
16
|
+
import { load } from "js-yaml";
|
|
17
|
+
/**
|
|
18
|
+
* Build a ReportProvenance object from pipeline context.
|
|
19
|
+
*
|
|
20
|
+
* Assembles provenance from:
|
|
21
|
+
* - Pipeline options (mode, source, areas, tasks)
|
|
22
|
+
* - config/models.yaml (model list, grader)
|
|
23
|
+
* - Environment variables (CI metadata, trigger detection)
|
|
24
|
+
* - Optional metadata (context hash, Promptfoo URL)
|
|
25
|
+
*/
|
|
26
|
+
export function buildProvenance(input) {
|
|
27
|
+
const models = loadModelsConfig(input.rootDir);
|
|
28
|
+
return {
|
|
29
|
+
areas: input.areas,
|
|
30
|
+
contextHash: input.contextHash,
|
|
31
|
+
evalFingerprint: input.evalFingerprint,
|
|
32
|
+
git: detectGitMetadata(),
|
|
33
|
+
graderModel: models.grader.id,
|
|
34
|
+
mode: input.mode,
|
|
35
|
+
models: models.models.map((m) => ({ id: m.id, label: m.label })),
|
|
36
|
+
promptfooUrl: input.promptfooUrl,
|
|
37
|
+
promptfooUrls: input.promptfooUrls,
|
|
38
|
+
source: {
|
|
39
|
+
baseUrl: input.source.baseUrl,
|
|
40
|
+
dataset: input.source.dataset,
|
|
41
|
+
name: input.source.name,
|
|
42
|
+
perspective: input.source.perspective,
|
|
43
|
+
projectId: input.source.projectId,
|
|
44
|
+
},
|
|
45
|
+
targetDocuments: input.sanityDocumentIds,
|
|
46
|
+
taskIds: input.taskIds,
|
|
47
|
+
trigger: detectTrigger(),
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Trigger detection
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
/**
|
|
54
|
+
* Extract git metadata from GitHub Actions environment variables.
|
|
55
|
+
* Returns undefined when not running in CI.
|
|
56
|
+
*/
|
|
57
|
+
function detectGitMetadata() {
|
|
58
|
+
const repo = process.env.GITHUB_REPOSITORY;
|
|
59
|
+
if (!repo)
|
|
60
|
+
return undefined;
|
|
61
|
+
const sha = process.env.GITHUB_SHA ?? "unknown";
|
|
62
|
+
const ref = process.env.GITHUB_REF ?? "";
|
|
63
|
+
// Extract branch name from ref (refs/heads/main → main)
|
|
64
|
+
const branch = ref.startsWith("refs/heads/")
|
|
65
|
+
? ref.slice("refs/heads/".length)
|
|
66
|
+
: ref.startsWith("refs/pull/")
|
|
67
|
+
? `pr-${ref.split("/")[2]}`
|
|
68
|
+
: ref;
|
|
69
|
+
// Extract PR number from GITHUB_REF (refs/pull/123/merge)
|
|
70
|
+
const prMatch = ref.match(/^refs\/pull\/(\d+)\//);
|
|
71
|
+
const prNumber = prMatch ? parseInt(prMatch[1], 10) : undefined;
|
|
72
|
+
return { branch, prNumber, repo, sha };
|
|
73
|
+
}
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
// Git metadata
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
/**
|
|
78
|
+
* Infer what triggered this evaluation from environment variables.
|
|
79
|
+
*
|
|
80
|
+
* Detection order:
|
|
81
|
+
* 1. AILF_TRIGGER_TYPE — explicit override (for custom integrations)
|
|
82
|
+
* 2. GITHUB_EVENT_NAME === "schedule" — cron-triggered
|
|
83
|
+
* 3. GITHUB_EVENT_NAME === "repository_dispatch" — cross-repo trigger
|
|
84
|
+
* 4. GITHUB_ACTIONS === "true" — CI-triggered
|
|
85
|
+
* 5. Default: manual
|
|
86
|
+
*/
|
|
87
|
+
function detectTrigger() {
|
|
88
|
+
const explicit = process.env.AILF_TRIGGER_TYPE;
|
|
89
|
+
if (explicit === "scheduled") {
|
|
90
|
+
return {
|
|
91
|
+
schedule: process.env.AILF_SCHEDULE ?? "unknown",
|
|
92
|
+
type: "scheduled",
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
if (explicit === "webhook") {
|
|
96
|
+
return {
|
|
97
|
+
documentId: process.env.AILF_WEBHOOK_DOCUMENT_ID,
|
|
98
|
+
source: process.env.AILF_WEBHOOK_SOURCE ?? "unknown",
|
|
99
|
+
type: "webhook",
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
// GitHub Actions context
|
|
103
|
+
const eventName = process.env.GITHUB_EVENT_NAME;
|
|
104
|
+
if (eventName === "schedule") {
|
|
105
|
+
return {
|
|
106
|
+
schedule: process.env.GITHUB_SCHEDULE ?? "unknown",
|
|
107
|
+
type: "scheduled",
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
if (eventName === "repository_dispatch") {
|
|
111
|
+
return {
|
|
112
|
+
callerRef: process.env.GITHUB_REF,
|
|
113
|
+
callerRepo: process.env.GITHUB_REPOSITORY_OWNER_ID ?? "unknown",
|
|
114
|
+
type: "cross-repo",
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
if (process.env.GITHUB_ACTIONS === "true") {
|
|
118
|
+
return {
|
|
119
|
+
runId: process.env.GITHUB_RUN_ID ?? "unknown",
|
|
120
|
+
type: "ci",
|
|
121
|
+
workflow: process.env.GITHUB_WORKFLOW ?? "unknown",
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
return { type: "manual" };
|
|
125
|
+
}
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Model config loading
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
/**
|
|
130
|
+
* Load config/models.yaml to extract model list and grader info.
|
|
131
|
+
* Falls back to a minimal config if the file can't be read.
|
|
132
|
+
*/
|
|
133
|
+
function loadModelsConfig(rootDir) {
|
|
134
|
+
try {
|
|
135
|
+
const content = readFileSync(resolve(rootDir, "config", "models.yaml"), "utf-8");
|
|
136
|
+
return load(content);
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
console.warn(" ⚠️ Could not read config/models.yaml for provenance");
|
|
140
|
+
return {
|
|
141
|
+
defaults: {},
|
|
142
|
+
grader: { id: "unknown" },
|
|
143
|
+
models: [],
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/readiness-report.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure computation module for launch readiness reports.
|
|
5
|
+
*
|
|
6
|
+
* Combines threshold evaluation, ceiling decomposition, and gap analysis
|
|
7
|
+
* into a single actionable readiness checklist for a given feature area.
|
|
8
|
+
*
|
|
9
|
+
* This module has NO side effects — no file I/O, no process.argv, no env vars.
|
|
10
|
+
* Callers (command handlers, orchestration steps) handle I/O and pass data in.
|
|
11
|
+
*
|
|
12
|
+
* Exports pure functions:
|
|
13
|
+
* - generateReadinessReport() — builds the structured report
|
|
14
|
+
* - formatReadinessMarkdown() — renders the report as markdown
|
|
15
|
+
*
|
|
16
|
+
* @see docs/exec-plans/completed/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
17
|
+
* @see docs/exec-plans/active/eliminate-lib-layer.md
|
|
18
|
+
*/
|
|
19
|
+
import type { ThresholdConfig } from "./schemas.js";
|
|
20
|
+
import type { GapAnalysisReport, GapEstimate, ScoreSummary, ThresholdEvaluation, ThresholdViolation } from "./types.js";
|
|
21
|
+
/** A single dimension's readiness check */
|
|
22
|
+
export interface DimensionCheck {
|
|
23
|
+
/** Dimension display name */
|
|
24
|
+
dimension: string;
|
|
25
|
+
/** Whether the dimension meets its threshold */
|
|
26
|
+
pass: boolean;
|
|
27
|
+
/** Actual score */
|
|
28
|
+
score: number;
|
|
29
|
+
/** Required threshold */
|
|
30
|
+
threshold: number;
|
|
31
|
+
}
|
|
32
|
+
/** A historical baseline entry for trend tracking */
|
|
33
|
+
export interface HistoryEntry {
|
|
34
|
+
/** The area's composite score at this point in time */
|
|
35
|
+
score: number;
|
|
36
|
+
/** Optional tag (e.g., "pre-groq-rewrite") */
|
|
37
|
+
tag?: string;
|
|
38
|
+
/** When this baseline was captured */
|
|
39
|
+
timestamp: string;
|
|
40
|
+
}
|
|
41
|
+
/** Structured readiness report — the output of generateReadinessReport() */
|
|
42
|
+
export interface ReadinessReport {
|
|
43
|
+
/** The feature area being evaluated */
|
|
44
|
+
area: string;
|
|
45
|
+
/** Ceiling decomposition data for this area */
|
|
46
|
+
ceiling: {
|
|
47
|
+
ceilingScore: number;
|
|
48
|
+
docLift: number;
|
|
49
|
+
docQualityGap: number;
|
|
50
|
+
floorScore: number;
|
|
51
|
+
};
|
|
52
|
+
/** Per-dimension breakdown with threshold comparison */
|
|
53
|
+
dimensions: DimensionCheck[];
|
|
54
|
+
/** Gap analysis entries for this area (empty if no gap data) */
|
|
55
|
+
gaps: GapEstimate[];
|
|
56
|
+
/** Historical scores (empty unless --history was used) */
|
|
57
|
+
history: HistoryEntry[];
|
|
58
|
+
/** Whether the area passes all thresholds */
|
|
59
|
+
pass: boolean;
|
|
60
|
+
/** The area's composite score */
|
|
61
|
+
score: number;
|
|
62
|
+
/** The composite threshold for this area */
|
|
63
|
+
threshold: number;
|
|
64
|
+
/** Threshold evaluation result */
|
|
65
|
+
thresholdEvaluation: ThresholdEvaluation;
|
|
66
|
+
/** Violations specific to this area */
|
|
67
|
+
violations: ThresholdViolation[];
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Format a readiness report as markdown.
|
|
71
|
+
*
|
|
72
|
+
* Pure function — takes a structured report and returns a markdown string.
|
|
73
|
+
*/
|
|
74
|
+
export declare function formatReadinessMarkdown(report: ReadinessReport): string;
|
|
75
|
+
/**
|
|
76
|
+
* Generate a structured readiness report for a given feature area.
|
|
77
|
+
*
|
|
78
|
+
* This is a pure function — it takes all data as parameters and produces
|
|
79
|
+
* a structured report. No I/O.
|
|
80
|
+
*/
|
|
81
|
+
export declare function generateReadinessReport(opts: {
|
|
82
|
+
area: string;
|
|
83
|
+
gapAnalysis?: GapAnalysisReport;
|
|
84
|
+
history?: HistoryEntry[];
|
|
85
|
+
scoreSummary: ScoreSummary;
|
|
86
|
+
thresholdConfig: ThresholdConfig;
|
|
87
|
+
}): ReadinessReport;
|