@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/grader-compare.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/grader-compare-runner.ts.
|
|
5
|
+
* The pure analysis functions live in pipeline/grader-comparison.ts.
|
|
6
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
7
|
+
*
|
|
8
|
+
* TODO: Update all importers to use pipeline/ modules, then delete this file.
|
|
9
|
+
*
|
|
10
|
+
* @deprecated Import from ../pipeline/grader-compare-runner.js instead.
|
|
11
|
+
*/
|
|
12
|
+
// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
|
|
13
|
+
import "dotenv/config";
|
|
14
|
+
import { dirname, resolve } from "path";
|
|
15
|
+
import { fileURLToPath } from "url";
|
|
16
|
+
import { load } from "js-yaml";
|
|
17
|
+
import { existsSync, readFileSync } from "fs";
|
|
18
|
+
// Re-export from pipeline modules
|
|
19
|
+
export { formatComparisonReport, runGraderCompare, } from "../pipeline/grader-compare-runner.js";
|
|
20
|
+
export { compareGraders, } from "../pipeline/grader-comparison.js";
|
|
21
|
+
import { runGraderCompare } from "../pipeline/grader-compare-runner.js";
|
|
22
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
23
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
24
|
+
function parseCliArgs() {
|
|
25
|
+
const args = process.argv.slice(2);
|
|
26
|
+
function getAllOptions(name) {
|
|
27
|
+
const results = [];
|
|
28
|
+
const flag = `--${name}`;
|
|
29
|
+
for (let i = 0; i < args.length; i++) {
|
|
30
|
+
if (args[i] === flag && i + 1 < args.length) {
|
|
31
|
+
results.push(args[i + 1]);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return results;
|
|
35
|
+
}
|
|
36
|
+
function getOption(name) {
|
|
37
|
+
const idx = args.indexOf(`--${name}`);
|
|
38
|
+
return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
|
|
39
|
+
}
|
|
40
|
+
const candidateArgs = getAllOptions("candidate");
|
|
41
|
+
let candidates;
|
|
42
|
+
if (candidateArgs.length > 0) {
|
|
43
|
+
candidates = candidateArgs.map((id) => ({
|
|
44
|
+
id,
|
|
45
|
+
label: id.split(":").pop() ?? id,
|
|
46
|
+
}));
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
const modelsPath = resolve(ROOT, "config", "models.yaml");
|
|
50
|
+
if (existsSync(modelsPath)) {
|
|
51
|
+
const raw = readFileSync(modelsPath, "utf-8");
|
|
52
|
+
const data = load(raw);
|
|
53
|
+
const configCandidates = data?.["grader-candidates"] ?? [];
|
|
54
|
+
candidates = configCandidates.map((c) => ({
|
|
55
|
+
id: c.id,
|
|
56
|
+
label: c.label ?? c.id.split(":").pop() ?? c.id,
|
|
57
|
+
}));
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
candidates = [];
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return {
|
|
64
|
+
candidates,
|
|
65
|
+
format: getOption("format") ?? "table",
|
|
66
|
+
outputPath: getOption("output"),
|
|
67
|
+
resultsPath: getOption("results") ?? "results/latest/eval-results.json",
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Legacy main() entry point.
|
|
72
|
+
* @deprecated Use runGraderCompare() from pipeline/grader-compare-runner.js instead.
|
|
73
|
+
*/
|
|
74
|
+
export async function main() {
|
|
75
|
+
const { candidates, format, outputPath, resultsPath } = parseCliArgs();
|
|
76
|
+
await runGraderCompare({
|
|
77
|
+
candidates,
|
|
78
|
+
format: format,
|
|
79
|
+
outputPath,
|
|
80
|
+
resultsPath,
|
|
81
|
+
rootDir: ROOT,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
// Only run when invoked directly
|
|
85
|
+
if (process.argv[1]?.endsWith("grader-compare.ts") ||
|
|
86
|
+
process.argv[1]?.endsWith("grader-compare.js")) {
|
|
87
|
+
main().catch((err) => {
|
|
88
|
+
console.error("❌ Fatal error:", err);
|
|
89
|
+
process.exit(1);
|
|
90
|
+
});
|
|
91
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/grader-consistency.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/grader-consistency-runner.ts.
|
|
5
|
+
* The pure analysis functions live in pipeline/grader-consistency.ts.
|
|
6
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
7
|
+
*
|
|
8
|
+
* TODO: Update all importers to use pipeline/ modules, then delete this file.
|
|
9
|
+
*
|
|
10
|
+
* @deprecated Import from ../pipeline/grader-consistency-runner.js instead.
|
|
11
|
+
*/
|
|
12
|
+
import "dotenv/config";
|
|
13
|
+
export { extractGradingJudgments, formatConsistencyReport, runGraderConsistency, type GraderConsistencyRunnerOptions, } from "../pipeline/grader-consistency-runner.js";
|
|
14
|
+
export { analyzeConsistency, type GraderConsistency, type ReplicatedGrading, } from "../pipeline/grader-consistency.js";
|
|
15
|
+
/** Options for the legacy main() function. */
|
|
16
|
+
export interface GraderConsistencyOptions {
|
|
17
|
+
/** Number of additional grading replications (default: 5) */
|
|
18
|
+
replications?: number;
|
|
19
|
+
/** Path to eval-results.json */
|
|
20
|
+
resultsPath?: string;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Legacy main() entry point — wraps runGraderConsistency() with CLI arg parsing.
|
|
24
|
+
*
|
|
25
|
+
* @deprecated Use runGraderConsistency() from pipeline/grader-consistency-runner.js instead.
|
|
26
|
+
*/
|
|
27
|
+
export declare function main(options?: GraderConsistencyOptions): Promise<void>;
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/grader-consistency.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/grader-consistency-runner.ts.
|
|
5
|
+
* The pure analysis functions live in pipeline/grader-consistency.ts.
|
|
6
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
7
|
+
*
|
|
8
|
+
* TODO: Update all importers to use pipeline/ modules, then delete this file.
|
|
9
|
+
*
|
|
10
|
+
* @deprecated Import from ../pipeline/grader-consistency-runner.js instead.
|
|
11
|
+
*/
|
|
12
|
+
// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
|
|
13
|
+
import "dotenv/config";
|
|
14
|
+
import { dirname, join, resolve } from "path";
|
|
15
|
+
import { fileURLToPath } from "url";
|
|
16
|
+
// Re-export from pipeline modules
|
|
17
|
+
export { extractGradingJudgments, formatConsistencyReport, runGraderConsistency, } from "../pipeline/grader-consistency-runner.js";
|
|
18
|
+
export { analyzeConsistency, } from "../pipeline/grader-consistency.js";
|
|
19
|
+
import { runGraderConsistency } from "../pipeline/grader-consistency-runner.js";
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// CLI argument parsing (used when called from CLI)
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
function parseCliArgs() {
|
|
26
|
+
const args = process.argv.slice(2);
|
|
27
|
+
function getFlag(name) {
|
|
28
|
+
return args.includes(`--${name}`);
|
|
29
|
+
}
|
|
30
|
+
function getOption(name) {
|
|
31
|
+
const idx = args.indexOf(`--${name}`);
|
|
32
|
+
return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
|
|
33
|
+
}
|
|
34
|
+
const showHelp = getFlag("help") || getFlag("h");
|
|
35
|
+
if (showHelp) {
|
|
36
|
+
console.log(`
|
|
37
|
+
Usage: pnpm grader-consistency [options]
|
|
38
|
+
|
|
39
|
+
Measure grader consistency by re-grading existing eval responses N times.
|
|
40
|
+
|
|
41
|
+
Options:
|
|
42
|
+
--replications <n> Number of additional grading replications (default: 5)
|
|
43
|
+
--results <path> Path to eval-results.json (default: results/latest/eval-results.json)
|
|
44
|
+
--help, -h Show this help
|
|
45
|
+
`);
|
|
46
|
+
process.exit(0);
|
|
47
|
+
}
|
|
48
|
+
const replicationsStr = getOption("replications") ?? "5";
|
|
49
|
+
return {
|
|
50
|
+
replications: parseInt(replicationsStr, 10),
|
|
51
|
+
resultsPath: getOption("results") ??
|
|
52
|
+
join(ROOT, "results", "latest", "eval-results.json"),
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Legacy main() entry point — wraps runGraderConsistency() with CLI arg parsing.
|
|
57
|
+
*
|
|
58
|
+
* @deprecated Use runGraderConsistency() from pipeline/grader-consistency-runner.js instead.
|
|
59
|
+
*/
|
|
60
|
+
export async function main(options) {
|
|
61
|
+
const cliArgs = options ? undefined : parseCliArgs();
|
|
62
|
+
const replications = options?.replications ?? cliArgs?.replications ?? 5;
|
|
63
|
+
const resultsPath = options?.resultsPath ??
|
|
64
|
+
cliArgs?.resultsPath ??
|
|
65
|
+
join(ROOT, "results", "latest", "eval-results.json");
|
|
66
|
+
await runGraderConsistency({
|
|
67
|
+
replications,
|
|
68
|
+
resultsPath,
|
|
69
|
+
rootDir: ROOT,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
// Only run when invoked directly
|
|
73
|
+
if (process.argv[1]?.endsWith("grader-consistency.ts") ||
|
|
74
|
+
process.argv[1]?.endsWith("grader-consistency.js")) {
|
|
75
|
+
main().catch((err) => {
|
|
76
|
+
console.error("❌ Fatal error:", err);
|
|
77
|
+
process.exit(1);
|
|
78
|
+
});
|
|
79
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/grader-sensitivity.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/grader-sensitivity-runner.ts.
|
|
5
|
+
* The pure analysis functions live in pipeline/grader-sensitivity.ts.
|
|
6
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
7
|
+
*
|
|
8
|
+
* TODO: Update all importers to use pipeline/ modules, then delete this file.
|
|
9
|
+
*
|
|
10
|
+
* @deprecated Import from ../pipeline/grader-sensitivity-runner.js instead.
|
|
11
|
+
*/
|
|
12
|
+
import "dotenv/config";
|
|
13
|
+
export { formatSensitivityReport, runGraderSensitivity, type GraderSensitivityRunnerOptions, } from "../pipeline/grader-sensitivity-runner.js";
|
|
14
|
+
export { analyzeSensitivity, type GraderSensitivityResult, type SensitivityPair, } from "../pipeline/grader-sensitivity.js";
|
|
15
|
+
/**
|
|
16
|
+
* Legacy main() entry point.
|
|
17
|
+
* @deprecated Use runGraderSensitivity() from pipeline/grader-sensitivity-runner.js instead.
|
|
18
|
+
*/
|
|
19
|
+
export declare function main(): Promise<void>;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/grader-sensitivity.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/grader-sensitivity-runner.ts.
|
|
5
|
+
* The pure analysis functions live in pipeline/grader-sensitivity.ts.
|
|
6
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
7
|
+
*
|
|
8
|
+
* TODO: Update all importers to use pipeline/ modules, then delete this file.
|
|
9
|
+
*
|
|
10
|
+
* @deprecated Import from ../pipeline/grader-sensitivity-runner.js instead.
|
|
11
|
+
*/
|
|
12
|
+
// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
|
|
13
|
+
import "dotenv/config";
|
|
14
|
+
import { dirname, resolve } from "path";
|
|
15
|
+
import { fileURLToPath } from "url";
|
|
16
|
+
// Re-export from pipeline modules
|
|
17
|
+
export { formatSensitivityReport, runGraderSensitivity, } from "../pipeline/grader-sensitivity-runner.js";
|
|
18
|
+
export { analyzeSensitivity, } from "../pipeline/grader-sensitivity.js";
|
|
19
|
+
import { runGraderSensitivity } from "../pipeline/grader-sensitivity-runner.js";
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// CLI argument parsing
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
function parseCliArgs() {
|
|
26
|
+
const args = process.argv.slice(2);
|
|
27
|
+
function getOption(name) {
|
|
28
|
+
const idx = args.indexOf(`--${name}`);
|
|
29
|
+
return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
|
|
30
|
+
}
|
|
31
|
+
function getFlag(name) {
|
|
32
|
+
return args.includes(`--${name}`);
|
|
33
|
+
}
|
|
34
|
+
const showHelp = getFlag("help") || getFlag("h");
|
|
35
|
+
if (showHelp) {
|
|
36
|
+
console.log(`
|
|
37
|
+
Usage: pnpm grader-sensitivity [options]
|
|
38
|
+
|
|
39
|
+
Test grader discrimination power using programmatic code degradation.
|
|
40
|
+
|
|
41
|
+
Options:
|
|
42
|
+
--area <name> Test only reference solutions in this area (e.g., groq)
|
|
43
|
+
--format <fmt> Output format: table (default) or json
|
|
44
|
+
--output <path> Write JSON report to file
|
|
45
|
+
--help, -h Show this help
|
|
46
|
+
`);
|
|
47
|
+
process.exit(0);
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
areaFilter: getOption("area"),
|
|
51
|
+
format: getOption("format") ?? "table",
|
|
52
|
+
outputPath: getOption("output"),
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Legacy main() entry point.
|
|
57
|
+
* @deprecated Use runGraderSensitivity() from pipeline/grader-sensitivity-runner.js instead.
|
|
58
|
+
*/
|
|
59
|
+
export async function main() {
|
|
60
|
+
const { areaFilter, format, outputPath } = parseCliArgs();
|
|
61
|
+
await runGraderSensitivity({
|
|
62
|
+
areaFilter,
|
|
63
|
+
format: format,
|
|
64
|
+
outputPath,
|
|
65
|
+
rootDir: ROOT,
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
// Only run when invoked directly
|
|
69
|
+
if (process.argv[1]?.endsWith("grader-sensitivity.ts") ||
|
|
70
|
+
process.argv[1]?.endsWith("grader-sensitivity.js")) {
|
|
71
|
+
main().catch((err) => {
|
|
72
|
+
console.error("❌ Fatal error:", err);
|
|
73
|
+
process.exit(1);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/grader-validate.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/grader-validate-runner.ts.
|
|
5
|
+
* The pure analysis functions live in pipeline/grader-validation.ts.
|
|
6
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
7
|
+
*
|
|
8
|
+
* TODO: Update all importers to use pipeline/ modules, then delete this file.
|
|
9
|
+
*
|
|
10
|
+
* @deprecated Import from ../pipeline/grader-validate-runner.js instead.
|
|
11
|
+
*/
|
|
12
|
+
import "dotenv/config";
|
|
13
|
+
export { formatValidationReport, runGraderValidate, type GraderValidateRunnerOptions, } from "../pipeline/grader-validate-runner.js";
|
|
14
|
+
export { classifyCorrelation, validateGrader, type GraderValidation, type HumanReferenceGrade, } from "../pipeline/grader-validation.js";
|
|
15
|
+
/**
|
|
16
|
+
* Legacy main() entry point.
|
|
17
|
+
* @deprecated Use runGraderValidate() from pipeline/grader-validate-runner.js instead.
|
|
18
|
+
*/
|
|
19
|
+
export declare function main(): Promise<void>;
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/grader-validate.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/grader-validate-runner.ts.
|
|
5
|
+
* The pure analysis functions live in pipeline/grader-validation.ts.
|
|
6
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
7
|
+
*
|
|
8
|
+
* TODO: Update all importers to use pipeline/ modules, then delete this file.
|
|
9
|
+
*
|
|
10
|
+
* @deprecated Import from ../pipeline/grader-validate-runner.js instead.
|
|
11
|
+
*/
|
|
12
|
+
// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
|
|
13
|
+
import "dotenv/config";
|
|
14
|
+
import { dirname, resolve } from "path";
|
|
15
|
+
import { fileURLToPath } from "url";
|
|
16
|
+
// Re-export from pipeline modules
|
|
17
|
+
export { formatValidationReport, runGraderValidate, } from "../pipeline/grader-validate-runner.js";
|
|
18
|
+
export { classifyCorrelation, validateGrader, } from "../pipeline/grader-validation.js";
|
|
19
|
+
import { runGraderValidate } from "../pipeline/grader-validate-runner.js";
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// CLI argument parsing
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
function parseCliArgs() {
|
|
26
|
+
const args = process.argv.slice(2);
|
|
27
|
+
function getOption(name) {
|
|
28
|
+
const idx = args.indexOf(`--${name}`);
|
|
29
|
+
return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
|
|
30
|
+
}
|
|
31
|
+
function getFlag(name) {
|
|
32
|
+
return args.includes(`--${name}`);
|
|
33
|
+
}
|
|
34
|
+
const showHelp = getFlag("help") || getFlag("h");
|
|
35
|
+
if (showHelp) {
|
|
36
|
+
console.log(`
|
|
37
|
+
Usage: pnpm grader-validate [options]
|
|
38
|
+
|
|
39
|
+
Validate grader accuracy against human reference grades.
|
|
40
|
+
|
|
41
|
+
Options:
|
|
42
|
+
--grader <model> Grader model to validate (default: from config/models.yaml)
|
|
43
|
+
--threshold <n> MAE threshold for pass/fail (default: 10)
|
|
44
|
+
--help, -h Show this help
|
|
45
|
+
`);
|
|
46
|
+
process.exit(0);
|
|
47
|
+
}
|
|
48
|
+
const thresholdStr = getOption("threshold");
|
|
49
|
+
return {
|
|
50
|
+
graderOverride: getOption("grader"),
|
|
51
|
+
maeThreshold: thresholdStr ? parseFloat(thresholdStr) : 10,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Legacy main() entry point.
|
|
56
|
+
* @deprecated Use runGraderValidate() from pipeline/grader-validate-runner.js instead.
|
|
57
|
+
*/
|
|
58
|
+
export async function main() {
|
|
59
|
+
const { graderOverride, maeThreshold } = parseCliArgs();
|
|
60
|
+
const result = await runGraderValidate({
|
|
61
|
+
graderModel: graderOverride,
|
|
62
|
+
maeThreshold,
|
|
63
|
+
rootDir: ROOT,
|
|
64
|
+
});
|
|
65
|
+
// Exit with error code if threshold not met
|
|
66
|
+
if (!result.passesThreshold) {
|
|
67
|
+
console.error(`\n ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${maeThreshold}`);
|
|
68
|
+
process.exit(1);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Only run when invoked directly
|
|
72
|
+
if (process.argv[1]?.endsWith("grader-validate.ts") ||
|
|
73
|
+
process.argv[1]?.endsWith("grader-validate.js")) {
|
|
74
|
+
main().catch((err) => {
|
|
75
|
+
console.error("❌ Fatal error:", err);
|
|
76
|
+
process.exit(1);
|
|
77
|
+
});
|
|
78
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/measure-retrieval.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/measure-retrieval.ts.
|
|
5
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
6
|
+
*
|
|
7
|
+
* TODO: Update all importers to use pipeline/measure-retrieval.ts, then delete this file.
|
|
8
|
+
*
|
|
9
|
+
* @deprecated Import from ../pipeline/measure-retrieval.js instead.
|
|
10
|
+
*/
|
|
11
|
+
import "dotenv/config";
|
|
12
|
+
export { calculateNDCG, calculateRecall, formatRetrievalTable, measureRetrieval, type MeasureRetrievalOptions, type RetrievalResult, type RetrievalSummary, type RetrieverFn, } from "../pipeline/measure-retrieval.js";
|
|
13
|
+
/** @deprecated Use measureRetrieval() from pipeline/measure-retrieval.js instead. */
|
|
14
|
+
export declare function main(): Promise<void>;
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/measure-retrieval.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/measure-retrieval.ts.
|
|
5
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
6
|
+
*
|
|
7
|
+
* TODO: Update all importers to use pipeline/measure-retrieval.ts, then delete this file.
|
|
8
|
+
*
|
|
9
|
+
* @deprecated Import from ../pipeline/measure-retrieval.js instead.
|
|
10
|
+
*/
|
|
11
|
+
// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
|
|
12
|
+
import "dotenv/config";
|
|
13
|
+
import { writeFileSync, mkdirSync } from "fs";
|
|
14
|
+
import { join, dirname } from "path";
|
|
15
|
+
import { getSanityClient } from "../sanity/client.js";
|
|
16
|
+
import { formatRetrievalTable, measureRetrieval, } from "../pipeline/measure-retrieval.js";
|
|
17
|
+
// Re-export pipeline types and functions
|
|
18
|
+
export { calculateNDCG, calculateRecall, formatRetrievalTable, measureRetrieval, } from "../pipeline/measure-retrieval.js";
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Sanity text search retriever (side-effecting — uses Sanity client)
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
async function retrieveDocsForQuery(query, k = 10) {
|
|
23
|
+
const client = getSanityClient();
|
|
24
|
+
const results = await client.fetch(`
|
|
25
|
+
*[_type == "article" && !(_id in path("drafts.**"))]
|
|
26
|
+
| score(
|
|
27
|
+
boost(title match $query, 3),
|
|
28
|
+
boost(pt::text(content) match $query, 1)
|
|
29
|
+
)
|
|
30
|
+
| order(_score desc)
|
|
31
|
+
[0...$k] {
|
|
32
|
+
"slug": slug.current,
|
|
33
|
+
_score
|
|
34
|
+
}
|
|
35
|
+
`, { k, query });
|
|
36
|
+
return results.map((r) => r.slug);
|
|
37
|
+
}
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// Legacy main() entry point
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
/** @deprecated Use measureRetrieval() from pipeline/measure-retrieval.js instead. */
|
|
42
|
+
export async function main() {
|
|
43
|
+
console.log("=== Sanity AI Literacy — Retrieval Quality Measurement ===\n");
|
|
44
|
+
const ROOT = join(dirname(new URL(import.meta.url).pathname), "..", "..");
|
|
45
|
+
const summary = await measureRetrieval({
|
|
46
|
+
onProgress: (area, taskId, result) => {
|
|
47
|
+
console.log(` ${taskId}:`);
|
|
48
|
+
console.log(` Recall@5: ${(result.recall_at_5 * 100).toFixed(1)}%`);
|
|
49
|
+
console.log(` Recall@10: ${(result.recall_at_10 * 100).toFixed(1)}%`);
|
|
50
|
+
console.log(` NDCG@10: ${(result.ndcg_at_10 * 100).toFixed(1)}%`);
|
|
51
|
+
},
|
|
52
|
+
retriever: retrieveDocsForQuery,
|
|
53
|
+
rootDir: ROOT,
|
|
54
|
+
});
|
|
55
|
+
// Print summary
|
|
56
|
+
console.log();
|
|
57
|
+
console.log(formatRetrievalTable(summary));
|
|
58
|
+
// Persist results
|
|
59
|
+
const outDir = join(ROOT, "results", "latest");
|
|
60
|
+
mkdirSync(outDir, { recursive: true });
|
|
61
|
+
writeFileSync(join(outDir, "retrieval-results.json"), JSON.stringify(summary, null, 2));
|
|
62
|
+
console.log("\nResults written to results/latest/retrieval-results.json");
|
|
63
|
+
}
|
|
64
|
+
// Only run when invoked directly (not when imported)
|
|
65
|
+
if (process.argv[1]?.endsWith("measure-retrieval.ts") ||
|
|
66
|
+
process.argv[1]?.endsWith("measure-retrieval.js")) {
|
|
67
|
+
main().catch((err) => {
|
|
68
|
+
console.error("Fatal error:", err);
|
|
69
|
+
process.exit(1);
|
|
70
|
+
});
|
|
71
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/pr-comment.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/pr-comment.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/pr-comment.js instead.
|
|
7
|
+
*/
|
|
8
|
+
export { generatePrComment, type PrCommentOptions, } from "../pipeline/pr-comment.js";
|
|
9
|
+
import type { PrCommentOptions } from "../pipeline/pr-comment.js";
|
|
10
|
+
/**
|
|
11
|
+
* Legacy main() entry point.
|
|
12
|
+
* @deprecated Use generatePrComment() from pipeline/pr-comment.ts instead.
|
|
13
|
+
*/
|
|
14
|
+
export declare function main(options?: Omit<PrCommentOptions, "rootDir"> & {
|
|
15
|
+
rootDir?: string;
|
|
16
|
+
}): void;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/pr-comment.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/pr-comment.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/pr-comment.js instead.
|
|
7
|
+
*/
|
|
8
|
+
import { dirname, resolve } from "path";
|
|
9
|
+
import { fileURLToPath } from "url";
|
|
10
|
+
export { generatePrComment, } from "../pipeline/pr-comment.js";
|
|
11
|
+
import { generatePrComment } from "../pipeline/pr-comment.js";
|
|
12
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
14
|
+
/**
|
|
15
|
+
* Legacy main() entry point.
|
|
16
|
+
* @deprecated Use generatePrComment() from pipeline/pr-comment.ts instead.
|
|
17
|
+
*/
|
|
18
|
+
export function main(options) {
|
|
19
|
+
generatePrComment({
|
|
20
|
+
rootDir: options?.rootDir ?? ROOT,
|
|
21
|
+
outputPath: options?.outputPath,
|
|
22
|
+
promptfooUrl: options?.promptfooUrl,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
if (process.argv[1]?.endsWith("pr-comment.ts") ||
|
|
26
|
+
process.argv[1]?.endsWith("pr-comment.js")) {
|
|
27
|
+
main();
|
|
28
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/readiness-report.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/readiness-report.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/readiness-report.js instead.
|
|
7
|
+
*/
|
|
8
|
+
export { formatReadinessMarkdown, generateReadinessReport, type DimensionCheck, type HistoryEntry, type ReadinessReport, } from "../pipeline/readiness-report.js";
|
|
9
|
+
/**
|
|
10
|
+
* Legacy main() entry point.
|
|
11
|
+
* @deprecated Use generateReadinessReport() + formatReadinessMarkdown() directly.
|
|
12
|
+
*/
|
|
13
|
+
export declare function main(): void;
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/readiness-report.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/readiness-report.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/readiness-report.js instead.
|
|
7
|
+
*/
|
|
8
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
|
9
|
+
import { dirname, join, resolve } from "node:path";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
import { load } from "js-yaml";
|
|
12
|
+
import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
|
|
13
|
+
export { formatReadinessMarkdown, generateReadinessReport, } from "../pipeline/readiness-report.js";
|
|
14
|
+
import { generateReadinessReport, formatReadinessMarkdown, } from "../pipeline/readiness-report.js";
|
|
15
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
17
|
+
const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
|
|
18
|
+
const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
|
|
19
|
+
const THRESHOLDS_PATH = join(ROOT, "config", "thresholds.yaml");
|
|
20
|
+
const BASELINES_DIR = join(ROOT, "results", "baselines");
|
|
21
|
+
/**
|
|
22
|
+
* Legacy main() entry point.
|
|
23
|
+
* @deprecated Use generateReadinessReport() + formatReadinessMarkdown() directly.
|
|
24
|
+
*/
|
|
25
|
+
export function main() {
|
|
26
|
+
const args = process.argv.slice(2);
|
|
27
|
+
let area;
|
|
28
|
+
let history = false;
|
|
29
|
+
let output;
|
|
30
|
+
for (let i = 0; i < args.length; i++) {
|
|
31
|
+
const arg = args[i];
|
|
32
|
+
if (arg === "--area" && i + 1 < args.length) {
|
|
33
|
+
area = args[++i];
|
|
34
|
+
}
|
|
35
|
+
else if (arg === "--history") {
|
|
36
|
+
history = true;
|
|
37
|
+
}
|
|
38
|
+
else if (arg === "--output" && i + 1 < args.length) {
|
|
39
|
+
output = args[++i];
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
if (!area) {
|
|
43
|
+
console.error("Usage: readiness-report --area <area> [--history] [--output <file>]");
|
|
44
|
+
process.exit(1);
|
|
45
|
+
}
|
|
46
|
+
// Load data
|
|
47
|
+
if (!existsSync(SCORE_SUMMARY_PATH)) {
|
|
48
|
+
throw new Error(`Score summary not found at ${SCORE_SUMMARY_PATH}. Run \`pnpm pipeline\` first.`);
|
|
49
|
+
}
|
|
50
|
+
const scoreSummary = JSON.parse(readFileSync(SCORE_SUMMARY_PATH, "utf-8"));
|
|
51
|
+
if (!existsSync(THRESHOLDS_PATH)) {
|
|
52
|
+
throw new Error(`Threshold config not found at ${THRESHOLDS_PATH}.`);
|
|
53
|
+
}
|
|
54
|
+
const rawThresholds = readFileSync(THRESHOLDS_PATH, "utf-8");
|
|
55
|
+
const thresholdConfig = ThresholdConfigSchema.parse(load(rawThresholds));
|
|
56
|
+
let gapAnalysis;
|
|
57
|
+
if (existsSync(GAP_ANALYSIS_PATH)) {
|
|
58
|
+
gapAnalysis = JSON.parse(readFileSync(GAP_ANALYSIS_PATH, "utf-8"));
|
|
59
|
+
}
|
|
60
|
+
const historyEntries = [];
|
|
61
|
+
if (history && existsSync(BASELINES_DIR)) {
|
|
62
|
+
const files = readdirSync(BASELINES_DIR)
|
|
63
|
+
.filter((f) => f.endsWith(".json"))
|
|
64
|
+
.sort();
|
|
65
|
+
for (const file of files) {
|
|
66
|
+
try {
|
|
67
|
+
const raw = readFileSync(join(BASELINES_DIR, file), "utf-8");
|
|
68
|
+
const data = JSON.parse(raw);
|
|
69
|
+
const areaScore = data.scores?.find((s) => s.feature === area);
|
|
70
|
+
if (!areaScore)
|
|
71
|
+
continue;
|
|
72
|
+
const nameWithoutExt = file.replace(/\.json$/, "");
|
|
73
|
+
const parts = nameWithoutExt.split("_");
|
|
74
|
+
const tag = parts.length > 4 ? parts.slice(4).join("_") : undefined;
|
|
75
|
+
historyEntries.push({
|
|
76
|
+
score: areaScore.totalScore,
|
|
77
|
+
tag,
|
|
78
|
+
timestamp: data.timestamp ?? nameWithoutExt,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
// Skip malformed baseline files
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const report = generateReadinessReport({
|
|
87
|
+
area,
|
|
88
|
+
gapAnalysis,
|
|
89
|
+
history: historyEntries,
|
|
90
|
+
scoreSummary,
|
|
91
|
+
thresholdConfig,
|
|
92
|
+
});
|
|
93
|
+
const markdown = formatReadinessMarkdown(report);
|
|
94
|
+
if (output) {
|
|
95
|
+
writeFileSync(output, markdown, "utf-8");
|
|
96
|
+
console.error(`✅ Readiness report written to ${output}`);
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
console.log(markdown);
|
|
100
|
+
}
|
|
101
|
+
if (!report.pass) {
|
|
102
|
+
process.exit(1);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
if (process.argv[1]?.endsWith("readiness-report.ts") ||
|
|
106
|
+
process.argv[1]?.endsWith("readiness-report.js")) {
|
|
107
|
+
main();
|
|
108
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/webhook-server.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/webhook-server.ts.
|
|
5
|
+
* This shim preserves backward compatibility for direct CLI invocation.
|
|
6
|
+
*
|
|
7
|
+
* TODO: Update all importers to use pipeline/webhook-server.ts, then delete this file.
|
|
8
|
+
*
|
|
9
|
+
* @deprecated Import from ../pipeline/webhook-server.js instead.
|
|
10
|
+
*/
|
|
11
|
+
export { startWebhookServer, type WebhookServerOptions, } from "../pipeline/webhook-server.js";
|