@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/cache.ts
|
|
3
|
+
*
|
|
4
|
+
* Content-aware caching for pipeline steps. Each step's cache key is derived
|
|
5
|
+
* from a hash of its inputs (config files, context files, task files, etc.).
|
|
6
|
+
* When inputs haven't changed between runs, cached outputs are reused.
|
|
7
|
+
*
|
|
8
|
+
* Cache storage: `results/cache/` directory with one JSON manifest per step.
|
|
9
|
+
* Each manifest maps a content hash to the step's output metadata.
|
|
10
|
+
*
|
|
11
|
+
* Cache invalidation triggers:
|
|
12
|
+
* - Content change: any input file's content changes → hash changes → miss
|
|
13
|
+
* - Config change: config/models.yaml, config/sources.yaml, tasks/*.yaml changes → miss
|
|
14
|
+
* - Manual bypass: --no-cache flag skips all cache lookups
|
|
15
|
+
* - Cache clear: delete results/cache/ to start fresh
|
|
16
|
+
*/
|
|
17
|
+
/** A single cache entry — stored per step per unique input hash */
|
|
18
|
+
export interface CacheEntry {
|
|
19
|
+
/** Duration of the original execution in milliseconds */
|
|
20
|
+
durationMs: number;
|
|
21
|
+
/** SHA-256 hash of all input content */
|
|
22
|
+
inputHash: string;
|
|
23
|
+
/** Output file paths that were produced (relative to package root) */
|
|
24
|
+
outputPaths: string[];
|
|
25
|
+
/** Human-readable summary of what was cached */
|
|
26
|
+
summary: string;
|
|
27
|
+
/** When this entry was created */
|
|
28
|
+
timestamp: string;
|
|
29
|
+
}
|
|
30
|
+
/** Result of a cache lookup */
|
|
31
|
+
export type CacheLookupResult = {
|
|
32
|
+
hit: false;
|
|
33
|
+
currentHash: string;
|
|
34
|
+
} | {
|
|
35
|
+
hit: true;
|
|
36
|
+
entry: CacheEntry;
|
|
37
|
+
};
|
|
38
|
+
/** The cache manifest for a single pipeline step */
|
|
39
|
+
export interface CacheManifest {
|
|
40
|
+
/** The most recent cache entry */
|
|
41
|
+
entry: CacheEntry | null;
|
|
42
|
+
/** Step name (e.g., "fetch-docs", "generate-configs") */
|
|
43
|
+
step: string;
|
|
44
|
+
}
|
|
45
|
+
/** Stats collected across all pipeline steps for reporting */
|
|
46
|
+
export interface CacheStats {
|
|
47
|
+
/** Steps where cache was hit (skipped execution) */
|
|
48
|
+
hits: number;
|
|
49
|
+
/** Steps where cache was missed (executed normally) */
|
|
50
|
+
misses: number;
|
|
51
|
+
/** Steps that were skipped for other reasons (--skip-fetch, etc.) */
|
|
52
|
+
skipped: number;
|
|
53
|
+
/** Per-step detail */
|
|
54
|
+
steps: Record<string, "disabled" | "hit" | "miss" | "skipped">;
|
|
55
|
+
/** Total steps that participated in caching */
|
|
56
|
+
total: number;
|
|
57
|
+
}
|
|
58
|
+
/** Create an empty CacheStats object */
|
|
59
|
+
export declare function createCacheStats(): CacheStats;
|
|
60
|
+
/** Format cache stats as a human-readable summary line */
|
|
61
|
+
export declare function formatCacheStats(stats: CacheStats): string;
|
|
62
|
+
/**
|
|
63
|
+
* Collect all file paths that serve as inputs for a given pipeline step.
|
|
64
|
+
* This is the core of cache key computation — if any of these files change,
|
|
65
|
+
* the step must re-execute.
|
|
66
|
+
*/
|
|
67
|
+
export declare function getStepInputPaths(rootDir: string, step: string): string[];
|
|
68
|
+
/**
|
|
69
|
+
* Compute a SHA-256 hash of the concatenated content of multiple files.
|
|
70
|
+
* Files are sorted by path for deterministic ordering. Missing files
|
|
71
|
+
* contribute a sentinel value so the hash changes if a file is deleted.
|
|
72
|
+
*
|
|
73
|
+
* Optional `context` strings are included in the hash so that non-file
|
|
74
|
+
* state (e.g., filter flags, environment variables) can also participate
|
|
75
|
+
* in cache key computation.
|
|
76
|
+
*/
|
|
77
|
+
export declare function hashFiles(paths: string[], context?: string[]): string;
|
|
78
|
+
/**
|
|
79
|
+
* Look up the cache for a pipeline step.
|
|
80
|
+
*
|
|
81
|
+
* Computes the current input hash and compares it against the stored manifest.
|
|
82
|
+
* Returns a hit if the hashes match AND all expected output files still exist.
|
|
83
|
+
*
|
|
84
|
+
* Optional `context` strings are included in the hash so that non-file
|
|
85
|
+
* state (e.g., area/task filter flags) participates in cache key computation.
|
|
86
|
+
*/
|
|
87
|
+
export declare function lookupCache(rootDir: string, step: string, context?: string[]): CacheLookupResult;
|
|
88
|
+
/**
|
|
89
|
+
* Read the cache manifest for a step.
|
|
90
|
+
* Returns null if no manifest exists or it's corrupt.
|
|
91
|
+
*/
|
|
92
|
+
export declare function readManifest(rootDir: string, step: string): CacheManifest | null;
|
|
93
|
+
/**
|
|
94
|
+
* Record a cache entry after a successful step execution.
|
|
95
|
+
*/
|
|
96
|
+
export declare function recordCache(rootDir: string, step: string, inputHash: string, summary: string, durationMs: number, outputPaths: string[]): void;
|
|
97
|
+
/**
|
|
98
|
+
* Write a cache manifest for a step.
|
|
99
|
+
* Creates the cache directory if it doesn't exist.
|
|
100
|
+
*/
|
|
101
|
+
export declare function writeManifest(rootDir: string, step: string, entry: CacheEntry): void;
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/cache.ts
|
|
3
|
+
*
|
|
4
|
+
* Content-aware caching for pipeline steps. Each step's cache key is derived
|
|
5
|
+
* from a hash of its inputs (config files, context files, task files, etc.).
|
|
6
|
+
* When inputs haven't changed between runs, cached outputs are reused.
|
|
7
|
+
*
|
|
8
|
+
* Cache storage: `results/cache/` directory with one JSON manifest per step.
|
|
9
|
+
* Each manifest maps a content hash to the step's output metadata.
|
|
10
|
+
*
|
|
11
|
+
* Cache invalidation triggers:
|
|
12
|
+
* - Content change: any input file's content changes → hash changes → miss
|
|
13
|
+
* - Config change: config/models.yaml, config/sources.yaml, tasks/*.yaml changes → miss
|
|
14
|
+
* - Manual bypass: --no-cache flag skips all cache lookups
|
|
15
|
+
* - Cache clear: delete results/cache/ to start fresh
|
|
16
|
+
*/
|
|
17
|
+
import { createHash } from "crypto";
|
|
18
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync, } from "fs";
|
|
19
|
+
import { join, resolve } from "path";
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Constants
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
const CACHE_DIR_NAME = "cache";
|
|
24
|
+
const CACHE_VERSION = 1;
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// Hashing
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
/** Create an empty CacheStats object */
|
|
29
|
+
export function createCacheStats() {
|
|
30
|
+
return {
|
|
31
|
+
hits: 0,
|
|
32
|
+
misses: 0,
|
|
33
|
+
skipped: 0,
|
|
34
|
+
steps: {},
|
|
35
|
+
total: 0,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
/** Format cache stats as a human-readable summary line */
|
|
39
|
+
export function formatCacheStats(stats) {
|
|
40
|
+
const { hits, misses, skipped, total } = stats;
|
|
41
|
+
const evaluated = misses;
|
|
42
|
+
const parts = [];
|
|
43
|
+
if (hits > 0)
|
|
44
|
+
parts.push(`${hits} cached`);
|
|
45
|
+
if (evaluated > 0)
|
|
46
|
+
parts.push(`${evaluated} evaluated`);
|
|
47
|
+
if (skipped > 0)
|
|
48
|
+
parts.push(`${skipped} skipped`);
|
|
49
|
+
return `${parts.join(", ")} (${total} total steps)`;
|
|
50
|
+
}
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
// Cache store
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
/**
|
|
55
|
+
* Collect all file paths that serve as inputs for a given pipeline step.
|
|
56
|
+
* This is the core of cache key computation — if any of these files change,
|
|
57
|
+
* the step must re-execute.
|
|
58
|
+
*/
|
|
59
|
+
export function getStepInputPaths(rootDir, step) {
|
|
60
|
+
const r = (rel) => resolve(rootDir, rel);
|
|
61
|
+
switch (step) {
|
|
62
|
+
case "calculate-scores": {
|
|
63
|
+
// Inputs: eval results file + scoring logic (the script itself)
|
|
64
|
+
return [
|
|
65
|
+
r("results/latest/eval-results.json"),
|
|
66
|
+
r("results/latest/eval-results-agentic.json"),
|
|
67
|
+
r("results/latest/eval-results-observed.json"),
|
|
68
|
+
].filter((p) => existsSync(p));
|
|
69
|
+
}
|
|
70
|
+
case "eval":
|
|
71
|
+
case "eval-baseline":
|
|
72
|
+
case "eval-agentic":
|
|
73
|
+
case "eval-observed": {
|
|
74
|
+
// Per-mode cache keys: eval-baseline and eval-agentic include only
|
|
75
|
+
// their own config + expanded YAML so changes to one mode's inputs
|
|
76
|
+
// don't invalidate the other's cache entry.
|
|
77
|
+
//
|
|
78
|
+
// The legacy "eval" key includes all configs for backward compat.
|
|
79
|
+
const isBaseline = step === "eval-baseline" || step === "eval";
|
|
80
|
+
const isAgentic = step === "eval-agentic" || step === "eval";
|
|
81
|
+
const isObserved = step === "eval-observed" || step === "eval";
|
|
82
|
+
const paths = [r("config/models.yaml")];
|
|
83
|
+
// Config files — only the relevant ones for this mode
|
|
84
|
+
if (isBaseline) {
|
|
85
|
+
paths.push(r("promptfooconfig.yaml"));
|
|
86
|
+
paths.push(r("tasks/.expanded.yaml"));
|
|
87
|
+
}
|
|
88
|
+
if (isAgentic) {
|
|
89
|
+
paths.push(r("promptfooconfig.agentic.yaml"));
|
|
90
|
+
paths.push(r("tasks/.expanded.agentic.yaml"));
|
|
91
|
+
}
|
|
92
|
+
if (isObserved) {
|
|
93
|
+
paths.push(r("promptfooconfig.observed.yaml"));
|
|
94
|
+
}
|
|
95
|
+
// Context files (shared across modes)
|
|
96
|
+
const contextsDir = r("contexts");
|
|
97
|
+
if (existsSync(contextsDir)) {
|
|
98
|
+
const contextFiles = readdirSync(contextsDir)
|
|
99
|
+
.filter((f) => f.endsWith(".md"))
|
|
100
|
+
.map((f) => join(contextsDir, f));
|
|
101
|
+
paths.push(...contextFiles);
|
|
102
|
+
// Canonical contexts
|
|
103
|
+
const canonicalDir = join(contextsDir, "canonical");
|
|
104
|
+
if (existsSync(canonicalDir)) {
|
|
105
|
+
const canonicalFiles = readdirSync(canonicalDir)
|
|
106
|
+
.filter((f) => f.endsWith(".md"))
|
|
107
|
+
.map((f) => join(canonicalDir, f));
|
|
108
|
+
paths.push(...canonicalFiles);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
// Task files (contain assertions and test definitions).
|
|
112
|
+
// Exclude generated .expanded*.yaml files — those are already listed
|
|
113
|
+
// explicitly above per mode.
|
|
114
|
+
const tasksDir = r("tasks");
|
|
115
|
+
if (existsSync(tasksDir)) {
|
|
116
|
+
const taskFiles = readdirSync(tasksDir)
|
|
117
|
+
.filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) &&
|
|
118
|
+
!f.startsWith(".expanded"))
|
|
119
|
+
.map((f) => join(tasksDir, f));
|
|
120
|
+
paths.push(...taskFiles);
|
|
121
|
+
}
|
|
122
|
+
// Reference solutions (used by grader assertions)
|
|
123
|
+
const refDir = r("canonical/reference-solutions");
|
|
124
|
+
if (existsSync(refDir)) {
|
|
125
|
+
const refFiles = readdirSync(refDir)
|
|
126
|
+
.map((f) => join(refDir, f))
|
|
127
|
+
.filter((f) => statSync(f).isFile());
|
|
128
|
+
paths.push(...refFiles);
|
|
129
|
+
}
|
|
130
|
+
return paths;
|
|
131
|
+
}
|
|
132
|
+
case "fetch-docs": {
|
|
133
|
+
// Inputs: config/sources.yaml, config/models.yaml, task files (which contain inline mappings)
|
|
134
|
+
const paths = [r("config/sources.yaml"), r("config/models.yaml")];
|
|
135
|
+
// Include all task files (they define feature areas)
|
|
136
|
+
const tasksDir = r("tasks");
|
|
137
|
+
if (existsSync(tasksDir)) {
|
|
138
|
+
const taskFiles = readdirSync(tasksDir)
|
|
139
|
+
.filter((f) => f.endsWith(".yaml") || f.endsWith(".yml"))
|
|
140
|
+
.map((f) => join(tasksDir, f));
|
|
141
|
+
paths.push(...taskFiles);
|
|
142
|
+
}
|
|
143
|
+
return paths;
|
|
144
|
+
}
|
|
145
|
+
case "generate-configs": {
|
|
146
|
+
// Inputs: config/models.yaml, config/sources.yaml, all task files
|
|
147
|
+
const paths = [r("config/models.yaml"), r("config/sources.yaml")];
|
|
148
|
+
const tasksDir = r("tasks");
|
|
149
|
+
if (existsSync(tasksDir)) {
|
|
150
|
+
const taskFiles = readdirSync(tasksDir)
|
|
151
|
+
.filter((f) => f.endsWith(".yaml") || f.endsWith(".yml"))
|
|
152
|
+
.map((f) => join(tasksDir, f));
|
|
153
|
+
paths.push(...taskFiles);
|
|
154
|
+
}
|
|
155
|
+
return paths;
|
|
156
|
+
}
|
|
157
|
+
case "report": {
|
|
158
|
+
// Inputs: score summary
|
|
159
|
+
return [r("results/latest/score-summary.json")];
|
|
160
|
+
}
|
|
161
|
+
default:
|
|
162
|
+
return [];
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Compute a SHA-256 hash of the concatenated content of multiple files.
|
|
167
|
+
* Files are sorted by path for deterministic ordering. Missing files
|
|
168
|
+
* contribute a sentinel value so the hash changes if a file is deleted.
|
|
169
|
+
*
|
|
170
|
+
* Optional `context` strings are included in the hash so that non-file
|
|
171
|
+
* state (e.g., filter flags, environment variables) can also participate
|
|
172
|
+
* in cache key computation.
|
|
173
|
+
*/
|
|
174
|
+
export function hashFiles(paths, context) {
|
|
175
|
+
const hash = createHash("sha256");
|
|
176
|
+
// Include cache version so format changes invalidate everything
|
|
177
|
+
hash.update(`cache-version:${CACHE_VERSION}\n`);
|
|
178
|
+
// Include non-file context (filter flags, env vars, etc.)
|
|
179
|
+
if (context && context.length > 0) {
|
|
180
|
+
for (const c of context) {
|
|
181
|
+
hash.update(`context:${c}\n`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
const sorted = [...paths].sort();
|
|
185
|
+
for (const filePath of sorted) {
|
|
186
|
+
hash.update(`path:${filePath}\n`);
|
|
187
|
+
if (existsSync(filePath)) {
|
|
188
|
+
const content = readFileSync(filePath);
|
|
189
|
+
hash.update(content);
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
hash.update("__missing__\n");
|
|
193
|
+
}
|
|
194
|
+
hash.update("\n---\n");
|
|
195
|
+
}
|
|
196
|
+
return hash.digest("hex");
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Look up the cache for a pipeline step.
|
|
200
|
+
*
|
|
201
|
+
* Computes the current input hash and compares it against the stored manifest.
|
|
202
|
+
* Returns a hit if the hashes match AND all expected output files still exist.
|
|
203
|
+
*
|
|
204
|
+
* Optional `context` strings are included in the hash so that non-file
|
|
205
|
+
* state (e.g., area/task filter flags) participates in cache key computation.
|
|
206
|
+
*/
|
|
207
|
+
export function lookupCache(rootDir, step, context) {
|
|
208
|
+
const inputPaths = getStepInputPaths(rootDir, step);
|
|
209
|
+
const currentHash = hashFiles(inputPaths, context);
|
|
210
|
+
const manifest = readManifest(rootDir, step);
|
|
211
|
+
if (!manifest?.entry) {
|
|
212
|
+
return { currentHash, hit: false };
|
|
213
|
+
}
|
|
214
|
+
// Hash must match
|
|
215
|
+
if (manifest.entry.inputHash !== currentHash) {
|
|
216
|
+
return { currentHash, hit: false };
|
|
217
|
+
}
|
|
218
|
+
// All output files must still exist (guard against manual deletion)
|
|
219
|
+
const outputsExist = manifest.entry.outputPaths.every((p) => existsSync(resolve(rootDir, p)));
|
|
220
|
+
if (!outputsExist) {
|
|
221
|
+
return { currentHash, hit: false };
|
|
222
|
+
}
|
|
223
|
+
return { entry: manifest.entry, hit: true };
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Read the cache manifest for a step.
|
|
227
|
+
* Returns null if no manifest exists or it's corrupt.
|
|
228
|
+
*/
|
|
229
|
+
export function readManifest(rootDir, step) {
|
|
230
|
+
const manifestPath = getManifestPath(rootDir, step);
|
|
231
|
+
if (!existsSync(manifestPath))
|
|
232
|
+
return null;
|
|
233
|
+
try {
|
|
234
|
+
const raw = readFileSync(manifestPath, "utf-8");
|
|
235
|
+
const parsed = JSON.parse(raw);
|
|
236
|
+
if (parsed.step !== step)
|
|
237
|
+
return null;
|
|
238
|
+
return parsed;
|
|
239
|
+
}
|
|
240
|
+
catch {
|
|
241
|
+
return null;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
// Cache operations
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
/**
|
|
248
|
+
* Record a cache entry after a successful step execution.
|
|
249
|
+
*/
|
|
250
|
+
export function recordCache(rootDir, step, inputHash, summary, durationMs, outputPaths) {
|
|
251
|
+
const entry = {
|
|
252
|
+
durationMs,
|
|
253
|
+
inputHash,
|
|
254
|
+
outputPaths,
|
|
255
|
+
summary,
|
|
256
|
+
timestamp: new Date().toISOString(),
|
|
257
|
+
};
|
|
258
|
+
writeManifest(rootDir, step, entry);
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Write a cache manifest for a step.
|
|
262
|
+
* Creates the cache directory if it doesn't exist.
|
|
263
|
+
*/
|
|
264
|
+
export function writeManifest(rootDir, step, entry) {
|
|
265
|
+
const cacheDir = getCacheDir(rootDir);
|
|
266
|
+
if (!existsSync(cacheDir)) {
|
|
267
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
268
|
+
}
|
|
269
|
+
const manifest = { entry, step };
|
|
270
|
+
const manifestPath = getManifestPath(rootDir, step);
|
|
271
|
+
writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
|
|
272
|
+
}
|
|
273
|
+
// ---------------------------------------------------------------------------
|
|
274
|
+
// Cache stats
|
|
275
|
+
// ---------------------------------------------------------------------------
|
|
276
|
+
/** Get the cache directory path */
|
|
277
|
+
function getCacheDir(rootDir) {
|
|
278
|
+
return resolve(rootDir, "results", CACHE_DIR_NAME);
|
|
279
|
+
}
|
|
280
|
+
/** Get the manifest file path for a step */
|
|
281
|
+
function getManifestPath(rootDir, step) {
|
|
282
|
+
return join(getCacheDir(rootDir), `${step}.json`);
|
|
283
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { type ResolvedSourceConfig } from "../sources.js";
|
|
2
|
+
import { type ActualScoreEntry, type ComponentResult } from "../_vendor/ailf-core/index.d.ts";
|
|
3
|
+
import type { GraderJudgment, PerModelEntry } from "./types.js";
|
|
4
|
+
export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, } from "../_vendor/ailf-core/index.d.ts";
|
|
5
|
+
export type { ActualScoreEntry, ComponentResult, TestResult, UrlMetadata, } from "../_vendor/ailf-core/index.d.ts";
|
|
6
|
+
export interface PromptfooResultsWrapper {
|
|
7
|
+
results: RawTestResult[];
|
|
8
|
+
stats: {
|
|
9
|
+
successes: number;
|
|
10
|
+
failures: number;
|
|
11
|
+
tokenUsage?: {
|
|
12
|
+
assertions?: {
|
|
13
|
+
completion: number;
|
|
14
|
+
prompt: number;
|
|
15
|
+
total: number;
|
|
16
|
+
};
|
|
17
|
+
completion: number;
|
|
18
|
+
prompt: number;
|
|
19
|
+
total: number;
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
export interface RawPromptfooFile {
|
|
24
|
+
[key: string]: unknown;
|
|
25
|
+
config?: {
|
|
26
|
+
defaultTest?: {
|
|
27
|
+
options?: {
|
|
28
|
+
provider?: string;
|
|
29
|
+
rubricProvider?: string;
|
|
30
|
+
};
|
|
31
|
+
};
|
|
32
|
+
};
|
|
33
|
+
results: PromptfooResultsWrapper;
|
|
34
|
+
}
|
|
35
|
+
export interface RawTestResult {
|
|
36
|
+
cost?: number;
|
|
37
|
+
error?: string;
|
|
38
|
+
gradingResult: null | {
|
|
39
|
+
componentResults: ComponentResult[];
|
|
40
|
+
pass: boolean;
|
|
41
|
+
};
|
|
42
|
+
metadata?: Record<string, unknown>;
|
|
43
|
+
provider?: {
|
|
44
|
+
id?: string;
|
|
45
|
+
label?: string;
|
|
46
|
+
};
|
|
47
|
+
response: {
|
|
48
|
+
output: string;
|
|
49
|
+
};
|
|
50
|
+
testCase?: {
|
|
51
|
+
description?: string;
|
|
52
|
+
vars?: Record<string, string>;
|
|
53
|
+
};
|
|
54
|
+
vars: Record<string, string>;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Calculate scores grouped by model. Each model gets its own FeatureScore[]
|
|
58
|
+
* and model-level aggregates.
|
|
59
|
+
*
|
|
60
|
+
* Uses the provider.id from Promptfoo results to identify models.
|
|
61
|
+
* Falls back to provider.label, then "unknown" if neither is available.
|
|
62
|
+
*
|
|
63
|
+
* @returns Record keyed by model ID, or null if only one model was used
|
|
64
|
+
* (per-model breakdown is redundant when there's only one model).
|
|
65
|
+
*/
|
|
66
|
+
export declare function calculateScoresPerModel(resultsPath: string, weights: Record<string, number>): null | PerModelEntry[];
|
|
67
|
+
/**
|
|
68
|
+
* Extract grader judgments (reason text + scores) from evaluation results.
|
|
69
|
+
*
|
|
70
|
+
* This preserves the grader's natural language reasoning for downstream
|
|
71
|
+
* analysis (failure mode classification, gap analysis). Each llm-rubric
|
|
72
|
+
* assertion produces one GraderJudgment entry.
|
|
73
|
+
*
|
|
74
|
+
* Phase 3a prerequisite: structured judgment data for failure mode extraction.
|
|
75
|
+
*/
|
|
76
|
+
export declare function extractGraderJudgments(resultsPath: string): GraderJudgment[];
|
|
77
|
+
/**
|
|
78
|
+
* Score agentic evaluation results. In agentic mode, all test entries are
|
|
79
|
+
* gold-only (no baseline entries — the .expanded.agentic.yaml fix ensures this).
|
|
80
|
+
* The model retrieves docs via tools, so all results map to "actual" scores.
|
|
81
|
+
*
|
|
82
|
+
* Returns a record keyed by feature area with the composite actual score.
|
|
83
|
+
*/
|
|
84
|
+
export declare function scoreAgenticResults(resultsPath: string, weights: Record<string, number>): Record<string, ActualScoreEntry>;
|
|
85
|
+
/** Options for the calculate-scores main() function. */
|
|
86
|
+
export interface CalculateScoresOptions {
|
|
87
|
+
/** Allowed origins for source isolation reporting */
|
|
88
|
+
allowedOrigins?: string[];
|
|
89
|
+
/** Evaluation mode (controls which result files are read) */
|
|
90
|
+
mode?: string;
|
|
91
|
+
/** Pre-resolved source config (skips loadSource() call) */
|
|
92
|
+
resolvedSource?: ResolvedSourceConfig;
|
|
93
|
+
/** Path to baseline results file (default: results/latest/eval-results.json) */
|
|
94
|
+
resultsPath?: string;
|
|
95
|
+
/** Root directory of the eval package (required) */
|
|
96
|
+
rootDir: string;
|
|
97
|
+
/** Search mode for source verification metadata */
|
|
98
|
+
searchMode?: string;
|
|
99
|
+
/** Documentation source name */
|
|
100
|
+
source?: string;
|
|
101
|
+
}
|
|
102
|
+
export declare function calculateAndWriteScores(options: CalculateScoresOptions): void;
|