@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schedules/digest.ts
|
|
3
|
+
*
|
|
4
|
+
* Weekly digest engine — aggregates evaluation reports from a time window
|
|
5
|
+
* and computes trend summaries for notification delivery.
|
|
6
|
+
*
|
|
7
|
+
* The digest answers: "How did our AI literacy scores trend this week?"
|
|
8
|
+
*
|
|
9
|
+
* Data flow:
|
|
10
|
+
* 1. Query Sanity for all reports in the lookback window
|
|
11
|
+
* 2. Group by feature area
|
|
12
|
+
* 3. Compute trend direction and magnitude per area
|
|
13
|
+
* 4. Identify regressions, improvements, and stable areas
|
|
14
|
+
* 5. Return a structured DigestSummary for formatting
|
|
15
|
+
*
|
|
16
|
+
* @see docs/design-docs/report-store/notifications.md
|
|
17
|
+
* @see docs/design-docs/report-store/implementation.md — Phase 5
|
|
18
|
+
*/
|
|
19
|
+
import { NOISE_THRESHOLD } from "../_vendor/ailf-shared/index.js";
|
|
20
|
+
import { getSanityClient } from "../sanity/client.js";
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Constants
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
const REPORT_TYPE = "ailf.report";
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// GROQ query
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
const DIGEST_QUERY = `*[_type == $type && completedAt >= $since] | order(completedAt asc) {
|
|
29
|
+
"reportId": reportId,
|
|
30
|
+
"completedAt": completedAt,
|
|
31
|
+
"mode": provenance.mode,
|
|
32
|
+
"sourceName": provenance.source.name,
|
|
33
|
+
"overallScore": summary.overall.avgScore,
|
|
34
|
+
"durationMs": durationMs,
|
|
35
|
+
"tag": tag,
|
|
36
|
+
"scores": summary.scores[] {
|
|
37
|
+
"feature": feature,
|
|
38
|
+
"totalScore": totalScore
|
|
39
|
+
}
|
|
40
|
+
}`;
|
|
41
|
+
/**
|
|
42
|
+
* Build a digest summary from a chronologically-ordered list of reports.
|
|
43
|
+
*
|
|
44
|
+
* Pure function — no I/O, fully testable.
|
|
45
|
+
*/
|
|
46
|
+
export function buildDigestSummary(reports, lookbackStart, lookbackEnd, noiseThreshold = NOISE_THRESHOLD) {
|
|
47
|
+
// Collect all unique areas across all reports
|
|
48
|
+
const areaSet = new Set();
|
|
49
|
+
for (const report of reports) {
|
|
50
|
+
for (const score of report.scores) {
|
|
51
|
+
areaSet.add(score.feature);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// Build per-area trends
|
|
55
|
+
const areaTrends = [];
|
|
56
|
+
for (const area of [...areaSet].sort()) {
|
|
57
|
+
const areaScores = reports
|
|
58
|
+
.map((r) => {
|
|
59
|
+
const s = r.scores.find((sc) => sc.feature === area);
|
|
60
|
+
return s ? s.totalScore : null;
|
|
61
|
+
})
|
|
62
|
+
.filter((s) => s !== null);
|
|
63
|
+
if (areaScores.length === 0)
|
|
64
|
+
continue;
|
|
65
|
+
const firstScore = areaScores[0];
|
|
66
|
+
const lastScore = areaScores[areaScores.length - 1];
|
|
67
|
+
const delta = lastScore - firstScore;
|
|
68
|
+
const trend = classifyTrend(delta, noiseThreshold);
|
|
69
|
+
areaTrends.push({
|
|
70
|
+
area,
|
|
71
|
+
firstScore,
|
|
72
|
+
lastScore,
|
|
73
|
+
reportCount: areaScores.length,
|
|
74
|
+
scoreDelta: delta,
|
|
75
|
+
trend,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
// Overall trend from report-level overall scores
|
|
79
|
+
const firstOverall = reports[0].overallScore;
|
|
80
|
+
const lastOverall = reports[reports.length - 1].overallScore;
|
|
81
|
+
const overallDelta = lastOverall - firstOverall;
|
|
82
|
+
const overallTrend = classifyTrend(overallDelta, noiseThreshold);
|
|
83
|
+
// Classify areas
|
|
84
|
+
const improved = areaTrends
|
|
85
|
+
.filter((t) => t.trend === "improving")
|
|
86
|
+
.map((t) => t.area);
|
|
87
|
+
const regressed = areaTrends
|
|
88
|
+
.filter((t) => t.trend === "regressing")
|
|
89
|
+
.map((t) => t.area);
|
|
90
|
+
const stable = areaTrends
|
|
91
|
+
.filter((t) => t.trend === "stable")
|
|
92
|
+
.map((t) => t.area);
|
|
93
|
+
return {
|
|
94
|
+
areaTrends,
|
|
95
|
+
improved,
|
|
96
|
+
lookbackEnd: lookbackEnd.toISOString(),
|
|
97
|
+
lookbackStart: lookbackStart.toISOString(),
|
|
98
|
+
overallDelta,
|
|
99
|
+
overallLatest: lastOverall,
|
|
100
|
+
overallTrend,
|
|
101
|
+
regressed,
|
|
102
|
+
reportCount: reports.length,
|
|
103
|
+
stable,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
// Trend computation
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
/**
|
|
110
|
+
* Classify a score delta into a trend direction.
|
|
111
|
+
*/
|
|
112
|
+
export function classifyTrend(delta, threshold = NOISE_THRESHOLD) {
|
|
113
|
+
if (delta > threshold)
|
|
114
|
+
return "improving";
|
|
115
|
+
if (delta < -threshold)
|
|
116
|
+
return "regressing";
|
|
117
|
+
return "stable";
|
|
118
|
+
}
|
|
119
|
+
// ---------------------------------------------------------------------------
|
|
120
|
+
// Helpers
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
/**
|
|
123
|
+
* Generate a digest summary from recent evaluation reports.
|
|
124
|
+
*
|
|
125
|
+
* Queries the Sanity Content Lake for all reports within the lookback
|
|
126
|
+
* window and computes trend analysis per area.
|
|
127
|
+
*
|
|
128
|
+
* @returns DigestSummary, or null if no reports found or on error
|
|
129
|
+
*/
|
|
130
|
+
export async function generateDigest(options = {}) {
|
|
131
|
+
const lookbackDays = options.lookbackDays ?? 7;
|
|
132
|
+
const threshold = options.noiseThreshold ?? NOISE_THRESHOLD;
|
|
133
|
+
const client = options.client ??
|
|
134
|
+
getSanityClient({
|
|
135
|
+
...(options.dataset ? { dataset: options.dataset } : {}),
|
|
136
|
+
...(options.projectId ? { projectId: options.projectId } : {}),
|
|
137
|
+
...(options.token ? { token: options.token } : {}),
|
|
138
|
+
});
|
|
139
|
+
const now = new Date();
|
|
140
|
+
const since = new Date(now.getTime() - lookbackDays * 24 * 60 * 60 * 1000);
|
|
141
|
+
try {
|
|
142
|
+
const reports = await client.fetch(DIGEST_QUERY, {
|
|
143
|
+
since: since.toISOString(),
|
|
144
|
+
type: REPORT_TYPE,
|
|
145
|
+
});
|
|
146
|
+
if (!reports || reports.length === 0) {
|
|
147
|
+
console.log(" ℹ️ No reports found in the lookback window");
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
return buildDigestSummary(reports, since, now, threshold);
|
|
151
|
+
}
|
|
152
|
+
catch (error) {
|
|
153
|
+
console.warn(" ⚠️ Failed to fetch digest reports:", error instanceof Error ? error.message : String(error));
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schedules/index.ts
|
|
3
|
+
*
|
|
4
|
+
* Barrel exports for the scheduled evaluation module.
|
|
5
|
+
*
|
|
6
|
+
* @see docs/design-docs/report-store/implementation.md — Phase 5
|
|
7
|
+
*/
|
|
8
|
+
export { buildDigestSummary, classifyTrend, generateDigest } from "./digest.js";
|
|
9
|
+
export type { AreaTrend, DigestOptions, DigestReportEntry, DigestSummary, TrendDirection, } from "./digest.js";
|
|
10
|
+
export { findSchedule, getDigestConfig, getEnabledSchedules, loadSchedules, } from "./loader.js";
|
|
11
|
+
export type { DigestConfig, ScheduleEntry, SchedulesFile } from "./schema.js";
|
|
12
|
+
export { DigestConfigSchema, ScheduleEntrySchema, SchedulesFileSchema, } from "./schema.js";
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schedules/index.ts
|
|
3
|
+
*
|
|
4
|
+
* Barrel exports for the scheduled evaluation module.
|
|
5
|
+
*
|
|
6
|
+
* @see docs/design-docs/report-store/implementation.md — Phase 5
|
|
7
|
+
*/
|
|
8
|
+
export { buildDigestSummary, classifyTrend, generateDigest } from "./digest.js";
|
|
9
|
+
export { findSchedule, getDigestConfig, getEnabledSchedules, loadSchedules, } from "./loader.js";
|
|
10
|
+
export { DigestConfigSchema, ScheduleEntrySchema, SchedulesFileSchema, } from "./schema.js";
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schedules/loader.ts
|
|
3
|
+
*
|
|
4
|
+
* Loads and validates config/schedules.yaml.
|
|
5
|
+
*
|
|
6
|
+
* The schedule config drives both the GitHub Actions cron workflow
|
|
7
|
+
* (which reads enabled schedules to build a matrix) and the digest
|
|
8
|
+
* script (which reads the digest section for lookback and delivery).
|
|
9
|
+
*
|
|
10
|
+
* @see docs/design-docs/report-store/implementation.md — Phase 5
|
|
11
|
+
*/
|
|
12
|
+
import { type DigestConfig, type ScheduleEntry, type SchedulesFile } from "./schema.js";
|
|
13
|
+
/**
|
|
14
|
+
* Find a schedule entry by name.
|
|
15
|
+
*/
|
|
16
|
+
export declare function findSchedule(name: string, configPath?: string): ScheduleEntry | undefined;
|
|
17
|
+
/**
|
|
18
|
+
* Get the digest configuration, if enabled.
|
|
19
|
+
*/
|
|
20
|
+
export declare function getDigestConfig(configPath?: string): DigestConfig | null;
|
|
21
|
+
/**
|
|
22
|
+
* Get all enabled schedule entries.
|
|
23
|
+
*/
|
|
24
|
+
export declare function getEnabledSchedules(configPath?: string): ScheduleEntry[];
|
|
25
|
+
/**
|
|
26
|
+
* Load and validate the schedule configuration.
|
|
27
|
+
*
|
|
28
|
+
* @param configPath - Override the default config file location
|
|
29
|
+
* @returns Parsed and validated schedules file, or null if not found/invalid
|
|
30
|
+
*/
|
|
31
|
+
export declare function loadSchedules(configPath?: string): null | SchedulesFile;
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schedules/loader.ts
|
|
3
|
+
*
|
|
4
|
+
* Loads and validates config/schedules.yaml.
|
|
5
|
+
*
|
|
6
|
+
* The schedule config drives both the GitHub Actions cron workflow
|
|
7
|
+
* (which reads enabled schedules to build a matrix) and the digest
|
|
8
|
+
* script (which reads the digest section for lookback and delivery).
|
|
9
|
+
*
|
|
10
|
+
* @see docs/design-docs/report-store/implementation.md — Phase 5
|
|
11
|
+
*/
|
|
12
|
+
import { existsSync, readFileSync } from "fs";
|
|
13
|
+
import { dirname, resolve } from "path";
|
|
14
|
+
import { fileURLToPath } from "url";
|
|
15
|
+
import { load } from "js-yaml";
|
|
16
|
+
import { interpolate } from "../interpolate.js";
|
|
17
|
+
import { SchedulesFileSchema, } from "./schema.js";
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Paths
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
const DEFAULT_CONFIG_PATH = resolve(__dirname, "..", "..", "config", "schedules.yaml");
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Public API
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
/**
|
|
27
|
+
* Find a schedule entry by name.
|
|
28
|
+
*/
|
|
29
|
+
export function findSchedule(name, configPath) {
|
|
30
|
+
const config = loadSchedules(configPath);
|
|
31
|
+
if (!config)
|
|
32
|
+
return undefined;
|
|
33
|
+
return config.schedules.find((s) => s.name === name);
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Get the digest configuration, if enabled.
|
|
37
|
+
*/
|
|
38
|
+
export function getDigestConfig(configPath) {
|
|
39
|
+
const config = loadSchedules(configPath);
|
|
40
|
+
if (!config?.digest?.enabled)
|
|
41
|
+
return null;
|
|
42
|
+
return config.digest;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Get all enabled schedule entries.
|
|
46
|
+
*/
|
|
47
|
+
export function getEnabledSchedules(configPath) {
|
|
48
|
+
const config = loadSchedules(configPath);
|
|
49
|
+
if (!config)
|
|
50
|
+
return [];
|
|
51
|
+
return config.schedules.filter((s) => s.enabled);
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Load and validate the schedule configuration.
|
|
55
|
+
*
|
|
56
|
+
* @param configPath - Override the default config file location
|
|
57
|
+
* @returns Parsed and validated schedules file, or null if not found/invalid
|
|
58
|
+
*/
|
|
59
|
+
export function loadSchedules(configPath = DEFAULT_CONFIG_PATH) {
|
|
60
|
+
if (!existsSync(configPath)) {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
try {
|
|
64
|
+
const raw = readFileSync(configPath, "utf-8");
|
|
65
|
+
const parsed = load(raw);
|
|
66
|
+
const interpolated = interpolate(parsed);
|
|
67
|
+
return SchedulesFileSchema.parse(interpolated);
|
|
68
|
+
}
|
|
69
|
+
catch (error) {
|
|
70
|
+
console.warn("Failed to load schedules config:", error instanceof Error ? error.message : error);
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schedules/schema.ts — Re-export barrel
|
|
3
|
+
*
|
|
4
|
+
* All schedule Zod schemas now live in @sanity/ailf-core. This file
|
|
5
|
+
* re-exports them for backward compatibility.
|
|
6
|
+
*
|
|
7
|
+
* @see packages/core/src/schemas/schedules.ts (canonical source)
|
|
8
|
+
*/
|
|
9
|
+
export * from "../_vendor/ailf-core/index.d.ts";
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schedules/schema.ts — Re-export barrel
|
|
3
|
+
*
|
|
4
|
+
* All schedule Zod schemas now live in @sanity/ailf-core. This file
|
|
5
|
+
* re-exports them for backward compatibility.
|
|
6
|
+
*
|
|
7
|
+
* @see packages/core/src/schemas/schedules.ts (canonical source)
|
|
8
|
+
*/
|
|
9
|
+
export * from "../_vendor/ailf-core/index.js";
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* agent-behavior-report.ts
|
|
3
|
+
*
|
|
4
|
+
* Standalone script that reads Promptfoo evaluation results containing
|
|
5
|
+
* agent behavior observation data and generates a detailed report.
|
|
6
|
+
*
|
|
7
|
+
* This provides deeper analysis than the summary included in the main
|
|
8
|
+
* calculate-scores report, including:
|
|
9
|
+
*
|
|
10
|
+
* - Per-task behavior breakdown (which specific pages each task visited)
|
|
11
|
+
* - Canonical doc coverage (did the agent find the "right" docs?)
|
|
12
|
+
* - Request timeline and latency analysis
|
|
13
|
+
* - Search strategy analysis
|
|
14
|
+
* - Cross-task navigation pattern detection
|
|
15
|
+
*
|
|
16
|
+
* Usage:
|
|
17
|
+
* tsx src/scripts/agent-behavior-report.ts [results-path]
|
|
18
|
+
*/
|
|
19
|
+
import "dotenv/config";
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* agent-behavior-report.ts
|
|
3
|
+
*
|
|
4
|
+
* Standalone script that reads Promptfoo evaluation results containing
|
|
5
|
+
* agent behavior observation data and generates a detailed report.
|
|
6
|
+
*
|
|
7
|
+
* This provides deeper analysis than the summary included in the main
|
|
8
|
+
* calculate-scores report, including:
|
|
9
|
+
*
|
|
10
|
+
* - Per-task behavior breakdown (which specific pages each task visited)
|
|
11
|
+
* - Canonical doc coverage (did the agent find the "right" docs?)
|
|
12
|
+
* - Request timeline and latency analysis
|
|
13
|
+
* - Search strategy analysis
|
|
14
|
+
* - Cross-task navigation pattern detection
|
|
15
|
+
*
|
|
16
|
+
* Usage:
|
|
17
|
+
* tsx src/scripts/agent-behavior-report.ts [results-path]
|
|
18
|
+
*/
|
|
19
|
+
// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
|
|
20
|
+
import "dotenv/config";
|
|
21
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs";
|
|
22
|
+
import { join, dirname } from "path";
|
|
23
|
+
// Canonical doc mapping: task description patterns -> expected doc slugs
|
|
24
|
+
// This maps what docs a well-informed agent *should* visit for each task
|
|
25
|
+
const CANONICAL_DOC_MAP = {
|
|
26
|
+
frameworks: [
|
|
27
|
+
"remix",
|
|
28
|
+
"nuxt",
|
|
29
|
+
"svelte",
|
|
30
|
+
"astro",
|
|
31
|
+
"gatsby",
|
|
32
|
+
"client-libraries",
|
|
33
|
+
],
|
|
34
|
+
functions: [
|
|
35
|
+
"functions",
|
|
36
|
+
"webhooks",
|
|
37
|
+
"groq-powered-webhooks",
|
|
38
|
+
"event-driven",
|
|
39
|
+
"automations",
|
|
40
|
+
],
|
|
41
|
+
"nextjs-live": [
|
|
42
|
+
"next-js",
|
|
43
|
+
"live-content-api",
|
|
44
|
+
"content-source-maps",
|
|
45
|
+
"app-router",
|
|
46
|
+
"groq",
|
|
47
|
+
"client-libraries",
|
|
48
|
+
],
|
|
49
|
+
"studio-setup": [
|
|
50
|
+
"studio",
|
|
51
|
+
"schema-types",
|
|
52
|
+
"structure-builder",
|
|
53
|
+
"configuration",
|
|
54
|
+
"plugins",
|
|
55
|
+
],
|
|
56
|
+
"visual-editing": [
|
|
57
|
+
"visual-editing",
|
|
58
|
+
"presentation",
|
|
59
|
+
"preview",
|
|
60
|
+
"overlays",
|
|
61
|
+
"loaders",
|
|
62
|
+
],
|
|
63
|
+
};
|
|
64
|
+
function analyzeResults(resultsPath) {
|
|
65
|
+
const json = JSON.parse(readFileSync(resultsPath, "utf-8"));
|
|
66
|
+
// Support both the flat shape ({ results: TestResult[] }) and the full
|
|
67
|
+
// Promptfoo envelope shape ({ results: { results: TestResult[] } }).
|
|
68
|
+
const results = Array.isArray(json.results)
|
|
69
|
+
? json.results
|
|
70
|
+
: json.results.results;
|
|
71
|
+
const tasks = [];
|
|
72
|
+
for (const result of results) {
|
|
73
|
+
const metadata = result.metadata;
|
|
74
|
+
if (!metadata?.agentBehaviorSummary)
|
|
75
|
+
continue;
|
|
76
|
+
const behavior = metadata.agentBehaviorSummary;
|
|
77
|
+
tasks.push({
|
|
78
|
+
behavior,
|
|
79
|
+
description: result.description,
|
|
80
|
+
feature: detectFeatureArea(result.description),
|
|
81
|
+
hasDocs: !!(result.vars.docs && result.vars.docs.trim().length > 0),
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
if (tasks.length === 0) {
|
|
85
|
+
return { features: [], hasData: false, tasks: [] };
|
|
86
|
+
}
|
|
87
|
+
// Group by feature
|
|
88
|
+
const byFeature = {};
|
|
89
|
+
for (const t of tasks) {
|
|
90
|
+
if (!byFeature[t.feature])
|
|
91
|
+
byFeature[t.feature] = [];
|
|
92
|
+
byFeature[t.feature].push(t);
|
|
93
|
+
}
|
|
94
|
+
const features = Object.entries(byFeature)
|
|
95
|
+
.map(([feature, featureTasks]) => {
|
|
96
|
+
const allDocSlugs = [
|
|
97
|
+
...new Set(featureTasks.flatMap((t) => t.behavior.docSlugsVisited)),
|
|
98
|
+
];
|
|
99
|
+
const allSearchQueries = [
|
|
100
|
+
...new Set(featureTasks.flatMap((t) => t.behavior.uniqueSearchQueries)),
|
|
101
|
+
];
|
|
102
|
+
const allExternalDomains = [
|
|
103
|
+
...new Set(featureTasks.flatMap((t) => t.behavior.externalDomains)),
|
|
104
|
+
];
|
|
105
|
+
const canonicalSlugs = CANONICAL_DOC_MAP[feature] || [];
|
|
106
|
+
const matchedCanonical = canonicalSlugs.filter((slug) => allDocSlugs.some((visited) => visited.includes(slug)));
|
|
107
|
+
const canonicalCoverage = canonicalSlugs.length > 0
|
|
108
|
+
? matchedCanonical.length / canonicalSlugs.length
|
|
109
|
+
: 0;
|
|
110
|
+
const count = featureTasks.length || 1;
|
|
111
|
+
return {
|
|
112
|
+
allDocSlugs,
|
|
113
|
+
allExternalDomains,
|
|
114
|
+
allSearchQueries,
|
|
115
|
+
avgDocPages: featureTasks.reduce((s, t) => s + t.behavior.docPagesVisited, 0) /
|
|
116
|
+
count,
|
|
117
|
+
avgNetworkMs: featureTasks.reduce((s, t) => s + t.behavior.totalNetworkMs, 0) /
|
|
118
|
+
count,
|
|
119
|
+
avgSearches: featureTasks.reduce((s, t) => s + t.behavior.searchesPerformed, 0) /
|
|
120
|
+
count,
|
|
121
|
+
canonicalCoverage,
|
|
122
|
+
canonicalSlugs,
|
|
123
|
+
feature,
|
|
124
|
+
tasks: featureTasks,
|
|
125
|
+
};
|
|
126
|
+
})
|
|
127
|
+
.sort((a, b) => a.feature.localeCompare(b.feature));
|
|
128
|
+
return { features, hasData: true, tasks };
|
|
129
|
+
}
|
|
130
|
+
function detectFeatureArea(description) {
|
|
131
|
+
const desc = description.toLowerCase();
|
|
132
|
+
if (desc.includes("studio"))
|
|
133
|
+
return "studio-setup";
|
|
134
|
+
if (desc.includes("visual") ||
|
|
135
|
+
desc.includes("presentation") ||
|
|
136
|
+
desc.includes("live preview"))
|
|
137
|
+
return "visual-editing";
|
|
138
|
+
if (desc.includes("function") || desc.includes("webhook"))
|
|
139
|
+
return "functions";
|
|
140
|
+
if (desc.includes("next") || desc.includes("app router"))
|
|
141
|
+
return "nextjs-live";
|
|
142
|
+
if (desc.includes("remix") ||
|
|
143
|
+
desc.includes("nuxt") ||
|
|
144
|
+
desc.includes("svelte"))
|
|
145
|
+
return "frameworks";
|
|
146
|
+
return "other";
|
|
147
|
+
}
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
// Report output
|
|
150
|
+
// ---------------------------------------------------------------------------
|
|
151
|
+
function main() {
|
|
152
|
+
const ROOT = join(dirname(new URL(import.meta.url).pathname), "..", "..");
|
|
153
|
+
const resultsPath = process.argv[2] || join(ROOT, "results", "latest", "eval-results.json");
|
|
154
|
+
if (!existsSync(resultsPath)) {
|
|
155
|
+
console.error(`Results file not found: ${resultsPath}`);
|
|
156
|
+
console.error("Run an evaluation first: pnpm eval:observed");
|
|
157
|
+
process.exit(1);
|
|
158
|
+
}
|
|
159
|
+
console.log(`Reading results from: ${resultsPath}`);
|
|
160
|
+
console.log();
|
|
161
|
+
const analysis = analyzeResults(resultsPath);
|
|
162
|
+
if (!analysis.hasData) {
|
|
163
|
+
console.log("No agent behavior data found in the results.");
|
|
164
|
+
console.log("Make sure you ran the evaluation with the observed config:");
|
|
165
|
+
console.log(" pnpm eval:observed");
|
|
166
|
+
process.exit(0);
|
|
167
|
+
}
|
|
168
|
+
printReport(analysis);
|
|
169
|
+
// Persist detailed report as JSON
|
|
170
|
+
const outDir = join(ROOT, "results", "latest");
|
|
171
|
+
mkdirSync(outDir, { recursive: true });
|
|
172
|
+
const reportData = {
|
|
173
|
+
features: analysis.features.map((f) => ({
|
|
174
|
+
avgDocPages: f.avgDocPages,
|
|
175
|
+
avgNetworkMs: f.avgNetworkMs,
|
|
176
|
+
avgSearches: f.avgSearches,
|
|
177
|
+
canonicalCoverage: f.canonicalCoverage,
|
|
178
|
+
canonicalSlugs: f.canonicalSlugs,
|
|
179
|
+
docSlugsVisited: f.allDocSlugs,
|
|
180
|
+
externalDomains: f.allExternalDomains,
|
|
181
|
+
feature: f.feature,
|
|
182
|
+
searchQueries: f.allSearchQueries,
|
|
183
|
+
taskCount: f.tasks.length,
|
|
184
|
+
})),
|
|
185
|
+
tasks: analysis.tasks.map((t) => ({
|
|
186
|
+
behavior: t.behavior,
|
|
187
|
+
description: t.description,
|
|
188
|
+
feature: t.feature,
|
|
189
|
+
hasDocs: t.hasDocs,
|
|
190
|
+
})),
|
|
191
|
+
timestamp: new Date().toISOString(),
|
|
192
|
+
totalTasks: analysis.tasks.length,
|
|
193
|
+
};
|
|
194
|
+
writeFileSync(join(outDir, "agent-behavior-report.json"), JSON.stringify(reportData, null, 2));
|
|
195
|
+
console.log("Agent behavior report written to results/latest/agent-behavior-report.json");
|
|
196
|
+
}
|
|
197
|
+
// ---------------------------------------------------------------------------
|
|
198
|
+
// Main
|
|
199
|
+
// ---------------------------------------------------------------------------
|
|
200
|
+
function printReport(analysis) {
|
|
201
|
+
console.log("=".repeat(80));
|
|
202
|
+
console.log(" AGENT BEHAVIOR OBSERVATION REPORT");
|
|
203
|
+
console.log("=".repeat(80));
|
|
204
|
+
console.log();
|
|
205
|
+
// ---- Overview table ----
|
|
206
|
+
console.log("OVERVIEW BY FEATURE AREA");
|
|
207
|
+
console.log("-".repeat(80));
|
|
208
|
+
const h = "| Feature Area | Tasks | Avg Docs | Avg Search | Avg Net(ms) | Canon% |";
|
|
209
|
+
const sep = "|---------------------|-------|----------|------------|-------------|--------|";
|
|
210
|
+
console.log(h);
|
|
211
|
+
console.log(sep);
|
|
212
|
+
for (const f of analysis.features) {
|
|
213
|
+
console.log(`| ${f.feature.padEnd(19)} | ` +
|
|
214
|
+
`${f.tasks.length.toString().padStart(5)} | ` +
|
|
215
|
+
`${f.avgDocPages.toFixed(1).padStart(8)} | ` +
|
|
216
|
+
`${f.avgSearches.toFixed(1).padStart(10)} | ` +
|
|
217
|
+
`${Math.round(f.avgNetworkMs).toString().padStart(11)} | ` +
|
|
218
|
+
`${(f.canonicalCoverage * 100).toFixed(0).padStart(5)}% |`);
|
|
219
|
+
}
|
|
220
|
+
console.log();
|
|
221
|
+
// ---- Canonical coverage breakdown ----
|
|
222
|
+
console.log("CANONICAL DOCUMENTATION COVERAGE");
|
|
223
|
+
console.log("-".repeat(80));
|
|
224
|
+
console.log();
|
|
225
|
+
for (const f of analysis.features) {
|
|
226
|
+
console.log(` ${f.feature} (${(f.canonicalCoverage * 100).toFixed(0)}% canonical coverage):`);
|
|
227
|
+
if (f.canonicalSlugs.length === 0) {
|
|
228
|
+
console.log(" (no canonical docs defined)");
|
|
229
|
+
}
|
|
230
|
+
else {
|
|
231
|
+
for (const slug of f.canonicalSlugs) {
|
|
232
|
+
const found = f.allDocSlugs.some((visited) => visited.includes(slug));
|
|
233
|
+
const marker = found ? "[x]" : "[ ]";
|
|
234
|
+
console.log(` ${marker} ${slug}`);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
if (f.allDocSlugs.length > 0) {
|
|
238
|
+
const nonCanonical = f.allDocSlugs.filter((slug) => !f.canonicalSlugs.some((c) => slug.includes(c)));
|
|
239
|
+
if (nonCanonical.length > 0) {
|
|
240
|
+
console.log(" Additional docs visited:");
|
|
241
|
+
for (const slug of nonCanonical) {
|
|
242
|
+
console.log(` + ${slug}`);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
console.log();
|
|
247
|
+
}
|
|
248
|
+
// ---- Search strategy ----
|
|
249
|
+
const allSearches = analysis.features.flatMap((f) => f.allSearchQueries);
|
|
250
|
+
if (allSearches.length > 0) {
|
|
251
|
+
console.log("SEARCH STRATEGY");
|
|
252
|
+
console.log("-".repeat(80));
|
|
253
|
+
console.log();
|
|
254
|
+
for (const f of analysis.features) {
|
|
255
|
+
if (f.allSearchQueries.length === 0)
|
|
256
|
+
continue;
|
|
257
|
+
console.log(` ${f.feature}:`);
|
|
258
|
+
for (const q of f.allSearchQueries) {
|
|
259
|
+
console.log(` -> "${q}"`);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
console.log();
|
|
263
|
+
}
|
|
264
|
+
// ---- Per-task detail ----
|
|
265
|
+
console.log("PER-TASK DETAIL");
|
|
266
|
+
console.log("-".repeat(80));
|
|
267
|
+
console.log();
|
|
268
|
+
for (const f of analysis.features) {
|
|
269
|
+
console.log(` ## ${f.feature}`);
|
|
270
|
+
console.log();
|
|
271
|
+
for (const t of f.tasks) {
|
|
272
|
+
const variant = t.hasDocs ? "[gold]" : "[baseline]";
|
|
273
|
+
console.log(` ${variant} ${t.description}`);
|
|
274
|
+
console.log(` Requests: ${t.behavior.totalRequests} | ` +
|
|
275
|
+
`Doc pages: ${t.behavior.docPagesVisited} | ` +
|
|
276
|
+
`Searches: ${t.behavior.searchesPerformed} | ` +
|
|
277
|
+
`External: ${t.behavior.externalRequestCount}`);
|
|
278
|
+
if (t.behavior.docSlugsVisited.length > 0) {
|
|
279
|
+
console.log(` Docs: ${t.behavior.docSlugsVisited.join(", ")}`);
|
|
280
|
+
}
|
|
281
|
+
if (t.behavior.uniqueSearchQueries.length > 0) {
|
|
282
|
+
console.log(` Queries: ${t.behavior.uniqueSearchQueries.map((q) => `"${q}"`).join(", ")}`);
|
|
283
|
+
}
|
|
284
|
+
console.log();
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
// ---- External domains ----
|
|
288
|
+
const allDomains = [
|
|
289
|
+
...new Set(analysis.features.flatMap((f) => f.allExternalDomains)),
|
|
290
|
+
];
|
|
291
|
+
if (allDomains.length > 0) {
|
|
292
|
+
console.log("EXTERNAL DOMAINS");
|
|
293
|
+
console.log("-".repeat(80));
|
|
294
|
+
console.log();
|
|
295
|
+
for (const d of allDomains) {
|
|
296
|
+
console.log(` - ${d}`);
|
|
297
|
+
}
|
|
298
|
+
console.log();
|
|
299
|
+
}
|
|
300
|
+
// ---- Summary stats ----
|
|
301
|
+
console.log("OVERALL STATISTICS");
|
|
302
|
+
console.log("-".repeat(80));
|
|
303
|
+
console.log();
|
|
304
|
+
const totalTasks = analysis.tasks.length;
|
|
305
|
+
const tasksUsingDocs = analysis.tasks.filter((t) => t.behavior.usedDocs).length;
|
|
306
|
+
const tasksUsingSearch = analysis.tasks.filter((t) => t.behavior.usedSearch).length;
|
|
307
|
+
const avgCanonical = analysis.features.reduce((s, f) => s + f.canonicalCoverage, 0) /
|
|
308
|
+
(analysis.features.length || 1);
|
|
309
|
+
console.log(` Total tasks observed: ${totalTasks}`);
|
|
310
|
+
console.log(` Tasks that used docs: ${tasksUsingDocs}/${totalTasks} (${((tasksUsingDocs / totalTasks) * 100).toFixed(0)}%)`);
|
|
311
|
+
console.log(` Tasks that used search: ${tasksUsingSearch}/${totalTasks} (${((tasksUsingSearch / totalTasks) * 100).toFixed(0)}%)`);
|
|
312
|
+
console.log(` Avg canonical coverage: ${(avgCanonical * 100).toFixed(1)}%`);
|
|
313
|
+
console.log();
|
|
314
|
+
}
|
|
315
|
+
main();
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Baseline.ts
|
|
3
|
+
*
|
|
4
|
+
* Manages historical baseline snapshots of evaluation scores.
|
|
5
|
+
* Allows saving, comparing, and listing score baselines over time.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* pnpm baseline:save # save current scores as baseline
|
|
9
|
+
* pnpm baseline:save --tag "pre-groq" # save with a descriptive tag
|
|
10
|
+
* pnpm baseline:compare # compare current vs latest baseline
|
|
11
|
+
* pnpm baseline:history # list all saved baselines
|
|
12
|
+
*/
|
|
13
|
+
interface BaselineMetadata {
|
|
14
|
+
areaCount: number;
|
|
15
|
+
avgScore: number;
|
|
16
|
+
filename: string;
|
|
17
|
+
graderCost?: number;
|
|
18
|
+
tag?: string;
|
|
19
|
+
timestamp: string;
|
|
20
|
+
totalCost?: number;
|
|
21
|
+
}
|
|
22
|
+
interface CompareResult {
|
|
23
|
+
comparisons?: ScoreComparison[];
|
|
24
|
+
message: string;
|
|
25
|
+
overallDelta?: number;
|
|
26
|
+
success: boolean;
|
|
27
|
+
}
|
|
28
|
+
interface ScoreComparison {
|
|
29
|
+
baseline: number;
|
|
30
|
+
costBaseline?: number;
|
|
31
|
+
costCurrent?: number;
|
|
32
|
+
costDelta?: number;
|
|
33
|
+
current: number;
|
|
34
|
+
delta: number;
|
|
35
|
+
feature: string;
|
|
36
|
+
}
|
|
37
|
+
export declare function compareBaseline(baselineFile?: string): CompareResult;
|
|
38
|
+
export declare function listBaselines(): BaselineMetadata[];
|
|
39
|
+
export declare function saveBaseline(tag?: string): {
|
|
40
|
+
success: boolean;
|
|
41
|
+
message: string;
|
|
42
|
+
};
|
|
43
|
+
export {};
|