@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* fetch-docs command — pull documentation from Sanity CMS.
|
|
3
|
+
*
|
|
4
|
+
* Uses the composition root to wire adapters, then delegates to
|
|
5
|
+
* ctx.docFetcher (SanityDocFetcher) — the same code path as the pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from "commander";
|
|
8
|
+
export declare function createFetchDocsCommand(): Command;
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* fetch-docs command — pull documentation from Sanity CMS.
|
|
3
|
+
*
|
|
4
|
+
* Uses the composition root to wire adapters, then delegates to
|
|
5
|
+
* ctx.docFetcher (SanityDocFetcher) — the same code path as the pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import { mkdirSync, writeFileSync } from "fs";
|
|
8
|
+
import { dirname, join, resolve } from "path";
|
|
9
|
+
import { fileURLToPath } from "url";
|
|
10
|
+
import { Command } from "commander";
|
|
11
|
+
import { createAppContext } from "../composition-root.js";
|
|
12
|
+
import { loadSource } from "../sources.js";
|
|
13
|
+
import { configToSourceOverrides } from "../orchestration/config-to-source-overrides.js";
|
|
14
|
+
import { addSanitySourceOptions } from "./shared/options.js";
|
|
15
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
17
|
+
export function createFetchDocsCommand() {
|
|
18
|
+
const cmd = new Command("fetch-docs")
|
|
19
|
+
.description("Fetch documentation contexts from Sanity CMS")
|
|
20
|
+
.option("-s, --source <name>", "Documentation source name (from sources.yaml)")
|
|
21
|
+
.option("--include-feature-areas", "Generate feature-area context files", false)
|
|
22
|
+
.option("--include-corpus", "Generate full corpus context file", false)
|
|
23
|
+
.action(async (opts) => {
|
|
24
|
+
try {
|
|
25
|
+
await executeFetchDocs(opts);
|
|
26
|
+
}
|
|
27
|
+
catch (err) {
|
|
28
|
+
process.exitCode = 1;
|
|
29
|
+
if (err instanceof Error)
|
|
30
|
+
console.error(err.message);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
addSanitySourceOptions(cmd);
|
|
34
|
+
return cmd;
|
|
35
|
+
}
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Implementation
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
async function executeFetchDocs(opts) {
|
|
40
|
+
console.log("=== ai-literacy-framework — Documentation Fetcher ===\n");
|
|
41
|
+
// Build a minimal ResolvedConfig for the composition root
|
|
42
|
+
const ctx = createAppContext({
|
|
43
|
+
rootDir: ROOT,
|
|
44
|
+
mode: "baseline",
|
|
45
|
+
skipFetch: false,
|
|
46
|
+
skipEval: true,
|
|
47
|
+
compareEnabled: false,
|
|
48
|
+
gapAnalysisEnabled: false,
|
|
49
|
+
readinessEnabled: false,
|
|
50
|
+
discoveryReportEnabled: false,
|
|
51
|
+
publishEnabled: false,
|
|
52
|
+
noCache: true,
|
|
53
|
+
noRemoteCache: true,
|
|
54
|
+
searchMode: "open",
|
|
55
|
+
source: opts.source,
|
|
56
|
+
});
|
|
57
|
+
// Resolve source
|
|
58
|
+
const overrides = configToSourceOverrides(ctx.config);
|
|
59
|
+
const resolvedSource = loadSource(ctx.config.source, overrides);
|
|
60
|
+
// Log source info
|
|
61
|
+
console.log(` Source: ${resolvedSource.name}`);
|
|
62
|
+
console.log(` Base URL: ${resolvedSource.baseUrl}`);
|
|
63
|
+
if (resolvedSource.dataset)
|
|
64
|
+
console.log(` Dataset: ${resolvedSource.dataset}`);
|
|
65
|
+
if (resolvedSource.perspective)
|
|
66
|
+
console.log(` Perspective: ${resolvedSource.perspective}`);
|
|
67
|
+
if (resolvedSource.documentIds && resolvedSource.documentIds.length > 0) {
|
|
68
|
+
console.log(` Documents: ${resolvedSource.documentIds.length} document ID(s)`);
|
|
69
|
+
}
|
|
70
|
+
if (resolvedSource.urls.length > 0) {
|
|
71
|
+
console.log(` URLs: ${resolvedSource.urls.length} direct URL(s)`);
|
|
72
|
+
}
|
|
73
|
+
console.log();
|
|
74
|
+
// The composition root wires SanityDocFetcher into ctx.docFetcher.
|
|
75
|
+
// We cast to access the non-port methods (feature areas, corpus).
|
|
76
|
+
const fetcher = ctx.docFetcher;
|
|
77
|
+
// Feature-area contexts (opt-in)
|
|
78
|
+
if (opts.includeFeatureAreas) {
|
|
79
|
+
await fetcher.fetchFeatureAreaContexts(resolvedSource);
|
|
80
|
+
}
|
|
81
|
+
// Canonical contexts — same code path as the pipeline
|
|
82
|
+
const tasks = await ctx.taskSource.loadTasks();
|
|
83
|
+
const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
|
|
84
|
+
if (tasksWithDocs.length > 0) {
|
|
85
|
+
console.log("\nGenerating canonical (gold-retrieval) contexts...\n");
|
|
86
|
+
const result = await fetcher.fetch(tasksWithDocs, resolvedSource);
|
|
87
|
+
// Write metadata files
|
|
88
|
+
if (result.metadata) {
|
|
89
|
+
writeMetadataFiles(ROOT, result.metadata);
|
|
90
|
+
}
|
|
91
|
+
console.log(`\n Canonical contexts: ${result.contexts.length} tasks`);
|
|
92
|
+
for (const docCtx of result.contexts) {
|
|
93
|
+
console.log(` ${docCtx.taskId}: ${docCtx.slugs.length} doc(s), ~${docCtx.tokenCount ?? 0} tokens`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
// Full corpus (opt-in)
|
|
97
|
+
if (opts.includeCorpus) {
|
|
98
|
+
await fetcher.fetchFullCorpus(resolvedSource);
|
|
99
|
+
}
|
|
100
|
+
console.log("\nDone!");
|
|
101
|
+
}
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
// Helpers
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
function writeMetadataFiles(rootDir, metadata) {
|
|
106
|
+
const contextsDir = join(rootDir, "contexts");
|
|
107
|
+
mkdirSync(contextsDir, { recursive: true });
|
|
108
|
+
if (metadata.manifest) {
|
|
109
|
+
const path = join(contextsDir, "document-manifest.json");
|
|
110
|
+
writeFileSync(path, JSON.stringify(metadata.manifest, null, 2));
|
|
111
|
+
console.log(` 📋 Document manifest: ${metadata.manifest.length} docs → contexts/document-manifest.json`);
|
|
112
|
+
}
|
|
113
|
+
if (metadata.releaseImpact) {
|
|
114
|
+
const path = join(contextsDir, "release-impact.json");
|
|
115
|
+
writeFileSync(path, JSON.stringify(metadata.releaseImpact, null, 2));
|
|
116
|
+
console.log(" 📄 Release impact written to contexts/release-impact.json");
|
|
117
|
+
}
|
|
118
|
+
if (metadata.documentOverlay) {
|
|
119
|
+
const path = join(contextsDir, "document-overlay.json");
|
|
120
|
+
writeFileSync(path, JSON.stringify(metadata.documentOverlay, null, 2));
|
|
121
|
+
console.log(" 📄 Document overlay written to contexts/document-overlay.json");
|
|
122
|
+
}
|
|
123
|
+
if (metadata.urlFetch) {
|
|
124
|
+
const path = join(contextsDir, "url-fetch.json");
|
|
125
|
+
writeFileSync(path, JSON.stringify(metadata.urlFetch, null, 2));
|
|
126
|
+
console.log(" 📄 URL fetch metadata written to contexts/url-fetch.json");
|
|
127
|
+
}
|
|
128
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* generate-configs command — generate promptfoo config files from models.yaml.
|
|
3
|
+
*
|
|
4
|
+
* Uses the composition root to wire adapters, then calls generateConfigs()
|
|
5
|
+
* directly — the same code path as the pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from "commander";
|
|
8
|
+
export declare function createGenerateConfigsCommand(): Command;
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* generate-configs command — generate promptfoo config files from models.yaml.
|
|
3
|
+
*
|
|
4
|
+
* Uses the composition root to wire adapters, then calls generateConfigs()
|
|
5
|
+
* directly — the same code path as the pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import { dirname, resolve } from "path";
|
|
8
|
+
import { fileURLToPath } from "url";
|
|
9
|
+
import { Command } from "commander";
|
|
10
|
+
import { createAppContext } from "../composition-root.js";
|
|
11
|
+
import { generateConfigs } from "../pipeline/generate-configs.js";
|
|
12
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
14
|
+
export function createGenerateConfigsCommand() {
|
|
15
|
+
return new Command("generate-configs")
|
|
16
|
+
.description("Generate promptfoo config files from config/models.yaml")
|
|
17
|
+
.option("-s, --source <name>", "Documentation source name")
|
|
18
|
+
.action(async (opts) => {
|
|
19
|
+
try {
|
|
20
|
+
const ctx = createAppContext({
|
|
21
|
+
rootDir: ROOT,
|
|
22
|
+
mode: "baseline",
|
|
23
|
+
skipFetch: true,
|
|
24
|
+
skipEval: true,
|
|
25
|
+
compareEnabled: false,
|
|
26
|
+
gapAnalysisEnabled: false,
|
|
27
|
+
readinessEnabled: false,
|
|
28
|
+
discoveryReportEnabled: false,
|
|
29
|
+
publishEnabled: false,
|
|
30
|
+
noCache: true,
|
|
31
|
+
noRemoteCache: true,
|
|
32
|
+
searchMode: "open",
|
|
33
|
+
source: opts.source,
|
|
34
|
+
});
|
|
35
|
+
generateConfigs({
|
|
36
|
+
rootDir: ctx.config.rootDir,
|
|
37
|
+
source: opts.source,
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
catch (err) {
|
|
41
|
+
process.exitCode = 1;
|
|
42
|
+
if (err instanceof Error)
|
|
43
|
+
console.error(err.message);
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* grader subcommand group — tools for measuring grader reliability.
|
|
3
|
+
*
|
|
4
|
+
* Exposes four subcommands:
|
|
5
|
+
* ailf grader consistency — measure grading variance (Phase 1)
|
|
6
|
+
* ailf grader compare — inter-grader comparison (Phase 3)
|
|
7
|
+
* ailf grader sensitivity — discrimination power testing (Phase 4)
|
|
8
|
+
* ailf grader validate — accuracy against human references (Phase 2)
|
|
9
|
+
*/
|
|
10
|
+
import { Command } from "commander";
|
|
11
|
+
export declare function createGraderCommand(): Command;
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* grader subcommand group — tools for measuring grader reliability.
|
|
3
|
+
*
|
|
4
|
+
* Exposes four subcommands:
|
|
5
|
+
* ailf grader consistency — measure grading variance (Phase 1)
|
|
6
|
+
* ailf grader compare — inter-grader comparison (Phase 3)
|
|
7
|
+
* ailf grader sensitivity — discrimination power testing (Phase 4)
|
|
8
|
+
* ailf grader validate — accuracy against human references (Phase 2)
|
|
9
|
+
*/
|
|
10
|
+
import { dirname, join, resolve } from "path";
|
|
11
|
+
import { fileURLToPath } from "url";
|
|
12
|
+
import { Command } from "commander";
|
|
13
|
+
import { runGraderCompare } from "../../pipeline/grader-compare-runner.js";
|
|
14
|
+
import { runGraderConsistency } from "../../pipeline/grader-consistency-runner.js";
|
|
15
|
+
import { runGraderSensitivity } from "../../pipeline/grader-sensitivity-runner.js";
|
|
16
|
+
import { runGraderValidate } from "../../pipeline/grader-validate-runner.js";
|
|
17
|
+
import { collect } from "../shared/options.js";
|
|
18
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
19
|
+
const ROOT = resolve(__dirname, "..", "..", "..");
|
|
20
|
+
export function createGraderCommand() {
|
|
21
|
+
const cmd = new Command("grader").description("Grader reliability tools");
|
|
22
|
+
// ── consistency ──────────────────────────────────────────────────────
|
|
23
|
+
cmd
|
|
24
|
+
.command("consistency")
|
|
25
|
+
.description("Measure grader consistency by re-grading existing responses N times")
|
|
26
|
+
.option("-r, --replications <n>", "Number of additional grading replications", parseInt, 5)
|
|
27
|
+
.option("--results <path>", "Path to eval-results.json")
|
|
28
|
+
.action(async (opts) => {
|
|
29
|
+
try {
|
|
30
|
+
await runGraderConsistency({
|
|
31
|
+
replications: opts.replications,
|
|
32
|
+
resultsPath: opts.results ??
|
|
33
|
+
join(ROOT, "results", "latest", "eval-results.json"),
|
|
34
|
+
rootDir: ROOT,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
catch (err) {
|
|
38
|
+
process.exitCode = 1;
|
|
39
|
+
if (err instanceof Error)
|
|
40
|
+
console.error(err.message);
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
// ── compare ──────────────────────────────────────────────────────────
|
|
44
|
+
cmd
|
|
45
|
+
.command("compare")
|
|
46
|
+
.description("Compare multiple grader models on the same responses")
|
|
47
|
+
.option("-c, --candidate <model>", "Candidate grader model ID (repeatable)", collect, [])
|
|
48
|
+
.option("--results <path>", "Path to eval results")
|
|
49
|
+
.option("-f, --format <fmt>", "Output format: table or json", "table")
|
|
50
|
+
.option("-o, --output <path>", "Write JSON report to file")
|
|
51
|
+
.action(async (opts) => {
|
|
52
|
+
try {
|
|
53
|
+
const candidates = opts.candidate.map((id) => ({
|
|
54
|
+
id,
|
|
55
|
+
label: id.split(":").pop() ?? id,
|
|
56
|
+
}));
|
|
57
|
+
await runGraderCompare({
|
|
58
|
+
candidates,
|
|
59
|
+
format: opts.format,
|
|
60
|
+
outputPath: opts.output,
|
|
61
|
+
resultsPath: opts.results,
|
|
62
|
+
rootDir: ROOT,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
catch (err) {
|
|
66
|
+
process.exitCode = 1;
|
|
67
|
+
if (err instanceof Error)
|
|
68
|
+
console.error(err.message);
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
// ── sensitivity ──────────────────────────────────────────────────────
|
|
72
|
+
cmd
|
|
73
|
+
.command("sensitivity")
|
|
74
|
+
.description("Test grader discrimination power using programmatic code degradation")
|
|
75
|
+
.option("-a, --area <name>", "Test only reference solutions in this area")
|
|
76
|
+
.option("-f, --format <fmt>", "Output format: table or json", "table")
|
|
77
|
+
.option("-o, --output <path>", "Write JSON report to file")
|
|
78
|
+
.action(async (opts) => {
|
|
79
|
+
try {
|
|
80
|
+
await runGraderSensitivity({
|
|
81
|
+
areaFilter: opts.area,
|
|
82
|
+
format: opts.format,
|
|
83
|
+
outputPath: opts.output,
|
|
84
|
+
rootDir: ROOT,
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
catch (err) {
|
|
88
|
+
process.exitCode = 1;
|
|
89
|
+
if (err instanceof Error)
|
|
90
|
+
console.error(err.message);
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
// ── validate ─────────────────────────────────────────────────────────
|
|
94
|
+
cmd
|
|
95
|
+
.command("validate")
|
|
96
|
+
.description("Validate grader accuracy against human reference grades")
|
|
97
|
+
.option("-g, --grader <model>", "Grader model to validate")
|
|
98
|
+
.option("-t, --threshold <n>", "MAE threshold for pass/fail", parseFloat, 10)
|
|
99
|
+
.action(async (opts) => {
|
|
100
|
+
try {
|
|
101
|
+
const result = await runGraderValidate({
|
|
102
|
+
graderModel: opts.grader,
|
|
103
|
+
maeThreshold: opts.threshold,
|
|
104
|
+
rootDir: ROOT,
|
|
105
|
+
});
|
|
106
|
+
if (!result.passesThreshold) {
|
|
107
|
+
console.error(`\n ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${opts.threshold}`);
|
|
108
|
+
process.exit(1);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
catch (err) {
|
|
112
|
+
process.exitCode = 1;
|
|
113
|
+
if (err instanceof Error)
|
|
114
|
+
console.error(err.message);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
return cmd;
|
|
118
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* commands/init.ts — Initialize a directory for AI Literacy Framework.
|
|
3
|
+
*
|
|
4
|
+
* Creates the .ailf/ directory structure with example configuration and
|
|
5
|
+
* task files. The generated files are ready-to-edit starting points —
|
|
6
|
+
* not live evaluation tasks.
|
|
7
|
+
*
|
|
8
|
+
* YAML output (default) preserves the inline comments from the source
|
|
9
|
+
* YAML files in packages/core/examples/. JSON output is a plain
|
|
10
|
+
* serialization of the parsed data — no comments.
|
|
11
|
+
*
|
|
12
|
+
* Usage:
|
|
13
|
+
* ailf init # YAML output (default)
|
|
14
|
+
* ailf init --output-format json # JSON output
|
|
15
|
+
* ailf init --force # overwrite existing files
|
|
16
|
+
* ailf init --path ./my-dir # target a specific directory
|
|
17
|
+
*/
|
|
18
|
+
import { Command } from "commander";
|
|
19
|
+
export declare function createInitCommand(): Command;
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* commands/init.ts — Initialize a directory for AI Literacy Framework.
|
|
3
|
+
*
|
|
4
|
+
* Creates the .ailf/ directory structure with example configuration and
|
|
5
|
+
* task files. The generated files are ready-to-edit starting points —
|
|
6
|
+
* not live evaluation tasks.
|
|
7
|
+
*
|
|
8
|
+
* YAML output (default) preserves the inline comments from the source
|
|
9
|
+
* YAML files in packages/core/examples/. JSON output is a plain
|
|
10
|
+
* serialization of the parsed data — no comments.
|
|
11
|
+
*
|
|
12
|
+
* Usage:
|
|
13
|
+
* ailf init # YAML output (default)
|
|
14
|
+
* ailf init --output-format json # JSON output
|
|
15
|
+
* ailf init --force # overwrite existing files
|
|
16
|
+
* ailf init --path ./my-dir # target a specific directory
|
|
17
|
+
*/
|
|
18
|
+
import { Command } from "commander";
|
|
19
|
+
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
20
|
+
import { resolve, relative } from "path";
|
|
21
|
+
import { ailfConfigData, ailfConfigYaml, taskYamlFiles, TASK_FILE_NAMES, allTaskData, } from "../_vendor/ailf-core/index.js";
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Command factory
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
export function createInitCommand() {
|
|
26
|
+
return new Command("init")
|
|
27
|
+
.description("Initialize a directory for AI Literacy Framework evaluation")
|
|
28
|
+
.option("--output-format <fmt>", 'Output format for generated files: "yaml" (default) or "json"', "yaml")
|
|
29
|
+
.option("--force", "Overwrite existing files", false)
|
|
30
|
+
.option("--path <dir>", "Target directory (default: current directory)", ".")
|
|
31
|
+
.action(async (opts) => {
|
|
32
|
+
await runInit(opts);
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// Helpers
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
/**
|
|
39
|
+
* Write a file if it doesn't exist (or --force is set).
|
|
40
|
+
* Returns true if the file was written, false if skipped.
|
|
41
|
+
*/
|
|
42
|
+
function writeIfNew(filePath, content, force) {
|
|
43
|
+
if (existsSync(filePath) && !force) {
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
writeFileSync(filePath, content, "utf-8");
|
|
47
|
+
return true;
|
|
48
|
+
}
|
|
49
|
+
/** Relative path for display, prefixed with ./ */
|
|
50
|
+
function rel(from, to) {
|
|
51
|
+
const r = relative(from, to);
|
|
52
|
+
return r.startsWith(".") ? r : `./${r}`;
|
|
53
|
+
}
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// Init logic
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
async function runInit(opts) {
|
|
58
|
+
const format = opts.outputFormat === "json" ? "json" : "yaml";
|
|
59
|
+
const ext = format === "json" ? ".json" : ".yaml";
|
|
60
|
+
const force = opts.force;
|
|
61
|
+
// Resolve target from the caller's actual working directory
|
|
62
|
+
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
63
|
+
const targetDir = resolve(callerCwd, opts.path);
|
|
64
|
+
const ailfDir = resolve(targetDir, ".ailf");
|
|
65
|
+
const tasksDir = resolve(ailfDir, "tasks");
|
|
66
|
+
console.log();
|
|
67
|
+
console.log(" 🚀 Initializing AI Literacy Framework");
|
|
68
|
+
console.log();
|
|
69
|
+
// 1. Create directories
|
|
70
|
+
mkdirSync(tasksDir, { recursive: true });
|
|
71
|
+
console.log(` ✓ Created ${rel(targetDir, ailfDir)}/`);
|
|
72
|
+
console.log(` ✓ Created ${rel(targetDir, tasksDir)}/`);
|
|
73
|
+
const written = [];
|
|
74
|
+
const skipped = [];
|
|
75
|
+
// 2. Write .ailf/config.yaml (or .json)
|
|
76
|
+
// YAML: raw string passthrough (preserves comments)
|
|
77
|
+
// JSON: serialize the parsed data
|
|
78
|
+
const configPath = resolve(ailfDir, `config${ext}`);
|
|
79
|
+
const configContent = format === "yaml"
|
|
80
|
+
? ailfConfigYaml
|
|
81
|
+
: JSON.stringify(ailfConfigData, null, 2) + "\n";
|
|
82
|
+
if (writeIfNew(configPath, configContent, force)) {
|
|
83
|
+
written.push(rel(targetDir, configPath));
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
skipped.push(rel(targetDir, configPath));
|
|
87
|
+
}
|
|
88
|
+
// 3. Write example tasks to .ailf/tasks/
|
|
89
|
+
// YAML: raw string passthrough (preserves comments)
|
|
90
|
+
// JSON: serialize individual task data
|
|
91
|
+
if (format === "yaml") {
|
|
92
|
+
// Each task is its own commented YAML file — write as-is
|
|
93
|
+
for (const stem of TASK_FILE_NAMES) {
|
|
94
|
+
const taskPath = resolve(tasksDir, `${stem}.yaml`);
|
|
95
|
+
const content = taskYamlFiles[stem];
|
|
96
|
+
if (writeIfNew(taskPath, content, force)) {
|
|
97
|
+
written.push(rel(targetDir, taskPath));
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
skipped.push(rel(targetDir, taskPath));
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
// JSON: serialize each task's parsed data individually
|
|
106
|
+
const tasks = Array.isArray(allTaskData)
|
|
107
|
+
? allTaskData
|
|
108
|
+
: [allTaskData];
|
|
109
|
+
for (const task of tasks) {
|
|
110
|
+
const taskId = task.id;
|
|
111
|
+
const taskPath = resolve(tasksDir, `${taskId}.json`);
|
|
112
|
+
const content = JSON.stringify([task], null, 2) + "\n";
|
|
113
|
+
if (writeIfNew(taskPath, content, force)) {
|
|
114
|
+
written.push(rel(targetDir, taskPath));
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
skipped.push(rel(targetDir, taskPath));
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
// 4. Write .gitignore in .ailf/ (keep results out of version control)
|
|
122
|
+
const gitignorePath = resolve(ailfDir, ".gitignore");
|
|
123
|
+
const gitignoreContent = `# AILF generated files\nresults/\ncontexts/\n`;
|
|
124
|
+
if (writeIfNew(gitignorePath, gitignoreContent, force)) {
|
|
125
|
+
written.push(rel(targetDir, gitignorePath));
|
|
126
|
+
}
|
|
127
|
+
else {
|
|
128
|
+
skipped.push(rel(targetDir, gitignorePath));
|
|
129
|
+
}
|
|
130
|
+
// 5. Summary
|
|
131
|
+
console.log();
|
|
132
|
+
if (written.length > 0) {
|
|
133
|
+
for (const f of written) {
|
|
134
|
+
console.log(` ✓ Created ${f}`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
if (skipped.length > 0) {
|
|
138
|
+
console.log();
|
|
139
|
+
for (const f of skipped) {
|
|
140
|
+
console.log(` ⊘ Skipped ${f} (already exists, use --force to overwrite)`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
console.log();
|
|
144
|
+
console.log(" Next steps:");
|
|
145
|
+
console.log();
|
|
146
|
+
console.log(` 1. Edit ${rel(targetDir, resolve(ailfDir, `config${ext}`))} with your Sanity project settings`);
|
|
147
|
+
console.log(` 2. Customize the example tasks in ${rel(targetDir, tasksDir)}/`);
|
|
148
|
+
console.log(" 3. Run: ailf pipeline --repo-tasks-path .ailf/tasks/");
|
|
149
|
+
console.log();
|
|
150
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Interactive mode — guided wizard for the evaluation pipeline.
|
|
3
|
+
*
|
|
4
|
+
* When `ailf` is run with no arguments (or `ailf interactive`), this module
|
|
5
|
+
* prompts the user through mode selection, area scoping, debug options,
|
|
6
|
+
* and common flags — then builds and executes the equivalent `ailf pipeline`
|
|
7
|
+
* command.
|
|
8
|
+
*
|
|
9
|
+
* Uses @inquirer/prompts for a clean, modern terminal UI.
|
|
10
|
+
*/
|
|
11
|
+
import { Command } from "commander";
|
|
12
|
+
export declare function createInteractiveCommand(): Command;
|