@sanity/ailf 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +52 -32
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/checks.d.ts +8 -3
- package/dist/pipeline/checks.js +23 -3
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +25 -25
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* readiness-report.ts
|
|
3
|
+
*
|
|
4
|
+
* Launch readiness report generator — Phase 5b of the Scenario Matrix
|
|
5
|
+
* implementation. Combines threshold evaluation, ceiling decomposition,
|
|
6
|
+
* and gap analysis into a single actionable readiness checklist for a
|
|
7
|
+
* given feature area.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* pnpm readiness-report --area visual-editing
|
|
11
|
+
* pnpm readiness-report --area groq --history
|
|
12
|
+
* pnpm readiness-report --area groq --output readiness.md
|
|
13
|
+
*
|
|
14
|
+
* Exports pure functions for unit testing:
|
|
15
|
+
* - generateReadinessReport() — builds the structured report
|
|
16
|
+
* - formatReadinessMarkdown() — renders the report as markdown
|
|
17
|
+
*
|
|
18
|
+
* @see docs/exec-plans/completed/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
19
|
+
*/
|
|
20
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
|
21
|
+
import { dirname, join, resolve } from "node:path";
|
|
22
|
+
import { fileURLToPath } from "node:url";
|
|
23
|
+
import { load } from "js-yaml";
|
|
24
|
+
import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
|
|
25
|
+
import { evaluateThresholds } from "../pipeline/thresholds.js";
|
|
26
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
28
|
+
const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
|
|
29
|
+
const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
|
|
30
|
+
const THRESHOLDS_PATH = join(ROOT, "config", "thresholds.yaml");
|
|
31
|
+
const BASELINES_DIR = join(ROOT, "results", "baselines");
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
// Pure functions (exported for testing)
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
/**
|
|
36
|
+
* Format a readiness report as markdown.
|
|
37
|
+
*
|
|
38
|
+
* Pure function — takes a structured report and returns a markdown string.
|
|
39
|
+
*/
|
|
40
|
+
export function formatReadinessMarkdown(report) {
|
|
41
|
+
const lines = [];
|
|
42
|
+
const areaLabel = formatAreaLabel(report.area);
|
|
43
|
+
// Header
|
|
44
|
+
const statusEmoji = report.pass ? "✅" : "❌";
|
|
45
|
+
const statusLabel = report.pass ? "READY" : "NOT READY";
|
|
46
|
+
lines.push(`## 🚀 Launch Readiness: ${areaLabel}`);
|
|
47
|
+
lines.push("");
|
|
48
|
+
lines.push(`**Overall:** ${statusEmoji} ${statusLabel} (${fmt(report.score)}/100, threshold: ${report.threshold})`);
|
|
49
|
+
lines.push("");
|
|
50
|
+
// Dimension Checklist
|
|
51
|
+
lines.push("### Dimension Checklist");
|
|
52
|
+
lines.push("");
|
|
53
|
+
lines.push("| Dimension | Score | Threshold | Status |");
|
|
54
|
+
lines.push("|---|---|---|---|");
|
|
55
|
+
for (const dim of report.dimensions) {
|
|
56
|
+
const status = dim.pass ? "✅ Meets threshold" : "❌ Below threshold";
|
|
57
|
+
lines.push(`| ${dim.dimension} | ${fmt(dim.score)} | ${fmt(dim.threshold)} | ${status} |`);
|
|
58
|
+
}
|
|
59
|
+
lines.push("");
|
|
60
|
+
// Ceiling Analysis
|
|
61
|
+
lines.push("### Ceiling Analysis");
|
|
62
|
+
lines.push("");
|
|
63
|
+
lines.push("| Metric | Value | Assessment |");
|
|
64
|
+
lines.push("|---|---|---|");
|
|
65
|
+
const ceilingAssessment = report.ceiling.ceilingScore >= 60
|
|
66
|
+
? "✅ Docs enable reasonable performance"
|
|
67
|
+
: "⚠️ Below 60 — docs need improvement";
|
|
68
|
+
lines.push(`| Ceiling Score | ${fmt(report.ceiling.ceilingScore)} | ${ceilingAssessment} |`);
|
|
69
|
+
const floorAssessment = report.ceiling.floorScore >= 30
|
|
70
|
+
? "Model has moderate baseline knowledge"
|
|
71
|
+
: "Model has limited baseline knowledge";
|
|
72
|
+
lines.push(`| Floor Score | ${fmt(report.ceiling.floorScore)} | ${floorAssessment} |`);
|
|
73
|
+
const liftSign = report.ceiling.docLift >= 0 ? "+" : "";
|
|
74
|
+
const liftAssessment = report.ceiling.docLift < 0
|
|
75
|
+
? "❌ Docs are hurting performance"
|
|
76
|
+
: report.ceiling.docLift >= 10
|
|
77
|
+
? "✅ Docs add significant value"
|
|
78
|
+
: "⚠️ Docs add minimal value";
|
|
79
|
+
lines.push(`| Doc Lift | ${liftSign}${fmt(report.ceiling.docLift)} | ${liftAssessment} |`);
|
|
80
|
+
lines.push(`| Doc Quality Gap | ${fmt(report.ceiling.docQualityGap)} | ${report.ceiling.docQualityGap > 30 ? "Room for improvement via documentation" : "✅ Docs are high quality"} |`);
|
|
81
|
+
lines.push("");
|
|
82
|
+
// Failing Criteria (only shown when there are violations)
|
|
83
|
+
if (report.violations.length > 0) {
|
|
84
|
+
lines.push("### Failing Criteria");
|
|
85
|
+
for (let i = 0; i < report.violations.length; i++) {
|
|
86
|
+
const v = report.violations[i];
|
|
87
|
+
lines.push(`${i + 1}. **${v.description}**`);
|
|
88
|
+
}
|
|
89
|
+
lines.push("");
|
|
90
|
+
lines.push("### Recommendation");
|
|
91
|
+
const count = report.violations.length;
|
|
92
|
+
const itemWord = count === 1 ? "item" : "items";
|
|
93
|
+
lines.push(`Fix the ${count} ${itemWord} above and re-evaluate.`);
|
|
94
|
+
lines.push("");
|
|
95
|
+
}
|
|
96
|
+
// Gap Analysis (if available)
|
|
97
|
+
if (report.gaps.length > 0) {
|
|
98
|
+
lines.push("### Gap Analysis");
|
|
99
|
+
lines.push("");
|
|
100
|
+
lines.push("| Failure Mode | Est. Lift | Confidence | Remediation |");
|
|
101
|
+
lines.push("|---|---|---|---|");
|
|
102
|
+
for (const gap of report.gaps) {
|
|
103
|
+
const confIcon = gap.confidence === "high"
|
|
104
|
+
? "🟢"
|
|
105
|
+
: gap.confidence === "medium"
|
|
106
|
+
? "🟡"
|
|
107
|
+
: "🔴";
|
|
108
|
+
lines.push(`| ${gap.failureMode} | +${gap.estimatedLift.toFixed(1)} | ${confIcon} ${gap.confidence} | ${gap.remediation} |`);
|
|
109
|
+
}
|
|
110
|
+
lines.push("");
|
|
111
|
+
}
|
|
112
|
+
// Historical Progress (if available)
|
|
113
|
+
if (report.history.length > 0) {
|
|
114
|
+
lines.push("### Historical Progress");
|
|
115
|
+
lines.push("");
|
|
116
|
+
lines.push("| Date | Score | Tag |");
|
|
117
|
+
lines.push("|---|---|---|");
|
|
118
|
+
for (const entry of report.history) {
|
|
119
|
+
const date = entry.timestamp.slice(0, 10);
|
|
120
|
+
const tag = entry.tag ?? "—";
|
|
121
|
+
lines.push(`| ${date} | ${fmt(entry.score)} | ${tag} |`);
|
|
122
|
+
}
|
|
123
|
+
// Show current score as the last row
|
|
124
|
+
lines.push(`| ${new Date().toISOString().slice(0, 10)} | ${fmt(report.score)} | *current* |`);
|
|
125
|
+
lines.push("");
|
|
126
|
+
}
|
|
127
|
+
return lines.join("\n");
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Generate a structured readiness report for a given feature area.
|
|
131
|
+
*
|
|
132
|
+
* This is a pure function — it takes all data as parameters and produces
|
|
133
|
+
* a structured report. No I/O.
|
|
134
|
+
*/
|
|
135
|
+
export function generateReadinessReport(opts) {
|
|
136
|
+
const { area, gapAnalysis, history = [], scoreSummary, thresholdConfig, } = opts;
|
|
137
|
+
// Find the area's scores
|
|
138
|
+
const areaScore = scoreSummary.scores.find((s) => s.feature === area);
|
|
139
|
+
if (!areaScore) {
|
|
140
|
+
throw new Error(`Area "${area}" not found in score summary. Available areas: ${scoreSummary.scores.map((s) => s.feature).join(", ")}`);
|
|
141
|
+
}
|
|
142
|
+
// Evaluate thresholds for the full summary (to get violations)
|
|
143
|
+
const thresholdEvaluation = evaluateThresholds(scoreSummary, thresholdConfig);
|
|
144
|
+
// Filter violations to only this area
|
|
145
|
+
const areaViolations = thresholdEvaluation.violations.filter((v) => v.area === area);
|
|
146
|
+
// Resolve per-area thresholds (with defaults)
|
|
147
|
+
const areaOverrides = thresholdConfig.areas?.[area];
|
|
148
|
+
const compositeThreshold = areaOverrides?.composite ?? thresholdConfig.defaults.composite;
|
|
149
|
+
const dimDefaults = thresholdConfig.defaults.dimensions ?? {};
|
|
150
|
+
const dimOverrides = areaOverrides?.dimensions ?? {};
|
|
151
|
+
// Build dimension checks
|
|
152
|
+
const dimensions = [
|
|
153
|
+
{
|
|
154
|
+
dimension: "Task Completion",
|
|
155
|
+
pass: areaScore.taskCompletion >=
|
|
156
|
+
(dimOverrides["task-completion"] ??
|
|
157
|
+
dimDefaults["task-completion"] ??
|
|
158
|
+
0),
|
|
159
|
+
score: areaScore.taskCompletion,
|
|
160
|
+
threshold: dimOverrides["task-completion"] ?? dimDefaults["task-completion"] ?? 0,
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
dimension: "Code Correctness",
|
|
164
|
+
pass: areaScore.codeCorrectness >=
|
|
165
|
+
(dimOverrides["code-correctness"] ??
|
|
166
|
+
dimDefaults["code-correctness"] ??
|
|
167
|
+
0),
|
|
168
|
+
score: areaScore.codeCorrectness,
|
|
169
|
+
threshold: dimOverrides["code-correctness"] ??
|
|
170
|
+
dimDefaults["code-correctness"] ??
|
|
171
|
+
0,
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
dimension: "Doc Coverage",
|
|
175
|
+
pass: areaScore.docCoverage >=
|
|
176
|
+
(dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0),
|
|
177
|
+
score: areaScore.docCoverage,
|
|
178
|
+
threshold: dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0,
|
|
179
|
+
},
|
|
180
|
+
];
|
|
181
|
+
// Filter gap analysis to this area
|
|
182
|
+
const areaGaps = gapAnalysis?.gaps.filter((g) => g.area === area) ?? [];
|
|
183
|
+
// Ceiling decomposition
|
|
184
|
+
const ceiling = {
|
|
185
|
+
ceilingScore: areaScore.ceilingScore,
|
|
186
|
+
docLift: areaScore.docLift,
|
|
187
|
+
docQualityGap: areaScore.docQualityGap,
|
|
188
|
+
floorScore: areaScore.floorScore,
|
|
189
|
+
};
|
|
190
|
+
const pass = areaViolations.length === 0;
|
|
191
|
+
return {
|
|
192
|
+
area,
|
|
193
|
+
ceiling,
|
|
194
|
+
dimensions,
|
|
195
|
+
gaps: areaGaps,
|
|
196
|
+
history,
|
|
197
|
+
pass,
|
|
198
|
+
score: areaScore.totalScore,
|
|
199
|
+
threshold: compositeThreshold,
|
|
200
|
+
thresholdEvaluation,
|
|
201
|
+
violations: areaViolations,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
// Formatting helpers (private)
|
|
206
|
+
// ---------------------------------------------------------------------------
|
|
207
|
+
/** Format a score for display (round to nearest integer) */
|
|
208
|
+
function fmt(n) {
|
|
209
|
+
return String(Math.round(n));
|
|
210
|
+
}
|
|
211
|
+
/** Convert kebab-case area name to title case */
|
|
212
|
+
function formatAreaLabel(area) {
|
|
213
|
+
return area
|
|
214
|
+
.split("-")
|
|
215
|
+
.map((w) => w.charAt(0).toUpperCase() + w.slice(1))
|
|
216
|
+
.join(" ");
|
|
217
|
+
}
|
|
218
|
+
// ---------------------------------------------------------------------------
|
|
219
|
+
// I/O helpers (used by CLI, not exported for testing)
|
|
220
|
+
// ---------------------------------------------------------------------------
|
|
221
|
+
function loadGapAnalysis(path) {
|
|
222
|
+
if (!existsSync(path))
|
|
223
|
+
return undefined;
|
|
224
|
+
return JSON.parse(readFileSync(path, "utf-8"));
|
|
225
|
+
}
|
|
226
|
+
function loadHistory(area, baselinesDir) {
|
|
227
|
+
if (!existsSync(baselinesDir))
|
|
228
|
+
return [];
|
|
229
|
+
const files = readdirSync(baselinesDir)
|
|
230
|
+
.filter((f) => f.endsWith(".json"))
|
|
231
|
+
.sort();
|
|
232
|
+
const entries = [];
|
|
233
|
+
for (const file of files) {
|
|
234
|
+
try {
|
|
235
|
+
const raw = readFileSync(join(baselinesDir, file), "utf-8");
|
|
236
|
+
const data = JSON.parse(raw);
|
|
237
|
+
const areaScore = data.scores?.find((s) => s.feature === area);
|
|
238
|
+
if (!areaScore)
|
|
239
|
+
continue;
|
|
240
|
+
// Extract tag from filename (e.g., "20260304_16_34_45_pre-groq.json")
|
|
241
|
+
const nameWithoutExt = file.replace(/\.json$/, "");
|
|
242
|
+
const parts = nameWithoutExt.split("_");
|
|
243
|
+
// Timestamps are like "20260304_16_34_45" (4 parts), rest is tag
|
|
244
|
+
const tag = parts.length > 4 ? parts.slice(4).join("_") : undefined;
|
|
245
|
+
entries.push({
|
|
246
|
+
score: areaScore.totalScore,
|
|
247
|
+
tag,
|
|
248
|
+
timestamp: data.timestamp ?? nameWithoutExt,
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
catch {
|
|
252
|
+
// Skip malformed baseline files
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
return entries;
|
|
256
|
+
}
|
|
257
|
+
function loadScoreSummary(path) {
|
|
258
|
+
if (!existsSync(path)) {
|
|
259
|
+
throw new Error(`Score summary not found at ${path}. Run \`pnpm pipeline\` first.`);
|
|
260
|
+
}
|
|
261
|
+
return JSON.parse(readFileSync(path, "utf-8"));
|
|
262
|
+
}
|
|
263
|
+
function loadThresholdConfig(path) {
|
|
264
|
+
if (!existsSync(path)) {
|
|
265
|
+
throw new Error(`Threshold config not found at ${path}.`);
|
|
266
|
+
}
|
|
267
|
+
const raw = readFileSync(path, "utf-8");
|
|
268
|
+
const parsed = load(raw);
|
|
269
|
+
const result = ThresholdConfigSchema.safeParse(parsed);
|
|
270
|
+
if (!result.success) {
|
|
271
|
+
const messages = result.error.issues
|
|
272
|
+
.map((i) => ` ${i.path.join(".")}: ${i.message}`)
|
|
273
|
+
.join("\n");
|
|
274
|
+
throw new Error(`Invalid thresholds.yaml:\n${messages}`);
|
|
275
|
+
}
|
|
276
|
+
return result.data;
|
|
277
|
+
}
|
|
278
|
+
// ---------------------------------------------------------------------------
|
|
279
|
+
// CLI
|
|
280
|
+
// ---------------------------------------------------------------------------
|
|
281
|
+
function main() {
|
|
282
|
+
const { area, history: includeHistory, output } = parseArgs(process.argv);
|
|
283
|
+
// Load data
|
|
284
|
+
const scoreSummary = loadScoreSummary(SCORE_SUMMARY_PATH);
|
|
285
|
+
const thresholdConfig = loadThresholdConfig(THRESHOLDS_PATH);
|
|
286
|
+
const gapAnalysis = loadGapAnalysis(GAP_ANALYSIS_PATH);
|
|
287
|
+
const history = includeHistory ? loadHistory(area, BASELINES_DIR) : [];
|
|
288
|
+
// Generate report
|
|
289
|
+
const report = generateReadinessReport({
|
|
290
|
+
area,
|
|
291
|
+
gapAnalysis,
|
|
292
|
+
history,
|
|
293
|
+
scoreSummary,
|
|
294
|
+
thresholdConfig,
|
|
295
|
+
});
|
|
296
|
+
// Format and output
|
|
297
|
+
const markdown = formatReadinessMarkdown(report);
|
|
298
|
+
if (output) {
|
|
299
|
+
writeFileSync(output, markdown, "utf-8");
|
|
300
|
+
console.error(`✅ Readiness report written to ${output}`);
|
|
301
|
+
}
|
|
302
|
+
else {
|
|
303
|
+
console.log(markdown);
|
|
304
|
+
}
|
|
305
|
+
// Exit with non-zero if not ready
|
|
306
|
+
if (!report.pass) {
|
|
307
|
+
process.exit(1);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
function parseArgs(argv) {
|
|
311
|
+
const args = argv.slice(2);
|
|
312
|
+
let area;
|
|
313
|
+
let history = false;
|
|
314
|
+
let output;
|
|
315
|
+
for (let i = 0; i < args.length; i++) {
|
|
316
|
+
const arg = args[i];
|
|
317
|
+
if (arg === "--area" && i + 1 < args.length) {
|
|
318
|
+
area = args[++i];
|
|
319
|
+
}
|
|
320
|
+
else if (arg === "--history") {
|
|
321
|
+
history = true;
|
|
322
|
+
}
|
|
323
|
+
else if (arg === "--output" && i + 1 < args.length) {
|
|
324
|
+
output = args[++i];
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
if (!area) {
|
|
328
|
+
console.error("Usage: readiness-report --area <area> [--history] [--output <file>]");
|
|
329
|
+
console.error("");
|
|
330
|
+
console.error("Options:");
|
|
331
|
+
console.error(" --area <area> Feature area to check (required)");
|
|
332
|
+
console.error(" --history Include historical progress from baselines");
|
|
333
|
+
console.error(" --output <file> Write markdown to file instead of stdout");
|
|
334
|
+
process.exit(1);
|
|
335
|
+
}
|
|
336
|
+
return { area, history, output };
|
|
337
|
+
}
|
|
338
|
+
// Only run when invoked directly
|
|
339
|
+
if (process.argv[1]?.endsWith("readiness-report.ts") ||
|
|
340
|
+
process.argv[1]?.endsWith("readiness-report.js")) {
|
|
341
|
+
main();
|
|
342
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* update-quality-scores.ts
|
|
3
|
+
*
|
|
4
|
+
* Reads score-summary.json and updates the feature area quality grades
|
|
5
|
+
* table in docs/QUALITY_SCORE.md. Designed to run automatically after
|
|
6
|
+
* each evaluation as the final pipeline step.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* pnpm update-quality-scores
|
|
10
|
+
* tsx src/scripts/update-quality-scores.ts
|
|
11
|
+
*/
|
|
12
|
+
export declare function updateQualityScores(): {
|
|
13
|
+
success: boolean;
|
|
14
|
+
message: string;
|
|
15
|
+
};
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* update-quality-scores.ts
|
|
3
|
+
*
|
|
4
|
+
* Reads score-summary.json and updates the feature area quality grades
|
|
5
|
+
* table in docs/QUALITY_SCORE.md. Designed to run automatically after
|
|
6
|
+
* each evaluation as the final pipeline step.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* pnpm update-quality-scores
|
|
10
|
+
* tsx src/scripts/update-quality-scores.ts
|
|
11
|
+
*/
|
|
12
|
+
import { execSync } from "child_process";
|
|
13
|
+
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
14
|
+
import { dirname, join, resolve } from "path";
|
|
15
|
+
import { fileURLToPath } from "url";
|
|
16
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
17
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
18
|
+
const REPO_ROOT = resolve(ROOT, "..", "..");
|
|
19
|
+
const QUALITY_SCORE_PATH = join(REPO_ROOT, "docs", "QUALITY_SCORE.md");
|
|
20
|
+
const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Grading
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
export function updateQualityScores() {
|
|
25
|
+
// Read score summary
|
|
26
|
+
if (!existsSync(SCORE_SUMMARY_PATH)) {
|
|
27
|
+
return {
|
|
28
|
+
message: `Score summary not found at ${SCORE_SUMMARY_PATH}. Run 'pnpm calculate-scores' first.`,
|
|
29
|
+
success: false,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
let summary;
|
|
33
|
+
try {
|
|
34
|
+
const raw = readFileSync(SCORE_SUMMARY_PATH, "utf-8");
|
|
35
|
+
const parsed = JSON.parse(raw);
|
|
36
|
+
// Normalize legacy field names (liftFromDocs → docLift)
|
|
37
|
+
summary = {
|
|
38
|
+
...parsed,
|
|
39
|
+
scores: parsed.scores.map((s) => ({
|
|
40
|
+
...s,
|
|
41
|
+
docLift: s.docLift ??
|
|
42
|
+
s.liftFromDocs ??
|
|
43
|
+
0,
|
|
44
|
+
})),
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
catch (err) {
|
|
48
|
+
return {
|
|
49
|
+
message: `Failed to parse score summary: ${err instanceof Error ? err.message : String(err)}`,
|
|
50
|
+
success: false,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
if (!summary.scores || summary.scores.length === 0) {
|
|
54
|
+
return { message: "Score summary contains no scores.", success: false };
|
|
55
|
+
}
|
|
56
|
+
// Read QUALITY_SCORE.md
|
|
57
|
+
if (!existsSync(QUALITY_SCORE_PATH)) {
|
|
58
|
+
return {
|
|
59
|
+
message: `QUALITY_SCORE.md not found at ${QUALITY_SCORE_PATH}.`,
|
|
60
|
+
success: false,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
let markdown = readFileSync(QUALITY_SCORE_PATH, "utf-8");
|
|
64
|
+
// Replace the feature area table
|
|
65
|
+
const newTable = generateTable(summary.scores);
|
|
66
|
+
const tablePattern = /\| Feature Area\s+\| Score\s+\| Grade\s+\| Doc Lift\s+\| Key gap[^|]*\|\n\| [-\s|]+\|\n(\|[^\n]+\|\n)*/;
|
|
67
|
+
const match = tablePattern.exec(markdown);
|
|
68
|
+
if (!match) {
|
|
69
|
+
return {
|
|
70
|
+
message: "Could not find the feature area quality grades table in QUALITY_SCORE.md.",
|
|
71
|
+
success: false,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
markdown =
|
|
75
|
+
markdown.slice(0, match.index) +
|
|
76
|
+
newTable +
|
|
77
|
+
"\n" +
|
|
78
|
+
markdown.slice(match.index + match[0].length);
|
|
79
|
+
// Write back
|
|
80
|
+
writeFileSync(QUALITY_SCORE_PATH, markdown);
|
|
81
|
+
// Format with Prettier to ensure consistent table formatting
|
|
82
|
+
// (emoji widths differ between padEnd and Prettier's table formatter)
|
|
83
|
+
try {
|
|
84
|
+
execSync("npx prettier --write " + QUALITY_SCORE_PATH, {
|
|
85
|
+
cwd: REPO_ROOT,
|
|
86
|
+
stdio: "pipe",
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
// Non-fatal — formatting is nice-to-have
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
message: `Updated ${summary.scores.length} feature area scores in QUALITY_SCORE.md (avg: ${Math.round(summary.overall.avgScore)}, lift: +${Math.round(summary.overall.avgDocLift)})`,
|
|
94
|
+
success: true,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
function generateTable(scores) {
|
|
98
|
+
// Sort by score descending
|
|
99
|
+
const sorted = [...scores].sort((a, b) => b.totalScore - a.totalScore);
|
|
100
|
+
// Build rows with data
|
|
101
|
+
const rows = sorted.map((s) => ({
|
|
102
|
+
feature: s.feature,
|
|
103
|
+
gap: keyGap(s, scores),
|
|
104
|
+
grade: grade(s.totalScore),
|
|
105
|
+
lift: "+" + s.docLift,
|
|
106
|
+
score: String(s.totalScore),
|
|
107
|
+
}));
|
|
108
|
+
// Calculate column widths from data (minimum widths from headers)
|
|
109
|
+
const cols = {
|
|
110
|
+
feature: Math.max(14, ...rows.map((r) => r.feature.length)),
|
|
111
|
+
gap: Math.max(7, ...rows.map((r) => r.gap.length)),
|
|
112
|
+
grade: 5,
|
|
113
|
+
lift: 8,
|
|
114
|
+
score: 5,
|
|
115
|
+
};
|
|
116
|
+
const fmtRow = (r) => `| ${r.feature.padEnd(cols.feature)} | ${r.score.padEnd(cols.score)} | ${r.grade.padEnd(cols.grade)} | ${r.lift.padEnd(cols.lift)} | ${r.gap.padEnd(cols.gap)} |`;
|
|
117
|
+
const header = fmtRow({
|
|
118
|
+
feature: "Feature Area",
|
|
119
|
+
gap: "Key gap",
|
|
120
|
+
grade: "Grade",
|
|
121
|
+
lift: "Doc Lift",
|
|
122
|
+
score: "Score",
|
|
123
|
+
});
|
|
124
|
+
const sep = `| ${"-".repeat(cols.feature)} | ${"-".repeat(cols.score)} | ${"-".repeat(cols.grade)} | ${"-".repeat(cols.lift)} | ${"-".repeat(cols.gap)} |`;
|
|
125
|
+
return [header, sep, ...rows.map(fmtRow)].join("\n");
|
|
126
|
+
}
|
|
127
|
+
// ---------------------------------------------------------------------------
|
|
128
|
+
// Table generation
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
function grade(score) {
|
|
131
|
+
if (score >= 80)
|
|
132
|
+
return "✅ A";
|
|
133
|
+
if (score >= 60)
|
|
134
|
+
return "🟡 B";
|
|
135
|
+
if (score >= 40)
|
|
136
|
+
return "🟠 C";
|
|
137
|
+
return "🔴 D";
|
|
138
|
+
}
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
// File update
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
function keyGap(s, allScores) {
|
|
143
|
+
// Below critical threshold
|
|
144
|
+
if (s.totalScore < 40) {
|
|
145
|
+
return "⚠️ Below critical — all dimensions underperform";
|
|
146
|
+
}
|
|
147
|
+
// Find the weakest dimension relative to max possible (all 0–100)
|
|
148
|
+
const dims = [
|
|
149
|
+
{ max: 100, name: "task completion", score: s.taskCompletion },
|
|
150
|
+
{ max: 100, name: "code correctness", score: s.codeCorrectness },
|
|
151
|
+
{ max: 100, name: "doc coverage", score: s.docCoverage },
|
|
152
|
+
];
|
|
153
|
+
// Sort by ratio (lowest first)
|
|
154
|
+
dims.sort((a, b) => a.score / a.max - b.score / b.max);
|
|
155
|
+
const weakest = dims[0];
|
|
156
|
+
// Check for notable strengths
|
|
157
|
+
const maxLift = Math.max(...allScores.map((sc) => sc.docLift));
|
|
158
|
+
const maxScore = Math.max(...allScores.map((sc) => sc.totalScore));
|
|
159
|
+
if (s.totalScore === maxScore) {
|
|
160
|
+
return `Strong — highest score; ${weakest.name} (${weakest.score}/${weakest.max})`;
|
|
161
|
+
}
|
|
162
|
+
if (s.docLift === maxLift) {
|
|
163
|
+
return `Highest doc lift; ${weakest.name} (${weakest.score}/${weakest.max})`;
|
|
164
|
+
}
|
|
165
|
+
if (weakest.score === 0) {
|
|
166
|
+
return `Zero ${weakest.name} score; lowest doc lift`;
|
|
167
|
+
}
|
|
168
|
+
return `${weakest.name[0].toUpperCase() + weakest.name.slice(1)} (${weakest.score}/${weakest.max}) holds back total score`;
|
|
169
|
+
}
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
// Main (when run directly)
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
if (process.argv[1]?.endsWith("update-quality-scores.ts") ||
|
|
174
|
+
process.argv[1]?.endsWith("update-quality-scores.js")) {
|
|
175
|
+
console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
|
|
176
|
+
const result = updateQualityScores();
|
|
177
|
+
if (result.success) {
|
|
178
|
+
console.log(` ✅ ${result.message}`);
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
console.error(` ❌ ${result.message}`);
|
|
182
|
+
process.exit(1);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
@@ -16,6 +16,6 @@
|
|
|
16
16
|
* - Migration script has been run (ailf.task documents exist in CL)
|
|
17
17
|
* - SANITY_API_TOKEN configured for Content Lake reads
|
|
18
18
|
*
|
|
19
|
-
* @see docs/exec-plans/tasks-as-content/phase-3-migration.md
|
|
19
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
|
|
20
20
|
*/
|
|
21
21
|
export {};
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
* - Migration script has been run (ailf.task documents exist in CL)
|
|
17
17
|
* - SANITY_API_TOKEN configured for Content Lake reads
|
|
18
18
|
*
|
|
19
|
-
* @see docs/exec-plans/tasks-as-content/phase-3-migration.md
|
|
19
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
|
|
20
20
|
*/
|
|
21
21
|
import { config as dotenvConfig } from "dotenv";
|
|
22
22
|
import { existsSync } from "fs";
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* validate.ts
|
|
3
|
+
*
|
|
4
|
+
* CLI script that validates pipeline configuration.
|
|
5
|
+
* Checks that all YAML files are consistent, all task-to-mapping
|
|
6
|
+
* cross-references are valid, and reference solutions exist.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* pnpm validate # validate everything
|
|
10
|
+
* pnpm validate --strict # treat warnings as errors
|
|
11
|
+
* pnpm validate --contexts # also check that context files exist
|
|
12
|
+
*/
|
|
13
|
+
export {};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* validate.ts
|
|
3
|
+
*
|
|
4
|
+
* CLI script that validates pipeline configuration.
|
|
5
|
+
* Checks that all YAML files are consistent, all task-to-mapping
|
|
6
|
+
* cross-references are valid, and reference solutions exist.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* pnpm validate # validate everything
|
|
10
|
+
* pnpm validate --strict # treat warnings as errors
|
|
11
|
+
* pnpm validate --contexts # also check that context files exist
|
|
12
|
+
*/
|
|
13
|
+
import { dirname, resolve } from "path";
|
|
14
|
+
import { fileURLToPath } from "url";
|
|
15
|
+
import { checkContextsExist, checkEnvironment } from "../pipeline/checks.js";
|
|
16
|
+
import { validateConfiguration } from "../pipeline/validate.js";
|
|
17
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
18
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// CLI argument parsing
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
const args = process.argv.slice(2);
|
|
23
|
+
const strict = args.includes("--strict");
|
|
24
|
+
const checkCtx = args.includes("--contexts");
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// Run validation
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
console.log("=== ai-literacy-framework — Configuration Validator ===\n");
|
|
29
|
+
const result = validateConfiguration(ROOT);
|
|
30
|
+
// Optionally check contexts
|
|
31
|
+
if (checkCtx) {
|
|
32
|
+
// Dynamically import to get feature areas
|
|
33
|
+
const { ALL_FEATURE_AREAS } = await import("../sanity/queries.js");
|
|
34
|
+
const contextIssues = checkContextsExist(ROOT, ALL_FEATURE_AREAS);
|
|
35
|
+
result.issues.push(...contextIssues);
|
|
36
|
+
result.valid =
|
|
37
|
+
result.valid && contextIssues.every((i) => i.severity !== "error");
|
|
38
|
+
}
|
|
39
|
+
// Check environment
|
|
40
|
+
const envIssues = checkEnvironment(ROOT);
|
|
41
|
+
result.issues.push(...envIssues);
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Report results
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
const errors = result.issues.filter((i) => i.severity === "error");
|
|
46
|
+
const warnings = result.issues.filter((i) => i.severity === "warning");
|
|
47
|
+
if (errors.length > 0) {
|
|
48
|
+
console.log(`❌ ${errors.length} error(s):\n`);
|
|
49
|
+
for (const issue of errors) {
|
|
50
|
+
console.log(` ERROR [${issue.source}] ${issue.message}`);
|
|
51
|
+
if (issue.path)
|
|
52
|
+
console.log(` at ${issue.path}`);
|
|
53
|
+
}
|
|
54
|
+
console.log();
|
|
55
|
+
}
|
|
56
|
+
if (warnings.length > 0) {
|
|
57
|
+
console.log(`⚠️ ${warnings.length} warning(s):\n`);
|
|
58
|
+
for (const issue of warnings) {
|
|
59
|
+
console.log(` WARN [${issue.source}] ${issue.message}`);
|
|
60
|
+
if (issue.path)
|
|
61
|
+
console.log(` at ${issue.path}`);
|
|
62
|
+
}
|
|
63
|
+
console.log();
|
|
64
|
+
}
|
|
65
|
+
if (errors.length === 0 && warnings.length === 0) {
|
|
66
|
+
console.log("✅ All checks passed — configuration is valid.\n");
|
|
67
|
+
}
|
|
68
|
+
if (errors.length === 0 && warnings.length > 0) {
|
|
69
|
+
console.log(`✅ Configuration is valid (${warnings.length} warning(s)).\n`);
|
|
70
|
+
}
|
|
71
|
+
// In strict mode, warnings are treated as errors
|
|
72
|
+
const exitCode = strict
|
|
73
|
+
? result.issues.length > 0
|
|
74
|
+
? 1
|
|
75
|
+
: 0
|
|
76
|
+
: errors.length > 0
|
|
77
|
+
? 1
|
|
78
|
+
: 0;
|
|
79
|
+
process.exit(exitCode);
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* webhook-server.ts
|
|
3
|
+
*
|
|
4
|
+
* Local development server for testing the webhook handler.
|
|
5
|
+
*
|
|
6
|
+
* Starts an HTTP server that receives Sanity webhook payloads, processes
|
|
7
|
+
* them through the WebhookHandler, and logs results. Useful for local
|
|
8
|
+
* development and testing the full event-driven trigger flow.
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* pnpm webhook-server # start on port 3333
|
|
12
|
+
* WEBHOOK_PORT=8080 pnpm webhook-server # custom port
|
|
13
|
+
*
|
|
14
|
+
* Test with curl:
|
|
15
|
+
* curl -X POST http://localhost:3333/webhook \
|
|
16
|
+
* -H "Content-Type: application/json" \
|
|
17
|
+
* -d '{"operation":"update","result":{"_id":"abc","_type":"article","slug":{"current":"groq-introduction"}}}'
|
|
18
|
+
*
|
|
19
|
+
* Endpoints:
|
|
20
|
+
* POST /webhook — handle a Sanity webhook payload
|
|
21
|
+
* GET /health — handler diagnostics (budget, pending, tracked slugs)
|
|
22
|
+
* GET /mappings — list all tracked document slugs and their areas
|
|
23
|
+
*
|
|
24
|
+
* @see docs/design-docs/report-store/visibility-workflows.md
|
|
25
|
+
*/
|
|
26
|
+
export {};
|