@sanity/ailf 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +52 -32
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/checks.d.ts +8 -3
- package/dist/pipeline/checks.js +23 -3
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +25 -25
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
*
|
|
12
12
|
* This module has NO side effects — no file I/O, no API calls.
|
|
13
13
|
*
|
|
14
|
-
* @see docs/exec-plans/grader-reliability.md — Phase 2
|
|
14
|
+
* @see docs/archive/exec-plans/grader-reliability.md — Phase 2
|
|
15
15
|
*/
|
|
16
16
|
// ---------------------------------------------------------------------------
|
|
17
17
|
// Pure computation
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { resolve } from "node:path";
|
|
1
2
|
import { normalizeMode } from "./normalize-mode.js";
|
|
2
3
|
/**
|
|
3
4
|
* Map a PipelineRequest to a ResolvedConfig.
|
|
@@ -19,13 +20,20 @@ import { normalizeMode } from "./normalize-mode.js";
|
|
|
19
20
|
export function mapRequestToConfig(request, rootDir) {
|
|
20
21
|
// Normalize mode so downstream pipeline code only sees canonical names.
|
|
21
22
|
// The API may receive legacy names ("baseline", "full") from older clients.
|
|
22
|
-
const { mode, variant } = normalizeMode(request.mode ?? "full");
|
|
23
|
+
const { mode, variant: normalizedVariant } = normalizeMode(request.mode ?? "full");
|
|
24
|
+
// Explicit variant from request takes precedence over one derived from
|
|
25
|
+
// legacy mode normalization. This supports the canonical form:
|
|
26
|
+
// { mode: "literacy", variant: "baseline" }
|
|
27
|
+
// while preserving backward compatibility with:
|
|
28
|
+
// { mode: "baseline" } → normalizeMode → { mode: "literacy", variant: "baseline" }
|
|
29
|
+
const variant = request.variant ?? normalizedVariant;
|
|
23
30
|
// API-triggered evaluations (identified by jobId) default to publish: true.
|
|
24
31
|
// Without this, the job's reportId is always null and GET /v1/reports/:id
|
|
25
32
|
// has nothing to return.
|
|
26
33
|
const publishDefault = !!request.jobId;
|
|
27
34
|
return {
|
|
28
35
|
rootDir,
|
|
36
|
+
outputDir: resolve(rootDir, "results", "latest"),
|
|
29
37
|
mode,
|
|
30
38
|
variant,
|
|
31
39
|
debug: mapDebug(request.debug),
|
|
@@ -66,6 +74,10 @@ export function mapRequestToConfig(request, rootDir) {
|
|
|
66
74
|
callerGit: request.callerGit,
|
|
67
75
|
callback: request.callback,
|
|
68
76
|
jobId: request.jobId,
|
|
77
|
+
captureEnabled: false,
|
|
78
|
+
captureDir: undefined,
|
|
79
|
+
captureCompress: true,
|
|
80
|
+
captureExtras: true,
|
|
69
81
|
remote: false,
|
|
70
82
|
apiUrl: "https://ailf-api.sanity.build",
|
|
71
83
|
presets: request.presets,
|
|
@@ -84,12 +96,13 @@ function mapDebug(debug) {
|
|
|
84
96
|
};
|
|
85
97
|
}
|
|
86
98
|
function mapTaskSourceType(taskMode) {
|
|
87
|
-
if (taskMode === "content-lake"
|
|
99
|
+
if (taskMode === "content-lake")
|
|
88
100
|
return taskMode;
|
|
89
101
|
// "inline" means the caller sent inline tasks that will be materialized
|
|
90
102
|
// to a temp directory and loaded via --repo-tasks-path. Use "repo" to
|
|
91
103
|
// ensure ONLY those tasks are used (no Content Lake merge).
|
|
92
104
|
if (taskMode === "inline")
|
|
93
105
|
return "repo";
|
|
106
|
+
// "yaml" was removed — treat it as default (Content Lake)
|
|
94
107
|
return undefined;
|
|
95
108
|
}
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* means unchanged tasks are skipped. Changed tasks are upserted via
|
|
11
11
|
* createOrReplace.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
14
14
|
*/
|
|
15
15
|
import type { SanityClient } from "@sanity/client";
|
|
16
16
|
import { type LiteracyTaskDefinition, type Logger } from "../_vendor/ailf-core/index.d.ts";
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* means unchanged tasks are skipped. Changed tasks are upserted via
|
|
11
11
|
* createOrReplace.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
14
14
|
*/
|
|
15
15
|
import { createHash } from "crypto";
|
|
16
16
|
import { readFileSync } from "fs";
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* emoji markers, alignment, and color-coding (via unicode markers).
|
|
8
8
|
* The JSON formatter produces machine-readable output for CI/CD.
|
|
9
9
|
*
|
|
10
|
-
* @see docs/exec-plans/execution-preview.md
|
|
10
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
11
11
|
*/
|
|
12
12
|
import type { ExecutionPlan } from "./plan.js";
|
|
13
13
|
/**
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* emoji markers, alignment, and color-coding (via unicode markers).
|
|
8
8
|
* The JSON formatter produces machine-readable output for CI/CD.
|
|
9
9
|
*
|
|
10
|
-
* @see docs/exec-plans/execution-preview.md
|
|
10
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
11
11
|
*/
|
|
12
12
|
import { formatCost } from "../agent-observer/pricing.js";
|
|
13
13
|
// ---------------------------------------------------------------------------
|
package/dist/pipeline/plan.d.ts
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* anything. Calls existing pure functions (task expansion, model loading,
|
|
8
8
|
* cache hashing, pricing) and composes them into an `ExecutionPlan`.
|
|
9
9
|
*
|
|
10
|
-
* @see docs/exec-plans/execution-preview.md
|
|
10
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
11
11
|
*/
|
|
12
12
|
import type { DebugOptions, EvalMode } from "./types.js";
|
|
13
13
|
import { LiteracyVariant } from "./normalize-mode.js";
|
package/dist/pipeline/plan.js
CHANGED
|
@@ -7,16 +7,17 @@
|
|
|
7
7
|
* anything. Calls existing pure functions (task expansion, model loading,
|
|
8
8
|
* cache hashing, pricing) and composes them into an `ExecutionPlan`.
|
|
9
9
|
*
|
|
10
|
-
* @see docs/exec-plans/execution-preview.md
|
|
10
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
11
11
|
*/
|
|
12
12
|
import { existsSync, readdirSync, statSync } from "fs";
|
|
13
13
|
import { resolve } from "path";
|
|
14
|
+
import { createLiteracyModeBase, modelMatchesLiteracyVariant, } from "./compiler/mode-bases/literacy.js";
|
|
14
15
|
import { lookupPricing } from "../agent-observer/pricing.js";
|
|
15
16
|
import { RepoTaskSource } from "../adapters/task-sources/repo-task-source.js";
|
|
16
17
|
import { loadAllTsTaskFiles } from "../adapters/task-sources/task-file-loader.js";
|
|
17
18
|
import { lookupCache } from "./cache.js";
|
|
18
19
|
import { compileLiteracyTasks } from "./compiler/literacy-bridge.js";
|
|
19
|
-
import { tryLoadConfigFile } from "./compiler/config-loader.js";
|
|
20
|
+
import { resolveVendoredSubdir, tryLoadConfigFile, } from "./compiler/config-loader.js";
|
|
20
21
|
import { LiteracyVariant } from "./normalize-mode.js";
|
|
21
22
|
import { validateConfiguration } from "./validate.js";
|
|
22
23
|
/**
|
|
@@ -44,33 +45,35 @@ function loadModelsFile(rootDir) {
|
|
|
44
45
|
const result = tryLoadConfigFile("models", rootDir);
|
|
45
46
|
return result?.data ?? null;
|
|
46
47
|
}
|
|
48
|
+
const _literacyBase = createLiteracyModeBase();
|
|
47
49
|
/**
|
|
48
|
-
*
|
|
50
|
+
* Check whether a model participates in a given eval mode + optional variant.
|
|
49
51
|
*
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
52
|
+
* For literacy mode, checks both mode enrollment and variant participation
|
|
53
|
+
* via the shared `modelMatchesLiteracyVariant` helper. For non-literacy
|
|
54
|
+
* modes, checks mode enrollment only.
|
|
53
55
|
*/
|
|
54
|
-
function
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
function modeMatchesModel(mode, model, variant) {
|
|
57
|
+
// Check basic mode enrollment
|
|
58
|
+
if (model.modes &&
|
|
59
|
+
model.modes.length > 0 &&
|
|
60
|
+
!model.modes.includes(mode)) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
// For literacy mode with a variant, check variant participation
|
|
64
|
+
if (mode === "literacy" && variant) {
|
|
58
65
|
switch (variant) {
|
|
59
66
|
case LiteracyVariant.AGENTIC:
|
|
60
|
-
return (
|
|
61
|
-
|
|
62
|
-
case LiteracyVariant.OBSERVED:
|
|
63
|
-
return modelModes.includes(LiteracyVariant.OBSERVED);
|
|
67
|
+
return (modelMatchesLiteracyVariant(model, "agentic-naive") ||
|
|
68
|
+
modelMatchesLiteracyVariant(model, "agentic-optimized"));
|
|
64
69
|
case LiteracyVariant.FULL:
|
|
65
|
-
return (
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
case LiteracyVariant.STANDARD:
|
|
70
|
+
return (modelMatchesLiteracyVariant(model, "baseline") ||
|
|
71
|
+
modelMatchesLiteracyVariant(model, "agentic-naive") ||
|
|
72
|
+
modelMatchesLiteracyVariant(model, "agentic-optimized"));
|
|
69
73
|
default:
|
|
70
|
-
return
|
|
74
|
+
return modelMatchesLiteracyVariant(model, variant);
|
|
71
75
|
}
|
|
72
76
|
}
|
|
73
|
-
// Non-literacy modes accept all models by default
|
|
74
77
|
return true;
|
|
75
78
|
}
|
|
76
79
|
// ---------------------------------------------------------------------------
|
|
@@ -139,8 +142,8 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
139
142
|
const modelsForCompile = loadModelsFile(rootDir);
|
|
140
143
|
const graderProvider = modelsForCompile?.grader?.id ?? "openai:chat:gpt-4o";
|
|
141
144
|
const modelEntries = (modelsForCompile?.models ?? []).map((m) => ({ id: m.id, label: m.label }));
|
|
142
|
-
// Load *.task.ts files from tasks/<mode>/
|
|
143
|
-
const modeTasksDir =
|
|
145
|
+
// Load *.task.ts files from tasks/<mode>/ (or dist/tasks/<mode>/ when vendored)
|
|
146
|
+
const modeTasksDir = resolveVendoredSubdir(rootDir, `tasks/${opts.mode}`);
|
|
144
147
|
if (existsSync(modeTasksDir)) {
|
|
145
148
|
const rawTasks = await loadAllTsTaskFiles(modeTasksDir);
|
|
146
149
|
if (rawTasks.length > 0) {
|
|
@@ -148,9 +151,16 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
148
151
|
const handlerModulePath = `./compiler/mode-handlers/${opts.mode}/index.js`;
|
|
149
152
|
const mod = await import(handlerModulePath);
|
|
150
153
|
const handler = mod.handler;
|
|
154
|
+
const skippedByMode = new Map();
|
|
151
155
|
for (const rawFile of rawTasks) {
|
|
152
156
|
for (const taskDef of rawFile.tasks) {
|
|
153
157
|
const task = taskDef;
|
|
158
|
+
// Filter to matching mode (skip tasks from other modes in same dir)
|
|
159
|
+
if ("mode" in task && task.mode !== opts.mode) {
|
|
160
|
+
const taskMode = task.mode ?? "unknown";
|
|
161
|
+
skippedByMode.set(taskMode, (skippedByMode.get(taskMode) ?? 0) + 1);
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
154
164
|
// Apply area/task/tag filter
|
|
155
165
|
if (filter) {
|
|
156
166
|
if (filter.areas?.length &&
|
|
@@ -192,6 +202,13 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
192
202
|
}
|
|
193
203
|
}
|
|
194
204
|
}
|
|
205
|
+
if (skippedByMode.size > 0) {
|
|
206
|
+
const summary = [...skippedByMode.entries()]
|
|
207
|
+
.map(([m, n]) => `${n} ${m}`)
|
|
208
|
+
.join(", ");
|
|
209
|
+
const total = [...skippedByMode.values()].reduce((a, b) => a + b, 0);
|
|
210
|
+
warnings.push(`Skipped ${total} task(s) with non-matching mode (${summary}). Current pipeline mode: ${opts.mode}. Run with --mode <mode> to include them.`);
|
|
211
|
+
}
|
|
195
212
|
}
|
|
196
213
|
}
|
|
197
214
|
}
|
|
@@ -203,13 +220,29 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
203
220
|
if (opts.repoTasksPath) {
|
|
204
221
|
try {
|
|
205
222
|
const repoSource = new RepoTaskSource(opts.repoTasksPath);
|
|
206
|
-
|
|
207
|
-
|
|
223
|
+
const allRepoTasks = await repoSource.loadTasks(filter);
|
|
224
|
+
// Filter to current mode tasks
|
|
225
|
+
const repoTasks = allRepoTasks.filter((t) => t.mode === opts.mode);
|
|
226
|
+
const skippedRepoTasks = allRepoTasks.length - repoTasks.length;
|
|
227
|
+
if (skippedRepoTasks > 0) {
|
|
228
|
+
const skippedModes = new Map();
|
|
229
|
+
for (const t of allRepoTasks) {
|
|
230
|
+
if (t.mode !== opts.mode) {
|
|
231
|
+
skippedModes.set(t.mode, (skippedModes.get(t.mode) ?? 0) + 1);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
const summary = [...skippedModes.entries()]
|
|
235
|
+
.map(([m, n]) => `${n} ${m}`)
|
|
236
|
+
.join(", ");
|
|
237
|
+
warnings.push(`Skipped ${skippedRepoTasks} repo task(s) with non-matching mode (${summary}). Current pipeline mode: ${opts.mode}. Run with --mode <mode> to include them.`);
|
|
238
|
+
}
|
|
208
239
|
repoTaskCount = repoTasks.length;
|
|
209
|
-
if (repoTaskCount > 0) {
|
|
240
|
+
if (repoTaskCount > 0 && opts.mode === "literacy") {
|
|
241
|
+
// Literacy-specific compilation for repo tasks (detailed test expansion)
|
|
242
|
+
const literacyRepoTasks = repoTasks.filter((t) => t.mode === "literacy");
|
|
210
243
|
const modelsForCompile = loadModelsFile(rootDir);
|
|
211
244
|
const graderProvider = modelsForCompile?.grader?.id ?? "openai:chat:gpt-4o";
|
|
212
|
-
const compileResult = compileLiteracyTasks(
|
|
245
|
+
const compileResult = compileLiteracyTasks(literacyRepoTasks, {
|
|
213
246
|
rootDir,
|
|
214
247
|
evalMode: opts.variant === LiteracyVariant.AGENTIC
|
|
215
248
|
? LiteracyVariant.AGENTIC
|
|
@@ -231,6 +264,11 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
231
264
|
}
|
|
232
265
|
}
|
|
233
266
|
}
|
|
267
|
+
else if (repoTaskCount > 0) {
|
|
268
|
+
// Non-literacy modes: approximate 1 test per task (compilation not
|
|
269
|
+
// supported for non-literacy repo tasks in the explain preview yet)
|
|
270
|
+
totalTests += repoTaskCount;
|
|
271
|
+
}
|
|
234
272
|
}
|
|
235
273
|
catch {
|
|
236
274
|
warnings.push(`Failed to scan repo tasks at ${opts.repoTasksPath} — count may be underestimated`);
|
|
@@ -244,19 +282,19 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
244
282
|
const models = [];
|
|
245
283
|
let graderModelName = "";
|
|
246
284
|
if (modelsFile) {
|
|
247
|
-
const activeModels = modelsFile.models.filter((m) =>
|
|
285
|
+
const activeModels = modelsFile.models.filter((m) => modeMatchesModel(opts.mode, m, opts.variant));
|
|
248
286
|
// For agentic mode, each model appears twice (naive + optimized)
|
|
249
287
|
for (const m of activeModels) {
|
|
250
288
|
const modelName = extractModelName(m.id);
|
|
251
289
|
if (opts.variant === LiteracyVariant.AGENTIC) {
|
|
252
|
-
if (m
|
|
290
|
+
if (modelMatchesLiteracyVariant(m, "agentic-naive")) {
|
|
253
291
|
models.push({
|
|
254
292
|
id: m.id,
|
|
255
293
|
label: `${m.label} (Naive)`,
|
|
256
294
|
modelName,
|
|
257
295
|
});
|
|
258
296
|
}
|
|
259
|
-
if (m
|
|
297
|
+
if (modelMatchesLiteracyVariant(m, "agentic-optimized")) {
|
|
260
298
|
models.push({
|
|
261
299
|
id: m.id,
|
|
262
300
|
label: `${m.label} (Optimized)`,
|
package/dist/pipeline/probe.d.ts
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* not "are these docs good enough?" The output is always labeled as
|
|
15
15
|
* directional and never displayed on the same scale as scored evaluations.
|
|
16
16
|
*
|
|
17
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
17
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
18
18
|
*/
|
|
19
19
|
import type { ProbeResult } from "./types.js";
|
|
20
20
|
/** Generic probe prompt template */
|
package/dist/pipeline/probe.js
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* not "are these docs good enough?" The output is always labeled as
|
|
15
15
|
* directional and never displayed on the same scale as scored evaluations.
|
|
16
16
|
*
|
|
17
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
17
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
18
18
|
*/
|
|
19
19
|
// ---------------------------------------------------------------------------
|
|
20
20
|
// Constants
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
* - generateReadinessReport() — builds the structured report
|
|
14
14
|
* - formatReadinessMarkdown() — renders the report as markdown
|
|
15
15
|
*
|
|
16
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
17
|
-
* @see docs/exec-plans/eliminate-lib-layer.md
|
|
16
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
17
|
+
* @see docs/archive/exec-plans/eliminate-lib-layer.md
|
|
18
18
|
*/
|
|
19
19
|
import type { ThresholdConfig } from "./schemas.js";
|
|
20
20
|
import type { GapAnalysisReport, GapEstimate, ScoreSummary, ThresholdEvaluation, ThresholdViolation } from "./types.js";
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
* - generateReadinessReport() — builds the structured report
|
|
14
14
|
* - formatReadinessMarkdown() — renders the report as markdown
|
|
15
15
|
*
|
|
16
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
17
|
-
* @see docs/exec-plans/eliminate-lib-layer.md
|
|
16
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
17
|
+
* @see docs/archive/exec-plans/eliminate-lib-layer.md
|
|
18
18
|
*/
|
|
19
19
|
import { evaluateThresholds } from "./thresholds.js";
|
|
20
20
|
// ---------------------------------------------------------------------------
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* - **not-applicable**: Updated, removed, or unchanged documents (these
|
|
16
16
|
* follow the standard before/after comparison flow from Phase 2).
|
|
17
17
|
*
|
|
18
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
18
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
19
19
|
*/
|
|
20
20
|
import type { ClassifiedReleaseDocument, ProductFeature, ReleaseClassification } from "./types.js";
|
|
21
21
|
import type { ReverseMapping } from "./reverse-mapping.js";
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* - **not-applicable**: Updated, removed, or unchanged documents (these
|
|
16
16
|
* follow the standard before/after comparison flow from Phase 2).
|
|
17
17
|
*
|
|
18
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
18
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
19
19
|
*/
|
|
20
20
|
// ---------------------------------------------------------------------------
|
|
21
21
|
// Public API
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* attribution (2c), and probe results (4b) into the document × area × task
|
|
11
11
|
* impact matrix specified by Scenario 2.4.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
13
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
14
14
|
*/
|
|
15
15
|
import type { AttributionReport, ComparisonReport, ProbeResult, ReleaseClassification, ReleaseImpactReport } from "./types.js";
|
|
16
16
|
/**
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* attribution (2c), and probe results (4b) into the document × area × task
|
|
11
11
|
* impact matrix specified by Scenario 2.4.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
13
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
14
14
|
*/
|
|
15
15
|
// ---------------------------------------------------------------------------
|
|
16
16
|
// Public API
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* - Clear "what does this mean?" context
|
|
13
13
|
* - skip-ailf bypass instructions
|
|
14
14
|
*
|
|
15
|
-
* @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
15
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
16
16
|
* @see packages/eval/src/pipeline/repo-threshold-evaluator.ts
|
|
17
17
|
*/
|
|
18
18
|
import type { ComparisonReport, ScoreSummary } from "./types.js";
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* - Clear "what does this mean?" context
|
|
13
13
|
* - skip-ailf bypass instructions
|
|
14
14
|
*
|
|
15
|
-
* @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
15
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
16
16
|
* @see packages/eval/src/pipeline/repo-threshold-evaluator.ts
|
|
17
17
|
*/
|
|
18
18
|
// ---------------------------------------------------------------------------
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* thresholds are per-area, defined by the AILF team, and drive
|
|
11
11
|
* readiness reports.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
14
14
|
* @see packages/eval/src/adapters/task-sources/repo-schemas.ts
|
|
15
15
|
*/
|
|
16
16
|
import type { ScoreSummary } from "./types.js";
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* thresholds are per-area, defined by the AILF team, and drive
|
|
11
11
|
* readiness reports.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
14
14
|
* @see packages/eval/src/adapters/task-sources/repo-schemas.ts
|
|
15
15
|
*/
|
|
16
16
|
// ---------------------------------------------------------------------------
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* pipeline/resolve-mappings.ts
|
|
3
3
|
*
|
|
4
|
-
* Extracts canonical mappings from
|
|
5
|
-
* Each task
|
|
6
|
-
* directly — there is no separate mappings
|
|
4
|
+
* Extracts canonical mappings from task definitions (*.task.ts files).
|
|
5
|
+
* Each task contains context.docs and referenceSolution fields
|
|
6
|
+
* directly — there is no separate mappings file.
|
|
7
7
|
*
|
|
8
8
|
* The output shape matches what downstream consumers expect so
|
|
9
9
|
* fetch-docs, validate, and calculate-scores work without changes.
|
|
@@ -24,12 +24,12 @@ export interface ResolvedMappings {
|
|
|
24
24
|
}>;
|
|
25
25
|
}
|
|
26
26
|
/**
|
|
27
|
-
* Extract
|
|
28
|
-
* Only tasks with
|
|
27
|
+
* Extract canonical mappings from *.task.ts files in tasks/literacy/.
|
|
28
|
+
* Only tasks with context.docs and referenceSolution are included.
|
|
29
29
|
*/
|
|
30
30
|
export declare function extractInlineMappings(rootDir: string): ResolvedMappings;
|
|
31
31
|
/**
|
|
32
|
-
* Resolve canonical mappings from
|
|
32
|
+
* Resolve canonical mappings from task definitions.
|
|
33
33
|
* This is the single source of truth — there is no external mappings file.
|
|
34
34
|
*/
|
|
35
35
|
export declare function resolveMappings(rootDir: string): ResolvedMappings;
|
|
@@ -1,72 +1,72 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* pipeline/resolve-mappings.ts
|
|
3
3
|
*
|
|
4
|
-
* Extracts canonical mappings from
|
|
5
|
-
* Each task
|
|
6
|
-
* directly — there is no separate mappings
|
|
4
|
+
* Extracts canonical mappings from task definitions (*.task.ts files).
|
|
5
|
+
* Each task contains context.docs and referenceSolution fields
|
|
6
|
+
* directly — there is no separate mappings file.
|
|
7
7
|
*
|
|
8
8
|
* The output shape matches what downstream consumers expect so
|
|
9
9
|
* fetch-docs, validate, and calculate-scores work without changes.
|
|
10
10
|
*/
|
|
11
|
-
import { existsSync
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
11
|
+
import { existsSync } from "fs";
|
|
12
|
+
import { discoverTsTaskFiles, loadTsTaskFileSync, } from "../adapters/task-sources/task-file-loader.js";
|
|
13
|
+
import { resolveVendoredSubdir } from "./compiler/config-loader.js";
|
|
14
14
|
// ---------------------------------------------------------------------------
|
|
15
15
|
// Resolution
|
|
16
16
|
// ---------------------------------------------------------------------------
|
|
17
17
|
/**
|
|
18
|
-
* Extract
|
|
19
|
-
* Only tasks with
|
|
18
|
+
* Extract canonical mappings from *.task.ts files in tasks/literacy/.
|
|
19
|
+
* Only tasks with context.docs and referenceSolution are included.
|
|
20
20
|
*/
|
|
21
21
|
export function extractInlineMappings(rootDir) {
|
|
22
|
-
const tasksDir =
|
|
22
|
+
const tasksDir = resolveVendoredSubdir(rootDir, "tasks/literacy");
|
|
23
23
|
const result = { feature_areas: {} };
|
|
24
24
|
if (!existsSync(tasksDir))
|
|
25
25
|
return result;
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
const tasks = [];
|
|
37
|
-
for (const entry of parsed) {
|
|
38
|
-
if (!isInlineTaskWithMappings(entry))
|
|
26
|
+
const files = discoverTsTaskFiles(tasksDir);
|
|
27
|
+
for (const file of files) {
|
|
28
|
+
const loaded = loadTsTaskFileSync(file);
|
|
29
|
+
for (const task of loaded.tasks) {
|
|
30
|
+
const t = task;
|
|
31
|
+
const area = typeof t.area === "string" ? t.area : undefined;
|
|
32
|
+
const id = typeof t.id === "string" ? t.id : undefined;
|
|
33
|
+
const title = typeof t.title === "string" ? t.title : "";
|
|
34
|
+
const referenceSolution = typeof t.referenceSolution === "string" ? t.referenceSolution : "";
|
|
35
|
+
if (!area || !id)
|
|
39
36
|
continue;
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
37
|
+
// Extract docs from context.docs (GeneralizedDocRef[])
|
|
38
|
+
const context = t.context;
|
|
39
|
+
const docs = [];
|
|
40
|
+
if (context?.docs && Array.isArray(context.docs)) {
|
|
41
|
+
for (const doc of context.docs) {
|
|
42
|
+
const d = doc;
|
|
43
|
+
if (typeof d.slug === "string") {
|
|
44
|
+
docs.push({
|
|
45
|
+
slug: d.slug,
|
|
46
|
+
reason: typeof d.reason === "string" ? d.reason : "",
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (docs.length === 0 || !referenceSolution)
|
|
52
|
+
continue;
|
|
53
|
+
if (!result.feature_areas[area]) {
|
|
54
|
+
result.feature_areas[area] = { tasks: [] };
|
|
55
|
+
}
|
|
56
|
+
result.feature_areas[area].tasks.push({
|
|
57
|
+
canonical_docs: docs,
|
|
58
|
+
description: title,
|
|
59
|
+
id,
|
|
60
|
+
reference_solution: referenceSolution,
|
|
45
61
|
});
|
|
46
62
|
}
|
|
47
|
-
if (tasks.length > 0) {
|
|
48
|
-
result.feature_areas[featureArea] = { tasks };
|
|
49
|
-
}
|
|
50
63
|
}
|
|
51
64
|
return result;
|
|
52
65
|
}
|
|
53
66
|
/**
|
|
54
|
-
* Resolve canonical mappings from
|
|
67
|
+
* Resolve canonical mappings from task definitions.
|
|
55
68
|
* This is the single source of truth — there is no external mappings file.
|
|
56
69
|
*/
|
|
57
70
|
export function resolveMappings(rootDir) {
|
|
58
71
|
return extractInlineMappings(rootDir);
|
|
59
72
|
}
|
|
60
|
-
// ---------------------------------------------------------------------------
|
|
61
|
-
// Helpers
|
|
62
|
-
// ---------------------------------------------------------------------------
|
|
63
|
-
function isInlineTaskWithMappings(entry) {
|
|
64
|
-
if (typeof entry !== "object" || entry === null)
|
|
65
|
-
return false;
|
|
66
|
-
const e = entry;
|
|
67
|
-
return (typeof e.id === "string" &&
|
|
68
|
-
typeof e.description === "string" &&
|
|
69
|
-
Array.isArray(e.canonical_docs) &&
|
|
70
|
-
e.canonical_docs.length > 0 &&
|
|
71
|
-
typeof e.reference_solution === "string");
|
|
72
|
-
}
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
* pipeline/retrieval-metrics.ts
|
|
3
3
|
*
|
|
4
4
|
* Computes retrieval precision and recall by comparing agent-retrieved
|
|
5
|
-
* doc slugs against canonical_docs defined in task
|
|
5
|
+
* doc slugs against canonical_docs defined in task definitions.
|
|
6
6
|
*
|
|
7
|
-
* This is a pure computation module — no file I/O beyond reading task
|
|
7
|
+
* This is a pure computation module — no file I/O beyond reading task files.
|
|
8
8
|
*/
|
|
9
9
|
import type { RetrievalMetrics, TaskRetrievalMetrics } from "./types.js";
|
|
10
10
|
export interface AgenticBehaviorData {
|
|
@@ -30,7 +30,7 @@ export declare function computeRetrievalMetrics(rootDir: string, behaviors: Agen
|
|
|
30
30
|
*/
|
|
31
31
|
export declare function computeTaskMetrics(taskId: string, area: string, retrieved: string[], canonical: Set<string>): TaskRetrievalMetrics;
|
|
32
32
|
/**
|
|
33
|
-
* Load
|
|
33
|
+
* Load canonical docs from *.task.ts files in tasks/literacy/.
|
|
34
34
|
* Returns a map of taskId → { slugs: Set<string>, area: string }.
|
|
35
35
|
*/
|
|
36
36
|
export declare function loadCanonicalDocs(rootDir: string): Map<string, {
|