@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* means unchanged tasks are skipped. Changed tasks are upserted via
|
|
11
11
|
* createOrReplace.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
14
14
|
*/
|
|
15
15
|
import type { SanityClient } from "@sanity/client";
|
|
16
16
|
import { type LiteracyTaskDefinition, type Logger } from "../_vendor/ailf-core/index.d.ts";
|
|
@@ -58,7 +58,7 @@ export interface MirrorResult {
|
|
|
58
58
|
skipped: number;
|
|
59
59
|
/** Feature areas auto-created */
|
|
60
60
|
areasCreated: string[];
|
|
61
|
-
/**
|
|
61
|
+
/** Context doc slugs that failed to resolve */
|
|
62
62
|
unresolvedSlugs: string[];
|
|
63
63
|
/** Errors (non-fatal — mirror continues) */
|
|
64
64
|
errors: string[];
|
|
@@ -70,7 +70,7 @@ export interface MirrorResult {
|
|
|
70
70
|
* 1. Compute deterministic document ID
|
|
71
71
|
* 2. Compute content hash of the task definition
|
|
72
72
|
* 3. Check if mirror document exists with same hash → skip if unchanged
|
|
73
|
-
* 4. Resolve
|
|
73
|
+
* 4. Resolve context doc slugs → Sanity references
|
|
74
74
|
* 5. Auto-create feature areas if needed
|
|
75
75
|
* 6. Upsert the ailf.task document with origin block
|
|
76
76
|
*/
|
|
@@ -114,8 +114,8 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
|
|
|
114
114
|
_type: string;
|
|
115
115
|
ownership: string;
|
|
116
116
|
status: import("@sanity/ailf-core").TaskStatus;
|
|
117
|
-
|
|
118
|
-
|
|
117
|
+
assertions: Record<string, unknown>[];
|
|
118
|
+
contextDocs: ({
|
|
119
119
|
_key: string;
|
|
120
120
|
reason: string;
|
|
121
121
|
} | {
|
|
@@ -138,9 +138,9 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
|
|
|
138
138
|
_key: string;
|
|
139
139
|
reason: string;
|
|
140
140
|
})[];
|
|
141
|
-
|
|
141
|
+
title: string;
|
|
142
142
|
docCoverage: boolean;
|
|
143
|
-
|
|
143
|
+
area: {
|
|
144
144
|
_ref: string;
|
|
145
145
|
_type: string;
|
|
146
146
|
};
|
|
@@ -161,5 +161,5 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
|
|
|
161
161
|
author: GitAuthor;
|
|
162
162
|
lastEditor: GitAuthor;
|
|
163
163
|
};
|
|
164
|
-
|
|
164
|
+
promptText: string;
|
|
165
165
|
};
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* means unchanged tasks are skipped. Changed tasks are upserted via
|
|
11
11
|
* createOrReplace.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
14
14
|
*/
|
|
15
15
|
import { createHash } from "crypto";
|
|
16
16
|
import { readFileSync } from "fs";
|
|
@@ -26,7 +26,7 @@ import { ConsoleLogger } from "../adapters/loggers/index.js";
|
|
|
26
26
|
* 1. Compute deterministic document ID
|
|
27
27
|
* 2. Compute content hash of the task definition
|
|
28
28
|
* 3. Check if mirror document exists with same hash → skip if unchanged
|
|
29
|
-
* 4. Resolve
|
|
29
|
+
* 4. Resolve context doc slugs → Sanity references
|
|
30
30
|
* 5. Auto-create feature areas if needed
|
|
31
31
|
* 6. Upsert the ailf.task document with origin block
|
|
32
32
|
*/
|
|
@@ -43,7 +43,7 @@ export async function mirrorRepoTasks(options) {
|
|
|
43
43
|
};
|
|
44
44
|
if (tasks.length === 0)
|
|
45
45
|
return result;
|
|
46
|
-
// Batch-resolve all
|
|
46
|
+
// Batch-resolve all context doc slugs (slug refs only — other ref types
|
|
47
47
|
// are stored without a resolved article reference for now)
|
|
48
48
|
const allSlugs = [
|
|
49
49
|
...new Set(tasks.flatMap((t) => (t.context?.docs ?? []).filter(isSlugRef).map((d) => d.slug))),
|
|
@@ -353,10 +353,10 @@ async function fetchExistingDocState(client, docIds) {
|
|
|
353
353
|
/** @internal Exported for testing — not part of the public API. */
|
|
354
354
|
export function buildMirrorDocument(task, opts) {
|
|
355
355
|
const { contentHash, docId, existingAuthor, git, slugToDocId } = opts;
|
|
356
|
-
// Build
|
|
356
|
+
// Build context docs with resolved references and correct refType.
|
|
357
357
|
// Each ref type gets the appropriate resolution fields set on the
|
|
358
358
|
// mirror document so Studio can display them correctly.
|
|
359
|
-
const
|
|
359
|
+
const contextDocs = (task.context?.docs ?? []).map((ref, i) => {
|
|
360
360
|
const base = { _key: `cd${i}`, reason: ref.reason ?? "" };
|
|
361
361
|
if (isSlugRef(ref)) {
|
|
362
362
|
const resolvedId = slugToDocId.get(ref.slug);
|
|
@@ -428,11 +428,11 @@ export function buildMirrorDocument(task, opts) {
|
|
|
428
428
|
_type: "ailf.task",
|
|
429
429
|
ownership: "repo",
|
|
430
430
|
status: task.status ?? "active",
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
431
|
+
assertions: assertArray,
|
|
432
|
+
contextDocs,
|
|
433
|
+
title: task.title,
|
|
434
434
|
docCoverage: task.docCoverage ?? false,
|
|
435
|
-
|
|
435
|
+
area: {
|
|
436
436
|
_ref: `ailf.featureArea.${area}`,
|
|
437
437
|
_type: "reference",
|
|
438
438
|
},
|
|
@@ -452,7 +452,7 @@ export function buildMirrorDocument(task, opts) {
|
|
|
452
452
|
author: existingAuthor ?? git.author,
|
|
453
453
|
lastEditor: git.author,
|
|
454
454
|
},
|
|
455
|
-
|
|
455
|
+
promptText: task.prompt?.text ?? "",
|
|
456
456
|
...(task.baseline
|
|
457
457
|
? {
|
|
458
458
|
baseline: {
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* emoji markers, alignment, and color-coding (via unicode markers).
|
|
8
8
|
* The JSON formatter produces machine-readable output for CI/CD.
|
|
9
9
|
*
|
|
10
|
-
* @see docs/exec-plans/execution-preview.md
|
|
10
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
11
11
|
*/
|
|
12
12
|
import type { ExecutionPlan } from "./plan.js";
|
|
13
13
|
/**
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* emoji markers, alignment, and color-coding (via unicode markers).
|
|
8
8
|
* The JSON formatter produces machine-readable output for CI/CD.
|
|
9
9
|
*
|
|
10
|
-
* @see docs/exec-plans/execution-preview.md
|
|
10
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
11
11
|
*/
|
|
12
12
|
import { formatCost } from "../agent-observer/pricing.js";
|
|
13
13
|
// ---------------------------------------------------------------------------
|
package/dist/pipeline/plan.d.ts
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* anything. Calls existing pure functions (task expansion, model loading,
|
|
8
8
|
* cache hashing, pricing) and composes them into an `ExecutionPlan`.
|
|
9
9
|
*
|
|
10
|
-
* @see docs/exec-plans/execution-preview.md
|
|
10
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
11
11
|
*/
|
|
12
12
|
import type { DebugOptions, EvalMode } from "./types.js";
|
|
13
13
|
import { LiteracyVariant } from "./normalize-mode.js";
|
package/dist/pipeline/plan.js
CHANGED
|
@@ -7,16 +7,17 @@
|
|
|
7
7
|
* anything. Calls existing pure functions (task expansion, model loading,
|
|
8
8
|
* cache hashing, pricing) and composes them into an `ExecutionPlan`.
|
|
9
9
|
*
|
|
10
|
-
* @see docs/exec-plans/execution-preview.md
|
|
10
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
11
11
|
*/
|
|
12
12
|
import { existsSync, readdirSync, statSync } from "fs";
|
|
13
13
|
import { resolve } from "path";
|
|
14
|
+
import { createLiteracyModeBase, modelMatchesLiteracyVariant, } from "./compiler/mode-bases/literacy.js";
|
|
14
15
|
import { lookupPricing } from "../agent-observer/pricing.js";
|
|
15
16
|
import { RepoTaskSource } from "../adapters/task-sources/repo-task-source.js";
|
|
16
17
|
import { loadAllTsTaskFiles } from "../adapters/task-sources/task-file-loader.js";
|
|
17
18
|
import { lookupCache } from "./cache.js";
|
|
18
19
|
import { compileLiteracyTasks } from "./compiler/literacy-bridge.js";
|
|
19
|
-
import { tryLoadConfigFile } from "./compiler/config-loader.js";
|
|
20
|
+
import { resolveVendoredSubdir, tryLoadConfigFile, } from "./compiler/config-loader.js";
|
|
20
21
|
import { LiteracyVariant } from "./normalize-mode.js";
|
|
21
22
|
import { validateConfiguration } from "./validate.js";
|
|
22
23
|
/**
|
|
@@ -44,33 +45,35 @@ function loadModelsFile(rootDir) {
|
|
|
44
45
|
const result = tryLoadConfigFile("models", rootDir);
|
|
45
46
|
return result?.data ?? null;
|
|
46
47
|
}
|
|
48
|
+
const _literacyBase = createLiteracyModeBase();
|
|
47
49
|
/**
|
|
48
|
-
*
|
|
50
|
+
* Check whether a model participates in a given eval mode + optional variant.
|
|
49
51
|
*
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
52
|
+
* For literacy mode, checks both mode enrollment and variant participation
|
|
53
|
+
* via the shared `modelMatchesLiteracyVariant` helper. For non-literacy
|
|
54
|
+
* modes, checks mode enrollment only.
|
|
53
55
|
*/
|
|
54
|
-
function
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
function modeMatchesModel(mode, model, variant) {
|
|
57
|
+
// Check basic mode enrollment
|
|
58
|
+
if (model.modes &&
|
|
59
|
+
model.modes.length > 0 &&
|
|
60
|
+
!model.modes.includes(mode)) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
// For literacy mode with a variant, check variant participation
|
|
64
|
+
if (mode === "literacy" && variant) {
|
|
58
65
|
switch (variant) {
|
|
59
66
|
case LiteracyVariant.AGENTIC:
|
|
60
|
-
return (
|
|
61
|
-
|
|
62
|
-
case LiteracyVariant.OBSERVED:
|
|
63
|
-
return modelModes.includes(LiteracyVariant.OBSERVED);
|
|
67
|
+
return (modelMatchesLiteracyVariant(model, "agentic-naive") ||
|
|
68
|
+
modelMatchesLiteracyVariant(model, "agentic-optimized"));
|
|
64
69
|
case LiteracyVariant.FULL:
|
|
65
|
-
return (
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
case LiteracyVariant.STANDARD:
|
|
70
|
+
return (modelMatchesLiteracyVariant(model, "baseline") ||
|
|
71
|
+
modelMatchesLiteracyVariant(model, "agentic-naive") ||
|
|
72
|
+
modelMatchesLiteracyVariant(model, "agentic-optimized"));
|
|
69
73
|
default:
|
|
70
|
-
return
|
|
74
|
+
return modelMatchesLiteracyVariant(model, variant);
|
|
71
75
|
}
|
|
72
76
|
}
|
|
73
|
-
// Non-literacy modes accept all models by default
|
|
74
77
|
return true;
|
|
75
78
|
}
|
|
76
79
|
// ---------------------------------------------------------------------------
|
|
@@ -139,18 +142,25 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
139
142
|
const modelsForCompile = loadModelsFile(rootDir);
|
|
140
143
|
const graderProvider = modelsForCompile?.grader?.id ?? "openai:chat:gpt-4o";
|
|
141
144
|
const modelEntries = (modelsForCompile?.models ?? []).map((m) => ({ id: m.id, label: m.label }));
|
|
142
|
-
// Load *.task.ts files from tasks/<mode>/
|
|
143
|
-
const modeTasksDir =
|
|
145
|
+
// Load *.task.ts files from tasks/<mode>/ (or dist/tasks/<mode>/ when vendored)
|
|
146
|
+
const modeTasksDir = resolveVendoredSubdir(rootDir, `tasks/${opts.mode}`);
|
|
144
147
|
if (existsSync(modeTasksDir)) {
|
|
145
148
|
const rawTasks = await loadAllTsTaskFiles(modeTasksDir);
|
|
146
149
|
if (rawTasks.length > 0) {
|
|
147
150
|
// Dynamic import of the handler module
|
|
148
|
-
const handlerModulePath = `./compiler/mode-handlers/${opts.mode}
|
|
151
|
+
const handlerModulePath = `./compiler/mode-handlers/${opts.mode}/index.js`;
|
|
149
152
|
const mod = await import(handlerModulePath);
|
|
150
153
|
const handler = mod.handler;
|
|
154
|
+
const skippedByMode = new Map();
|
|
151
155
|
for (const rawFile of rawTasks) {
|
|
152
156
|
for (const taskDef of rawFile.tasks) {
|
|
153
157
|
const task = taskDef;
|
|
158
|
+
// Filter to matching mode (skip tasks from other modes in same dir)
|
|
159
|
+
if ("mode" in task && task.mode !== opts.mode) {
|
|
160
|
+
const taskMode = task.mode ?? "unknown";
|
|
161
|
+
skippedByMode.set(taskMode, (skippedByMode.get(taskMode) ?? 0) + 1);
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
154
164
|
// Apply area/task/tag filter
|
|
155
165
|
if (filter) {
|
|
156
166
|
if (filter.areas?.length &&
|
|
@@ -192,6 +202,13 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
192
202
|
}
|
|
193
203
|
}
|
|
194
204
|
}
|
|
205
|
+
if (skippedByMode.size > 0) {
|
|
206
|
+
const summary = [...skippedByMode.entries()]
|
|
207
|
+
.map(([m, n]) => `${n} ${m}`)
|
|
208
|
+
.join(", ");
|
|
209
|
+
const total = [...skippedByMode.values()].reduce((a, b) => a + b, 0);
|
|
210
|
+
warnings.push(`Skipped ${total} task(s) with non-matching mode (${summary}). Current pipeline mode: ${opts.mode}. Run with --mode <mode> to include them.`);
|
|
211
|
+
}
|
|
195
212
|
}
|
|
196
213
|
}
|
|
197
214
|
}
|
|
@@ -203,13 +220,29 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
203
220
|
if (opts.repoTasksPath) {
|
|
204
221
|
try {
|
|
205
222
|
const repoSource = new RepoTaskSource(opts.repoTasksPath);
|
|
206
|
-
|
|
207
|
-
|
|
223
|
+
const allRepoTasks = await repoSource.loadTasks(filter);
|
|
224
|
+
// Filter to current mode tasks
|
|
225
|
+
const repoTasks = allRepoTasks.filter((t) => t.mode === opts.mode);
|
|
226
|
+
const skippedRepoTasks = allRepoTasks.length - repoTasks.length;
|
|
227
|
+
if (skippedRepoTasks > 0) {
|
|
228
|
+
const skippedModes = new Map();
|
|
229
|
+
for (const t of allRepoTasks) {
|
|
230
|
+
if (t.mode !== opts.mode) {
|
|
231
|
+
skippedModes.set(t.mode, (skippedModes.get(t.mode) ?? 0) + 1);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
const summary = [...skippedModes.entries()]
|
|
235
|
+
.map(([m, n]) => `${n} ${m}`)
|
|
236
|
+
.join(", ");
|
|
237
|
+
warnings.push(`Skipped ${skippedRepoTasks} repo task(s) with non-matching mode (${summary}). Current pipeline mode: ${opts.mode}. Run with --mode <mode> to include them.`);
|
|
238
|
+
}
|
|
208
239
|
repoTaskCount = repoTasks.length;
|
|
209
|
-
if (repoTaskCount > 0) {
|
|
240
|
+
if (repoTaskCount > 0 && opts.mode === "literacy") {
|
|
241
|
+
// Literacy-specific compilation for repo tasks (detailed test expansion)
|
|
242
|
+
const literacyRepoTasks = repoTasks.filter((t) => t.mode === "literacy");
|
|
210
243
|
const modelsForCompile = loadModelsFile(rootDir);
|
|
211
244
|
const graderProvider = modelsForCompile?.grader?.id ?? "openai:chat:gpt-4o";
|
|
212
|
-
const compileResult = compileLiteracyTasks(
|
|
245
|
+
const compileResult = compileLiteracyTasks(literacyRepoTasks, {
|
|
213
246
|
rootDir,
|
|
214
247
|
evalMode: opts.variant === LiteracyVariant.AGENTIC
|
|
215
248
|
? LiteracyVariant.AGENTIC
|
|
@@ -231,6 +264,11 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
231
264
|
}
|
|
232
265
|
}
|
|
233
266
|
}
|
|
267
|
+
else if (repoTaskCount > 0) {
|
|
268
|
+
// Non-literacy modes: approximate 1 test per task (compilation not
|
|
269
|
+
// supported for non-literacy repo tasks in the explain preview yet)
|
|
270
|
+
totalTests += repoTaskCount;
|
|
271
|
+
}
|
|
234
272
|
}
|
|
235
273
|
catch {
|
|
236
274
|
warnings.push(`Failed to scan repo tasks at ${opts.repoTasksPath} — count may be underestimated`);
|
|
@@ -244,19 +282,19 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
244
282
|
const models = [];
|
|
245
283
|
let graderModelName = "";
|
|
246
284
|
if (modelsFile) {
|
|
247
|
-
const activeModels = modelsFile.models.filter((m) =>
|
|
285
|
+
const activeModels = modelsFile.models.filter((m) => modeMatchesModel(opts.mode, m, opts.variant));
|
|
248
286
|
// For agentic mode, each model appears twice (naive + optimized)
|
|
249
287
|
for (const m of activeModels) {
|
|
250
288
|
const modelName = extractModelName(m.id);
|
|
251
289
|
if (opts.variant === LiteracyVariant.AGENTIC) {
|
|
252
|
-
if (m
|
|
290
|
+
if (modelMatchesLiteracyVariant(m, "agentic-naive")) {
|
|
253
291
|
models.push({
|
|
254
292
|
id: m.id,
|
|
255
293
|
label: `${m.label} (Naive)`,
|
|
256
294
|
modelName,
|
|
257
295
|
});
|
|
258
296
|
}
|
|
259
|
-
if (m
|
|
297
|
+
if (modelMatchesLiteracyVariant(m, "agentic-optimized")) {
|
|
260
298
|
models.push({
|
|
261
299
|
id: m.id,
|
|
262
300
|
label: `${m.label} (Optimized)`,
|
package/dist/pipeline/probe.d.ts
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* not "are these docs good enough?" The output is always labeled as
|
|
15
15
|
* directional and never displayed on the same scale as scored evaluations.
|
|
16
16
|
*
|
|
17
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
17
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
18
18
|
*/
|
|
19
19
|
import type { ProbeResult } from "./types.js";
|
|
20
20
|
/** Generic probe prompt template */
|
package/dist/pipeline/probe.js
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* not "are these docs good enough?" The output is always labeled as
|
|
15
15
|
* directional and never displayed on the same scale as scored evaluations.
|
|
16
16
|
*
|
|
17
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
17
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
18
18
|
*/
|
|
19
19
|
// ---------------------------------------------------------------------------
|
|
20
20
|
// Constants
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
* - generateReadinessReport() — builds the structured report
|
|
14
14
|
* - formatReadinessMarkdown() — renders the report as markdown
|
|
15
15
|
*
|
|
16
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
17
|
-
* @see docs/exec-plans/eliminate-lib-layer.md
|
|
16
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
17
|
+
* @see docs/archive/exec-plans/eliminate-lib-layer.md
|
|
18
18
|
*/
|
|
19
19
|
import type { ThresholdConfig } from "./schemas.js";
|
|
20
20
|
import type { GapAnalysisReport, GapEstimate, ScoreSummary, ThresholdEvaluation, ThresholdViolation } from "./types.js";
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
* - generateReadinessReport() — builds the structured report
|
|
14
14
|
* - formatReadinessMarkdown() — renders the report as markdown
|
|
15
15
|
*
|
|
16
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
17
|
-
* @see docs/exec-plans/eliminate-lib-layer.md
|
|
16
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
17
|
+
* @see docs/archive/exec-plans/eliminate-lib-layer.md
|
|
18
18
|
*/
|
|
19
19
|
import { evaluateThresholds } from "./thresholds.js";
|
|
20
20
|
// ---------------------------------------------------------------------------
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* - **not-applicable**: Updated, removed, or unchanged documents (these
|
|
16
16
|
* follow the standard before/after comparison flow from Phase 2).
|
|
17
17
|
*
|
|
18
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
18
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
19
19
|
*/
|
|
20
20
|
import type { ClassifiedReleaseDocument, ProductFeature, ReleaseClassification } from "./types.js";
|
|
21
21
|
import type { ReverseMapping } from "./reverse-mapping.js";
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* - **not-applicable**: Updated, removed, or unchanged documents (these
|
|
16
16
|
* follow the standard before/after comparison flow from Phase 2).
|
|
17
17
|
*
|
|
18
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
18
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
19
19
|
*/
|
|
20
20
|
// ---------------------------------------------------------------------------
|
|
21
21
|
// Public API
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* attribution (2c), and probe results (4b) into the document × area × task
|
|
11
11
|
* impact matrix specified by Scenario 2.4.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
13
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
14
14
|
*/
|
|
15
15
|
import type { AttributionReport, ComparisonReport, ProbeResult, ReleaseClassification, ReleaseImpactReport } from "./types.js";
|
|
16
16
|
/**
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* attribution (2c), and probe results (4b) into the document × area × task
|
|
11
11
|
* impact matrix specified by Scenario 2.4.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
13
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-4-content-release-integration.md
|
|
14
14
|
*/
|
|
15
15
|
// ---------------------------------------------------------------------------
|
|
16
16
|
// Public API
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* - Clear "what does this mean?" context
|
|
13
13
|
* - skip-ailf bypass instructions
|
|
14
14
|
*
|
|
15
|
-
* @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
15
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
16
16
|
* @see packages/eval/src/pipeline/repo-threshold-evaluator.ts
|
|
17
17
|
*/
|
|
18
18
|
import type { ComparisonReport, ScoreSummary } from "./types.js";
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* - Clear "what does this mean?" context
|
|
13
13
|
* - skip-ailf bypass instructions
|
|
14
14
|
*
|
|
15
|
-
* @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
15
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
16
16
|
* @see packages/eval/src/pipeline/repo-threshold-evaluator.ts
|
|
17
17
|
*/
|
|
18
18
|
// ---------------------------------------------------------------------------
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* thresholds are per-area, defined by the AILF team, and drive
|
|
11
11
|
* readiness reports.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
14
14
|
* @see packages/eval/src/adapters/task-sources/repo-schemas.ts
|
|
15
15
|
*/
|
|
16
16
|
import type { ScoreSummary } from "./types.js";
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* thresholds are per-area, defined by the AILF team, and drive
|
|
11
11
|
* readiness reports.
|
|
12
12
|
*
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-6-pr-quality-gates.md
|
|
14
14
|
* @see packages/eval/src/adapters/task-sources/repo-schemas.ts
|
|
15
15
|
*/
|
|
16
16
|
// ---------------------------------------------------------------------------
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* pipeline/resolve-mappings.ts
|
|
3
3
|
*
|
|
4
|
-
* Extracts canonical mappings from
|
|
5
|
-
* Each task
|
|
6
|
-
* directly — there is no separate mappings
|
|
4
|
+
* Extracts canonical mappings from task definitions (*.task.ts files).
|
|
5
|
+
* Each task contains context.docs and referenceSolution fields
|
|
6
|
+
* directly — there is no separate mappings file.
|
|
7
7
|
*
|
|
8
8
|
* The output shape matches what downstream consumers expect so
|
|
9
9
|
* fetch-docs, validate, and calculate-scores work without changes.
|
|
@@ -24,12 +24,12 @@ export interface ResolvedMappings {
|
|
|
24
24
|
}>;
|
|
25
25
|
}
|
|
26
26
|
/**
|
|
27
|
-
* Extract
|
|
28
|
-
* Only tasks with
|
|
27
|
+
* Extract canonical mappings from *.task.ts files in tasks/literacy/.
|
|
28
|
+
* Only tasks with context.docs and referenceSolution are included.
|
|
29
29
|
*/
|
|
30
30
|
export declare function extractInlineMappings(rootDir: string): ResolvedMappings;
|
|
31
31
|
/**
|
|
32
|
-
* Resolve canonical mappings from
|
|
32
|
+
* Resolve canonical mappings from task definitions.
|
|
33
33
|
* This is the single source of truth — there is no external mappings file.
|
|
34
34
|
*/
|
|
35
35
|
export declare function resolveMappings(rootDir: string): ResolvedMappings;
|
|
@@ -1,72 +1,72 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* pipeline/resolve-mappings.ts
|
|
3
3
|
*
|
|
4
|
-
* Extracts canonical mappings from
|
|
5
|
-
* Each task
|
|
6
|
-
* directly — there is no separate mappings
|
|
4
|
+
* Extracts canonical mappings from task definitions (*.task.ts files).
|
|
5
|
+
* Each task contains context.docs and referenceSolution fields
|
|
6
|
+
* directly — there is no separate mappings file.
|
|
7
7
|
*
|
|
8
8
|
* The output shape matches what downstream consumers expect so
|
|
9
9
|
* fetch-docs, validate, and calculate-scores work without changes.
|
|
10
10
|
*/
|
|
11
|
-
import { existsSync
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
11
|
+
import { existsSync } from "fs";
|
|
12
|
+
import { discoverTsTaskFiles, loadTsTaskFileSync, } from "../adapters/task-sources/task-file-loader.js";
|
|
13
|
+
import { resolveVendoredSubdir } from "./compiler/config-loader.js";
|
|
14
14
|
// ---------------------------------------------------------------------------
|
|
15
15
|
// Resolution
|
|
16
16
|
// ---------------------------------------------------------------------------
|
|
17
17
|
/**
|
|
18
|
-
* Extract
|
|
19
|
-
* Only tasks with
|
|
18
|
+
* Extract canonical mappings from *.task.ts files in tasks/literacy/.
|
|
19
|
+
* Only tasks with context.docs and referenceSolution are included.
|
|
20
20
|
*/
|
|
21
21
|
export function extractInlineMappings(rootDir) {
|
|
22
|
-
const tasksDir =
|
|
22
|
+
const tasksDir = resolveVendoredSubdir(rootDir, "tasks/literacy");
|
|
23
23
|
const result = { feature_areas: {} };
|
|
24
24
|
if (!existsSync(tasksDir))
|
|
25
25
|
return result;
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
const tasks = [];
|
|
37
|
-
for (const entry of parsed) {
|
|
38
|
-
if (!isInlineTaskWithMappings(entry))
|
|
26
|
+
const files = discoverTsTaskFiles(tasksDir);
|
|
27
|
+
for (const file of files) {
|
|
28
|
+
const loaded = loadTsTaskFileSync(file);
|
|
29
|
+
for (const task of loaded.tasks) {
|
|
30
|
+
const t = task;
|
|
31
|
+
const area = typeof t.area === "string" ? t.area : undefined;
|
|
32
|
+
const id = typeof t.id === "string" ? t.id : undefined;
|
|
33
|
+
const title = typeof t.title === "string" ? t.title : "";
|
|
34
|
+
const referenceSolution = typeof t.referenceSolution === "string" ? t.referenceSolution : "";
|
|
35
|
+
if (!area || !id)
|
|
39
36
|
continue;
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
37
|
+
// Extract docs from context.docs (GeneralizedDocRef[])
|
|
38
|
+
const context = t.context;
|
|
39
|
+
const docs = [];
|
|
40
|
+
if (context?.docs && Array.isArray(context.docs)) {
|
|
41
|
+
for (const doc of context.docs) {
|
|
42
|
+
const d = doc;
|
|
43
|
+
if (typeof d.slug === "string") {
|
|
44
|
+
docs.push({
|
|
45
|
+
slug: d.slug,
|
|
46
|
+
reason: typeof d.reason === "string" ? d.reason : "",
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (docs.length === 0 || !referenceSolution)
|
|
52
|
+
continue;
|
|
53
|
+
if (!result.feature_areas[area]) {
|
|
54
|
+
result.feature_areas[area] = { tasks: [] };
|
|
55
|
+
}
|
|
56
|
+
result.feature_areas[area].tasks.push({
|
|
57
|
+
canonical_docs: docs,
|
|
58
|
+
description: title,
|
|
59
|
+
id,
|
|
60
|
+
reference_solution: referenceSolution,
|
|
45
61
|
});
|
|
46
62
|
}
|
|
47
|
-
if (tasks.length > 0) {
|
|
48
|
-
result.feature_areas[featureArea] = { tasks };
|
|
49
|
-
}
|
|
50
63
|
}
|
|
51
64
|
return result;
|
|
52
65
|
}
|
|
53
66
|
/**
|
|
54
|
-
* Resolve canonical mappings from
|
|
67
|
+
* Resolve canonical mappings from task definitions.
|
|
55
68
|
* This is the single source of truth — there is no external mappings file.
|
|
56
69
|
*/
|
|
57
70
|
export function resolveMappings(rootDir) {
|
|
58
71
|
return extractInlineMappings(rootDir);
|
|
59
72
|
}
|
|
60
|
-
// ---------------------------------------------------------------------------
|
|
61
|
-
// Helpers
|
|
62
|
-
// ---------------------------------------------------------------------------
|
|
63
|
-
function isInlineTaskWithMappings(entry) {
|
|
64
|
-
if (typeof entry !== "object" || entry === null)
|
|
65
|
-
return false;
|
|
66
|
-
const e = entry;
|
|
67
|
-
return (typeof e.id === "string" &&
|
|
68
|
-
typeof e.description === "string" &&
|
|
69
|
-
Array.isArray(e.canonical_docs) &&
|
|
70
|
-
e.canonical_docs.length > 0 &&
|
|
71
|
-
typeof e.reference_solution === "string");
|
|
72
|
-
}
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
* pipeline/retrieval-metrics.ts
|
|
3
3
|
*
|
|
4
4
|
* Computes retrieval precision and recall by comparing agent-retrieved
|
|
5
|
-
* doc slugs against canonical_docs defined in task
|
|
5
|
+
* doc slugs against canonical_docs defined in task definitions.
|
|
6
6
|
*
|
|
7
|
-
* This is a pure computation module — no file I/O beyond reading task
|
|
7
|
+
* This is a pure computation module — no file I/O beyond reading task files.
|
|
8
8
|
*/
|
|
9
9
|
import type { RetrievalMetrics, TaskRetrievalMetrics } from "./types.js";
|
|
10
10
|
export interface AgenticBehaviorData {
|
|
@@ -30,7 +30,7 @@ export declare function computeRetrievalMetrics(rootDir: string, behaviors: Agen
|
|
|
30
30
|
*/
|
|
31
31
|
export declare function computeTaskMetrics(taskId: string, area: string, retrieved: string[], canonical: Set<string>): TaskRetrievalMetrics;
|
|
32
32
|
/**
|
|
33
|
-
* Load
|
|
33
|
+
* Load canonical docs from *.task.ts files in tasks/literacy/.
|
|
34
34
|
* Returns a map of taskId → { slugs: Set<string>, area: string }.
|
|
35
35
|
*/
|
|
36
36
|
export declare function loadCanonicalDocs(rootDir: string): Map<string, {
|