@sanity/ailf 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +52 -32
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/checks.d.ts +8 -3
- package/dist/pipeline/checks.js +23 -3
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +25 -25
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* capture compare — compare two pipeline capture directories.
|
|
3
|
+
*
|
|
4
|
+
* Reads manifest.json from both captures, runs compareCaptures(),
|
|
5
|
+
* and prints a human-readable table or JSON diff report.
|
|
6
|
+
*
|
|
7
|
+
* Supports both raw directories and .tar.gz archives.
|
|
8
|
+
*
|
|
9
|
+
* Exit codes:
|
|
10
|
+
* 0 — captures are equivalent
|
|
11
|
+
* 1 — differences found
|
|
12
|
+
* 2 — error (missing files, invalid manifest, etc.)
|
|
13
|
+
*/
|
|
14
|
+
import { execFileSync } from "node:child_process";
|
|
15
|
+
import { existsSync, mkdtempSync, readdirSync, rmSync, writeFileSync, } from "node:fs";
|
|
16
|
+
import { tmpdir } from "node:os";
|
|
17
|
+
import { join, resolve } from "node:path";
|
|
18
|
+
import { Command, Option } from "commander";
|
|
19
|
+
import { compareCaptures } from "../artifact-capture/comparator.js";
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Command factory
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
export function createCaptureCompareCommand() {
|
|
24
|
+
return new Command("compare")
|
|
25
|
+
.description("Compare two pipeline capture directories")
|
|
26
|
+
.argument("<baseline>", "Path to baseline capture (directory or .tar.gz)")
|
|
27
|
+
.argument("<experiment>", "Path to experiment capture (directory or .tar.gz)")
|
|
28
|
+
.addOption(new Option("-m, --mode <mode>", "Comparison mode")
|
|
29
|
+
.choices(["inventory", "structural", "strict"])
|
|
30
|
+
.default("inventory"))
|
|
31
|
+
.option("-f, --format <fmt>", "Output format: table or json", "table")
|
|
32
|
+
.option("-o, --output <path>", "Write JSON report to file")
|
|
33
|
+
.option("--score-threshold <n>", "Aggregate score regression threshold (points)", parseFloat, 5)
|
|
34
|
+
.option("--task-threshold <n>", "Per-task score regression threshold (points)", parseFloat, 10)
|
|
35
|
+
.option("--timing-threshold <n>", "Step timing multiplier threshold", parseFloat, 2)
|
|
36
|
+
.option("--json-depth <n>", "JSON structural diff depth", parseInt, 3)
|
|
37
|
+
.action(async (baselinePath, experimentPath, opts) => {
|
|
38
|
+
const cleanups = [];
|
|
39
|
+
try {
|
|
40
|
+
const baseline = resolveCapturePath(resolve(baselinePath), cleanups);
|
|
41
|
+
const experiment = resolveCapturePath(resolve(experimentPath), cleanups);
|
|
42
|
+
console.log("");
|
|
43
|
+
console.log(" ailf capture compare");
|
|
44
|
+
console.log(" " + "─".repeat(40));
|
|
45
|
+
console.log("");
|
|
46
|
+
console.log(` Baseline: ${baselinePath}`);
|
|
47
|
+
console.log(` Experiment: ${experimentPath}`);
|
|
48
|
+
console.log(` Mode: ${opts.mode}`);
|
|
49
|
+
console.log("");
|
|
50
|
+
const report = compareCaptures(baseline, experiment, {
|
|
51
|
+
mode: opts.mode,
|
|
52
|
+
scoreThresholds: {
|
|
53
|
+
aggregate: opts.scoreThreshold,
|
|
54
|
+
perTask: opts.taskThreshold,
|
|
55
|
+
},
|
|
56
|
+
timingThresholds: { multiplier: opts.timingThreshold },
|
|
57
|
+
jsonDiffDepth: opts.jsonDepth,
|
|
58
|
+
});
|
|
59
|
+
if (opts.format === "json") {
|
|
60
|
+
const json = JSON.stringify(report, null, 2);
|
|
61
|
+
if (opts.output) {
|
|
62
|
+
writeFileSync(opts.output, json, "utf-8");
|
|
63
|
+
console.log(` Report written to ${opts.output}`);
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
console.log(json);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
printTableReport(report);
|
|
71
|
+
if (opts.output) {
|
|
72
|
+
const json = JSON.stringify(report, null, 2);
|
|
73
|
+
writeFileSync(opts.output, json, "utf-8");
|
|
74
|
+
console.log(` Report also written to ${opts.output}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
process.exitCode = report.equivalent ? 0 : 1;
|
|
78
|
+
}
|
|
79
|
+
catch (err) {
|
|
80
|
+
console.error(` Error: ${err instanceof Error ? err.message : String(err)}`);
|
|
81
|
+
process.exitCode = 2;
|
|
82
|
+
}
|
|
83
|
+
finally {
|
|
84
|
+
for (const cleanup of cleanups) {
|
|
85
|
+
try {
|
|
86
|
+
cleanup();
|
|
87
|
+
}
|
|
88
|
+
catch {
|
|
89
|
+
// Best-effort cleanup
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
// Path resolution (handles tar.gz, subdirectories, raw dirs)
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
function resolveCapturePath(inputPath, cleanups) {
|
|
99
|
+
if (!existsSync(inputPath)) {
|
|
100
|
+
throw new Error(`Path does not exist: ${inputPath}`);
|
|
101
|
+
}
|
|
102
|
+
if (inputPath.endsWith(".tar.gz")) {
|
|
103
|
+
const tempDir = mkdtempSync(join(tmpdir(), "ailf-capture-cmp-"));
|
|
104
|
+
cleanups.push(() => rmSync(tempDir, { recursive: true, force: true }));
|
|
105
|
+
execFileSync("tar", ["-xzf", inputPath, "-C", tempDir]);
|
|
106
|
+
return findManifestDir(tempDir);
|
|
107
|
+
}
|
|
108
|
+
return findManifestDir(inputPath);
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Find the directory containing manifest.json.
|
|
112
|
+
*
|
|
113
|
+
* Handles two cases:
|
|
114
|
+
* 1. Path IS the capture dir (contains manifest.json directly)
|
|
115
|
+
* 2. Path is the parent captures/ dir (contains a single timestamped subdir)
|
|
116
|
+
*/
|
|
117
|
+
function findManifestDir(dir) {
|
|
118
|
+
if (existsSync(join(dir, "manifest.json")))
|
|
119
|
+
return dir;
|
|
120
|
+
// Look one level down for a capture subdirectory
|
|
121
|
+
const entries = readdirSync(dir).filter((e) => !e.startsWith(".") && !e.endsWith(".tar.gz"));
|
|
122
|
+
for (const entry of entries) {
|
|
123
|
+
const sub = join(dir, entry);
|
|
124
|
+
if (existsSync(join(sub, "manifest.json")))
|
|
125
|
+
return sub;
|
|
126
|
+
}
|
|
127
|
+
throw new Error(`No manifest.json found in ${dir} or its subdirectories. ` +
|
|
128
|
+
`Is this a valid capture directory?`);
|
|
129
|
+
}
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
// Table formatting
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
function printTableReport(report) {
|
|
134
|
+
// Inventory
|
|
135
|
+
console.log(" Inventory");
|
|
136
|
+
console.log(" " + "─".repeat(40));
|
|
137
|
+
console.log(` Common: ${report.inventory.common.length} artifact(s)`);
|
|
138
|
+
if (report.inventory.added.length > 0) {
|
|
139
|
+
console.log(` Added: ${report.inventory.added.length} (${report.inventory.added.join(", ")})`);
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
console.log(" Added: 0");
|
|
143
|
+
}
|
|
144
|
+
if (report.inventory.removed.length > 0) {
|
|
145
|
+
console.log(` Removed: ${report.inventory.removed.length} (${report.inventory.removed.join(", ")})`);
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
console.log(" Removed: 0");
|
|
149
|
+
}
|
|
150
|
+
console.log("");
|
|
151
|
+
// Content diff preview (structural/strict modes)
|
|
152
|
+
if (report.content && report.content.length > 0) {
|
|
153
|
+
console.log(" Content Changes");
|
|
154
|
+
console.log(" " + "─".repeat(40));
|
|
155
|
+
for (const diff of report.content.slice(0, 10)) {
|
|
156
|
+
printContentDiff(diff);
|
|
157
|
+
}
|
|
158
|
+
if (report.content.length > 10) {
|
|
159
|
+
console.log(` ... and ${report.content.length - 10} more changed artifact(s)`);
|
|
160
|
+
}
|
|
161
|
+
console.log("");
|
|
162
|
+
}
|
|
163
|
+
// Scores
|
|
164
|
+
if (report.scores) {
|
|
165
|
+
console.log(" Scores");
|
|
166
|
+
console.log(" " + "─".repeat(40));
|
|
167
|
+
const { baselineMean, currentMean, delta } = report.scores;
|
|
168
|
+
const icon = delta > 0 ? "+" : delta < 0 ? "" : " ";
|
|
169
|
+
console.log(` Aggregate: ${baselineMean.toFixed(1)} -> ${currentMean.toFixed(1)} (${icon}${delta.toFixed(1)})`);
|
|
170
|
+
if (report.scores.breaches.length > 0) {
|
|
171
|
+
console.log(` Breaches: ${report.scores.breaches.length}`);
|
|
172
|
+
for (const b of report.scores.breaches) {
|
|
173
|
+
console.log(` - ${b}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
console.log(" Breaches: none");
|
|
178
|
+
}
|
|
179
|
+
console.log("");
|
|
180
|
+
}
|
|
181
|
+
// Timing
|
|
182
|
+
if (report.timing) {
|
|
183
|
+
console.log(" Timing");
|
|
184
|
+
console.log(" " + "─".repeat(40));
|
|
185
|
+
const { totalDeltaMs } = report.timing;
|
|
186
|
+
const sign = totalDeltaMs >= 0 ? "+" : "";
|
|
187
|
+
console.log(` Total delta: ${sign}${totalDeltaMs}ms`);
|
|
188
|
+
if (report.timing.breaches.length > 0) {
|
|
189
|
+
console.log(` Breaches: ${report.timing.breaches.length}`);
|
|
190
|
+
for (const b of report.timing.breaches) {
|
|
191
|
+
console.log(` - ${b}`);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
else {
|
|
195
|
+
console.log(" Breaches: none");
|
|
196
|
+
}
|
|
197
|
+
console.log("");
|
|
198
|
+
}
|
|
199
|
+
// Security
|
|
200
|
+
console.log(" Security");
|
|
201
|
+
console.log(" " + "─".repeat(40));
|
|
202
|
+
if (report.security.leaksFound) {
|
|
203
|
+
console.log(` Leaks: ${report.security.violations.length} finding(s)`);
|
|
204
|
+
for (const v of report.security.violations.slice(0, 5)) {
|
|
205
|
+
console.log(` - ${v.file}: ${v.detail}`);
|
|
206
|
+
}
|
|
207
|
+
if (report.security.violations.length > 5) {
|
|
208
|
+
console.log(` ... and ${report.security.violations.length - 5} more`);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
console.log(" Leaks: none");
|
|
213
|
+
}
|
|
214
|
+
console.log("");
|
|
215
|
+
// Result
|
|
216
|
+
if (report.equivalent) {
|
|
217
|
+
console.log(" Result: EQUIVALENT");
|
|
218
|
+
}
|
|
219
|
+
else {
|
|
220
|
+
console.log(` Result: DIFFERENCES FOUND`);
|
|
221
|
+
console.log(` ${report.summary}`);
|
|
222
|
+
}
|
|
223
|
+
console.log("");
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Print up to 3 changed key paths for a content diff.
|
|
227
|
+
*/
|
|
228
|
+
function printContentDiff(diff) {
|
|
229
|
+
console.log(` ${diff.artifactKey} (${diff.format})`);
|
|
230
|
+
if (Array.isArray(diff.changes)) {
|
|
231
|
+
// JSON diff — show up to 3 changed paths
|
|
232
|
+
const jsonChanges = diff.changes;
|
|
233
|
+
for (const change of jsonChanges.slice(0, 3)) {
|
|
234
|
+
if (change.baseline === undefined) {
|
|
235
|
+
console.log(` + ${change.path} (added)`);
|
|
236
|
+
}
|
|
237
|
+
else if (change.experiment === undefined) {
|
|
238
|
+
console.log(` - ${change.path} (removed)`);
|
|
239
|
+
}
|
|
240
|
+
else {
|
|
241
|
+
console.log(` ~ ${change.path} (changed)`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
if (jsonChanges.length > 3) {
|
|
245
|
+
console.log(` ... ${jsonChanges.length - 3} more change(s)`);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
else {
|
|
249
|
+
// Text/markdown diff
|
|
250
|
+
const { addedLines, removedLines } = diff.changes;
|
|
251
|
+
console.log(` +${addedLines} / -${removedLines} lines`);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* capture list — list pipeline captures in a directory.
|
|
3
|
+
*
|
|
4
|
+
* Scans a capture directory for subdirectories containing manifest.json,
|
|
5
|
+
* reads each manifest, and prints a summary table sorted by date.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* ailf capture list # default: results/captures/
|
|
9
|
+
* ailf capture list ./my-captures # custom directory
|
|
10
|
+
*/
|
|
11
|
+
import { Command } from "commander";
|
|
12
|
+
export declare function createCaptureListCommand(): Command;
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* capture list — list pipeline captures in a directory.
|
|
3
|
+
*
|
|
4
|
+
* Scans a capture directory for subdirectories containing manifest.json,
|
|
5
|
+
* reads each manifest, and prints a summary table sorted by date.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* ailf capture list # default: results/captures/
|
|
9
|
+
* ailf capture list ./my-captures # custom directory
|
|
10
|
+
*/
|
|
11
|
+
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
|
12
|
+
import { join, resolve } from "node:path";
|
|
13
|
+
import { Command } from "commander";
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Command factory
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
export function createCaptureListCommand() {
|
|
18
|
+
return new Command("list")
|
|
19
|
+
.description("List pipeline captures in a directory")
|
|
20
|
+
.argument("[dir]", "Captures directory (default: results/captures/)")
|
|
21
|
+
.option("-f, --format <fmt>", "Output format: table or json", "table")
|
|
22
|
+
.action(async (dir, opts) => {
|
|
23
|
+
const captureDir = resolve(dir ?? "results/captures");
|
|
24
|
+
if (!existsSync(captureDir)) {
|
|
25
|
+
console.error(` No captures directory found at ${captureDir}`);
|
|
26
|
+
console.error(" Run 'ailf pipeline --capture' to create captures.");
|
|
27
|
+
process.exitCode = 1;
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
const captures = discoverCaptures(captureDir);
|
|
31
|
+
if (captures.length === 0) {
|
|
32
|
+
console.log(` No captures found in ${captureDir}`);
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
// Sort by startedAt descending (newest first)
|
|
36
|
+
captures.sort((a, b) => new Date(b.startedAt).getTime() - new Date(a.startedAt).getTime());
|
|
37
|
+
if (opts.format === "json") {
|
|
38
|
+
console.log(JSON.stringify(captures, null, 2));
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
console.log("");
|
|
42
|
+
console.log(" ailf capture list");
|
|
43
|
+
console.log(" " + "─".repeat(60));
|
|
44
|
+
console.log("");
|
|
45
|
+
console.log(" " +
|
|
46
|
+
"Date".padEnd(22) +
|
|
47
|
+
"Mode".padEnd(18) +
|
|
48
|
+
"Artifacts".padEnd(12) +
|
|
49
|
+
"Size".padEnd(10) +
|
|
50
|
+
"Path");
|
|
51
|
+
console.log(" " + "─".repeat(60));
|
|
52
|
+
for (const c of captures) {
|
|
53
|
+
const date = new Date(c.startedAt).toLocaleString("en-US", {
|
|
54
|
+
month: "short",
|
|
55
|
+
day: "2-digit",
|
|
56
|
+
hour: "2-digit",
|
|
57
|
+
minute: "2-digit",
|
|
58
|
+
});
|
|
59
|
+
const mode = c.mode.padEnd(18);
|
|
60
|
+
const artifacts = String(c.artifactCount).padEnd(12);
|
|
61
|
+
const size = formatBytes(c.totalBytes).padEnd(10);
|
|
62
|
+
console.log(` ${date.padEnd(22)}${mode}${artifacts}${size}${c.name}`);
|
|
63
|
+
}
|
|
64
|
+
console.log("");
|
|
65
|
+
console.log(` ${captures.length} capture(s) found in ${captureDir}`);
|
|
66
|
+
console.log("");
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
function discoverCaptures(captureDir) {
|
|
70
|
+
const entries = [];
|
|
71
|
+
for (const name of readdirSync(captureDir)) {
|
|
72
|
+
if (name.startsWith("."))
|
|
73
|
+
continue;
|
|
74
|
+
const fullPath = join(captureDir, name);
|
|
75
|
+
// Raw directory with manifest.json
|
|
76
|
+
const manifestPath = join(fullPath, "manifest.json");
|
|
77
|
+
if (existsSync(manifestPath)) {
|
|
78
|
+
try {
|
|
79
|
+
const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
|
|
80
|
+
const totalBytes = manifest.artifacts.reduce((sum, a) => sum + a.bytes, 0);
|
|
81
|
+
entries.push({
|
|
82
|
+
name,
|
|
83
|
+
path: fullPath,
|
|
84
|
+
mode: manifest.pipeline.mode,
|
|
85
|
+
startedAt: manifest.startedAt,
|
|
86
|
+
artifactCount: manifest.artifacts.length,
|
|
87
|
+
totalBytes,
|
|
88
|
+
compressed: false,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
// Skip unparseable manifests
|
|
93
|
+
}
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
// .tar.gz archive — read size but don't extract
|
|
97
|
+
if (name.endsWith(".tar.gz")) {
|
|
98
|
+
try {
|
|
99
|
+
const stat = statSync(fullPath);
|
|
100
|
+
entries.push({
|
|
101
|
+
name,
|
|
102
|
+
path: fullPath,
|
|
103
|
+
mode: extractModeFromName(name),
|
|
104
|
+
startedAt: extractTimestampFromName(name),
|
|
105
|
+
artifactCount: -1, // Unknown without extracting
|
|
106
|
+
totalBytes: stat.size,
|
|
107
|
+
compressed: true,
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
// Skip
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return entries;
|
|
116
|
+
}
|
|
117
|
+
/** Known modes — used to correctly extract mode from hyphenated filenames. */
|
|
118
|
+
const KNOWN_MODES = [
|
|
119
|
+
"literacy",
|
|
120
|
+
"mcp-server",
|
|
121
|
+
"agent-harness",
|
|
122
|
+
"knowledge-probe",
|
|
123
|
+
];
|
|
124
|
+
function extractModeFromName(name) {
|
|
125
|
+
for (const mode of KNOWN_MODES) {
|
|
126
|
+
if (name.startsWith(mode + "-"))
|
|
127
|
+
return mode;
|
|
128
|
+
}
|
|
129
|
+
return name.split("-")[0] ?? "unknown";
|
|
130
|
+
}
|
|
131
|
+
function extractTimestampFromName(name) {
|
|
132
|
+
// Pattern: {mode}-YYYYMMDD-HHmmss-{id}.tar.gz
|
|
133
|
+
const match = name.match(/(\d{4})(\d{2})(\d{2})-(\d{2})(\d{2})(\d{2})/);
|
|
134
|
+
if (!match)
|
|
135
|
+
return "unknown";
|
|
136
|
+
const [, y, m, d, h, min, s] = match;
|
|
137
|
+
return `${y}-${m}-${d}T${h}:${min}:${s}Z`;
|
|
138
|
+
}
|
|
139
|
+
function formatBytes(bytes) {
|
|
140
|
+
if (bytes < 0)
|
|
141
|
+
return "?";
|
|
142
|
+
if (bytes < 1024)
|
|
143
|
+
return `${bytes}B`;
|
|
144
|
+
if (bytes < 1024 * 1024)
|
|
145
|
+
return `${(bytes / 1024).toFixed(1)}KB`;
|
|
146
|
+
return `${(bytes / 1024 / 1024).toFixed(1)}MB`;
|
|
147
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* capture command — manage and compare pipeline artifact captures.
|
|
3
|
+
*
|
|
4
|
+
* Parent command for capture-related subcommands:
|
|
5
|
+
* ailf capture compare <baseline> <experiment>
|
|
6
|
+
* ailf capture list [dir]
|
|
7
|
+
*/
|
|
8
|
+
import { Command } from "commander";
|
|
9
|
+
export declare function createCaptureCommand(): Command;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* capture command — manage and compare pipeline artifact captures.
|
|
3
|
+
*
|
|
4
|
+
* Parent command for capture-related subcommands:
|
|
5
|
+
* ailf capture compare <baseline> <experiment>
|
|
6
|
+
* ailf capture list [dir]
|
|
7
|
+
*/
|
|
8
|
+
import { Command } from "commander";
|
|
9
|
+
import { createCaptureCompareCommand } from "./capture-compare.js";
|
|
10
|
+
import { createCaptureListCommand } from "./capture-list.js";
|
|
11
|
+
export function createCaptureCommand() {
|
|
12
|
+
const cmd = new Command("capture").description("Manage and compare pipeline artifact captures");
|
|
13
|
+
cmd.addCommand(createCaptureCompareCommand());
|
|
14
|
+
cmd.addCommand(createCaptureListCommand());
|
|
15
|
+
return cmd;
|
|
16
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* chronic-failures command — query recent reports to find tasks that
|
|
3
|
+
* consistently error above a configurable threshold.
|
|
4
|
+
*
|
|
5
|
+
* @see docs/exec-plans/eval-pipeline-timeout-resilience.md — Phase 5
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from "commander";
|
|
8
|
+
export declare function createChronicFailuresCommand(): Command;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* chronic-failures command — query recent reports to find tasks that
|
|
3
|
+
* consistently error above a configurable threshold.
|
|
4
|
+
*
|
|
5
|
+
* @see docs/exec-plans/eval-pipeline-timeout-resilience.md — Phase 5
|
|
6
|
+
*/
|
|
7
|
+
import { Command } from "commander";
|
|
8
|
+
import { detectChronicFailures, formatChronicFailuresConsole, } from "../pipeline/chronic-failures.js";
|
|
9
|
+
import { ReportStore } from "../report-store.js";
|
|
10
|
+
export function createChronicFailuresCommand() {
|
|
11
|
+
return new Command("chronic-failures")
|
|
12
|
+
.description("Identify tasks that error in >50% of recent evaluation runs")
|
|
13
|
+
.option("--lookback <n>", "Number of recent reports to analyze", (v) => parseInt(v, 10), 10)
|
|
14
|
+
.option("--threshold <n>", "Error rate threshold (0-1) for chronic classification", (v) => parseFloat(v), 0.5)
|
|
15
|
+
.option("--json", "Output raw JSON", false)
|
|
16
|
+
.action(async (opts) => {
|
|
17
|
+
const reportStore = new ReportStore();
|
|
18
|
+
const report = await detectChronicFailures(reportStore, {
|
|
19
|
+
lookback: opts.lookback,
|
|
20
|
+
threshold: opts.threshold,
|
|
21
|
+
});
|
|
22
|
+
if (opts.json) {
|
|
23
|
+
console.log(JSON.stringify(report, null, 2));
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
console.log(formatChronicFailuresConsole(report));
|
|
27
|
+
}
|
|
28
|
+
// Exit with non-zero if chronic failures detected
|
|
29
|
+
if (report.failures.length > 0) {
|
|
30
|
+
process.exit(1);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
}
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
* Adding --explain support for a new command = adding one registry entry.
|
|
18
18
|
* Commands not in the registry fall back to a minimal generic plan.
|
|
19
19
|
*
|
|
20
|
-
* @see docs/exec-plans/execution-preview.md
|
|
20
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
21
21
|
*/
|
|
22
22
|
import type { Command } from "commander";
|
|
23
23
|
/**
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
* Adding --explain support for a new command = adding one registry entry.
|
|
18
18
|
* Commands not in the registry fall back to a minimal generic plan.
|
|
19
19
|
*
|
|
20
|
-
* @see docs/exec-plans/execution-preview.md
|
|
20
|
+
* @see docs/archive/exec-plans/execution-preview.md
|
|
21
21
|
*/
|
|
22
22
|
import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
|
|
23
23
|
import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
|
|
@@ -137,6 +137,24 @@ const EXPLAIN_REGISTRY = {
|
|
|
137
137
|
},
|
|
138
138
|
],
|
|
139
139
|
},
|
|
140
|
+
"chronic-failures": {
|
|
141
|
+
description: "Identify tasks that error in >50% of recent evaluation runs",
|
|
142
|
+
filesRead: [],
|
|
143
|
+
steps: [
|
|
144
|
+
{
|
|
145
|
+
cacheStatus: "miss",
|
|
146
|
+
name: "Query Content Lake",
|
|
147
|
+
reason: "Fetch testSummary.errors from recent reports",
|
|
148
|
+
willRun: true,
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
cacheStatus: "miss",
|
|
152
|
+
name: "Aggregate errors",
|
|
153
|
+
reason: "Compute per-task error rates across runs",
|
|
154
|
+
willRun: true,
|
|
155
|
+
},
|
|
156
|
+
],
|
|
157
|
+
},
|
|
140
158
|
"coverage-audit": {
|
|
141
159
|
description: "Cross-reference feature registry against evaluation tasks for coverage gaps",
|
|
142
160
|
filesRead: ["config/features.ts", "tasks/*.{yaml,task.ts,task.js}"],
|
|
@@ -295,7 +313,7 @@ const EXPLAIN_REGISTRY = {
|
|
|
295
313
|
"measure-retrieval": {
|
|
296
314
|
description: "Measure Sanity text search retrieval quality against canonical document annotations",
|
|
297
315
|
filesCreated: ["results/latest/retrieval-metrics.json"],
|
|
298
|
-
filesRead: ["tasks/*.
|
|
316
|
+
filesRead: ["tasks/literacy/*.task.ts"],
|
|
299
317
|
steps: [
|
|
300
318
|
{
|
|
301
319
|
cacheStatus: "miss",
|
|
@@ -576,16 +594,23 @@ export async function handleExplain(actionCommand, confirmExecution, rootDir) {
|
|
|
576
594
|
*/
|
|
577
595
|
function buildInitExplainPlan(actionCommand, rootDir) {
|
|
578
596
|
const opts = actionCommand.opts();
|
|
579
|
-
const format = opts.outputFormat === "json"
|
|
580
|
-
|
|
597
|
+
const format = opts.outputFormat === "json"
|
|
598
|
+
? "json"
|
|
599
|
+
: opts.outputFormat === "yaml"
|
|
600
|
+
? "yaml"
|
|
601
|
+
: "ts";
|
|
602
|
+
const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
|
|
603
|
+
const configFile = format === "ts"
|
|
604
|
+
? "ailf.config.ts"
|
|
605
|
+
: `config.${format === "yaml" ? "yaml" : "json"}`;
|
|
581
606
|
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
582
607
|
const targetDir = opts.path ?? ".";
|
|
583
608
|
const ailfDir = `${targetDir}/.ailf`;
|
|
584
609
|
const tasksDir = `${ailfDir}/tasks`;
|
|
585
610
|
const taskFileNames = [...TASK_FILE_NAMES];
|
|
586
611
|
const filesCreated = [
|
|
587
|
-
`${ailfDir}
|
|
588
|
-
...taskFileNames.map((stem) => `${tasksDir}/${stem}${
|
|
612
|
+
`${ailfDir}/${configFile}`,
|
|
613
|
+
...taskFileNames.map((stem) => `${tasksDir}/${stem}${taskExt}`),
|
|
589
614
|
`${ailfDir}/.gitignore`,
|
|
590
615
|
];
|
|
591
616
|
return buildSimpleCommandPlan({
|
|
@@ -603,14 +628,14 @@ function buildInitExplainPlan(actionCommand, rootDir) {
|
|
|
603
628
|
},
|
|
604
629
|
{
|
|
605
630
|
cacheStatus: "miss",
|
|
606
|
-
name: `Write
|
|
631
|
+
name: `Write ${configFile}`,
|
|
607
632
|
reason: `Project configuration template (${format.toUpperCase()} format)`,
|
|
608
633
|
willRun: true,
|
|
609
634
|
},
|
|
610
635
|
{
|
|
611
636
|
cacheStatus: "miss",
|
|
612
637
|
name: `Write example tasks (${taskFileNames.length} files)`,
|
|
613
|
-
reason: `
|
|
638
|
+
reason: `Starter tasks in ${tasksDir}/ (${format.toUpperCase()} format)`,
|
|
614
639
|
willRun: true,
|
|
615
640
|
},
|
|
616
641
|
{
|
|
@@ -700,6 +725,10 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
|
700
725
|
taskSource: raw.taskSource,
|
|
701
726
|
remoteCache: raw.remoteCache,
|
|
702
727
|
config: raw.config,
|
|
728
|
+
capture: raw.capture ?? false,
|
|
729
|
+
captureCompress: raw.captureCompress ?? true,
|
|
730
|
+
captureExtras: raw.captureExtras ?? true,
|
|
731
|
+
captureDir: raw.captureDir,
|
|
703
732
|
};
|
|
704
733
|
const resolved = computeResolvedOptions(withDefaults);
|
|
705
734
|
const planOpts = {
|
|
@@ -41,6 +41,7 @@ async function executeFetchDocs(opts) {
|
|
|
41
41
|
// Build a minimal ResolvedConfig for the composition root
|
|
42
42
|
const ctx = createAppContext({
|
|
43
43
|
rootDir: ROOT,
|
|
44
|
+
outputDir: resolve(ROOT, "results", "latest"),
|
|
44
45
|
mode: "literacy",
|
|
45
46
|
noAutoScope: false,
|
|
46
47
|
skipFetch: false,
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* generate-configs command — generate promptfoo config files
|
|
2
|
+
* generate-configs command — generate promptfoo config files via the compiler pipeline.
|
|
3
3
|
*
|
|
4
|
-
* Uses the composition root to wire adapters, then
|
|
5
|
-
* directly — the same code path as the pipeline.
|
|
4
|
+
* Uses the composition root to wire adapters, then runs GenerateConfigsStep
|
|
5
|
+
* directly — the same code path as the full pipeline.
|
|
6
6
|
*/
|
|
7
7
|
import { Command } from "commander";
|
|
8
8
|
export declare function createGenerateConfigsCommand(): Command;
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* generate-configs command — generate promptfoo config files
|
|
2
|
+
* generate-configs command — generate promptfoo config files via the compiler pipeline.
|
|
3
3
|
*
|
|
4
|
-
* Uses the composition root to wire adapters, then
|
|
5
|
-
* directly — the same code path as the pipeline.
|
|
4
|
+
* Uses the composition root to wire adapters, then runs GenerateConfigsStep
|
|
5
|
+
* directly — the same code path as the full pipeline.
|
|
6
6
|
*/
|
|
7
7
|
import { dirname, resolve } from "path";
|
|
8
8
|
import { fileURLToPath } from "url";
|
|
9
9
|
import { Command } from "commander";
|
|
10
10
|
import { createAppContext } from "../composition-root.js";
|
|
11
|
-
import {
|
|
11
|
+
import { GenerateConfigsStep } from "../orchestration/steps/generate-configs-step.js";
|
|
12
12
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
13
|
const ROOT = resolve(__dirname, "..", "..");
|
|
14
14
|
export function createGenerateConfigsCommand() {
|
|
@@ -19,6 +19,7 @@ export function createGenerateConfigsCommand() {
|
|
|
19
19
|
try {
|
|
20
20
|
const ctx = createAppContext({
|
|
21
21
|
rootDir: ROOT,
|
|
22
|
+
outputDir: resolve(ROOT, "results", "latest"),
|
|
22
23
|
mode: "literacy",
|
|
23
24
|
noAutoScope: false,
|
|
24
25
|
skipFetch: true,
|
|
@@ -35,10 +36,21 @@ export function createGenerateConfigsCommand() {
|
|
|
35
36
|
remote: false,
|
|
36
37
|
apiUrl: "https://ailf-api.sanity.build",
|
|
37
38
|
});
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
const step = new GenerateConfigsStep();
|
|
40
|
+
// Run validation checks first
|
|
41
|
+
const issues = step.check(ctx);
|
|
42
|
+
if (issues.length > 0) {
|
|
43
|
+
for (const issue of issues) {
|
|
44
|
+
console.error(` ❌ ${issue.message}`);
|
|
45
|
+
}
|
|
46
|
+
process.exitCode = 1;
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
const result = await step.execute(ctx, {});
|
|
50
|
+
if (result.status === "failed") {
|
|
51
|
+
console.error(`❌ ${result.error}`);
|
|
52
|
+
process.exitCode = 1;
|
|
53
|
+
}
|
|
42
54
|
}
|
|
43
55
|
catch (err) {
|
|
44
56
|
process.exitCode = 1;
|