@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/baseline.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/baseline.ts.
|
|
5
|
+
* This shim preserves backward compatibility for:
|
|
6
|
+
* - Direct CLI invocation: `tsx src/lib/baseline.ts`
|
|
7
|
+
* - Test imports that haven't been updated yet
|
|
8
|
+
*
|
|
9
|
+
* TODO: Update all importers to use pipeline/baseline.ts, then delete this file.
|
|
10
|
+
*
|
|
11
|
+
* @deprecated Import from ../pipeline/baseline.js instead.
|
|
12
|
+
*/
|
|
13
|
+
export type { BaselineMetadata, CompareResult, ScoreComparison, } from "../pipeline/baseline.js";
|
|
14
|
+
export declare function saveBaseline(tag?: string): {
|
|
15
|
+
success: boolean;
|
|
16
|
+
message: string;
|
|
17
|
+
};
|
|
18
|
+
export declare function compareBaseline(baselineFile?: string): import("./baseline.js").CompareResult;
|
|
19
|
+
export declare function listBaselines(): import("./baseline.js").BaselineMetadata[];
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/baseline.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/baseline.ts.
|
|
5
|
+
* This shim preserves backward compatibility for:
|
|
6
|
+
* - Direct CLI invocation: `tsx src/lib/baseline.ts`
|
|
7
|
+
* - Test imports that haven't been updated yet
|
|
8
|
+
*
|
|
9
|
+
* TODO: Update all importers to use pipeline/baseline.ts, then delete this file.
|
|
10
|
+
*
|
|
11
|
+
* @deprecated Import from ../pipeline/baseline.js instead.
|
|
12
|
+
*/
|
|
13
|
+
import { dirname, resolve } from "path";
|
|
14
|
+
import { fileURLToPath } from "url";
|
|
15
|
+
import { saveBaseline as _saveBaseline, compareBaseline as _compareBaseline, listBaselines as _listBaselines, } from "../pipeline/baseline.js";
|
|
16
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
17
|
+
const ROOT = resolve(__dirname, "../..");
|
|
18
|
+
export function saveBaseline(tag) {
|
|
19
|
+
return _saveBaseline(ROOT, tag);
|
|
20
|
+
}
|
|
21
|
+
export function compareBaseline(baselineFile) {
|
|
22
|
+
return _compareBaseline(ROOT, baselineFile);
|
|
23
|
+
}
|
|
24
|
+
export function listBaselines() {
|
|
25
|
+
return _listBaselines(ROOT);
|
|
26
|
+
}
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// CLI
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
if (process.argv[1]?.endsWith("baseline.ts") ||
|
|
31
|
+
process.argv[1]?.endsWith("baseline.js")) {
|
|
32
|
+
const args = process.argv.slice(2);
|
|
33
|
+
const command = args[0] || "save";
|
|
34
|
+
function getArg(name) {
|
|
35
|
+
const idx = args.indexOf(`--${name}`);
|
|
36
|
+
return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
|
|
37
|
+
}
|
|
38
|
+
switch (command) {
|
|
39
|
+
case "compare": {
|
|
40
|
+
const file = getArg("file");
|
|
41
|
+
console.log("=== Baseline Comparison ===\n");
|
|
42
|
+
const result = compareBaseline(file);
|
|
43
|
+
if (!result.success) {
|
|
44
|
+
console.error(` ❌ ${result.message}`);
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
console.log(` ${result.message}\n`);
|
|
48
|
+
console.log(" " +
|
|
49
|
+
"Feature Area".padEnd(18) +
|
|
50
|
+
"Current".padEnd(10) +
|
|
51
|
+
"Baseline".padEnd(10) +
|
|
52
|
+
"Delta");
|
|
53
|
+
console.log(" " + "-".repeat(50));
|
|
54
|
+
for (const c of result.comparisons) {
|
|
55
|
+
const deltaStr = c.delta > 0 ? `+${c.delta}` : c.delta === 0 ? "=" : String(c.delta);
|
|
56
|
+
const icon = c.delta > 0 ? "📈" : c.delta < 0 ? "📉" : "➡️";
|
|
57
|
+
console.log(" " +
|
|
58
|
+
c.feature.padEnd(18) +
|
|
59
|
+
String(c.current).padEnd(10) +
|
|
60
|
+
String(c.baseline).padEnd(10) +
|
|
61
|
+
`${icon} ${deltaStr}`);
|
|
62
|
+
}
|
|
63
|
+
// Cost comparison (only if cost data exists)
|
|
64
|
+
const hasCostData = result.comparisons.some((c) => c.costCurrent !== undefined || c.costBaseline !== undefined);
|
|
65
|
+
if (hasCostData) {
|
|
66
|
+
console.log();
|
|
67
|
+
console.log(" Cost Comparison:");
|
|
68
|
+
console.log(" " +
|
|
69
|
+
"Feature Area".padEnd(18) +
|
|
70
|
+
"Current".padEnd(10) +
|
|
71
|
+
"Baseline".padEnd(10) +
|
|
72
|
+
"Delta");
|
|
73
|
+
console.log(" " + "-".repeat(50));
|
|
74
|
+
for (const c of result.comparisons) {
|
|
75
|
+
if (c.costCurrent === undefined && c.costBaseline === undefined) {
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
const cur = `$${(c.costCurrent ?? 0).toFixed(4)}`;
|
|
79
|
+
const base = `$${(c.costBaseline ?? 0).toFixed(4)}`;
|
|
80
|
+
const delta = c.costDelta ?? 0;
|
|
81
|
+
const deltaStr = delta > 0
|
|
82
|
+
? `+$${delta.toFixed(4)}`
|
|
83
|
+
: delta < 0
|
|
84
|
+
? `-$${Math.abs(delta).toFixed(4)}`
|
|
85
|
+
: "=";
|
|
86
|
+
const icon = delta > 0 ? "📈" : delta < 0 ? "📉" : "➡️";
|
|
87
|
+
console.log(" " +
|
|
88
|
+
c.feature.padEnd(18) +
|
|
89
|
+
cur.padEnd(10) +
|
|
90
|
+
base.padEnd(10) +
|
|
91
|
+
`${icon} ${deltaStr}`);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
console.log();
|
|
95
|
+
const overallIcon = result.overallDelta > 0 ? "📈" : result.overallDelta < 0 ? "📉" : "➡️";
|
|
96
|
+
const overallStr = result.overallDelta > 0
|
|
97
|
+
? `+${result.overallDelta}`
|
|
98
|
+
: result.overallDelta === 0
|
|
99
|
+
? "="
|
|
100
|
+
: String(result.overallDelta);
|
|
101
|
+
console.log(` Overall: ${overallIcon} ${overallStr} points`);
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
case "history": {
|
|
105
|
+
console.log("=== Baseline History ===\n");
|
|
106
|
+
const baselines = listBaselines();
|
|
107
|
+
if (baselines.length === 0) {
|
|
108
|
+
console.log(" No baselines saved yet.");
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
const hasCosts = baselines.some((b) => b.totalCost !== undefined || b.graderCost !== undefined);
|
|
112
|
+
const costHeader = hasCosts ? "Cost".padEnd(10) : "";
|
|
113
|
+
console.log(" " +
|
|
114
|
+
"Date".padEnd(22) +
|
|
115
|
+
"Avg".padEnd(6) +
|
|
116
|
+
"Areas".padEnd(7) +
|
|
117
|
+
costHeader +
|
|
118
|
+
"Tag");
|
|
119
|
+
console.log(" " + "-".repeat(hasCosts ? 60 : 50));
|
|
120
|
+
for (const b of baselines) {
|
|
121
|
+
const date = new Date(b.timestamp).toLocaleString();
|
|
122
|
+
const combinedCost = (b.totalCost ?? 0) + (b.graderCost ?? 0);
|
|
123
|
+
const costStr = hasCosts
|
|
124
|
+
? (combinedCost > 0 ? `$${combinedCost.toFixed(2)}` : "-").padEnd(10)
|
|
125
|
+
: "";
|
|
126
|
+
console.log(" " +
|
|
127
|
+
date.padEnd(22) +
|
|
128
|
+
String(b.avgScore).padEnd(6) +
|
|
129
|
+
String(b.areaCount).padEnd(7) +
|
|
130
|
+
costStr +
|
|
131
|
+
(b.tag ?? ""));
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
case "save": {
|
|
137
|
+
const tag = getArg("tag");
|
|
138
|
+
console.log("=== Saving baseline snapshot ===\n");
|
|
139
|
+
const result = saveBaseline(tag);
|
|
140
|
+
if (result.success) {
|
|
141
|
+
console.log(` ✅ ${result.message}`);
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
console.error(` ❌ ${result.message}`);
|
|
145
|
+
process.exit(1);
|
|
146
|
+
}
|
|
147
|
+
break;
|
|
148
|
+
}
|
|
149
|
+
default:
|
|
150
|
+
console.error(`Unknown command: "${command}". Use: save, history, compare`);
|
|
151
|
+
process.exit(1);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/calculate-scores.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/calculate-scores.ts.
|
|
5
|
+
* This shim preserves backward compatibility for:
|
|
6
|
+
* - Direct CLI invocation: `tsx src/lib/calculate-scores.ts`
|
|
7
|
+
* - Test imports that haven't been updated yet
|
|
8
|
+
*
|
|
9
|
+
* TODO: Update all importers to use pipeline/calculate-scores.ts, then delete this file.
|
|
10
|
+
*
|
|
11
|
+
* @deprecated Import from ../pipeline/calculate-scores.js instead.
|
|
12
|
+
*/
|
|
13
|
+
export { calculateAndWriteScores, calculateScoresPerModel, extractGraderJudgments, scoreAgenticResults, type CalculateScoresOptions, type PromptfooResultsWrapper, type RawPromptfooFile, type RawTestResult, } from "../pipeline/calculate-scores.js";
|
|
14
|
+
export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, type ActualScoreEntry, type ComponentResult, type TestResult, type UrlMetadata, } from "../_vendor/ailf-core/index.d.ts";
|
|
15
|
+
import type { CalculateScoresOptions } from "../pipeline/calculate-scores.js";
|
|
16
|
+
/**
|
|
17
|
+
* Legacy main() entry point — wraps calculateAndWriteScores() with env var fallbacks.
|
|
18
|
+
*
|
|
19
|
+
* @deprecated Use calculateAndWriteScores() from pipeline/calculate-scores.ts instead.
|
|
20
|
+
*/
|
|
21
|
+
export declare function main(options?: Omit<CalculateScoresOptions, "rootDir"> & {
|
|
22
|
+
rootDir?: string;
|
|
23
|
+
}): void;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/calculate-scores.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/calculate-scores.ts.
|
|
5
|
+
* This shim preserves backward compatibility for:
|
|
6
|
+
* - Direct CLI invocation: `tsx src/lib/calculate-scores.ts`
|
|
7
|
+
* - Test imports that haven't been updated yet
|
|
8
|
+
*
|
|
9
|
+
* TODO: Update all importers to use pipeline/calculate-scores.ts, then delete this file.
|
|
10
|
+
*
|
|
11
|
+
* @deprecated Import from ../pipeline/calculate-scores.js instead.
|
|
12
|
+
*/
|
|
13
|
+
import { dirname, join } from "path";
|
|
14
|
+
import { fileURLToPath } from "url";
|
|
15
|
+
// Re-export everything from the real implementation
|
|
16
|
+
export { calculateAndWriteScores, calculateScoresPerModel, extractGraderJudgments, scoreAgenticResults, } from "../pipeline/calculate-scores.js";
|
|
17
|
+
// Re-export core types/functions for backward compatibility
|
|
18
|
+
export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, } from "../_vendor/ailf-core/index.js";
|
|
19
|
+
import { calculateAndWriteScores } from "../pipeline/calculate-scores.js";
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const ROOT = join(__dirname, "..", "..");
|
|
22
|
+
/**
|
|
23
|
+
* Legacy main() entry point — wraps calculateAndWriteScores() with env var fallbacks.
|
|
24
|
+
*
|
|
25
|
+
* @deprecated Use calculateAndWriteScores() from pipeline/calculate-scores.ts instead.
|
|
26
|
+
*/
|
|
27
|
+
export function main(options) {
|
|
28
|
+
calculateAndWriteScores({
|
|
29
|
+
rootDir: options?.rootDir ?? ROOT,
|
|
30
|
+
allowedOrigins: options?.allowedOrigins,
|
|
31
|
+
mode: options?.mode ?? process.env.EVAL_MODE ?? "baseline",
|
|
32
|
+
resolvedSource: options?.resolvedSource,
|
|
33
|
+
resultsPath: options?.resultsPath,
|
|
34
|
+
searchMode: options?.searchMode,
|
|
35
|
+
source: options?.source,
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
// Only run when invoked directly (not when imported for testing)
|
|
39
|
+
if (process.argv[1]?.endsWith("calculate-scores.ts") ||
|
|
40
|
+
process.argv[1]?.endsWith("calculate-scores.js")) {
|
|
41
|
+
main();
|
|
42
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* compare.ts
|
|
3
|
+
*
|
|
4
|
+
* CLI for structured comparison between two evaluation runs.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* pnpm compare # compare current vs latest baseline
|
|
8
|
+
* pnpm compare --baseline <path> # compare current vs specific file
|
|
9
|
+
* pnpm compare --baseline <path> --experiment <path> # compare two specific files
|
|
10
|
+
* pnpm compare --threshold 5 # custom noise threshold
|
|
11
|
+
* pnpm compare --output /tmp/comparison.json # write JSON report to file
|
|
12
|
+
* pnpm compare --format json # output raw JSON (default: table)
|
|
13
|
+
*
|
|
14
|
+
* Reads: results/latest/score-summary.json (as experiment, unless --experiment)
|
|
15
|
+
* Reads: results/baselines/<latest>.json (as baseline, unless --baseline)
|
|
16
|
+
*/
|
|
17
|
+
export { formatComparisonMarkdown, formatComparisonTable, } from "../_vendor/ailf-core/index.d.ts";
|
|
18
|
+
export declare function main(): void;
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* compare.ts
|
|
3
|
+
*
|
|
4
|
+
* CLI for structured comparison between two evaluation runs.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* pnpm compare # compare current vs latest baseline
|
|
8
|
+
* pnpm compare --baseline <path> # compare current vs specific file
|
|
9
|
+
* pnpm compare --baseline <path> --experiment <path> # compare two specific files
|
|
10
|
+
* pnpm compare --threshold 5 # custom noise threshold
|
|
11
|
+
* pnpm compare --output /tmp/comparison.json # write JSON report to file
|
|
12
|
+
* pnpm compare --format json # output raw JSON (default: table)
|
|
13
|
+
*
|
|
14
|
+
* Reads: results/latest/score-summary.json (as experiment, unless --experiment)
|
|
15
|
+
* Reads: results/baselines/<latest>.json (as baseline, unless --baseline)
|
|
16
|
+
*/
|
|
17
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
|
|
18
|
+
import { dirname, join, resolve } from "path";
|
|
19
|
+
import { fileURLToPath } from "url";
|
|
20
|
+
import { formatComparisonTable, } from "../_vendor/ailf-core/index.js";
|
|
21
|
+
import { compare } from "../pipeline/compare.js";
|
|
22
|
+
import { DEFAULT_NOISE_THRESHOLD, } from "../pipeline/types.js";
|
|
23
|
+
// Re-export pure formatters from core for backward compatibility.
|
|
24
|
+
export { formatComparisonMarkdown, formatComparisonTable, } from "../_vendor/ailf-core/index.js";
|
|
25
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
26
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
27
|
+
const BASELINES_DIR = join(ROOT, "results", "baselines");
|
|
28
|
+
const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// CLI argument parsing
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
const args = process.argv.slice(2);
|
|
33
|
+
function getFlag(name) {
|
|
34
|
+
return args.includes(`--${name}`);
|
|
35
|
+
}
|
|
36
|
+
function getOption(name) {
|
|
37
|
+
const idx = args.indexOf(`--${name}`);
|
|
38
|
+
return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
|
|
39
|
+
}
|
|
40
|
+
const baselinePath = getOption("baseline");
|
|
41
|
+
const experimentPath = getOption("experiment");
|
|
42
|
+
const thresholdStr = getOption("threshold");
|
|
43
|
+
const threshold = thresholdStr
|
|
44
|
+
? parseFloat(thresholdStr)
|
|
45
|
+
: DEFAULT_NOISE_THRESHOLD;
|
|
46
|
+
const outputPath = getOption("output");
|
|
47
|
+
const format = getOption("format") ?? "table";
|
|
48
|
+
const showHelp = getFlag("help") || getFlag("h");
|
|
49
|
+
if (showHelp) {
|
|
50
|
+
console.log(`
|
|
51
|
+
Usage: pnpm compare [options]
|
|
52
|
+
|
|
53
|
+
Compare two evaluation score summaries and produce structured deltas.
|
|
54
|
+
|
|
55
|
+
Options:
|
|
56
|
+
--baseline <path> Baseline score-summary.json (default: latest baseline)
|
|
57
|
+
--experiment <path> Experiment score-summary.json (default: results/latest/score-summary.json)
|
|
58
|
+
--threshold <n> Noise threshold for unchanged classification (default: ${DEFAULT_NOISE_THRESHOLD})
|
|
59
|
+
--output <path> Write JSON report to file
|
|
60
|
+
--format <fmt> Output format: table (default) or json
|
|
61
|
+
--help, -h Show this help
|
|
62
|
+
|
|
63
|
+
Examples:
|
|
64
|
+
pnpm compare # current scores vs latest baseline
|
|
65
|
+
pnpm compare --threshold 5 # wider noise band
|
|
66
|
+
pnpm compare --format json # machine-readable output
|
|
67
|
+
pnpm compare --baseline results/baselines/20260310_02_43_44.json
|
|
68
|
+
pnpm compare --baseline before.json --experiment after.json
|
|
69
|
+
`);
|
|
70
|
+
process.exit(0);
|
|
71
|
+
}
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
// File loading helpers
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
/**
|
|
76
|
+
* formatComparisonMarkdown — re-exported from @sanity/ailf-core above.
|
|
77
|
+
* formatComparisonTable — re-exported from @sanity/ailf-core above.
|
|
78
|
+
*/
|
|
79
|
+
function findLatestBaseline() {
|
|
80
|
+
if (!existsSync(BASELINES_DIR))
|
|
81
|
+
return null;
|
|
82
|
+
const files = readdirSync(BASELINES_DIR)
|
|
83
|
+
.filter((f) => f.endsWith(".json"))
|
|
84
|
+
.sort()
|
|
85
|
+
.reverse();
|
|
86
|
+
return files.length > 0 ? join(BASELINES_DIR, files[0]) : null;
|
|
87
|
+
}
|
|
88
|
+
function loadSummary(path) {
|
|
89
|
+
if (!existsSync(path)) {
|
|
90
|
+
console.error(`❌ File not found: ${path}`);
|
|
91
|
+
process.exit(1);
|
|
92
|
+
}
|
|
93
|
+
const raw = readFileSync(path, "utf-8");
|
|
94
|
+
return JSON.parse(raw);
|
|
95
|
+
}
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// Main
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
export function main() {
|
|
100
|
+
// Resolve experiment path
|
|
101
|
+
const expPath = experimentPath ?? SCORE_SUMMARY_PATH;
|
|
102
|
+
const experiment = loadSummary(expPath);
|
|
103
|
+
// Resolve baseline path
|
|
104
|
+
let basePath;
|
|
105
|
+
if (baselinePath) {
|
|
106
|
+
basePath = resolve(baselinePath);
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
const latest = findLatestBaseline();
|
|
110
|
+
if (!latest) {
|
|
111
|
+
console.error("❌ No baselines found. Run 'pnpm baseline:save' first, or use --baseline <path>.");
|
|
112
|
+
process.exit(1);
|
|
113
|
+
}
|
|
114
|
+
basePath = latest;
|
|
115
|
+
}
|
|
116
|
+
const baseline = loadSummary(basePath);
|
|
117
|
+
// Try to load grader consistency data for empirical thresholds
|
|
118
|
+
const consistencyPath = join(ROOT, "results", "latest", "grader-consistency.json");
|
|
119
|
+
let graderConsistency;
|
|
120
|
+
if (existsSync(consistencyPath) && !thresholdStr) {
|
|
121
|
+
try {
|
|
122
|
+
const consistencyRaw = JSON.parse(readFileSync(consistencyPath, "utf-8"));
|
|
123
|
+
if (consistencyRaw.recommendedThreshold && consistencyRaw.perDimension) {
|
|
124
|
+
graderConsistency =
|
|
125
|
+
consistencyRaw;
|
|
126
|
+
console.log(` 📊 Using empirical noise threshold: ±${graderConsistency.recommendedThreshold.toFixed(1)} (from grader consistency data)`);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
// Non-fatal — fall back to default threshold
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
console.log(` Baseline: ${basePath}`);
|
|
134
|
+
console.log(` Experiment: ${expPath}`);
|
|
135
|
+
if (!graderConsistency) {
|
|
136
|
+
console.log(` Threshold: ±${threshold} (default — run --grader-replications for empirical threshold)`);
|
|
137
|
+
}
|
|
138
|
+
console.log("");
|
|
139
|
+
const report = compare(baseline, experiment, {
|
|
140
|
+
graderConsistency,
|
|
141
|
+
noiseThreshold: threshold,
|
|
142
|
+
});
|
|
143
|
+
if (format === "json") {
|
|
144
|
+
const json = JSON.stringify(report, null, 2);
|
|
145
|
+
if (outputPath) {
|
|
146
|
+
writeFileSync(outputPath, json);
|
|
147
|
+
console.log(` ✅ Comparison report written to ${outputPath}`);
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
console.log(json);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
const table = formatComparisonTable(report);
|
|
155
|
+
console.log(table);
|
|
156
|
+
if (outputPath) {
|
|
157
|
+
const json = JSON.stringify(report, null, 2);
|
|
158
|
+
writeFileSync(outputPath, json);
|
|
159
|
+
console.log(` ✅ Comparison report also written to ${outputPath}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
// Write comparison report to results/latest for other steps to consume
|
|
163
|
+
const latestComparisonPath = join(ROOT, "results", "latest", "comparison-report.json");
|
|
164
|
+
writeFileSync(latestComparisonPath, JSON.stringify(report, null, 2));
|
|
165
|
+
}
|
|
166
|
+
// Only run when invoked directly
|
|
167
|
+
if (process.argv[1]?.endsWith("compare.ts") ||
|
|
168
|
+
process.argv[1]?.endsWith("compare.js")) {
|
|
169
|
+
main();
|
|
170
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { countReferencedDocs, countTasksByArea, formatCoverageConsole, formatCoverageMarkdown, loadFeatureRegistry, runCoverageAudit, } from "../pipeline/coverage-audit.js";
|
|
2
|
+
export type { CoverageAuditReport, ProductFeature } from "../pipeline/types.js";
|
|
3
|
+
/** @deprecated Use pipeline functions directly with explicit rootDir */
|
|
4
|
+
export declare function main(): void;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/coverage-audit.ts — DEPRECATED re-export shim.
|
|
3
|
+
* @deprecated Import from ../pipeline/coverage-audit.js instead.
|
|
4
|
+
*/
|
|
5
|
+
import { dirname, resolve } from "path";
|
|
6
|
+
import { fileURLToPath } from "url";
|
|
7
|
+
export { countReferencedDocs, countTasksByArea, formatCoverageConsole, formatCoverageMarkdown, loadFeatureRegistry, runCoverageAudit, } from "../pipeline/coverage-audit.js";
|
|
8
|
+
import { countReferencedDocs, formatCoverageConsole, formatCoverageMarkdown, runCoverageAudit, } from "../pipeline/coverage-audit.js";
|
|
9
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
10
|
+
const ROOT = resolve(__dirname, "../..");
|
|
11
|
+
/** @deprecated Use pipeline functions directly with explicit rootDir */
|
|
12
|
+
export function main() {
|
|
13
|
+
const args = process.argv.slice(2);
|
|
14
|
+
const formatArg = args.includes("--format")
|
|
15
|
+
? args[args.indexOf("--format") + 1]
|
|
16
|
+
: undefined;
|
|
17
|
+
const jsonOutput = args.includes("--json");
|
|
18
|
+
const report = runCoverageAudit(ROOT);
|
|
19
|
+
if (!report) {
|
|
20
|
+
console.error("❌ Coverage audit failed.");
|
|
21
|
+
process.exit(1);
|
|
22
|
+
}
|
|
23
|
+
if (jsonOutput) {
|
|
24
|
+
console.log(JSON.stringify(report, null, 2));
|
|
25
|
+
}
|
|
26
|
+
else if (formatArg === "md" || formatArg === "markdown") {
|
|
27
|
+
console.log(formatCoverageMarkdown(report));
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
console.log(formatCoverageConsole(report));
|
|
31
|
+
}
|
|
32
|
+
if (!jsonOutput && formatArg !== "md") {
|
|
33
|
+
const docStats = countReferencedDocs(ROOT);
|
|
34
|
+
console.log("DOCUMENT UTILIZATION:");
|
|
35
|
+
console.log(` ${docStats.total} unique document slugs referenced across evaluation tasks`);
|
|
36
|
+
console.log("");
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
if (process.argv[1]?.endsWith("coverage-audit.ts") ||
|
|
40
|
+
process.argv[1]?.endsWith("coverage-audit.js")) {
|
|
41
|
+
main();
|
|
42
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/discovery-report.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/discovery-report.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/discovery-report.js instead.
|
|
7
|
+
*/
|
|
8
|
+
export { formatDiscoveryMarkdown, generateDiscoveryReport, type DiscoveryReport, type InvisibleDoc, } from "../pipeline/discovery-report.js";
|
|
9
|
+
/**
|
|
10
|
+
* Legacy main() entry point.
|
|
11
|
+
* @deprecated Use generateDiscoveryReport() + formatDiscoveryMarkdown() directly.
|
|
12
|
+
*/
|
|
13
|
+
export declare function main(): void;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/discovery-report.ts — DEPRECATED re-export shim.
|
|
3
|
+
*
|
|
4
|
+
* The real implementation has moved to pipeline/discovery-report.ts.
|
|
5
|
+
*
|
|
6
|
+
* @deprecated Import from ../pipeline/discovery-report.js instead.
|
|
7
|
+
*/
|
|
8
|
+
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
9
|
+
import { dirname, join, resolve } from "node:path";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
export { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../pipeline/discovery-report.js";
|
|
12
|
+
import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../pipeline/discovery-report.js";
|
|
13
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
14
|
+
const ROOT = resolve(__dirname, "..", "..");
|
|
15
|
+
/**
|
|
16
|
+
* Legacy main() entry point.
|
|
17
|
+
* @deprecated Use generateDiscoveryReport() + formatDiscoveryMarkdown() directly.
|
|
18
|
+
*/
|
|
19
|
+
export function main() {
|
|
20
|
+
const args = process.argv.slice(2);
|
|
21
|
+
let output;
|
|
22
|
+
const areaFilter = [];
|
|
23
|
+
let summaryPath = join(ROOT, "results", "latest", "score-summary.json");
|
|
24
|
+
for (let i = 0; i < args.length; i++) {
|
|
25
|
+
if (args[i] === "--output" && args[i + 1]) {
|
|
26
|
+
output = args[++i];
|
|
27
|
+
}
|
|
28
|
+
else if (args[i] === "--area" && args[i + 1]) {
|
|
29
|
+
areaFilter.push(...args[++i].split(","));
|
|
30
|
+
}
|
|
31
|
+
else if (args[i] === "--input" && args[i + 1]) {
|
|
32
|
+
summaryPath = args[++i];
|
|
33
|
+
}
|
|
34
|
+
else if (!args[i].startsWith("-")) {
|
|
35
|
+
summaryPath = args[i];
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
if (!existsSync(summaryPath)) {
|
|
39
|
+
console.error(`❌ Score summary not found: ${summaryPath}`);
|
|
40
|
+
console.error("Run an agentic evaluation first: pnpm pipeline -- --mode agentic");
|
|
41
|
+
process.exit(1);
|
|
42
|
+
}
|
|
43
|
+
const summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
|
|
44
|
+
const report = generateDiscoveryReport(summary, areaFilter.length > 0 ? areaFilter : undefined);
|
|
45
|
+
const markdown = formatDiscoveryMarkdown(report);
|
|
46
|
+
if (output) {
|
|
47
|
+
writeFileSync(output, markdown, "utf-8");
|
|
48
|
+
console.log(`✅ Discovery report written to ${output}`);
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
console.log(markdown);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
if (process.argv[1]?.endsWith("discovery-report.ts") ||
|
|
55
|
+
process.argv[1]?.endsWith("discovery-report.js")) {
|
|
56
|
+
main();
|
|
57
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fetch-docs.ts — CLI entry point for documentation fetching.
|
|
3
|
+
*
|
|
4
|
+
* This is a standalone script for direct CLI invocation:
|
|
5
|
+
* npx tsx src/lib/fetch-docs.ts [--source name] [--include-feature-areas] [--include-corpus]
|
|
6
|
+
*
|
|
7
|
+
* The pipeline uses ctx.docFetcher (SanityDocFetcher adapter) directly via
|
|
8
|
+
* FetchDocsStep — this file is NOT called by the pipeline.
|
|
9
|
+
*
|
|
10
|
+
* Capabilities:
|
|
11
|
+
* 1. Canonical contexts — delegates to SanityDocFetcher adapter (always)
|
|
12
|
+
* 2. Feature-area contexts — one file per GROQ feature area query
|
|
13
|
+
* (opt-in via --include-feature-areas)
|
|
14
|
+
* 3. Full corpus — all articles in one file
|
|
15
|
+
* (opt-in via --include-corpus)
|
|
16
|
+
*/
|
|
17
|
+
import "dotenv/config";
|
|
18
|
+
import { type ResolvedSourceConfig } from "../sources.js";
|
|
19
|
+
/** Options for the fetch-docs main() function. */
|
|
20
|
+
export interface FetchDocsOptions {
|
|
21
|
+
/** Fetch full corpus (for retrieval experiments) */
|
|
22
|
+
includeCorpus?: boolean;
|
|
23
|
+
/** Fetch feature-area contexts in addition to canonical contexts */
|
|
24
|
+
includeFeatureAreas?: boolean;
|
|
25
|
+
/** Pre-resolved source config (skips loadSource() call) */
|
|
26
|
+
resolvedSource?: ResolvedSourceConfig;
|
|
27
|
+
/** Documentation source name (e.g., "branch", "local") */
|
|
28
|
+
source?: string;
|
|
29
|
+
}
|
|
30
|
+
export declare function main(options?: FetchDocsOptions): Promise<void>;
|