@sanity/ailf 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +52 -32
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/checks.d.ts +8 -3
- package/dist/pipeline/checks.js +23 -3
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +25 -25
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
* pipeline/retrieval-metrics.ts
|
|
3
3
|
*
|
|
4
4
|
* Computes retrieval precision and recall by comparing agent-retrieved
|
|
5
|
-
* doc slugs against canonical_docs defined in task
|
|
5
|
+
* doc slugs against canonical_docs defined in task definitions.
|
|
6
6
|
*
|
|
7
|
-
* This is a pure computation module — no file I/O beyond reading task
|
|
7
|
+
* This is a pure computation module — no file I/O beyond reading task files.
|
|
8
8
|
*/
|
|
9
|
-
import { existsSync
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
9
|
+
import { existsSync } from "fs";
|
|
10
|
+
import { discoverTsTaskFiles, loadTsTaskFileSync, } from "../adapters/task-sources/task-file-loader.js";
|
|
11
|
+
import { resolveVendoredSubdir } from "./compiler/config-loader.js";
|
|
12
|
+
// --- Types for task definitions (just the canonical_docs part) ---
|
|
12
13
|
/**
|
|
13
14
|
* Compute retrieval metrics from agentic behavior data.
|
|
14
15
|
*
|
|
@@ -107,28 +108,35 @@ export function computeTaskMetrics(taskId, area, retrieved, canonical) {
|
|
|
107
108
|
};
|
|
108
109
|
}
|
|
109
110
|
/**
|
|
110
|
-
* Load
|
|
111
|
+
* Load canonical docs from *.task.ts files in tasks/literacy/.
|
|
111
112
|
* Returns a map of taskId → { slugs: Set<string>, area: string }.
|
|
112
113
|
*/
|
|
113
114
|
export function loadCanonicalDocs(rootDir) {
|
|
114
|
-
const tasksDir =
|
|
115
|
+
const tasksDir = resolveVendoredSubdir(rootDir, "tasks/literacy");
|
|
115
116
|
if (!existsSync(tasksDir))
|
|
116
117
|
return new Map();
|
|
117
118
|
const result = new Map();
|
|
118
|
-
const files =
|
|
119
|
+
const files = discoverTsTaskFiles(tasksDir);
|
|
119
120
|
for (const file of files) {
|
|
120
|
-
const
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
121
|
+
const loaded = loadTsTaskFileSync(file);
|
|
122
|
+
for (const task of loaded.tasks) {
|
|
123
|
+
const t = task;
|
|
124
|
+
const id = typeof t.id === "string" ? t.id : undefined;
|
|
125
|
+
const area = typeof t.area === "string" ? t.area : undefined;
|
|
126
|
+
if (!id || !area)
|
|
127
|
+
continue;
|
|
128
|
+
// Extract slugs from context.docs
|
|
129
|
+
const context = t.context;
|
|
130
|
+
if (!context?.docs || !Array.isArray(context.docs))
|
|
131
|
+
continue;
|
|
132
|
+
const slugs = new Set();
|
|
133
|
+
for (const doc of context.docs) {
|
|
134
|
+
const d = doc;
|
|
135
|
+
if (typeof d.slug === "string")
|
|
136
|
+
slugs.add(d.slug);
|
|
137
|
+
}
|
|
138
|
+
if (slugs.size > 0) {
|
|
139
|
+
result.set(id, { area, slugs });
|
|
132
140
|
}
|
|
133
141
|
}
|
|
134
142
|
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Calculate AI Literacy Scores from eval results.
|
|
3
|
+
*
|
|
4
|
+
* Preconditions: eval-results.json exists and is valid
|
|
5
|
+
* Postconditions: score-summary.json exists and is valid
|
|
6
|
+
*
|
|
7
|
+
* Cache key: eval results JSON file(s)
|
|
8
|
+
* Cache outputs: results/latest/score-summary.json
|
|
9
|
+
*/
|
|
10
|
+
import type { EvalMode, StepResult } from "../types.js";
|
|
11
|
+
export declare function runCalculateScores(source?: string, mode?: EvalMode, noCache?: boolean): StepResult;
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Calculate AI Literacy Scores from eval results.
|
|
3
|
+
*
|
|
4
|
+
* Preconditions: eval-results.json exists and is valid
|
|
5
|
+
* Postconditions: score-summary.json exists and is valid
|
|
6
|
+
*
|
|
7
|
+
* Cache key: eval results JSON file(s)
|
|
8
|
+
* Cache outputs: results/latest/score-summary.json
|
|
9
|
+
*/
|
|
10
|
+
import { execSync } from "child_process";
|
|
11
|
+
import { dirname, resolve } from "path";
|
|
12
|
+
import { fileURLToPath } from "url";
|
|
13
|
+
import { getStepInputPaths, hashFiles, lookupCache, recordCache, } from "../cache.js";
|
|
14
|
+
import { checkResultsExist, checkScoreSummaryValid } from "../checks.js";
|
|
15
|
+
import { RESULTS_FILES } from "./eval-step.js";
|
|
16
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
17
|
+
const ROOT = resolve(__dirname, "..", "..", "..");
|
|
18
|
+
export function runCalculateScores(source, mode = "baseline", noCache = false) {
|
|
19
|
+
const start = Date.now();
|
|
20
|
+
// For full mode, use the baseline results file as the primary input
|
|
21
|
+
// (calculate-scores reads all available results files internally)
|
|
22
|
+
const primaryMode = mode === "full" ? "baseline" : mode;
|
|
23
|
+
const resultsFile = RESULTS_FILES[primaryMode];
|
|
24
|
+
const resultsIssues = checkResultsExist(ROOT, resultsFile);
|
|
25
|
+
const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
|
|
26
|
+
if (resultsErrors.length > 0) {
|
|
27
|
+
return {
|
|
28
|
+
durationMs: Date.now() - start,
|
|
29
|
+
error: `Results missing: ${resultsErrors.map((e) => e.message).join("; ")}`,
|
|
30
|
+
status: "failed",
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
// Cache check
|
|
34
|
+
if (!noCache) {
|
|
35
|
+
const cacheResult = lookupCache(ROOT, "calculate-scores");
|
|
36
|
+
if (cacheResult.hit) {
|
|
37
|
+
return {
|
|
38
|
+
durationMs: Date.now() - start,
|
|
39
|
+
status: "success",
|
|
40
|
+
summary: `Skipped (cached) — ${cacheResult.entry.summary}`,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
// Execute — note: calculate-scores exits 1 when areas are below critical,
|
|
45
|
+
// which is expected behavior, not an error
|
|
46
|
+
try {
|
|
47
|
+
const sourceArg = source ? ` --source ${source}` : "";
|
|
48
|
+
const resultsArg = primaryMode !== "baseline" ? ` ${resultsFile}` : "";
|
|
49
|
+
execSync(`tsx src/lib/calculate-scores.ts${resultsArg}${sourceArg}`, {
|
|
50
|
+
cwd: ROOT,
|
|
51
|
+
env: process.env,
|
|
52
|
+
stdio: "inherit",
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
catch (err) {
|
|
56
|
+
const code = err !== null && typeof err === "object" && "status" in err
|
|
57
|
+
? err.status
|
|
58
|
+
: 1;
|
|
59
|
+
// Exit code 1 means "areas below critical" — that's expected
|
|
60
|
+
if (code !== 1) {
|
|
61
|
+
return {
|
|
62
|
+
durationMs: Date.now() - start,
|
|
63
|
+
error: `calculate-scores failed with exit code ${code}`,
|
|
64
|
+
status: "failed",
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// Postcondition: score summary exists and is valid
|
|
69
|
+
const summaryIssues = checkScoreSummaryValid(ROOT);
|
|
70
|
+
const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
|
|
71
|
+
if (summaryErrors.length > 0) {
|
|
72
|
+
return {
|
|
73
|
+
durationMs: Date.now() - start,
|
|
74
|
+
error: `Postcondition failed: ${summaryErrors.map((e) => e.message).join("; ")}`,
|
|
75
|
+
status: "failed",
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
const durationMs = Date.now() - start;
|
|
79
|
+
const summary = "Scores calculated and summary written";
|
|
80
|
+
// Record cache
|
|
81
|
+
if (!noCache) {
|
|
82
|
+
const inputPaths = getStepInputPaths(ROOT, "calculate-scores");
|
|
83
|
+
const inputHash = hashFiles(inputPaths);
|
|
84
|
+
recordCache(ROOT, "calculate-scores", inputHash, summary, durationMs, [
|
|
85
|
+
"results/latest/score-summary.json",
|
|
86
|
+
]);
|
|
87
|
+
}
|
|
88
|
+
return { durationMs, status: "success", summary };
|
|
89
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Compare current scores against a baseline.
|
|
3
|
+
*
|
|
4
|
+
* Preconditions: score-summary.json exists
|
|
5
|
+
* Postconditions: comparison-report.json written to results/latest/
|
|
6
|
+
*
|
|
7
|
+
* This step is optional — it only runs when --compare is passed
|
|
8
|
+
* (or a baseline exists and auto-compare is enabled).
|
|
9
|
+
*/
|
|
10
|
+
import type { CompareOptions, StepResult } from "../types.js";
|
|
11
|
+
/**
|
|
12
|
+
* Run comparison against a baseline.
|
|
13
|
+
*
|
|
14
|
+
* @param rootDir Package root directory
|
|
15
|
+
* @param baselinePath Explicit baseline file path (optional — uses latest if omitted)
|
|
16
|
+
* @param options Compare options (noise threshold, etc.)
|
|
17
|
+
*/
|
|
18
|
+
export declare function runCompare(rootDir: string, baselinePath?: string, options?: CompareOptions): StepResult;
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Compare current scores against a baseline.
|
|
3
|
+
*
|
|
4
|
+
* Preconditions: score-summary.json exists
|
|
5
|
+
* Postconditions: comparison-report.json written to results/latest/
|
|
6
|
+
*
|
|
7
|
+
* This step is optional — it only runs when --compare is passed
|
|
8
|
+
* (or a baseline exists and auto-compare is enabled).
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
|
|
11
|
+
import { join, resolve } from "path";
|
|
12
|
+
import { compare } from "../compare.js";
|
|
13
|
+
/**
|
|
14
|
+
* Run comparison against a baseline.
|
|
15
|
+
*
|
|
16
|
+
* @param rootDir Package root directory
|
|
17
|
+
* @param baselinePath Explicit baseline file path (optional — uses latest if omitted)
|
|
18
|
+
* @param options Compare options (noise threshold, etc.)
|
|
19
|
+
*/
|
|
20
|
+
export function runCompare(rootDir, baselinePath, options) {
|
|
21
|
+
const start = Date.now();
|
|
22
|
+
const scoreSummaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
|
|
23
|
+
if (!existsSync(scoreSummaryPath)) {
|
|
24
|
+
return {
|
|
25
|
+
durationMs: Date.now() - start,
|
|
26
|
+
error: "score-summary.json not found. Run calculate-scores first.",
|
|
27
|
+
status: "failed",
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
// Load experiment (current run)
|
|
31
|
+
const experiment = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
32
|
+
// Resolve baseline
|
|
33
|
+
let resolvedBaselinePath;
|
|
34
|
+
if (baselinePath) {
|
|
35
|
+
resolvedBaselinePath = resolve(baselinePath);
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
const baselinesDir = resolve(rootDir, "results", "baselines");
|
|
39
|
+
if (!existsSync(baselinesDir)) {
|
|
40
|
+
return {
|
|
41
|
+
reason: "No baselines directory found. Run 'pnpm baseline:save' first.",
|
|
42
|
+
status: "skipped",
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
const files = readdirSync(baselinesDir)
|
|
46
|
+
.filter((f) => f.endsWith(".json"))
|
|
47
|
+
.sort()
|
|
48
|
+
.reverse();
|
|
49
|
+
if (files.length === 0) {
|
|
50
|
+
return {
|
|
51
|
+
reason: "No baseline files found. Run 'pnpm baseline:save' first.",
|
|
52
|
+
status: "skipped",
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
resolvedBaselinePath = join(baselinesDir, files[0]);
|
|
56
|
+
}
|
|
57
|
+
if (!existsSync(resolvedBaselinePath)) {
|
|
58
|
+
return {
|
|
59
|
+
durationMs: Date.now() - start,
|
|
60
|
+
error: `Baseline file not found: ${resolvedBaselinePath}`,
|
|
61
|
+
status: "failed",
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
const baseline = JSON.parse(readFileSync(resolvedBaselinePath, "utf-8"));
|
|
65
|
+
// Run comparison
|
|
66
|
+
const report = compare(baseline, experiment, options);
|
|
67
|
+
// Write report
|
|
68
|
+
const reportPath = resolve(rootDir, "results", "latest", "comparison-report.json");
|
|
69
|
+
writeFileSync(reportPath, JSON.stringify(report, null, 2));
|
|
70
|
+
// Build summary
|
|
71
|
+
const improved = report.improved.length;
|
|
72
|
+
const regressed = report.regressed.length;
|
|
73
|
+
const unchanged = report.unchanged.length;
|
|
74
|
+
const overallDelta = report.deltas.overall;
|
|
75
|
+
const deltaStr = overallDelta > 0
|
|
76
|
+
? `+${Math.round(overallDelta)}`
|
|
77
|
+
: String(Math.round(overallDelta));
|
|
78
|
+
const parts = [`Overall: ${deltaStr}`];
|
|
79
|
+
if (improved > 0)
|
|
80
|
+
parts.push(`${improved} improved`);
|
|
81
|
+
if (regressed > 0)
|
|
82
|
+
parts.push(`${regressed} regressed`);
|
|
83
|
+
if (unchanged > 0)
|
|
84
|
+
parts.push(`${unchanged} unchanged`);
|
|
85
|
+
return {
|
|
86
|
+
durationMs: Date.now() - start,
|
|
87
|
+
status: "success",
|
|
88
|
+
summary: parts.join(", "),
|
|
89
|
+
};
|
|
90
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline step: Run promptfoo evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Preconditions: config files and context files exist
|
|
5
|
+
* Postconditions: eval-results.json exists and is valid
|
|
6
|
+
*
|
|
7
|
+
* Cache key: promptfooconfig*.yaml + contexts/*.md + tasks/*.yaml +
|
|
8
|
+
* canonical contexts + reference solutions + config/models.yaml
|
|
9
|
+
* Cache outputs: results/latest/eval-results*.json
|
|
10
|
+
*
|
|
11
|
+
* Remote cache: When local cache misses and a Sanity token is available,
|
|
12
|
+
* the step queries the Content Lake for a report with a matching eval
|
|
13
|
+
* fingerprint. On a hit, the cached score-summary.json is written to disk
|
|
14
|
+
* and the eval + calculate-scores steps are skipped entirely.
|
|
15
|
+
*
|
|
16
|
+
* @see docs/design-docs/content-lake-eval-caching.md
|
|
17
|
+
*/
|
|
18
|
+
import type { ConcreteEvalMode, DebugOptions, FilterOptions, StepResult } from "../types.js";
|
|
19
|
+
/** Each mode writes eval results to a different file (set in the config's outputPath) */
|
|
20
|
+
export declare const RESULTS_FILES: Record<ConcreteEvalMode, string>;
|
|
21
|
+
/** Extended step result that carries cache metadata for downstream steps */
|
|
22
|
+
export interface EvalStepResult {
|
|
23
|
+
/** The computed eval fingerprint (for publishing in provenance) */
|
|
24
|
+
evalFingerprint?: string;
|
|
25
|
+
/** Whether this result came from a remote cache hit */
|
|
26
|
+
remoteCacheHit?: boolean;
|
|
27
|
+
/** The step result */
|
|
28
|
+
stepResult: StepResult;
|
|
29
|
+
}
|
|
30
|
+
/** Options for the remote cache (Content Lake fingerprint lookup) */
|
|
31
|
+
export interface RemoteCacheOptions {
|
|
32
|
+
/** Whether this is a debug run (debug runs don't use remote cache) */
|
|
33
|
+
debug?: boolean;
|
|
34
|
+
/** Filter options used for fingerprint computation */
|
|
35
|
+
filter?: FilterOptions;
|
|
36
|
+
/** Grader model identifier from models.yaml */
|
|
37
|
+
graderModel: string;
|
|
38
|
+
/** Disable remote cache lookup (--no-remote-cache) */
|
|
39
|
+
noRemoteCache?: boolean;
|
|
40
|
+
/** Sanity API token for reading cached reports */
|
|
41
|
+
sanityToken?: string;
|
|
42
|
+
}
|
|
43
|
+
export declare function buildFilterFlags(debug?: DebugOptions): string;
|
|
44
|
+
/**
|
|
45
|
+
* Extract the Promptfoo share URL from the eval results JSON.
|
|
46
|
+
*
|
|
47
|
+
* Promptfoo writes a `shareableUrl` field into the results file when
|
|
48
|
+
* `PROMPTFOO_API_KEY` is set. This replaces the previous approach of
|
|
49
|
+
* scraping the URL from a captured log file (which required piping
|
|
50
|
+
* through `tee` and broke TTY progress reporting).
|
|
51
|
+
*/
|
|
52
|
+
export declare function extractShareUrl(mode: ConcreteEvalMode): string | undefined;
|
|
53
|
+
export declare function runEval(mode: ConcreteEvalMode, debug?: DebugOptions, concurrency?: number, noCache?: boolean, remoteCacheOpts?: RemoteCacheOptions): Promise<EvalStepResult>;
|