@sanity/ailf 0.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/features.ts +23 -0
- package/config/models.ts +95 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
- package/dist/_vendor/ailf-core/config-helpers.js +170 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +39 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +141 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
- package/dist/adapters/task-sources/index.d.ts +3 -2
- package/dist/adapters/task-sources/index.js +3 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
- package/dist/adapters/task-sources/repo-task-source.js +92 -80
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +9 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +99 -4
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +28 -8
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +8 -7
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +261 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +7 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
- package/dist/pipeline/mirror-repo-tasks.js +22 -21
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +15 -4
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Redaction pipeline — strips sensitive data from traces before storage.
|
|
3
|
+
*
|
|
4
|
+
* Applied before ANY storage (both blob and Content Lake). Configurable
|
|
5
|
+
* patterns handle Bearer tokens, API keys, Sanity tokens, and other
|
|
6
|
+
* common secret formats.
|
|
7
|
+
*
|
|
8
|
+
* Principles:
|
|
9
|
+
* 1. Redact before store — sensitive data never reaches storage
|
|
10
|
+
* 2. Configurable patterns — teams can add project-specific rules
|
|
11
|
+
* 3. Truncation for cost — large outputs truncated to max bytes
|
|
12
|
+
* 4. No PII by default — tasks shouldn't contain PII, this is a safety net
|
|
13
|
+
*
|
|
14
|
+
* @see docs/design-docs/architecture-overhaul/observability-telemetry.md
|
|
15
|
+
*/
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Default rules
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
/** Built-in redaction rules for common secret patterns */
|
|
20
|
+
export const DEFAULT_REDACTION_RULES = [
|
|
21
|
+
{
|
|
22
|
+
name: "bearer_tokens",
|
|
23
|
+
pattern: /Bearer\s+[A-Za-z0-9._~+/=-]{10,}/g,
|
|
24
|
+
replacement: "Bearer [REDACTED]",
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
name: "sanity_tokens",
|
|
28
|
+
pattern: /sk[A-Za-z0-9]{30,}/g,
|
|
29
|
+
replacement: "[REDACTED_SANITY_TOKEN]",
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
name: "openai_keys",
|
|
33
|
+
pattern: /sk-[A-Za-z0-9_-]{20,}/g,
|
|
34
|
+
replacement: "[REDACTED_OPENAI_KEY]",
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
name: "api_key_values",
|
|
38
|
+
pattern: /((?:api[_-]?key|token|secret|password|authorization)\s*[:=]\s*)(["']?)(?!\[REDACTED)[^\s"']{8,}\2/gi,
|
|
39
|
+
replacement: "$1$2[REDACTED]$2",
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
name: "slack_tokens",
|
|
43
|
+
pattern: /xoxb-[A-Za-z0-9-]{20,}/g,
|
|
44
|
+
replacement: "[REDACTED_SLACK_TOKEN]",
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
name: "github_tokens",
|
|
48
|
+
pattern: /gh[ps]_[A-Za-z0-9]{30,}/g,
|
|
49
|
+
replacement: "[REDACTED_GITHUB_TOKEN]",
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
name: "anthropic_keys",
|
|
53
|
+
pattern: /sk-ant-[A-Za-z0-9_-]{20,}/g,
|
|
54
|
+
replacement: "[REDACTED_ANTHROPIC_KEY]",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
name: "base64_credentials",
|
|
58
|
+
pattern: /Basic\s+[A-Za-z0-9+/=]{20,}/g,
|
|
59
|
+
replacement: "Basic [REDACTED]",
|
|
60
|
+
},
|
|
61
|
+
];
|
|
62
|
+
/** Default fields to omit entirely */
|
|
63
|
+
const DEFAULT_OMIT_FIELDS = [
|
|
64
|
+
"toolCalls[*].input.headers.Authorization",
|
|
65
|
+
"toolCalls[*].input.headers.Cookie",
|
|
66
|
+
"toolCalls[*].input.headers.Set-Cookie",
|
|
67
|
+
];
|
|
68
|
+
const DEFAULT_MAX_OUTPUT_BYTES = 10_240;
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// Public API
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
/**
|
|
73
|
+
* Create a default redaction config.
|
|
74
|
+
*
|
|
75
|
+
* @param overrides - Custom rules or settings to merge
|
|
76
|
+
*/
|
|
77
|
+
export function createRedactionConfig(overrides) {
|
|
78
|
+
return {
|
|
79
|
+
rules: overrides?.rules
|
|
80
|
+
? [...DEFAULT_REDACTION_RULES, ...overrides.rules]
|
|
81
|
+
: DEFAULT_REDACTION_RULES,
|
|
82
|
+
omitFields: overrides?.omitFields
|
|
83
|
+
? [...DEFAULT_OMIT_FIELDS, ...overrides.omitFields]
|
|
84
|
+
: DEFAULT_OMIT_FIELDS,
|
|
85
|
+
maxOutputBytes: overrides?.maxOutputBytes ?? DEFAULT_MAX_OUTPUT_BYTES,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Apply redaction to an evaluation trace.
|
|
90
|
+
*
|
|
91
|
+
* Processes tool call inputs and outputs, event data, and search terms.
|
|
92
|
+
* Returns a new trace (does not mutate the original).
|
|
93
|
+
*/
|
|
94
|
+
export function redactTrace(trace, config) {
|
|
95
|
+
const cfg = config ?? createRedactionConfig();
|
|
96
|
+
let redactionCount = 0;
|
|
97
|
+
const rulesApplied = new Set();
|
|
98
|
+
// Deep clone to avoid mutation
|
|
99
|
+
const redacted = JSON.parse(JSON.stringify(trace));
|
|
100
|
+
// Redact tool calls
|
|
101
|
+
redacted.toolCalls = redacted.toolCalls.map((call) => {
|
|
102
|
+
const result = redactToolCall(call, cfg);
|
|
103
|
+
redactionCount += result.count;
|
|
104
|
+
for (const rule of result.rules)
|
|
105
|
+
rulesApplied.add(rule);
|
|
106
|
+
return result.call;
|
|
107
|
+
});
|
|
108
|
+
// Redact events
|
|
109
|
+
redacted.events = redacted.events.map((event) => {
|
|
110
|
+
const dataStr = JSON.stringify(event.data);
|
|
111
|
+
const { text, count, rules } = applyRules(dataStr, cfg.rules);
|
|
112
|
+
redactionCount += count;
|
|
113
|
+
for (const rule of rules)
|
|
114
|
+
rulesApplied.add(rule);
|
|
115
|
+
return { ...event, data: JSON.parse(text) };
|
|
116
|
+
});
|
|
117
|
+
// Redact search terms (may contain embedded secrets)
|
|
118
|
+
redacted.searchTerms = redacted.searchTerms.map((term) => {
|
|
119
|
+
const { text, count, rules } = applyRules(term, cfg.rules);
|
|
120
|
+
redactionCount += count;
|
|
121
|
+
for (const rule of rules)
|
|
122
|
+
rulesApplied.add(rule);
|
|
123
|
+
return text;
|
|
124
|
+
});
|
|
125
|
+
return {
|
|
126
|
+
trace: redacted,
|
|
127
|
+
redactionCount,
|
|
128
|
+
rulesApplied: [...rulesApplied],
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
// Tool call redaction
|
|
133
|
+
// ---------------------------------------------------------------------------
|
|
134
|
+
function redactToolCall(call, config) {
|
|
135
|
+
let count = 0;
|
|
136
|
+
const rules = [];
|
|
137
|
+
// Redact input
|
|
138
|
+
const inputStr = JSON.stringify(call.input);
|
|
139
|
+
const inputResult = applyRules(inputStr, config.rules);
|
|
140
|
+
count += inputResult.count;
|
|
141
|
+
rules.push(...inputResult.rules);
|
|
142
|
+
// Redact output
|
|
143
|
+
let outputStr = JSON.stringify(call.output);
|
|
144
|
+
// Truncate output if too large
|
|
145
|
+
if (outputStr.length > config.maxOutputBytes) {
|
|
146
|
+
outputStr = outputStr.slice(0, config.maxOutputBytes) + "... [truncated]";
|
|
147
|
+
}
|
|
148
|
+
const outputResult = applyRules(outputStr, config.rules);
|
|
149
|
+
count += outputResult.count;
|
|
150
|
+
rules.push(...outputResult.rules);
|
|
151
|
+
// Omit specific fields from input
|
|
152
|
+
let parsedInput = JSON.parse(inputResult.text);
|
|
153
|
+
parsedInput = omitFields(parsedInput, config.omitFields, "input");
|
|
154
|
+
return {
|
|
155
|
+
call: {
|
|
156
|
+
...call,
|
|
157
|
+
input: parsedInput,
|
|
158
|
+
output: parseJsonSafe(outputResult.text),
|
|
159
|
+
},
|
|
160
|
+
count,
|
|
161
|
+
rules,
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
// ---------------------------------------------------------------------------
|
|
165
|
+
// Rule application
|
|
166
|
+
// ---------------------------------------------------------------------------
|
|
167
|
+
function applyRules(text, rules) {
|
|
168
|
+
let result = text;
|
|
169
|
+
let count = 0;
|
|
170
|
+
const appliedRules = [];
|
|
171
|
+
for (const rule of rules) {
|
|
172
|
+
// Reset lastIndex before match() — global regexes are stateful
|
|
173
|
+
rule.pattern.lastIndex = 0;
|
|
174
|
+
const matches = result.match(rule.pattern);
|
|
175
|
+
if (matches && matches.length > 0) {
|
|
176
|
+
count += matches.length;
|
|
177
|
+
appliedRules.push(rule.name);
|
|
178
|
+
// Reset again before replace() — match() may leave lastIndex dirty
|
|
179
|
+
rule.pattern.lastIndex = 0;
|
|
180
|
+
result = result.replace(rule.pattern, rule.replacement);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return { text: result, count, rules: appliedRules };
|
|
184
|
+
}
|
|
185
|
+
// ---------------------------------------------------------------------------
|
|
186
|
+
// Field omission
|
|
187
|
+
// ---------------------------------------------------------------------------
|
|
188
|
+
function omitFields(obj, patterns, context) {
|
|
189
|
+
for (const pattern of patterns) {
|
|
190
|
+
// Simple field path handling (not full JSONPath)
|
|
191
|
+
// Handles: "toolCalls[*].input.headers.Authorization" when context is "input"
|
|
192
|
+
if (pattern.includes(context)) {
|
|
193
|
+
const parts = pattern.split(".");
|
|
194
|
+
const fieldIndex = parts.indexOf(context);
|
|
195
|
+
if (fieldIndex >= 0) {
|
|
196
|
+
const remainingPath = parts.slice(fieldIndex + 1);
|
|
197
|
+
deleteNestedField(obj, remainingPath);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
return obj;
|
|
202
|
+
}
|
|
203
|
+
function deleteNestedField(obj, path) {
|
|
204
|
+
if (path.length === 0)
|
|
205
|
+
return;
|
|
206
|
+
if (path.length === 1) {
|
|
207
|
+
delete obj[path[0]];
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
const child = obj[path[0]];
|
|
211
|
+
if (child && typeof child === "object") {
|
|
212
|
+
deleteNestedField(child, path.slice(1));
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
function parseJsonSafe(text) {
|
|
216
|
+
try {
|
|
217
|
+
return JSON.parse(text);
|
|
218
|
+
}
|
|
219
|
+
catch {
|
|
220
|
+
return text;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool call classification — maps raw provider tool names to categories.
|
|
3
|
+
*
|
|
4
|
+
* Raw tool names from providers are noisy and inconsistent (`WebSearch` vs
|
|
5
|
+
* `web_search` vs `Browser.search`). This module normalizes every tool call
|
|
6
|
+
* into one of six standard categories for cross-model comparison.
|
|
7
|
+
*
|
|
8
|
+
* @see docs/design-docs/architecture-overhaul/observability-telemetry.md
|
|
9
|
+
*/
|
|
10
|
+
import type { ToolCallCategory } from "../../../_vendor/ailf-core/index.d.ts";
|
|
11
|
+
/**
|
|
12
|
+
* Classify a tool call by its raw name.
|
|
13
|
+
*
|
|
14
|
+
* Resolution order:
|
|
15
|
+
* 1. Exact match in custom overrides (if provided)
|
|
16
|
+
* 2. Exact match in default tool categories
|
|
17
|
+
* 3. Heuristic pattern matching on the name
|
|
18
|
+
* 4. Falls back to "execute" (safest default for unknown tools)
|
|
19
|
+
*
|
|
20
|
+
* @param name - Raw tool name from the provider
|
|
21
|
+
* @param customMappings - Optional custom tool → category overrides
|
|
22
|
+
* @returns The classified category
|
|
23
|
+
*/
|
|
24
|
+
export declare function classifyToolCall(name: string, customMappings?: Record<string, ToolCallCategory>): ToolCallCategory;
|
|
25
|
+
/**
|
|
26
|
+
* Classify multiple tool calls, returning the category for each.
|
|
27
|
+
* Also tracks unrecognized names for the caller to log warnings.
|
|
28
|
+
*/
|
|
29
|
+
export declare function classifyToolCalls(names: string[], customMappings?: Record<string, ToolCallCategory>): {
|
|
30
|
+
categories: ToolCallCategory[];
|
|
31
|
+
unrecognized: string[];
|
|
32
|
+
};
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool call classification — maps raw provider tool names to categories.
|
|
3
|
+
*
|
|
4
|
+
* Raw tool names from providers are noisy and inconsistent (`WebSearch` vs
|
|
5
|
+
* `web_search` vs `Browser.search`). This module normalizes every tool call
|
|
6
|
+
* into one of six standard categories for cross-model comparison.
|
|
7
|
+
*
|
|
8
|
+
* @see docs/design-docs/architecture-overhaul/observability-telemetry.md
|
|
9
|
+
*/
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Default tool name → category mapping
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
const DEFAULT_TOOL_CATEGORIES = {
|
|
14
|
+
// Search tools
|
|
15
|
+
Grep: "search",
|
|
16
|
+
WebSearch: "search",
|
|
17
|
+
grep: "search",
|
|
18
|
+
search: "search",
|
|
19
|
+
semantic_search: "search",
|
|
20
|
+
web_search: "search",
|
|
21
|
+
// Read tools
|
|
22
|
+
Glob: "read",
|
|
23
|
+
Read: "read",
|
|
24
|
+
WebFetch: "read",
|
|
25
|
+
cat: "read",
|
|
26
|
+
curl: "read",
|
|
27
|
+
file_read: "read",
|
|
28
|
+
read_file: "read",
|
|
29
|
+
web_fetch: "read",
|
|
30
|
+
// Write tools
|
|
31
|
+
Edit: "write",
|
|
32
|
+
FileEdit: "write",
|
|
33
|
+
Write: "write",
|
|
34
|
+
file_write: "write",
|
|
35
|
+
patch: "write",
|
|
36
|
+
write_file: "write",
|
|
37
|
+
// Execute tools
|
|
38
|
+
Bash: "execute",
|
|
39
|
+
RunCode: "execute",
|
|
40
|
+
bash: "execute",
|
|
41
|
+
exec: "execute",
|
|
42
|
+
python: "execute",
|
|
43
|
+
run_code: "execute",
|
|
44
|
+
shell: "execute",
|
|
45
|
+
// Navigate tools
|
|
46
|
+
"Browser.navigate": "navigate",
|
|
47
|
+
FollowLink: "navigate",
|
|
48
|
+
browse: "navigate",
|
|
49
|
+
follow_link: "navigate",
|
|
50
|
+
navigate: "navigate",
|
|
51
|
+
open_url: "navigate",
|
|
52
|
+
// Communicate tools
|
|
53
|
+
AskUser: "communicate",
|
|
54
|
+
TodoRead: "communicate",
|
|
55
|
+
TodoWrite: "communicate",
|
|
56
|
+
ask_user: "communicate",
|
|
57
|
+
submit_response: "communicate",
|
|
58
|
+
};
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Heuristic patterns (fallback when name not in lookup table)
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
const HEURISTIC_PATTERNS = [
|
|
63
|
+
[/search|find|query|lookup|grep/i, "search"],
|
|
64
|
+
[/read|fetch|get|load|cat|view/i, "read"],
|
|
65
|
+
[/write|create|edit|update|patch|save|put|post/i, "write"],
|
|
66
|
+
[/exec|run|bash|shell|python|code|command/i, "execute"],
|
|
67
|
+
[/navigate|browse|open|follow|link|url/i, "navigate"],
|
|
68
|
+
[/ask|user|chat|message|submit|todo|response/i, "communicate"],
|
|
69
|
+
];
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
// Public API
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
/**
|
|
74
|
+
* Classify a tool call by its raw name.
|
|
75
|
+
*
|
|
76
|
+
* Resolution order:
|
|
77
|
+
* 1. Exact match in custom overrides (if provided)
|
|
78
|
+
* 2. Exact match in default tool categories
|
|
79
|
+
* 3. Heuristic pattern matching on the name
|
|
80
|
+
* 4. Falls back to "execute" (safest default for unknown tools)
|
|
81
|
+
*
|
|
82
|
+
* @param name - Raw tool name from the provider
|
|
83
|
+
* @param customMappings - Optional custom tool → category overrides
|
|
84
|
+
* @returns The classified category
|
|
85
|
+
*/
|
|
86
|
+
export function classifyToolCall(name, customMappings) {
|
|
87
|
+
// 1. Custom overrides
|
|
88
|
+
if (customMappings?.[name]) {
|
|
89
|
+
return customMappings[name];
|
|
90
|
+
}
|
|
91
|
+
// 2. Default lookup
|
|
92
|
+
if (DEFAULT_TOOL_CATEGORIES[name]) {
|
|
93
|
+
return DEFAULT_TOOL_CATEGORIES[name];
|
|
94
|
+
}
|
|
95
|
+
// 3. Heuristic matching
|
|
96
|
+
for (const [pattern, category] of HEURISTIC_PATTERNS) {
|
|
97
|
+
if (pattern.test(name)) {
|
|
98
|
+
return category;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
// 4. Unknown → execute (safest default)
|
|
102
|
+
return "execute";
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Classify multiple tool calls, returning the category for each.
|
|
106
|
+
* Also tracks unrecognized names for the caller to log warnings.
|
|
107
|
+
*/
|
|
108
|
+
export function classifyToolCalls(names, customMappings) {
|
|
109
|
+
const categories = [];
|
|
110
|
+
const unrecognized = [];
|
|
111
|
+
for (const name of names) {
|
|
112
|
+
const category = classifyToolCall(name, customMappings);
|
|
113
|
+
categories.push(category);
|
|
114
|
+
// Track names that required heuristic or default fallback
|
|
115
|
+
if (!DEFAULT_TOOL_CATEGORIES[name] && !customMappings?.[name]) {
|
|
116
|
+
unrecognized.push(name);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return { categories, unrecognized };
|
|
120
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TraceCollector — extracts structured trace data from provider responses.
|
|
3
|
+
*
|
|
4
|
+
* Parses tool calls, token usage, and timing data from Promptfoo result
|
|
5
|
+
* objects and normalizes them into the canonical `EvalTrace` shape.
|
|
6
|
+
*
|
|
7
|
+
* Works via inline extraction — parsing provider response metadata
|
|
8
|
+
* directly, without requiring additional infrastructure.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/design-docs/architecture-overhaul/observability-telemetry.md
|
|
11
|
+
* @see packages/core/src/types/trace.ts — EvalTrace types
|
|
12
|
+
*/
|
|
13
|
+
import type { EvalTrace, ToolCallCategory } from "../../../_vendor/ailf-core/index.d.ts";
|
|
14
|
+
/** Raw provider response shape (subset of Promptfoo's result object) */
|
|
15
|
+
export interface ProviderResponse {
|
|
16
|
+
/** Raw text output */
|
|
17
|
+
output?: string;
|
|
18
|
+
/** Token usage (varies by provider) */
|
|
19
|
+
tokenUsage?: {
|
|
20
|
+
completion?: number;
|
|
21
|
+
prompt?: number;
|
|
22
|
+
total?: number;
|
|
23
|
+
cached?: number;
|
|
24
|
+
};
|
|
25
|
+
/** Provider-specific metadata (e.g., Claude's toolCalls) */
|
|
26
|
+
metadata?: {
|
|
27
|
+
toolCalls?: RawToolCall[];
|
|
28
|
+
[key: string]: unknown;
|
|
29
|
+
};
|
|
30
|
+
/** Response latency in milliseconds */
|
|
31
|
+
latencyMs?: number;
|
|
32
|
+
}
|
|
33
|
+
/** Raw tool call from a provider (pre-normalization) */
|
|
34
|
+
export interface RawToolCall {
|
|
35
|
+
name?: string;
|
|
36
|
+
input?: Record<string, unknown>;
|
|
37
|
+
output?: unknown;
|
|
38
|
+
error?: string;
|
|
39
|
+
durationMs?: number;
|
|
40
|
+
/** Alternative field names used by some providers */
|
|
41
|
+
function?: {
|
|
42
|
+
name?: string;
|
|
43
|
+
arguments?: string;
|
|
44
|
+
};
|
|
45
|
+
type?: string;
|
|
46
|
+
}
|
|
47
|
+
/** Options for trace collection */
|
|
48
|
+
export interface TraceCollectorOptions {
|
|
49
|
+
/** Run ID to associate with this trace */
|
|
50
|
+
runId: string;
|
|
51
|
+
/** Task ID that produced this test case */
|
|
52
|
+
taskId: string;
|
|
53
|
+
/** Test case index within the task */
|
|
54
|
+
testCaseIndex: number;
|
|
55
|
+
/** Model under evaluation */
|
|
56
|
+
modelId: string;
|
|
57
|
+
/** Custom tool → category mappings */
|
|
58
|
+
toolCategories?: Record<string, ToolCallCategory>;
|
|
59
|
+
/** Maximum output size per tool call (bytes) */
|
|
60
|
+
maxOutputBytes?: number;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Collect a trace from a single provider response.
|
|
64
|
+
*
|
|
65
|
+
* Extracts tool calls, token usage, timing, and builds the
|
|
66
|
+
* chronological event log.
|
|
67
|
+
*/
|
|
68
|
+
export declare function collectTrace(response: ProviderResponse, options: TraceCollectorOptions): EvalTrace;
|
|
69
|
+
/**
|
|
70
|
+
* Merge multiple per-turn traces into a single test case trace.
|
|
71
|
+
*
|
|
72
|
+
* Each turn produces its own trace. This function combines them into
|
|
73
|
+
* a parent trace with per-turn spans.
|
|
74
|
+
*/
|
|
75
|
+
export declare function mergeTraces(turns: EvalTrace[], parentOptions: TraceCollectorOptions): EvalTrace;
|