npm - @sanity/ailf - Versions diffs - 0.1.0 - Mend

@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (530) hide show

package/README.md +89 -0
package/bin/ailf.js +64 -0
package/canonical/grader-references/README.md +88 -0
package/canonical/grader-references/groq.yaml +234 -0
package/canonical/grader-references/studio-setup.yaml +275 -0
package/canonical/reference-solutions/.gitkeep +1 -0
package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
package/canonical/reference-solutions/groq/joins-references.ts +300 -0
package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
package/config/bigquery/README.md +74 -0
package/config/bigquery/views/area_scores.sql +87 -0
package/config/bigquery/views/reports.sql +49 -0
package/config/features.yaml +116 -0
package/config/models.yaml +115 -0
package/config/prompts.yaml +75 -0
package/config/rubrics.yaml +62 -0
package/config/schedules.yaml +43 -0
package/config/sinks.yaml +54 -0
package/config/sources.yaml +51 -0
package/config/thresholds.yaml +49 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
package/dist/_vendor/ailf-core/examples/index.js +285 -0
package/dist/_vendor/ailf-core/index.d.ts +17 -0
package/dist/_vendor/ailf-core/index.js +17 -0
package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
package/dist/_vendor/ailf-core/ports/context.js +14 -0
package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
package/dist/_vendor/ailf-core/ports/index.js +7 -0
package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
package/dist/_vendor/ailf-core/ports/logger.js +11 -0
package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
package/dist/_vendor/ailf-core/schemas/index.js +16 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
package/dist/_vendor/ailf-core/services/index.js +12 -0
package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
package/dist/_vendor/ailf-core/services/scoring.js +222 -0
package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
package/dist/_vendor/ailf-core/types/index.js +21 -0
package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
package/dist/_vendor/ailf-shared/document-ref.js +1 -0
package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
package/dist/_vendor/ailf-shared/index.d.ts +16 -0
package/dist/_vendor/ailf-shared/index.js +16 -0
package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
package/dist/_vendor/ailf-shared/score-grades.js +23 -0
package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
package/dist/adapters/cache/content-lake-cache.js +59 -0
package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
package/dist/adapters/cache/filesystem-cache.js +54 -0
package/dist/adapters/cache/index.d.ts +2 -0
package/dist/adapters/cache/index.js +2 -0
package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
package/dist/adapters/config-sources/file-config-adapter.js +96 -0
package/dist/adapters/config-sources/index.d.ts +2 -0
package/dist/adapters/config-sources/index.js +2 -0
package/dist/adapters/doc-fetchers/index.d.ts +1 -0
package/dist/adapters/doc-fetchers/index.js +1 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
package/dist/adapters/eval-runners/index.d.ts +1 -0
package/dist/adapters/eval-runners/index.js +1 -0
package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
package/dist/adapters/index.d.ts +12 -0
package/dist/adapters/index.js +12 -0
package/dist/adapters/loggers/console-logger.d.ts +22 -0
package/dist/adapters/loggers/console-logger.js +54 -0
package/dist/adapters/loggers/index.d.ts +9 -0
package/dist/adapters/loggers/index.js +9 -0
package/dist/adapters/loggers/json-logger.d.ts +18 -0
package/dist/adapters/loggers/json-logger.js +33 -0
package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
package/dist/adapters/loggers/quiet-logger.js +30 -0
package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
package/dist/adapters/task-sources/composite-task-source.js +59 -0
package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
package/dist/adapters/task-sources/index.d.ts +7 -0
package/dist/adapters/task-sources/index.js +7 -0
package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
package/dist/adapters/task-sources/repo-schemas.js +234 -0
package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
package/dist/adapters/task-sources/repo-task-source.js +104 -0
package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
package/dist/adapters/task-sources/repo-trigger.js +153 -0
package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
package/dist/adapters/task-sources/repo-validation.js +164 -0
package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
package/dist/adapters/task-sources/yaml-task-source.js +136 -0
package/dist/agent-observer/agentic-provider.d.ts +132 -0
package/dist/agent-observer/agentic-provider.js +983 -0
package/dist/agent-observer/classifier.d.ts +62 -0
package/dist/agent-observer/classifier.js +269 -0
package/dist/agent-observer/index.d.ts +7 -0
package/dist/agent-observer/index.js +4 -0
package/dist/agent-observer/pricing.d.ts +35 -0
package/dist/agent-observer/pricing.js +82 -0
package/dist/agent-observer/provider.d.ts +77 -0
package/dist/agent-observer/provider.js +151 -0
package/dist/agent-observer/proxy.d.ts +91 -0
package/dist/agent-observer/proxy.js +321 -0
package/dist/agent-observer/test-imports.d.ts +7 -0
package/dist/agent-observer/test-imports.js +185 -0
package/dist/agent-observer/types.d.ts +137 -0
package/dist/agent-observer/types.js +16 -0
package/dist/assertions/source-isolation.d.ts +72 -0
package/dist/assertions/source-isolation.js +117 -0
package/dist/cli.d.ts +24 -0
package/dist/cli.js +199 -0
package/dist/commands/agent-report.d.ts +5 -0
package/dist/commands/agent-report.js +69 -0
package/dist/commands/baseline.d.ts +9 -0
package/dist/commands/baseline.js +141 -0
package/dist/commands/cache.d.ts +13 -0
package/dist/commands/cache.js +135 -0
package/dist/commands/calculate-scores.d.ts +8 -0
package/dist/commands/calculate-scores.js +48 -0
package/dist/commands/compare.d.ts +8 -0
package/dist/commands/compare.js +120 -0
package/dist/commands/completion.d.ts +18 -0
package/dist/commands/completion.js +260 -0
package/dist/commands/coverage-audit.d.ts +7 -0
package/dist/commands/coverage-audit.js +40 -0
package/dist/commands/discovery-report.d.ts +10 -0
package/dist/commands/discovery-report.js +44 -0
package/dist/commands/eval.d.ts +9 -0
package/dist/commands/eval.js +35 -0
package/dist/commands/explain-handler.d.ts +34 -0
package/dist/commands/explain-handler.js +719 -0
package/dist/commands/fetch-docs.d.ts +8 -0
package/dist/commands/fetch-docs.js +128 -0
package/dist/commands/generate-configs.d.ts +8 -0
package/dist/commands/generate-configs.js +46 -0
package/dist/commands/grader/index.d.ts +11 -0
package/dist/commands/grader/index.js +118 -0
package/dist/commands/init.d.ts +19 -0
package/dist/commands/init.js +150 -0
package/dist/commands/interactive.d.ts +12 -0
package/dist/commands/interactive.js +238 -0
package/dist/commands/lookup-doc.d.ts +15 -0
package/dist/commands/lookup-doc.js +84 -0
package/dist/commands/measure-retrieval.d.ts +5 -0
package/dist/commands/measure-retrieval.js +65 -0
package/dist/commands/pipeline-action.d.ts +71 -0
package/dist/commands/pipeline-action.js +305 -0
package/dist/commands/pipeline.d.ts +62 -0
package/dist/commands/pipeline.js +53 -0
package/dist/commands/pr-comment.d.ts +8 -0
package/dist/commands/pr-comment.js +47 -0
package/dist/commands/publish.d.ts +26 -0
package/dist/commands/publish.js +253 -0
package/dist/commands/readiness-report.d.ts +10 -0
package/dist/commands/readiness-report.js +104 -0
package/dist/commands/shared/options.d.ts +29 -0
package/dist/commands/shared/options.js +57 -0
package/dist/commands/update-quality-scores.d.ts +5 -0
package/dist/commands/update-quality-scores.js +20 -0
package/dist/commands/validate-tasks.d.ts +16 -0
package/dist/commands/validate-tasks.js +93 -0
package/dist/commands/validate.d.ts +9 -0
package/dist/commands/validate.js +73 -0
package/dist/commands/webhook-server.d.ts +5 -0
package/dist/commands/webhook-server.js +30 -0
package/dist/commands/weekly-digest.d.ts +10 -0
package/dist/commands/weekly-digest.js +104 -0
package/dist/composition-root.d.ts +26 -0
package/dist/composition-root.js +107 -0
package/dist/interpolate.d.ts +26 -0
package/dist/interpolate.js +70 -0
package/dist/job-store.d.ts +104 -0
package/dist/job-store.js +188 -0
package/dist/lib/agent-behavior-report.d.ts +8 -0
package/dist/lib/agent-behavior-report.js +185 -0
package/dist/lib/baseline.d.ts +19 -0
package/dist/lib/baseline.js +153 -0
package/dist/lib/calculate-scores.d.ts +23 -0
package/dist/lib/calculate-scores.js +42 -0
package/dist/lib/compare.d.ts +18 -0
package/dist/lib/compare.js +170 -0
package/dist/lib/coverage-audit.d.ts +4 -0
package/dist/lib/coverage-audit.js +42 -0
package/dist/lib/discovery-report.d.ts +13 -0
package/dist/lib/discovery-report.js +57 -0
package/dist/lib/fetch-docs.d.ts +30 -0
package/dist/lib/fetch-docs.js +171 -0
package/dist/lib/generate-configs.d.ts +25 -0
package/dist/lib/generate-configs.js +42 -0
package/dist/lib/grader-api.d.ts +21 -0
package/dist/lib/grader-api.js +34 -0
package/dist/lib/grader-compare.d.ts +19 -0
package/dist/lib/grader-compare.js +91 -0
package/dist/lib/grader-consistency.d.ts +27 -0
package/dist/lib/grader-consistency.js +79 -0
package/dist/lib/grader-sensitivity.d.ts +19 -0
package/dist/lib/grader-sensitivity.js +75 -0
package/dist/lib/grader-validate.d.ts +19 -0
package/dist/lib/grader-validate.js +78 -0
package/dist/lib/measure-retrieval.d.ts +14 -0
package/dist/lib/measure-retrieval.js +71 -0
package/dist/lib/pr-comment.d.ts +16 -0
package/dist/lib/pr-comment.js +28 -0
package/dist/lib/readiness-report.d.ts +13 -0
package/dist/lib/readiness-report.js +108 -0
package/dist/lib/webhook-server.d.ts +11 -0
package/dist/lib/webhook-server.js +24 -0
package/dist/lib/weekly-digest.d.ts +24 -0
package/dist/lib/weekly-digest.js +148 -0
package/dist/orchestration/build-app-context.d.ts +27 -0
package/dist/orchestration/build-app-context.js +81 -0
package/dist/orchestration/build-step-sequence.d.ts +15 -0
package/dist/orchestration/build-step-sequence.js +84 -0
package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
package/dist/orchestration/config-to-source-overrides.js +28 -0
package/dist/orchestration/env-bridge.d.ts +21 -0
package/dist/orchestration/env-bridge.js +66 -0
package/dist/orchestration/index.d.ts +11 -0
package/dist/orchestration/index.js +11 -0
package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
package/dist/orchestration/pipeline-orchestrator.js +153 -0
package/dist/orchestration/step-runner.d.ts +20 -0
package/dist/orchestration/step-runner.js +88 -0
package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
package/dist/orchestration/steps/calculate-scores-step.js +95 -0
package/dist/orchestration/steps/callback-step.d.ts +24 -0
package/dist/orchestration/steps/callback-step.js +76 -0
package/dist/orchestration/steps/compare-step.d.ts +14 -0
package/dist/orchestration/steps/compare-step.js +92 -0
package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
package/dist/orchestration/steps/discovery-report-step.js +55 -0
package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
package/dist/orchestration/steps/fetch-docs-step.js +135 -0
package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
package/dist/orchestration/steps/gap-analysis-step.js +136 -0
package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
package/dist/orchestration/steps/generate-configs-step.js +85 -0
package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
package/dist/orchestration/steps/grader-consistency-step.js +64 -0
package/dist/orchestration/steps/index.d.ts +19 -0
package/dist/orchestration/steps/index.js +19 -0
package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
package/dist/orchestration/steps/publish-report-step.js +216 -0
package/dist/orchestration/steps/readiness-step.d.ts +13 -0
package/dist/orchestration/steps/readiness-step.js +91 -0
package/dist/orchestration/steps/report-step.d.ts +12 -0
package/dist/orchestration/steps/report-step.js +49 -0
package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
package/dist/orchestration/steps/run-eval-step.js +195 -0
package/dist/orchestration/steps/validate-step.d.ts +12 -0
package/dist/orchestration/steps/validate-step.js +41 -0
package/dist/pipeline/agent-behavior-report.d.ts +53 -0
package/dist/pipeline/agent-behavior-report.js +132 -0
package/dist/pipeline/attribution.d.ts +47 -0
package/dist/pipeline/attribution.js +226 -0
package/dist/pipeline/baseline.d.ts +37 -0
package/dist/pipeline/baseline.js +141 -0
package/dist/pipeline/cache.d.ts +101 -0
package/dist/pipeline/cache.js +283 -0
package/dist/pipeline/calculate-scores.d.ts +102 -0
package/dist/pipeline/calculate-scores.js +1128 -0
package/dist/pipeline/callback-delivery.d.ts +50 -0
package/dist/pipeline/callback-delivery.js +89 -0
package/dist/pipeline/checks.d.ts +39 -0
package/dist/pipeline/checks.js +280 -0
package/dist/pipeline/classify-url.d.ts +61 -0
package/dist/pipeline/classify-url.js +93 -0
package/dist/pipeline/compare.d.ts +31 -0
package/dist/pipeline/compare.js +208 -0
package/dist/pipeline/coverage-audit.d.ts +39 -0
package/dist/pipeline/coverage-audit.js +165 -0
package/dist/pipeline/degradations.d.ts +85 -0
package/dist/pipeline/degradations.js +242 -0
package/dist/pipeline/discovery-report.d.ts +55 -0
package/dist/pipeline/discovery-report.js +178 -0
package/dist/pipeline/eval-constants.d.ts +68 -0
package/dist/pipeline/eval-constants.js +111 -0
package/dist/pipeline/eval-fingerprint.d.ts +66 -0
package/dist/pipeline/eval-fingerprint.js +175 -0
package/dist/pipeline/expand-tasks.d.ts +220 -0
package/dist/pipeline/expand-tasks.js +421 -0
package/dist/pipeline/failure-modes.d.ts +46 -0
package/dist/pipeline/failure-modes.js +348 -0
package/dist/pipeline/fetch-url-content.d.ts +44 -0
package/dist/pipeline/fetch-url-content.js +93 -0
package/dist/pipeline/gap-analysis.d.ts +48 -0
package/dist/pipeline/gap-analysis.js +231 -0
package/dist/pipeline/generate-configs.d.ts +72 -0
package/dist/pipeline/generate-configs.js +395 -0
package/dist/pipeline/grader-api.d.ts +49 -0
package/dist/pipeline/grader-api.js +200 -0
package/dist/pipeline/grader-compare-runner.d.ts +44 -0
package/dist/pipeline/grader-compare-runner.js +301 -0
package/dist/pipeline/grader-comparison.d.ts +111 -0
package/dist/pipeline/grader-comparison.js +161 -0
package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
package/dist/pipeline/grader-consistency-runner.js +270 -0
package/dist/pipeline/grader-consistency.d.ts +103 -0
package/dist/pipeline/grader-consistency.js +146 -0
package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
package/dist/pipeline/grader-sensitivity-runner.js +282 -0
package/dist/pipeline/grader-sensitivity.d.ts +94 -0
package/dist/pipeline/grader-sensitivity.js +144 -0
package/dist/pipeline/grader-validate-runner.d.ts +38 -0
package/dist/pipeline/grader-validate-runner.js +229 -0
package/dist/pipeline/grader-validation.d.ts +107 -0
package/dist/pipeline/grader-validation.js +169 -0
package/dist/pipeline/map-request-to-config.d.ts +19 -0
package/dist/pipeline/map-request-to-config.js +80 -0
package/dist/pipeline/measure-retrieval.d.ts +59 -0
package/dist/pipeline/measure-retrieval.js +111 -0
package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
package/dist/pipeline/mirror-repo-tasks.js +350 -0
package/dist/pipeline/plan-format.d.ts +33 -0
package/dist/pipeline/plan-format.js +202 -0
package/dist/pipeline/plan.d.ts +169 -0
package/dist/pipeline/plan.js +708 -0
package/dist/pipeline/pr-comment.d.ts +19 -0
package/dist/pipeline/pr-comment.js +502 -0
package/dist/pipeline/probe.d.ts +52 -0
package/dist/pipeline/probe.js +390 -0
package/dist/pipeline/provenance.d.ts +47 -0
package/dist/pipeline/provenance.js +146 -0
package/dist/pipeline/readiness-report.d.ts +87 -0
package/dist/pipeline/readiness-report.js +205 -0
package/dist/pipeline/release-classification.d.ts +54 -0
package/dist/pipeline/release-classification.js +238 -0
package/dist/pipeline/release-report.d.ts +37 -0
package/dist/pipeline/release-report.js +222 -0
package/dist/pipeline/repo-eval-comment.d.ts +37 -0
package/dist/pipeline/repo-eval-comment.js +165 -0
package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
package/dist/pipeline/repo-threshold-evaluator.js +162 -0
package/dist/pipeline/resolve-mappings.d.ts +35 -0
package/dist/pipeline/resolve-mappings.js +72 -0
package/dist/pipeline/retrieval-metrics.d.ts +39 -0
package/dist/pipeline/retrieval-metrics.js +136 -0
package/dist/pipeline/reverse-mapping.d.ts +67 -0
package/dist/pipeline/reverse-mapping.js +88 -0
package/dist/pipeline/schemas.d.ts +9 -0
package/dist/pipeline/schemas.js +9 -0
package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
package/dist/pipeline/steps/calculate-scores-step.js +89 -0
package/dist/pipeline/steps/compare-step.d.ts +18 -0
package/dist/pipeline/steps/compare-step.js +90 -0
package/dist/pipeline/steps/eval-step.d.ts +53 -0
package/dist/pipeline/steps/eval-step.js +347 -0
package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
package/dist/pipeline/steps/fetch-docs-step.js +84 -0
package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
package/dist/pipeline/steps/generate-configs-step.js +98 -0
package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
package/dist/pipeline/steps/grader-consistency-step.js +74 -0
package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
package/dist/pipeline/steps/publish-report-step.js +243 -0
package/dist/pipeline/steps/report-step.d.ts +13 -0
package/dist/pipeline/steps/report-step.js +56 -0
package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
package/dist/pipeline/steps/update-scores-step.js +42 -0
package/dist/pipeline/targeted-loo.d.ts +88 -0
package/dist/pipeline/targeted-loo.js +203 -0
package/dist/pipeline/thresholds.d.ts +27 -0
package/dist/pipeline/thresholds.js +245 -0
package/dist/pipeline/types.d.ts +10 -0
package/dist/pipeline/types.js +10 -0
package/dist/pipeline/validate.d.ts +67 -0
package/dist/pipeline/validate.js +406 -0
package/dist/pipeline/webhook-server.d.ts +37 -0
package/dist/pipeline/webhook-server.js +133 -0
package/dist/report-store.d.ts +84 -0
package/dist/report-store.js +208 -0
package/dist/sanity/client.d.ts +38 -0
package/dist/sanity/client.js +86 -0
package/dist/sanity/portable-text.d.ts +11 -0
package/dist/sanity/portable-text.js +211 -0
package/dist/sanity/queries.d.ts +133 -0
package/dist/sanity/queries.js +300 -0
package/dist/schedules/digest.d.ts +116 -0
package/dist/schedules/digest.js +156 -0
package/dist/schedules/index.d.ts +12 -0
package/dist/schedules/index.js +10 -0
package/dist/schedules/loader.d.ts +31 -0
package/dist/schedules/loader.js +73 -0
package/dist/schedules/schema.d.ts +9 -0
package/dist/schedules/schema.js +9 -0
package/dist/scripts/agent-behavior-report.d.ts +19 -0
package/dist/scripts/agent-behavior-report.js +315 -0
package/dist/scripts/baseline.d.ts +43 -0
package/dist/scripts/baseline.js +267 -0
package/dist/scripts/calculate-scores.d.ts +166 -0
package/dist/scripts/calculate-scores.js +1296 -0
package/dist/scripts/compare.d.ts +22 -0
package/dist/scripts/compare.js +334 -0
package/dist/scripts/coverage-audit.d.ts +44 -0
package/dist/scripts/coverage-audit.js +209 -0
package/dist/scripts/debug-eval.d.ts +19 -0
package/dist/scripts/debug-eval.js +73 -0
package/dist/scripts/discovery-report.d.ts +58 -0
package/dist/scripts/discovery-report.js +250 -0
package/dist/scripts/fetch-docs.d.ts +35 -0
package/dist/scripts/fetch-docs.js +472 -0
package/dist/scripts/generate-configs.d.ts +66 -0
package/dist/scripts/generate-configs.js +459 -0
package/dist/scripts/grader-api.d.ts +27 -0
package/dist/scripts/grader-api.js +206 -0
package/dist/scripts/grader-compare.d.ts +22 -0
package/dist/scripts/grader-compare.js +368 -0
package/dist/scripts/grader-consistency.d.ts +20 -0
package/dist/scripts/grader-consistency.js +313 -0
package/dist/scripts/grader-sensitivity.d.ts +22 -0
package/dist/scripts/grader-sensitivity.js +354 -0
package/dist/scripts/grader-validate.d.ts +19 -0
package/dist/scripts/grader-validate.js +267 -0
package/dist/scripts/measure-retrieval.d.ts +10 -0
package/dist/scripts/measure-retrieval.js +145 -0
package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
package/dist/scripts/pipeline.d.ts +76 -0
package/dist/scripts/pipeline.js +1031 -0
package/dist/scripts/pr-comment.d.ts +10 -0
package/dist/scripts/pr-comment.js +510 -0
package/dist/scripts/readiness-report.d.ts +88 -0
package/dist/scripts/readiness-report.js +342 -0
package/dist/scripts/update-quality-scores.d.ts +15 -0
package/dist/scripts/update-quality-scores.js +184 -0
package/dist/scripts/validate-task-sources.d.ts +21 -0
package/dist/scripts/validate-task-sources.js +210 -0
package/dist/scripts/validate.d.ts +13 -0
package/dist/scripts/validate.js +79 -0
package/dist/scripts/webhook-server.d.ts +26 -0
package/dist/scripts/webhook-server.js +147 -0
package/dist/scripts/weekly-digest.d.ts +24 -0
package/dist/scripts/weekly-digest.js +144 -0
package/dist/sinks/bigquery/index.d.ts +131 -0
package/dist/sinks/bigquery/index.js +222 -0
package/dist/sinks/format-slack.d.ts +64 -0
package/dist/sinks/format-slack.js +306 -0
package/dist/sinks/index.d.ts +23 -0
package/dist/sinks/index.js +18 -0
package/dist/sinks/loader.d.ts +18 -0
package/dist/sinks/loader.js +82 -0
package/dist/sinks/retry.d.ts +24 -0
package/dist/sinks/retry.js +52 -0
package/dist/sinks/schema.d.ts +9 -0
package/dist/sinks/schema.js +9 -0
package/dist/sinks/slack/format.d.ts +65 -0
package/dist/sinks/slack/format.js +327 -0
package/dist/sinks/slack/index.d.ts +27 -0
package/dist/sinks/slack/index.js +78 -0
package/dist/sinks/slack-sink.d.ts +27 -0
package/dist/sinks/slack-sink.js +78 -0
package/dist/sinks/types.d.ts +59 -0
package/dist/sinks/types.js +44 -0
package/dist/sinks/webhook/index.d.ts +19 -0
package/dist/sinks/webhook/index.js +50 -0
package/dist/sinks/webhook-sink.d.ts +19 -0
package/dist/sinks/webhook-sink.js +50 -0
package/dist/sources.d.ts +104 -0
package/dist/sources.js +292 -0
package/dist/webhook/budget.d.ts +42 -0
package/dist/webhook/budget.js +60 -0
package/dist/webhook/debounce.d.ts +67 -0
package/dist/webhook/debounce.js +76 -0
package/dist/webhook/dispatch.d.ts +45 -0
package/dist/webhook/dispatch.js +84 -0
package/dist/webhook/eval-request-handler.d.ts +87 -0
package/dist/webhook/eval-request-handler.js +181 -0
package/dist/webhook/handler.d.ts +88 -0
package/dist/webhook/handler.js +203 -0
package/dist/webhook/index.d.ts +17 -0
package/dist/webhook/index.js +12 -0
package/dist/webhook/types.d.ts +109 -0
package/dist/webhook/types.js +10 -0
package/package.json +72 -0
package/tasks/.expanded.agentic.yaml +51 -0
package/tasks/.expanded.yaml +66 -0
package/tasks/frameworks.yaml +98 -0
package/tasks/functions.yaml +51 -0
package/tasks/groq.yaml +216 -0
package/tasks/nextjs-live.yaml +62 -0
package/tasks/studio-setup.yaml +111 -0
package/tasks/visual-editing.yaml +120 -0

package/dist/agent-observer/classifier.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * classifier.ts
+ *
+ * Classifies raw ObservedRequest records into meaningful categories:
+ *   - Doc page visits (sanity.io/docs/*)
+ *   - Search queries (search endpoints, query params)
+ *   - Sanity API calls (api.sanity.io, apicdn.sanity.io)
+ *   - External requests (everything else)
+ *
+ * Also extracts metadata like doc slugs, search query text, and page titles.
+ */
+import type { ObservedRequest, DocPageVisit, SearchQuery, ApiCall, ExternalRequest } from "./types.js";
+export interface ClassifiedRequests {
+    apiCalls: ApiCall[];
+    docPageVisits: DocPageVisit[];
+    externalRequests: ExternalRequest[];
+    searchQueries: SearchQuery[];
+}
+/**
+ * Classifies an array of raw observed requests into categorized buckets.
+ */
+export declare function classifyRequests(requests: ObservedRequest[]): ClassifiedRequests;
+/**
+ * Extracts the API endpoint path from a Sanity API URL.
+ *
+ * Example:
+ *   https://api.sanity.io/v2021-03-25/data/query/production → "/data/query/production"
+ */
+export declare function extractApiEndpoint(url: string): string;
+/**
+ * Extracts the doc slug from a sanity.io/docs URL.
+ *
+ * Examples:
+ *   https://www.sanity.io/docs/groq-introduction → "groq-introduction"
+ *   https://www.sanity.io/docs/getting-started/create-a-schema → "getting-started/create-a-schema"
+ *   https://www.sanity.io/docs → "" (root docs page)
+ */
+export declare function extractDocSlug(url: string): string;
+/**
+ * Extracts the domain from a URL.
+ */
+export declare function extractDomain(url: string): string;
+/**
+ * Attempts to extract a page title from an HTML response preview.
+ */
+export declare function extractPageTitle(responsePreview?: string): string | undefined;
+/**
+ * Extracts a search query string from a URL or request body.
+ */
+export declare function extractSearchQuery(req: ObservedRequest): string;
+/**
+ * Determines if a request is to a Sanity documentation page.
+ */
+export declare function isDocPageRequest(req: ObservedRequest): boolean;
+/**
+ * Determines if a request is to the Sanity API (not docs).
+ */
+export declare function isSanityApiRequest(req: ObservedRequest): boolean;
+/**
+ * Determines if a request is a search operation.
+ */
+export declare function isSearchRequest(req: ObservedRequest): boolean;

package/dist/agent-observer/classifier.js ADDED Viewed

@@ -0,0 +1,269 @@
+/**
+ * classifier.ts
+ *
+ * Classifies raw ObservedRequest records into meaningful categories:
+ *   - Doc page visits (sanity.io/docs/*)
+ *   - Search queries (search endpoints, query params)
+ *   - Sanity API calls (api.sanity.io, apicdn.sanity.io)
+ *   - External requests (everything else)
+ *
+ * Also extracts metadata like doc slugs, search query text, and page titles.
+ */
+import { z } from "zod";
+// ---------------------------------------------------------------------------
+// Zod schemas for parsed JSON bodies (search request payloads)
+// ---------------------------------------------------------------------------
+/** Single search request item (e.g., Algolia multi-index entry) */
+const SearchRequestItemSchema = z.object({
+    params: z.string().optional(),
+    query: z.string().optional(),
+});
+/** Search body payload — covers Algolia single-query, params-based, and multi-index formats */
+const SearchBodySchema = z.object({
+    params: z.string().optional(),
+    query: z.string().optional(),
+    requests: z.array(SearchRequestItemSchema).optional(),
+});
+// ---------------------------------------------------------------------------
+// URL pattern matchers
+// ---------------------------------------------------------------------------
+/** Matches sanity.io documentation pages */
+const SANITY_DOCS_PATTERN = /^https?:\/\/(r\.jina\.ai\/https?:\/\/)?(www\.)?sanity\.io\/docs(\/[^?#]*)?/;
+/** Matches Sanity API endpoints */
+const SANITY_API_PATTERN = /^https?:\/\/(api|apicdn|cdn)\.sanity\.io/;
+/** Matches sanity.io non-docs pages (blog, plugins, etc.) — including via proxies */
+const SANITY_SITE_PATTERN = /^https?:\/\/(r\.jina\.ai\/https?:\/\/)?(www\.)?sanity\.io/;
+/** Common search endpoint patterns */
+const SEARCH_PATTERNS = [
+    // Sanity docs search (Algolia, custom, etc.)
+    /sanity\.io\/docs.*[?&](q|query|search)=/i,
+    /sanity\.io\/api\/search/i,
+    /sanity\.io\/search/i,
+    // Algolia search (used by many doc sites)
+    /algolia(net)?\.com.*\/quer(y|ies)/i,
+    /algolia(net)?\.com.*\/search/i,
+    // Google / Bing / DuckDuckGo
+    /google\.com\/search/i,
+    /bing\.com\/search/i,
+    /duckduckgo\.com/i,
+    // Jina Reader proxied search URLs
+    /r\.jina\.ai\/https?:\/\/(www\.)?duckduckgo\.com/i,
+    /r\.jina\.ai\/https?:\/\/(www\.)?google\.com\/search/i,
+    /r\.jina\.ai\/https?:\/\/(www\.)?bing\.com\/search/i,
+];
+/**
+ * Classifies an array of raw observed requests into categorized buckets.
+ */
+export function classifyRequests(requests) {
+    const result = {
+        apiCalls: [],
+        docPageVisits: [],
+        externalRequests: [],
+        searchQueries: [],
+    };
+    for (const req of requests) {
+        // Skip failed requests (no response)
+        if (req.statusCode === 0)
+            continue;
+        // Order matters: API calls first (they may have ?query= params that look like searches),
+        // then searches, then doc pages, then external
+        if (isSanityApiRequest(req)) {
+            result.apiCalls.push({
+                endpoint: extractApiEndpoint(req.url),
+                method: req.method,
+                timestamp: req.timestamp,
+                url: req.url,
+            });
+        }
+        else if (isSearchRequest(req)) {
+            result.searchQueries.push({
+                query: extractSearchQuery(req),
+                timestamp: req.timestamp,
+                url: req.url,
+            });
+        }
+        else if (isDocPageRequest(req)) {
+            const slug = extractDocSlug(req.url);
+            result.docPageVisits.push({
+                contentSize: req.responseSize,
+                slug,
+                timestamp: req.timestamp,
+                title: extractPageTitle(req.responsePreview),
+                url: req.url,
+            });
+        }
+        else if (!SANITY_SITE_PATTERN.test(req.url)) {
+            // External request (not on sanity.io at all)
+            result.externalRequests.push({
+                domain: extractDomain(req.url),
+                method: req.method,
+                timestamp: req.timestamp,
+                url: req.url,
+            });
+        }
+        // else: sanity.io non-docs page — we track it in raw requests but don't classify it
+    }
+    return result;
+}
+/**
+ * Extracts the API endpoint path from a Sanity API URL.
+ *
+ * Example:
+ *   https://api.sanity.io/v2021-03-25/data/query/production → "/data/query/production"
+ */
+export function extractApiEndpoint(url) {
+    const match = url.match(/sanity\.io\/(v[\d-]+)?(.*)/);
+    if (!match)
+        return new URL(url).pathname;
+    return match[2] || "/";
+}
+/**
+ * Extracts the doc slug from a sanity.io/docs URL.
+ *
+ * Examples:
+ *   https://www.sanity.io/docs/groq-introduction → "groq-introduction"
+ *   https://www.sanity.io/docs/getting-started/create-a-schema → "getting-started/create-a-schema"
+ *   https://www.sanity.io/docs → "" (root docs page)
+ */
+export function extractDocSlug(url) {
+    const match = url.match(/sanity\.io\/docs\/([^?#]+)/);
+    if (!match)
+        return "";
+    // Remove trailing slash and .md extension
+    return match[1].replace(/\/$/, "").replace(/\.md$/, "");
+}
+// ---------------------------------------------------------------------------
+// Metadata extractors
+// ---------------------------------------------------------------------------
+/**
+ * Extracts the domain from a URL.
+ */
+export function extractDomain(url) {
+    try {
+        return new URL(url).hostname;
+    }
+    catch {
+        const match = url.match(/https?:\/\/([^/?#]+)/);
+        return match ? match[1] : "unknown";
+    }
+}
+/**
+ * Attempts to extract a page title from an HTML response preview.
+ */
+export function extractPageTitle(responsePreview) {
+    if (!responsePreview)
+        return undefined;
+    const match = responsePreview.match(/<title[^>]*>([^<]+)<\/title>/i);
+    return match ? match[1].trim() : undefined;
+}
+/**
+ * Extracts a search query string from a URL or request body.
+ */
+export function extractSearchQuery(req) {
+    // Handle Jina-proxied URLs by extracting the inner URL's query params
+    // e.g., https://r.jina.ai/https://duckduckgo.com/?q=sanity+schema
+    let searchUrl = req.url;
+    const jinaMatch = req.url.match(/r\.jina\.ai\/(https?:\/\/.+)/);
+    if (jinaMatch) {
+        searchUrl = jinaMatch[1];
+    }
+    // Try URL query parameters first
+    try {
+        const urlObj = new URL(searchUrl);
+        for (const param of ["q", "query", "search", "s"]) {
+            const value = urlObj.searchParams.get(param);
+            if (value)
+                return value;
+        }
+    }
+    catch {
+        // Invalid URL — try regex fallback
+        const match = req.url.match(/[?&](q|query|search|s)=([^&]+)/);
+        if (match)
+            return decodeURIComponent(match[2]);
+    }
+    // Try request body (for POST search requests like Algolia)
+    if (req.body) {
+        try {
+            const bodyObj = SearchBodySchema.parse(JSON.parse(req.body));
+            // Algolia format
+            if (bodyObj.query)
+                return bodyObj.query;
+            if (bodyObj.params) {
+                const params = new URLSearchParams(bodyObj.params);
+                const q = params.get("query");
+                if (q)
+                    return q;
+            }
+            // Array of requests (Algolia multi-index)
+            if (bodyObj.requests) {
+                const queries = bodyObj.requests
+                    .map((r) => {
+                    if (r.query)
+                        return r.query;
+                    if (r.params) {
+                        const p = new URLSearchParams(r.params);
+                        return p.get("query");
+                    }
+                    return null;
+                })
+                    .filter(Boolean);
+                if (queries.length > 0)
+                    return queries.join("; ");
+            }
+        }
+        catch {
+            // Not JSON — ignore
+        }
+    }
+    return "";
+}
+/**
+ * Determines if a request is to a Sanity documentation page.
+ */
+export function isDocPageRequest(req) {
+    return (SANITY_DOCS_PATTERN.test(req.url) &&
+        !isSearchRequest(req) &&
+        req.method === "GET" &&
+        // Filter out static assets
+        !isStaticAsset(req.url));
+}
+/**
+ * Determines if a request is to the Sanity API (not docs).
+ */
+export function isSanityApiRequest(req) {
+    return SANITY_API_PATTERN.test(req.url);
+}
+// ---------------------------------------------------------------------------
+// Full classification pipeline
+// ---------------------------------------------------------------------------
+/**
+ * Determines if a request is a search operation.
+ */
+export function isSearchRequest(req) {
+    // Exclude Sanity API calls — they may have ?query= params (GROQ)
+    if (SANITY_API_PATTERN.test(req.url))
+        return false;
+    // Check URL against search patterns
+    if (SEARCH_PATTERNS.some((p) => p.test(req.url)))
+        return true;
+    // Check POST body for search-like payloads (Algolia, etc.)
+    if (req.method === "POST" && req.body) {
+        try {
+            const bodyObj = SearchBodySchema.safeParse(JSON.parse(req.body));
+            if (bodyObj.success && (bodyObj.data.query || bodyObj.data.requests))
+                return true;
+        }
+        catch {
+            /* not JSON */
+        }
+    }
+    return false;
+}
+/**
+ * Determines if a URL points to a static asset (JS, CSS, images, fonts).
+ */
+function isStaticAsset(url) {
+    const assetExtensions = /\.(js|css|png|jpg|jpeg|gif|svg|ico|woff2?|ttf|eot|map)(\?|$)/i;
+    return assetExtensions.test(url);
+}

package/dist/agent-observer/index.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+export { classifyRequests, extractDocSlug, extractSearchQuery, extractApiEndpoint, extractDomain, extractPageTitle, isDocPageRequest, isSearchRequest, isSanityApiRequest, } from "./classifier.js";
+export type { ClassifiedRequests } from "./classifier.js";
+export { calculateCost, formatCost, lookupPricing } from "./pricing.js";
+export { default as InstrumentedProvider } from "./provider.js";
+export { RequestRecorder } from "./proxy.js";
+export type { RecorderOptions } from "./proxy.js";
+export type { ObservedRequest, DocPageVisit, SearchQuery, ApiCall, ExternalRequest, AgentBehaviorLog, AgentBehaviorSummary, } from "./types.js";

package/dist/agent-observer/index.js ADDED Viewed

@@ -0,0 +1,4 @@
+export { classifyRequests, extractDocSlug, extractSearchQuery, extractApiEndpoint, extractDomain, extractPageTitle, isDocPageRequest, isSearchRequest, isSanityApiRequest, } from "./classifier.js";
+export { calculateCost, formatCost, lookupPricing } from "./pricing.js";
+export { default as InstrumentedProvider } from "./provider.js";
+export { RequestRecorder } from "./proxy.js";

package/dist/agent-observer/pricing.d.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * pricing.ts
+ *
+ * Model pricing lookup and cost calculation for OpenAI and Anthropic models.
+ * Used by custom providers (agentic, instrumented) to calculate
+ * per-request cost from token counts.
+ *
+ * Prices are per-token (not per-million) for simpler arithmetic.
+ * Source: https://openai.com/api/pricing/ and https://docs.anthropic.com/en/docs/about-claude/models
+ */
+interface ModelPricing {
+    /** Cost per input/prompt token */
+    input: number;
+    /** Cost per output/completion token */
+    output: number;
+}
+/**
+ * Calculate the cost in USD for a given model and token usage.
+ * Returns 0 if the model is not in the pricing table.
+ */
+export declare function calculateCost(model: string, promptTokens: number, completionTokens: number): number;
+/**
+ * Format a cost value as a human-readable USD string.
+ * - Under $0.01: shows 4 decimal places (e.g., "$0.0023")
+ * - Under $1: shows 2 decimal places (e.g., "$0.45")
+ * - $1+: shows 2 decimal places (e.g., "$12.50")
+ */
+export declare function formatCost(cost: number): string;
+/**
+ * Look up pricing for a model. Supports exact matches and prefix
+ * matching (e.g., "gpt-4o-2024-08-06" matches "gpt-4o").
+ * Returns undefined if no pricing is found.
+ */
+export declare function lookupPricing(model: string): ModelPricing | undefined;
+export {};

package/dist/agent-observer/pricing.js ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * pricing.ts
+ *
+ * Model pricing lookup and cost calculation for OpenAI and Anthropic models.
+ * Used by custom providers (agentic, instrumented) to calculate
+ * per-request cost from token counts.
+ *
+ * Prices are per-token (not per-million) for simpler arithmetic.
+ * Source: https://openai.com/api/pricing/ and https://docs.anthropic.com/en/docs/about-claude/models
+ */
+// ---------------------------------------------------------------------------
+// Pricing table
+// ---------------------------------------------------------------------------
+/**
+ * Per-token pricing for supported models.
+ * Prices are in USD. Update when providers change pricing or new models
+ * are added to models.yaml.
+ */
+// Source: https://openai.com/api/pricing/ and https://docs.anthropic.com/en/docs/about-claude/models
+const MODEL_PRICING = {
+    // Anthropic models
+    "claude-3.5-sonnet-20241022": {
+        input: 3 / 1_000_000,
+        output: 15 / 1_000_000,
+    },
+    "claude-opus-4-5-20251101": { input: 15 / 1_000_000, output: 75 / 1_000_000 },
+    "claude-opus-4-6": { input: 15 / 1_000_000, output: 75 / 1_000_000 },
+    "claude-sonnet-4-20250514": { input: 3 / 1_000_000, output: 15 / 1_000_000 },
+    // OpenAI models
+    "gpt-4-turbo": { input: 10 / 1_000_000, output: 30 / 1_000_000 },
+    "gpt-4.1": { input: 2 / 1_000_000, output: 8 / 1_000_000 },
+    "gpt-4.1-mini": { input: 0.4 / 1_000_000, output: 1.6 / 1_000_000 },
+    "gpt-4.1-nano": { input: 0.1 / 1_000_000, output: 0.4 / 1_000_000 },
+    "gpt-4o": { input: 2.5 / 1_000_000, output: 10 / 1_000_000 },
+    "gpt-4o-mini": { input: 0.15 / 1_000_000, output: 0.6 / 1_000_000 },
+    "gpt-5-2025-08-07": { input: 10 / 1_000_000, output: 30 / 1_000_000 },
+    "gpt-5.2": { input: 2 / 1_000_000, output: 8 / 1_000_000 },
+    "gpt-5.4": { input: 2 / 1_000_000, output: 8 / 1_000_000 },
+    "o3-mini": { input: 1.1 / 1_000_000, output: 4.4 / 1_000_000 },
+};
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+/**
+ * Calculate the cost in USD for a given model and token usage.
+ * Returns 0 if the model is not in the pricing table.
+ */
+export function calculateCost(model, promptTokens, completionTokens) {
+    const pricing = lookupPricing(model);
+    if (!pricing)
+        return 0;
+    return pricing.input * promptTokens + pricing.output * completionTokens;
+}
+/**
+ * Format a cost value as a human-readable USD string.
+ * - Under $0.01: shows 4 decimal places (e.g., "$0.0023")
+ * - Under $1: shows 2 decimal places (e.g., "$0.45")
+ * - $1+: shows 2 decimal places (e.g., "$12.50")
+ */
+export function formatCost(cost) {
+    if (cost === 0)
+        return "$0.00";
+    if (cost < 0.01)
+        return `$${cost.toFixed(4)}`;
+    return `$${cost.toFixed(2)}`;
+}
+/**
+ * Look up pricing for a model. Supports exact matches and prefix
+ * matching (e.g., "gpt-4o-2024-08-06" matches "gpt-4o").
+ * Returns undefined if no pricing is found.
+ */
+export function lookupPricing(model) {
+    // Exact match first
+    if (MODEL_PRICING[model])
+        return MODEL_PRICING[model];
+    // Prefix match: "gpt-4o-2024-08-06" → "gpt-4o"
+    for (const [key, pricing] of Object.entries(MODEL_PRICING)) {
+        if (model.startsWith(key))
+            return pricing;
+    }
+    return undefined;
+}

package/dist/agent-observer/provider.d.ts ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * provider.ts
+ *
+ * Instrumented Promptfoo custom provider that wraps any underlying LLM
+ * provider and records all HTTP activity the agent performs during a task.
+ *
+ * This is the integration point between the observation infrastructure
+ * (RequestRecorder) and Promptfoo's evaluation pipeline. It:
+ *
+ *   1. Starts a recording session before the LLM call
+ *   2. Delegates to the real provider (calls OpenAI directly)
+ *   3. Stops recording and attaches the AgentBehaviorLog to
+ *      ProviderResponse.metadata so it flows through scoring
+ *
+ * Promptfoo config usage:
+ *
+ *   providers:
+ *     - id: file://src/agent-observer/provider.ts
+ *       label: "gpt-4o (observed)"
+ *       config:
+ *         model: gpt-4o
+ *         temperature: 0
+ *         max_tokens: 4096
+ *         observe: true
+ *         observerOptions:
+ *           maxPreviewBytes: 2048
+ *           captureResponsePreview: true
+ *
+ * Promptfoo loads this file and instantiates the default export class.
+ */
+import { RequestRecorder } from "./proxy.js";
+interface CallApiContextParams {
+    prompt?: {
+        raw: string;
+        label?: string;
+    };
+    vars?: Record<string, object | string>;
+}
+interface ProviderOptions {
+    config?: Record<string, unknown>;
+    id?: string;
+}
+interface ProviderResponse {
+    cached?: boolean;
+    cost?: number;
+    error?: string;
+    metadata?: Record<string, unknown>;
+    output?: object | string;
+    tokenUsage?: {
+        total?: number;
+        prompt?: number;
+        completion?: number;
+        cached?: number;
+    };
+}
+export default class InstrumentedProvider {
+    config: Record<string, unknown>;
+    protected providerId: string;
+    private recorder;
+    constructor(options: ProviderOptions);
+    /**
+     * Main Promptfoo provider entry point. Called for each test case.
+     */
+    callApi(prompt: string, context?: CallApiContextParams): Promise<ProviderResponse>;
+    /**
+     * Exposes the recorder for external integrations (e.g., agents that
+     * make their own HTTP requests outside the provider call).
+     */
+    getRecorder(): RequestRecorder;
+    id(): string;
+    /**
+     * Calls OpenAI Chat Completions API directly. Uses the recorder's
+     * fetch wrapper so the LLM call itself is captured in the observation log.
+     */
+    private callOpenAI;
+}
+export {};

package/dist/agent-observer/provider.js ADDED Viewed

@@ -0,0 +1,151 @@
+/**
+ * provider.ts
+ *
+ * Instrumented Promptfoo custom provider that wraps any underlying LLM
+ * provider and records all HTTP activity the agent performs during a task.
+ *
+ * This is the integration point between the observation infrastructure
+ * (RequestRecorder) and Promptfoo's evaluation pipeline. It:
+ *
+ *   1. Starts a recording session before the LLM call
+ *   2. Delegates to the real provider (calls OpenAI directly)
+ *   3. Stops recording and attaches the AgentBehaviorLog to
+ *      ProviderResponse.metadata so it flows through scoring
+ *
+ * Promptfoo config usage:
+ *
+ *   providers:
+ *     - id: file://src/agent-observer/provider.ts
+ *       label: "gpt-4o (observed)"
+ *       config:
+ *         model: gpt-4o
+ *         temperature: 0
+ *         max_tokens: 4096
+ *         observe: true
+ *         observerOptions:
+ *           maxPreviewBytes: 2048
+ *           captureResponsePreview: true
+ *
+ * Promptfoo loads this file and instantiates the default export class.
+ */
+import { config as loadDotenv } from "dotenv";
+import { randomUUID } from "crypto";
+import { RequestRecorder } from "./proxy.js";
+import { calculateCost } from "./pricing.js";
+loadDotenv({
+    override: true,
+    path: new URL("../../.env", import.meta.url).pathname,
+});
+// ---------------------------------------------------------------------------
+// Provider implementation
+// ---------------------------------------------------------------------------
+export default class InstrumentedProvider {
+    config;
+    providerId;
+    recorder;
+    constructor(options) {
+        this.providerId = options.id ?? "instrumented-observer";
+        this.config = options.config ?? {};
+        this.recorder = new RequestRecorder(this.config.observerOptions ?? {});
+    }
+    /**
+     * Main Promptfoo provider entry point. Called for each test case.
+     */
+    async callApi(prompt, context) {
+        const sessionId = randomUUID();
+        const taskDescription = context?.vars?.task ||
+            context?.prompt?.label ||
+            "unknown-task";
+        const observe = this.config.observe !== false;
+        // Start observation
+        if (observe) {
+            this.recorder.start(sessionId, this.id(), taskDescription);
+        }
+        let result;
+        try {
+            result = await this.callOpenAI(prompt);
+        }
+        catch (err) {
+            const error = err;
+            result = {
+                error: error.message,
+                output: undefined,
+            };
+        }
+        // Stop observation and attach behavior log
+        if (observe) {
+            const behaviorLog = this.recorder.stop();
+            result.metadata = {
+                ...(result.metadata ?? {}),
+                agentBehavior: behaviorLog,
+                agentBehaviorSummary: behaviorLog.summary,
+            };
+        }
+        return result;
+    }
+    /**
+     * Exposes the recorder for external integrations (e.g., agents that
+     * make their own HTTP requests outside the provider call).
+     */
+    getRecorder() {
+        return this.recorder;
+    }
+    id() {
+        return `instrumented:${this.providerId}`;
+    }
+    /**
+     * Calls OpenAI Chat Completions API directly. Uses the recorder's
+     * fetch wrapper so the LLM call itself is captured in the observation log.
+     */
+    async callOpenAI(prompt) {
+        const model = this.config.model || "gpt-4o";
+        const temperature = this.config.temperature ?? 0;
+        const maxTokens = this.config.max_tokens || 4096;
+        const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
+        if (!apiKey) {
+            return {
+                error: "OPENAI_API_KEY not set. Configure it in env or provider config.",
+                output: undefined,
+            };
+        }
+        // Use the recorder's fetch wrapper so the API call is recorded
+        const fetchFn = this.recorder.isRunning()
+            ? this.recorder.fetch.bind(this.recorder)
+            : globalThis.fetch;
+        const startTime = Date.now();
+        const response = await fetchFn("https://api.openai.com/v1/chat/completions", {
+            body: JSON.stringify({
+                max_tokens: maxTokens,
+                messages: [{ content: prompt, role: "user" }],
+                model,
+                temperature,
+            }),
+            headers: {
+                Authorization: `Bearer ${apiKey}`,
+                "Content-Type": "application/json",
+            },
+            method: "POST",
+        });
+        const data = (await response.json());
+        if (data.error) {
+            return {
+                error: data.error.message ?? "Unknown OpenAI error",
+                output: undefined,
+            };
+        }
+        const output = data.choices?.[0]?.message?.content ?? "";
+        return {
+            cost: calculateCost(model, data.usage?.prompt_tokens ?? 0, data.usage?.completion_tokens ?? 0),
+            metadata: {
+                latencyMs: Date.now() - startTime,
+                model,
+            },
+            output,
+            tokenUsage: {
+                completion: data.usage?.completion_tokens,
+                prompt: data.usage?.prompt_tokens,
+                total: data.usage?.total_tokens,
+            },
+        };
+    }
+}