@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,983 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* agentic-provider.ts
|
|
3
|
+
*
|
|
4
|
+
* An agentic Promptfoo provider that gives the model web_search and
|
|
5
|
+
* fetch_page tools, simulating how real AI agents behave when a user
|
|
6
|
+
* asks a development question.
|
|
7
|
+
*
|
|
8
|
+
* Supports two agent modes via the `agentMode` config:
|
|
9
|
+
*
|
|
10
|
+
* - "naive" — Simulates current agents (Claude Code, ChatGPT, Cursor):
|
|
11
|
+
* uses Jina Reader for search + page fetching because real
|
|
12
|
+
* agents can't render JavaScript-heavy SPAs server-side.
|
|
13
|
+
*
|
|
14
|
+
* - "optimized" — Simulates an ideal agent that knows about Sanity's
|
|
15
|
+
* agent-friendly endpoints: fetches .md versions of doc
|
|
16
|
+
* pages directly, uses llms.txt for doc discovery, and
|
|
17
|
+
* falls back to Jina only for non-Sanity pages.
|
|
18
|
+
*
|
|
19
|
+
* All HTTP requests go through the RequestRecorder, so they're automatically
|
|
20
|
+
* classified as docPageVisits, searchQueries, etc.
|
|
21
|
+
*
|
|
22
|
+
* Promptfoo config usage:
|
|
23
|
+
*
|
|
24
|
+
* providers:
|
|
25
|
+
* - id: file://dist/agent-observer/agentic-provider.js
|
|
26
|
+
* label: "GPT-4o (Naive Agent)"
|
|
27
|
+
* config:
|
|
28
|
+
* model: gpt-4o
|
|
29
|
+
* agentMode: naive # or "optimized"
|
|
30
|
+
* maxToolRounds: 5
|
|
31
|
+
*/
|
|
32
|
+
import { config as loadDotenv } from "dotenv";
|
|
33
|
+
import { randomUUID } from "crypto";
|
|
34
|
+
import { RequestRecorder } from "./proxy.js";
|
|
35
|
+
import { calculateCost } from "./pricing.js";
|
|
36
|
+
import { isAllowedOrigin } from "../sources.js";
|
|
37
|
+
loadDotenv({
|
|
38
|
+
override: true,
|
|
39
|
+
path: new URL("../../.env", import.meta.url).pathname,
|
|
40
|
+
});
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
// Tool definitions — mirror what real agents provide
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
const AGENT_TOOLS = [
|
|
45
|
+
{
|
|
46
|
+
function: {
|
|
47
|
+
description: "Search the web for information. Use this to find documentation, " +
|
|
48
|
+
"tutorials, API references, and examples. Returns a list of search " +
|
|
49
|
+
"results with titles, URLs, and snippets.",
|
|
50
|
+
name: "web_search",
|
|
51
|
+
parameters: {
|
|
52
|
+
properties: {
|
|
53
|
+
query: {
|
|
54
|
+
description: "The search query to execute",
|
|
55
|
+
type: "string",
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
required: ["query"],
|
|
59
|
+
type: "object",
|
|
60
|
+
},
|
|
61
|
+
},
|
|
62
|
+
type: "function",
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
function: {
|
|
66
|
+
description: "Fetch the content of a web page. Use this to read documentation, " +
|
|
67
|
+
"code examples, API references, or any other web content. Returns " +
|
|
68
|
+
"the text content of the page.",
|
|
69
|
+
name: "fetch_page",
|
|
70
|
+
parameters: {
|
|
71
|
+
properties: {
|
|
72
|
+
url: {
|
|
73
|
+
description: "The URL to fetch",
|
|
74
|
+
type: "string",
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
required: ["url"],
|
|
78
|
+
type: "object",
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
type: "function",
|
|
82
|
+
},
|
|
83
|
+
];
|
|
84
|
+
// Optimized mode gets an extra tool for discovering Sanity's doc structure
|
|
85
|
+
const OPTIMIZED_EXTRA_TOOLS = [
|
|
86
|
+
{
|
|
87
|
+
function: {
|
|
88
|
+
description: "List all available documentation pages for a site. Currently supports " +
|
|
89
|
+
"the documentation site (via llms.txt). Returns a structured list of doc page titles " +
|
|
90
|
+
"and URLs. Use this FIRST to discover what documentation is available " +
|
|
91
|
+
"before fetching specific pages.",
|
|
92
|
+
name: "list_docs",
|
|
93
|
+
parameters: {
|
|
94
|
+
properties: {
|
|
95
|
+
site: {
|
|
96
|
+
description: 'The documentation site domain, e.g. "sanity.io"',
|
|
97
|
+
type: "string",
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
required: ["site"],
|
|
101
|
+
type: "object",
|
|
102
|
+
},
|
|
103
|
+
},
|
|
104
|
+
type: "function",
|
|
105
|
+
},
|
|
106
|
+
];
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
// Sanity docs helpers
|
|
109
|
+
// ---------------------------------------------------------------------------
|
|
110
|
+
/** Default doc base URL — used when no source config is provided */
|
|
111
|
+
const DEFAULT_DOC_BASE_URL = "https://www.sanity.io/docs";
|
|
112
|
+
/** Default llms.txt URL */
|
|
113
|
+
const DEFAULT_LLMS_TXT_URL = "https://www.sanity.io/docs/llms.txt";
|
|
114
|
+
export default class AgenticProvider {
|
|
115
|
+
config;
|
|
116
|
+
providerId;
|
|
117
|
+
agentMode;
|
|
118
|
+
allowedOrigins;
|
|
119
|
+
customHeaders;
|
|
120
|
+
docBaseUrl;
|
|
121
|
+
docsUrlPattern;
|
|
122
|
+
llmsTxtUrl;
|
|
123
|
+
priorityDomain;
|
|
124
|
+
recorder;
|
|
125
|
+
searchMode;
|
|
126
|
+
constructor(options) {
|
|
127
|
+
this.providerId = options.id ?? "agentic-observer";
|
|
128
|
+
this.config = options.config ?? {};
|
|
129
|
+
this.agentMode = this.config.agentMode || "naive";
|
|
130
|
+
// Documentation source configuration — defaults to Sanity production
|
|
131
|
+
this.docBaseUrl = this.config.docBaseUrl || DEFAULT_DOC_BASE_URL;
|
|
132
|
+
this.llmsTxtUrl = this.config.llmsTxtUrl || DEFAULT_LLMS_TXT_URL;
|
|
133
|
+
this.docsUrlPattern = buildDocsUrlPattern(this.docBaseUrl);
|
|
134
|
+
// Custom HTTP headers (e.g., Vercel bypass protection token)
|
|
135
|
+
this.customHeaders =
|
|
136
|
+
this.config.customHeaders || {};
|
|
137
|
+
// Extract priority domain from docBaseUrl for search result ranking
|
|
138
|
+
const baseUrlObj = new URL(this.docBaseUrl);
|
|
139
|
+
this.priorityDomain =
|
|
140
|
+
this.config.priorityDomain ||
|
|
141
|
+
baseUrlObj.hostname.replace(/^www\./, "");
|
|
142
|
+
// Optional origin sandboxing — restrict which URLs the agent can access
|
|
143
|
+
this.allowedOrigins = Array.isArray(this.config.allowedOrigins)
|
|
144
|
+
? this.config.allowedOrigins.filter(Boolean)
|
|
145
|
+
: [];
|
|
146
|
+
// Search mode: controls web_search tool availability and filtering
|
|
147
|
+
this.searchMode = this.config.searchMode || "open";
|
|
148
|
+
this.recorder = new RequestRecorder(this.config.observerOptions || {});
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Main Promptfoo provider entry point. Runs the full agentic loop.
|
|
152
|
+
*/
|
|
153
|
+
async callApi(prompt, context) {
|
|
154
|
+
const sessionId = randomUUID();
|
|
155
|
+
const taskDescription = context?.vars?.task ||
|
|
156
|
+
context?.prompt?.label ||
|
|
157
|
+
"unknown-task";
|
|
158
|
+
const observe = this.config.observe !== false;
|
|
159
|
+
if (observe) {
|
|
160
|
+
this.recorder.start(sessionId, this.id(), taskDescription);
|
|
161
|
+
}
|
|
162
|
+
let result;
|
|
163
|
+
try {
|
|
164
|
+
result = await this.runAgenticLoop(prompt);
|
|
165
|
+
}
|
|
166
|
+
catch (err) {
|
|
167
|
+
const error = err;
|
|
168
|
+
result = {
|
|
169
|
+
error: error.message,
|
|
170
|
+
output: undefined,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
if (observe) {
|
|
174
|
+
const behaviorLog = this.recorder.stop();
|
|
175
|
+
result.metadata = {
|
|
176
|
+
...(result.metadata ?? {}),
|
|
177
|
+
agentBehavior: behaviorLog,
|
|
178
|
+
agentBehaviorSummary: behaviorLog.summary,
|
|
179
|
+
agentMode: this.agentMode,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
return result;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Exposes the recorder for external integrations.
|
|
186
|
+
*/
|
|
187
|
+
getRecorder() {
|
|
188
|
+
return this.recorder;
|
|
189
|
+
}
|
|
190
|
+
id() {
|
|
191
|
+
return `agentic:${this.agentMode}:${this.providerId}`;
|
|
192
|
+
}
|
|
193
|
+
// -------------------------------------------------------------------------
|
|
194
|
+
// Tool execution
|
|
195
|
+
// -------------------------------------------------------------------------
|
|
196
|
+
/**
|
|
197
|
+
* Build the system prompt based on agent mode and configured doc URLs.
|
|
198
|
+
*/
|
|
199
|
+
buildSystemPrompt() {
|
|
200
|
+
const docDomain = this.priorityDomain;
|
|
201
|
+
const docUrl = this.docBaseUrl;
|
|
202
|
+
const exampleSlug = "schema-types";
|
|
203
|
+
// Search guidance varies based on search mode
|
|
204
|
+
const searchGuidance = this.searchMode === "off"
|
|
205
|
+
? "You do NOT have web search. Fetch documentation pages directly " +
|
|
206
|
+
"using the URLs you know or discover via list_docs."
|
|
207
|
+
: this.searchMode === "origin-only"
|
|
208
|
+
? `You have web search restricted to ${docDomain}. ` +
|
|
209
|
+
"Search results will only include pages from this domain."
|
|
210
|
+
: "You have access to web search.";
|
|
211
|
+
return this.agentMode === "optimized"
|
|
212
|
+
? "You are an expert developer helping with implementation tasks. " +
|
|
213
|
+
searchGuidance +
|
|
214
|
+
" You also have page fetching and a doc listing tool. " +
|
|
215
|
+
"IMPORTANT: The documentation supports agent-friendly endpoints. " +
|
|
216
|
+
`Start by calling list_docs for "${docDomain}" to discover available doc pages. ` +
|
|
217
|
+
'When fetching doc pages, append ".md" to the URL to get clean markdown ' +
|
|
218
|
+
`(e.g., ${docUrl}/${exampleSlug}.md instead of ` +
|
|
219
|
+
`${docUrl}/${exampleSlug}). ` +
|
|
220
|
+
"Always verify against the current docs before answering."
|
|
221
|
+
: "You are an expert developer helping with implementation tasks. " +
|
|
222
|
+
searchGuidance +
|
|
223
|
+
" You also have page fetching tools. " +
|
|
224
|
+
"When asked about APIs, configuration, schemas, GROQ queries, " +
|
|
225
|
+
"or any topic related to the documentation, " +
|
|
226
|
+
`USE the tools to look up the latest official documentation at ${docDomain} ` +
|
|
227
|
+
"before answering. Do not rely solely on your training data — always verify " +
|
|
228
|
+
"against the current docs. Prefer official documentation over third-party sources.";
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Cleans Jina Reader markdown output by removing navigation boilerplate,
|
|
232
|
+
* cookie banners, and footer content.
|
|
233
|
+
*/
|
|
234
|
+
cleanJinaContent(markdown) {
|
|
235
|
+
const lines = markdown.split("\n");
|
|
236
|
+
const cleanLines = [];
|
|
237
|
+
let inMainContent = false;
|
|
238
|
+
let skipCount = 0;
|
|
239
|
+
for (const line of lines) {
|
|
240
|
+
if (line.includes("cookie") && line.includes("Privacy Policy"))
|
|
241
|
+
continue;
|
|
242
|
+
if (line.includes("Accept Deny Non-Essential"))
|
|
243
|
+
continue;
|
|
244
|
+
if (line.trim() ===
|
|
245
|
+
"Opens in a new window Opens an external website Opens an external website in a new window")
|
|
246
|
+
continue;
|
|
247
|
+
if (line.trim().startsWith("* ") &&
|
|
248
|
+
line.includes(`](${this.docBaseUrl}/`)) {
|
|
249
|
+
skipCount++;
|
|
250
|
+
if (skipCount > 3 && !inMainContent)
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
else {
|
|
254
|
+
skipCount = 0;
|
|
255
|
+
}
|
|
256
|
+
if (line.startsWith("# ") ||
|
|
257
|
+
line.startsWith("## ") ||
|
|
258
|
+
line.startsWith("### ")) {
|
|
259
|
+
inMainContent = true;
|
|
260
|
+
}
|
|
261
|
+
if (inMainContent) {
|
|
262
|
+
cleanLines.push(line);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
if (cleanLines.length < 10)
|
|
266
|
+
return markdown;
|
|
267
|
+
return cleanLines.join("\n").trim();
|
|
268
|
+
}
|
|
269
|
+
// -------------------------------------------------------------------------
|
|
270
|
+
// web_search — different strategies per mode
|
|
271
|
+
// -------------------------------------------------------------------------
|
|
272
|
+
/**
|
|
273
|
+
* Detect which LLM provider to use based on config and model name.
|
|
274
|
+
* Reads the `provider` config field set by generate-configs, with
|
|
275
|
+
* fallback heuristics for backward compatibility.
|
|
276
|
+
*/
|
|
277
|
+
detectProvider() {
|
|
278
|
+
const explicit = this.config.provider;
|
|
279
|
+
if (explicit === "anthropic")
|
|
280
|
+
return "anthropic";
|
|
281
|
+
if (explicit === "openai")
|
|
282
|
+
return "openai";
|
|
283
|
+
// Heuristic fallback: detect from model name
|
|
284
|
+
const model = this.config.model || "";
|
|
285
|
+
if (model.startsWith("claude"))
|
|
286
|
+
return "anthropic";
|
|
287
|
+
return "openai";
|
|
288
|
+
}
|
|
289
|
+
// -------------------------------------------------------------------------
|
|
290
|
+
// fetch_page — key difference between naive and optimized modes
|
|
291
|
+
// -------------------------------------------------------------------------
|
|
292
|
+
async executeFetchPage(url, fetchFn) {
|
|
293
|
+
const maxContentLength = 12000;
|
|
294
|
+
// -----------------------------------------------------------------------
|
|
295
|
+
// ORIGIN SANDBOXING: reject URLs outside allowed origins (supports globs)
|
|
296
|
+
// -----------------------------------------------------------------------
|
|
297
|
+
if (this.allowedOrigins.length > 0 &&
|
|
298
|
+
!isAllowedOrigin(url, this.allowedOrigins)) {
|
|
299
|
+
return `[Blocked] URL ${url} is outside the allowed origins: ${this.allowedOrigins.join(", ")}`;
|
|
300
|
+
}
|
|
301
|
+
// -----------------------------------------------------------------------
|
|
302
|
+
// OPTIMIZED MODE: Use .md endpoints for docs pages
|
|
303
|
+
// -----------------------------------------------------------------------
|
|
304
|
+
if (this.agentMode === "optimized" && this.docsUrlPattern.test(url)) {
|
|
305
|
+
const mdUrl = toMarkdownUrl(url);
|
|
306
|
+
const response = await fetchFn(mdUrl, {
|
|
307
|
+
headers: this.mergeDocHeaders({
|
|
308
|
+
Accept: "text/markdown, text/plain",
|
|
309
|
+
"User-Agent": "Mozilla/5.0 (compatible; SanityEvalBot/1.0)",
|
|
310
|
+
}, mdUrl),
|
|
311
|
+
method: "GET",
|
|
312
|
+
});
|
|
313
|
+
if (response.ok) {
|
|
314
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
315
|
+
const text = await response.text();
|
|
316
|
+
// Verify we got markdown, not HTML (the .md endpoint returns
|
|
317
|
+
// Content-Type: text/markdown;charset=UTF-8)
|
|
318
|
+
if (contentType.includes("markdown") || !text.startsWith("<!DOCTYPE")) {
|
|
319
|
+
return text.slice(0, maxContentLength);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
// If .md failed, fall through to naive strategy
|
|
323
|
+
}
|
|
324
|
+
// -----------------------------------------------------------------------
|
|
325
|
+
// NAIVE MODE (and fallback): Use Jina Reader for JS-rendered pages
|
|
326
|
+
// -----------------------------------------------------------------------
|
|
327
|
+
try {
|
|
328
|
+
const jinaUrl = `https://r.jina.ai/${url}`;
|
|
329
|
+
const jinaResponse = await fetchFn(jinaUrl, {
|
|
330
|
+
headers: {
|
|
331
|
+
Accept: "text/plain",
|
|
332
|
+
"User-Agent": "Mozilla/5.0 (compatible; SanityEvalBot/1.0)",
|
|
333
|
+
},
|
|
334
|
+
method: "GET",
|
|
335
|
+
});
|
|
336
|
+
if (jinaResponse.ok) {
|
|
337
|
+
const markdown = await jinaResponse.text();
|
|
338
|
+
if (markdown.length > 100) {
|
|
339
|
+
return this.cleanJinaContent(markdown).slice(0, maxContentLength);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
catch {
|
|
344
|
+
// Jina unavailable — fall through to direct fetch
|
|
345
|
+
}
|
|
346
|
+
// -----------------------------------------------------------------------
|
|
347
|
+
// LAST RESORT: Direct fetch with HTML stripping
|
|
348
|
+
// -----------------------------------------------------------------------
|
|
349
|
+
const response = await fetchFn(url, {
|
|
350
|
+
headers: this.mergeDocHeaders({
|
|
351
|
+
Accept: "text/html,application/xhtml+xml,text/plain",
|
|
352
|
+
"User-Agent": "Mozilla/5.0 (compatible; SanityEvalBot/1.0)",
|
|
353
|
+
}, url),
|
|
354
|
+
method: "GET",
|
|
355
|
+
});
|
|
356
|
+
if (!response.ok) {
|
|
357
|
+
return JSON.stringify({
|
|
358
|
+
error: `HTTP ${response.status}: ${response.statusText}`,
|
|
359
|
+
url,
|
|
360
|
+
});
|
|
361
|
+
}
|
|
362
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
363
|
+
const text = await response.text();
|
|
364
|
+
if (contentType.includes("markdown")) {
|
|
365
|
+
return text.slice(0, maxContentLength);
|
|
366
|
+
}
|
|
367
|
+
if (contentType.includes("html")) {
|
|
368
|
+
return this.stripHtml(text).slice(0, maxContentLength);
|
|
369
|
+
}
|
|
370
|
+
return text.slice(0, maxContentLength);
|
|
371
|
+
}
|
|
372
|
+
// -------------------------------------------------------------------------
|
|
373
|
+
// list_docs — fetches llms.txt (optimized mode only)
|
|
374
|
+
// -------------------------------------------------------------------------
|
|
375
|
+
async executeListDocs(site, fetchFn) {
|
|
376
|
+
// Origin sandboxing for list_docs — block requests to off-origin sites
|
|
377
|
+
if (this.allowedOrigins.length > 0) {
|
|
378
|
+
const siteHost = site
|
|
379
|
+
.replace(/^https?:\/\//, "")
|
|
380
|
+
.replace(/\/.*$/, "")
|
|
381
|
+
.replace(/^www\./, "");
|
|
382
|
+
if (!isAllowedOrigin(`https://${siteHost}`, this.allowedOrigins)) {
|
|
383
|
+
return JSON.stringify({
|
|
384
|
+
error: `list_docs restricted to allowed origins: ${this.allowedOrigins.join(", ")}`,
|
|
385
|
+
suggestion: `Try list_docs("${this.priorityDomain}") instead.`,
|
|
386
|
+
});
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
// Use configured llms.txt URL, or construct from the provided site
|
|
390
|
+
const llmsTxtUrl = site.includes("llms.txt")
|
|
391
|
+
? site
|
|
392
|
+
: site === this.priorityDomain || site.includes(this.priorityDomain)
|
|
393
|
+
? this.llmsTxtUrl
|
|
394
|
+
: `https://${site.replace(/^https?:\/\//, "").replace(/\/.*$/, "")}/llms.txt`;
|
|
395
|
+
const response = await fetchFn(llmsTxtUrl, {
|
|
396
|
+
headers: this.mergeDocHeaders({
|
|
397
|
+
Accept: "text/plain, text/markdown",
|
|
398
|
+
"User-Agent": "Mozilla/5.0 (compatible; SanityEvalBot/1.0)",
|
|
399
|
+
}, llmsTxtUrl),
|
|
400
|
+
method: "GET",
|
|
401
|
+
});
|
|
402
|
+
if (!response.ok) {
|
|
403
|
+
return JSON.stringify({
|
|
404
|
+
error: `No llms.txt found at ${llmsTxtUrl} (HTTP ${response.status})`,
|
|
405
|
+
suggestion: "Try using web_search instead to find documentation.",
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
409
|
+
const text = await response.text();
|
|
410
|
+
// Verify it's actually an llms.txt file (markdown with links)
|
|
411
|
+
if (contentType.includes("html") && text.includes("<!DOCTYPE")) {
|
|
412
|
+
return JSON.stringify({
|
|
413
|
+
error: `${llmsTxtUrl} returned HTML, not a docs listing.`,
|
|
414
|
+
suggestion: "Try using web_search instead to find documentation.",
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
// Return the full llms.txt — it's already a clean markdown listing
|
|
418
|
+
// Trim to reasonable size (llms.txt can be long)
|
|
419
|
+
return text.slice(0, 15000);
|
|
420
|
+
}
|
|
421
|
+
async executeTool(name, argsJson, fetchFn) {
|
|
422
|
+
try {
|
|
423
|
+
const args = JSON.parse(argsJson);
|
|
424
|
+
switch (name) {
|
|
425
|
+
case "fetch_page":
|
|
426
|
+
return await this.executeFetchPage(args.url, fetchFn);
|
|
427
|
+
case "list_docs":
|
|
428
|
+
return await this.executeListDocs(args.site, fetchFn);
|
|
429
|
+
case "web_search":
|
|
430
|
+
return await this.executeWebSearch(args.query, fetchFn);
|
|
431
|
+
default:
|
|
432
|
+
return JSON.stringify({ error: `Unknown tool: ${name}` });
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
catch (err) {
|
|
436
|
+
const error = err;
|
|
437
|
+
return JSON.stringify({ error: error.message });
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
// -------------------------------------------------------------------------
|
|
441
|
+
// Content cleaning helpers
|
|
442
|
+
// -------------------------------------------------------------------------
|
|
443
|
+
async executeWebSearch(query, fetchFn) {
|
|
444
|
+
let results = [];
|
|
445
|
+
// Try Google Custom Search API if configured (both modes)
|
|
446
|
+
const googleApiKey = process.env.GOOGLE_SEARCH_API_KEY;
|
|
447
|
+
const googleCseId = process.env.GOOGLE_CSE_ID;
|
|
448
|
+
if (googleApiKey && googleCseId) {
|
|
449
|
+
const params = new URLSearchParams({
|
|
450
|
+
cx: googleCseId,
|
|
451
|
+
key: googleApiKey,
|
|
452
|
+
num: "5",
|
|
453
|
+
q: query,
|
|
454
|
+
});
|
|
455
|
+
const response = await fetchFn(`https://www.googleapis.com/customsearch/v1?${params}`);
|
|
456
|
+
const data = (await response.json());
|
|
457
|
+
if (data.items?.length) {
|
|
458
|
+
results = data.items.map((item) => ({
|
|
459
|
+
snippet: item.snippet,
|
|
460
|
+
title: item.title,
|
|
461
|
+
url: item.link,
|
|
462
|
+
}));
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
// Fallback: Use Jina Reader to search via DuckDuckGo
|
|
466
|
+
if (results.length === 0) {
|
|
467
|
+
const ddgUrl = `https://duckduckgo.com/?q=${encodeURIComponent(query)}`;
|
|
468
|
+
try {
|
|
469
|
+
const jinaResponse = await fetchFn(`https://r.jina.ai/${ddgUrl}`, {
|
|
470
|
+
headers: { Accept: "text/plain" },
|
|
471
|
+
});
|
|
472
|
+
if (jinaResponse.ok) {
|
|
473
|
+
const text = await jinaResponse.text();
|
|
474
|
+
results = this.parseSearchResults(text);
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
catch {
|
|
478
|
+
// Jina search unavailable
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
// Final fallback: construct likely Sanity doc URLs from the query
|
|
482
|
+
if (results.length === 0) {
|
|
483
|
+
const sanitized = query
|
|
484
|
+
.toLowerCase()
|
|
485
|
+
.replace(/sanity\.?(io)?/gi, "")
|
|
486
|
+
.trim();
|
|
487
|
+
const slugGuess = sanitized
|
|
488
|
+
.replace(/\s+/g, "-")
|
|
489
|
+
.replace(/[^a-z0-9-]/g, "");
|
|
490
|
+
results = [
|
|
491
|
+
{
|
|
492
|
+
snippet: `Try the documentation page for: ${sanitized}`,
|
|
493
|
+
title: `Documentation: ${query}`,
|
|
494
|
+
url: `${this.docBaseUrl}/${slugGuess}`,
|
|
495
|
+
},
|
|
496
|
+
];
|
|
497
|
+
}
|
|
498
|
+
// -----------------------------------------------------------------------
|
|
499
|
+
// ORIGIN FILTERING: in "origin-only" mode, restrict results to allowed
|
|
500
|
+
// origins. This filters search results the same way fetch_page is
|
|
501
|
+
// sandboxed — the agent only sees results from permitted domains.
|
|
502
|
+
// -----------------------------------------------------------------------
|
|
503
|
+
if (this.searchMode === "origin-only" && this.allowedOrigins.length > 0) {
|
|
504
|
+
const filtered = results.filter((r) => isAllowedOrigin(r.url, this.allowedOrigins));
|
|
505
|
+
if (filtered.length > 0) {
|
|
506
|
+
return JSON.stringify(filtered.slice(0, 8));
|
|
507
|
+
}
|
|
508
|
+
// No on-origin results — return a helpful fallback
|
|
509
|
+
return JSON.stringify([
|
|
510
|
+
{
|
|
511
|
+
snippet: `Search was restricted to ${this.allowedOrigins.join(", ")}. ` +
|
|
512
|
+
`Try fetching docs directly at ${this.docBaseUrl} or use list_docs.`,
|
|
513
|
+
title: "No results found within allowed origins",
|
|
514
|
+
url: this.docBaseUrl,
|
|
515
|
+
},
|
|
516
|
+
]);
|
|
517
|
+
}
|
|
518
|
+
// "open" mode: return all results (priority-sorted by parseSearchResults)
|
|
519
|
+
return JSON.stringify(results.slice(0, 8));
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Build the set of tools available to the agent based on search mode
|
|
523
|
+
* and agent mode. When searchMode is "off", web_search is excluded
|
|
524
|
+
* entirely — the model can't call what it can't see.
|
|
525
|
+
*/
|
|
526
|
+
getAvailableTools() {
|
|
527
|
+
const tools = [];
|
|
528
|
+
// web_search: included unless search mode is "off"
|
|
529
|
+
if (this.searchMode !== "off") {
|
|
530
|
+
tools.push(AGENT_TOOLS[0]); // web_search
|
|
531
|
+
}
|
|
532
|
+
// fetch_page: always included (origin sandboxing handles restriction)
|
|
533
|
+
tools.push(AGENT_TOOLS[1]); // fetch_page
|
|
534
|
+
// list_docs: optimized mode only
|
|
535
|
+
if (this.agentMode === "optimized") {
|
|
536
|
+
tools.push(...OPTIMIZED_EXTRA_TOOLS);
|
|
537
|
+
}
|
|
538
|
+
return tools;
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* Merge custom headers into a request's headers.
|
|
542
|
+
* Custom headers are injected into doc-site requests only — never into
|
|
543
|
+
* external APIs (OpenAI, Jina, Google).
|
|
544
|
+
*/
|
|
545
|
+
mergeDocHeaders(baseHeaders, url) {
|
|
546
|
+
if (Object.keys(this.customHeaders).length === 0)
|
|
547
|
+
return baseHeaders;
|
|
548
|
+
// Only inject custom headers for requests to the doc site
|
|
549
|
+
try {
|
|
550
|
+
const urlHost = new URL(url).hostname.replace(/^www\./, "");
|
|
551
|
+
const docHost = new URL(this.docBaseUrl).hostname.replace(/^www\./, "");
|
|
552
|
+
if (urlHost !== docHost && !urlHost.endsWith(`.${docHost}`)) {
|
|
553
|
+
return baseHeaders;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
catch {
|
|
557
|
+
return baseHeaders;
|
|
558
|
+
}
|
|
559
|
+
return { ...baseHeaders, ...this.customHeaders };
|
|
560
|
+
}
|
|
561
|
+
/**
|
|
562
|
+
* Parses search results from Jina Reader markdown output.
|
|
563
|
+
*/
|
|
564
|
+
parseSearchResults(markdown) {
|
|
565
|
+
const results = [];
|
|
566
|
+
const lines = markdown.split("\n");
|
|
567
|
+
let currentTitle = "";
|
|
568
|
+
let currentUrl = "";
|
|
569
|
+
let currentSnippet = "";
|
|
570
|
+
for (const line of lines) {
|
|
571
|
+
const linkMatch = line.match(/\[([^\]]+)\]\((https?:\/\/[^)]+)\)/);
|
|
572
|
+
if (linkMatch) {
|
|
573
|
+
if (currentUrl && currentTitle) {
|
|
574
|
+
results.push({
|
|
575
|
+
snippet: currentSnippet || currentTitle,
|
|
576
|
+
title: currentTitle,
|
|
577
|
+
url: currentUrl,
|
|
578
|
+
});
|
|
579
|
+
}
|
|
580
|
+
currentTitle = linkMatch[1];
|
|
581
|
+
currentUrl = linkMatch[2];
|
|
582
|
+
currentSnippet = "";
|
|
583
|
+
continue;
|
|
584
|
+
}
|
|
585
|
+
const urlMatch = line.match(/^(https?:\/\/\S+)/);
|
|
586
|
+
if (urlMatch && !currentUrl) {
|
|
587
|
+
currentUrl = urlMatch[1];
|
|
588
|
+
continue;
|
|
589
|
+
}
|
|
590
|
+
if (currentUrl && line.trim().length > 20) {
|
|
591
|
+
currentSnippet += (currentSnippet ? " " : "") + line.trim();
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
if (currentUrl && currentTitle) {
|
|
595
|
+
results.push({
|
|
596
|
+
snippet: currentSnippet || currentTitle,
|
|
597
|
+
title: currentTitle,
|
|
598
|
+
url: currentUrl,
|
|
599
|
+
});
|
|
600
|
+
}
|
|
601
|
+
// Prioritize results from the configured documentation domain
|
|
602
|
+
const domain = this.priorityDomain;
|
|
603
|
+
const priorityResults = results.filter((r) => r.url.includes(domain));
|
|
604
|
+
const otherResults = results.filter((r) => !r.url.includes(domain));
|
|
605
|
+
return [...priorityResults, ...otherResults];
|
|
606
|
+
}
|
|
607
|
+
/**
|
|
608
|
+
* Runs the agentic tool-calling loop. Routes to OpenAI or Anthropic
|
|
609
|
+
* based on the `provider` config field.
|
|
610
|
+
*/
|
|
611
|
+
async runAgenticLoop(prompt) {
|
|
612
|
+
const providerType = this.detectProvider();
|
|
613
|
+
return providerType === "anthropic"
|
|
614
|
+
? this.runAnthropicLoop(prompt)
|
|
615
|
+
: this.runOpenAILoop(prompt);
|
|
616
|
+
}
|
|
617
|
+
// -------------------------------------------------------------------------
|
|
618
|
+
// OpenAI agentic loop
|
|
619
|
+
// -------------------------------------------------------------------------
|
|
620
|
+
async runAnthropicLoop(prompt) {
|
|
621
|
+
const model = this.config.model || "claude-sonnet-4-20250514";
|
|
622
|
+
const temperature = this.config.temperature ?? 0.2;
|
|
623
|
+
const maxTokens = this.config.max_tokens || 4096;
|
|
624
|
+
const maxToolRounds = this.config.maxToolRounds || 5;
|
|
625
|
+
const apiKey = this.config.apiKey || process.env.ANTHROPIC_API_KEY;
|
|
626
|
+
if (!apiKey) {
|
|
627
|
+
return {
|
|
628
|
+
error: "ANTHROPIC_API_KEY not set. Configure it in env or provider config.",
|
|
629
|
+
output: undefined,
|
|
630
|
+
};
|
|
631
|
+
}
|
|
632
|
+
const fetchFn = this.recorder.isRunning()
|
|
633
|
+
? this.recorder.fetch.bind(this.recorder)
|
|
634
|
+
: globalThis.fetch;
|
|
635
|
+
const openAiTools = this.getAvailableTools();
|
|
636
|
+
const tools = this.toAnthropicTools(openAiTools);
|
|
637
|
+
const systemPrompt = this.buildSystemPrompt();
|
|
638
|
+
// Anthropic uses a separate `system` field, not a system message in the array
|
|
639
|
+
const anthropicMessages = [
|
|
640
|
+
{ content: prompt, role: "user" },
|
|
641
|
+
];
|
|
642
|
+
let inputTokens = 0;
|
|
643
|
+
let outputTokens = 0;
|
|
644
|
+
const startTime = Date.now();
|
|
645
|
+
for (let round = 0; round <= maxToolRounds; round++) {
|
|
646
|
+
const isLastRound = round === maxToolRounds;
|
|
647
|
+
// On the last round, omit tools entirely to force a text-only response.
|
|
648
|
+
// Anthropic doesn't support tool_choice: "none" — the way to disable
|
|
649
|
+
// tools is to simply not include them in the request.
|
|
650
|
+
// We also inject a synthesis prompt so the model knows to produce
|
|
651
|
+
// a final answer from whatever context it has gathered so far.
|
|
652
|
+
if (isLastRound) {
|
|
653
|
+
// Ensure the last message is a user message (Anthropic requires
|
|
654
|
+
// alternating user/assistant). If the last message is already a
|
|
655
|
+
// user message (tool_result), we can append text to it or add a
|
|
656
|
+
// new user message.
|
|
657
|
+
const lastMsg = anthropicMessages[anthropicMessages.length - 1];
|
|
658
|
+
const synthesisText = "You've gathered enough information. Based on the documentation " +
|
|
659
|
+
"and context you've collected, provide your complete, final answer now. " +
|
|
660
|
+
"Include all necessary code, imports, and configuration.";
|
|
661
|
+
if (lastMsg?.role === "user" && Array.isArray(lastMsg.content)) {
|
|
662
|
+
// Last message is tool_result blocks — append a text block
|
|
663
|
+
;
|
|
664
|
+
lastMsg.content.push({
|
|
665
|
+
text: synthesisText,
|
|
666
|
+
type: "text",
|
|
667
|
+
});
|
|
668
|
+
}
|
|
669
|
+
else {
|
|
670
|
+
anthropicMessages.push({
|
|
671
|
+
content: synthesisText,
|
|
672
|
+
role: "user",
|
|
673
|
+
});
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
const body = {
|
|
677
|
+
max_tokens: maxTokens,
|
|
678
|
+
messages: anthropicMessages,
|
|
679
|
+
model,
|
|
680
|
+
system: systemPrompt,
|
|
681
|
+
temperature,
|
|
682
|
+
};
|
|
683
|
+
if (!isLastRound) {
|
|
684
|
+
body.tools = tools;
|
|
685
|
+
}
|
|
686
|
+
const response = await fetchFn("https://api.anthropic.com/v1/messages", {
|
|
687
|
+
body: JSON.stringify(body),
|
|
688
|
+
headers: {
|
|
689
|
+
"anthropic-version": "2023-06-01",
|
|
690
|
+
"Content-Type": "application/json",
|
|
691
|
+
"x-api-key": apiKey,
|
|
692
|
+
},
|
|
693
|
+
method: "POST",
|
|
694
|
+
});
|
|
695
|
+
const data = (await response.json());
|
|
696
|
+
if (data.error) {
|
|
697
|
+
return {
|
|
698
|
+
error: data.error.message ??
|
|
699
|
+
`Anthropic API error: ${JSON.stringify(data.error)}`,
|
|
700
|
+
output: undefined,
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
inputTokens += data.usage?.input_tokens ?? 0;
|
|
704
|
+
outputTokens += data.usage?.output_tokens ?? 0;
|
|
705
|
+
if (!data.content?.length) {
|
|
706
|
+
// Empty content with end_turn is valid (model chose to say nothing).
|
|
707
|
+
// Return empty output rather than treating as an error.
|
|
708
|
+
return {
|
|
709
|
+
cost: calculateCost(model, inputTokens, outputTokens),
|
|
710
|
+
metadata: {
|
|
711
|
+
agentMode: this.agentMode,
|
|
712
|
+
emptyResponse: true,
|
|
713
|
+
latencyMs: Date.now() - startTime,
|
|
714
|
+
model,
|
|
715
|
+
provider: "anthropic",
|
|
716
|
+
toolRounds: round,
|
|
717
|
+
},
|
|
718
|
+
output: "",
|
|
719
|
+
tokenUsage: {
|
|
720
|
+
completion: outputTokens,
|
|
721
|
+
prompt: inputTokens,
|
|
722
|
+
total: inputTokens + outputTokens,
|
|
723
|
+
},
|
|
724
|
+
};
|
|
725
|
+
}
|
|
726
|
+
// Add the assistant response to message history
|
|
727
|
+
anthropicMessages.push({
|
|
728
|
+
content: data.content,
|
|
729
|
+
role: "assistant",
|
|
730
|
+
});
|
|
731
|
+
// Check if the model wants to use tools
|
|
732
|
+
const toolUseBlocks = data.content.filter((block) => block.type === "tool_use");
|
|
733
|
+
if (data.stop_reason !== "tool_use" || toolUseBlocks.length === 0) {
|
|
734
|
+
// Model is done — extract text response
|
|
735
|
+
const textBlocks = data.content.filter((block) => block.type === "text");
|
|
736
|
+
const output = textBlocks.map((b) => b.text).join("\n") || "";
|
|
737
|
+
return {
|
|
738
|
+
cost: calculateCost(model, inputTokens, outputTokens),
|
|
739
|
+
metadata: {
|
|
740
|
+
agentMode: this.agentMode,
|
|
741
|
+
latencyMs: Date.now() - startTime,
|
|
742
|
+
model,
|
|
743
|
+
provider: "anthropic",
|
|
744
|
+
toolRounds: round,
|
|
745
|
+
},
|
|
746
|
+
output,
|
|
747
|
+
tokenUsage: {
|
|
748
|
+
completion: outputTokens,
|
|
749
|
+
prompt: inputTokens,
|
|
750
|
+
total: inputTokens + outputTokens,
|
|
751
|
+
},
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
// Execute tool calls and add results
|
|
755
|
+
const toolResults = [];
|
|
756
|
+
for (const toolUse of toolUseBlocks) {
|
|
757
|
+
const argsJson = JSON.stringify(toolUse.input);
|
|
758
|
+
const result = await this.executeTool(toolUse.name, argsJson, fetchFn);
|
|
759
|
+
toolResults.push({
|
|
760
|
+
content: result,
|
|
761
|
+
tool_use_id: toolUse.id,
|
|
762
|
+
type: "tool_result",
|
|
763
|
+
});
|
|
764
|
+
}
|
|
765
|
+
// Add tool results as a user message (Anthropic's format)
|
|
766
|
+
anthropicMessages.push({
|
|
767
|
+
content: toolResults,
|
|
768
|
+
role: "user",
|
|
769
|
+
});
|
|
770
|
+
}
|
|
771
|
+
// Safety net: exhausted tool rounds
|
|
772
|
+
const lastAssistantMsg = [...anthropicMessages]
|
|
773
|
+
.reverse()
|
|
774
|
+
.find((m) => m.role === "assistant");
|
|
775
|
+
let lastText = "";
|
|
776
|
+
if (lastAssistantMsg && Array.isArray(lastAssistantMsg.content)) {
|
|
777
|
+
const textBlocks = lastAssistantMsg.content.filter((b) => b.type === "text");
|
|
778
|
+
lastText = textBlocks.map((b) => b.text).join("\n");
|
|
779
|
+
}
|
|
780
|
+
return {
|
|
781
|
+
cost: calculateCost(model, inputTokens, outputTokens),
|
|
782
|
+
metadata: {
|
|
783
|
+
agentMode: this.agentMode,
|
|
784
|
+
exhaustedRounds: true,
|
|
785
|
+
latencyMs: Date.now() - startTime,
|
|
786
|
+
model,
|
|
787
|
+
provider: "anthropic",
|
|
788
|
+
toolRounds: maxToolRounds,
|
|
789
|
+
},
|
|
790
|
+
output: lastText ||
|
|
791
|
+
"[Agent exhausted tool rounds without producing a final answer]",
|
|
792
|
+
tokenUsage: {
|
|
793
|
+
completion: outputTokens,
|
|
794
|
+
prompt: inputTokens,
|
|
795
|
+
total: inputTokens + outputTokens,
|
|
796
|
+
},
|
|
797
|
+
};
|
|
798
|
+
}
|
|
799
|
+
// -------------------------------------------------------------------------
|
|
800
|
+
// Anthropic agentic loop
|
|
801
|
+
// -------------------------------------------------------------------------
|
|
802
|
+
async runOpenAILoop(prompt) {
|
|
803
|
+
const model = this.config.model || "gpt-4o";
|
|
804
|
+
const temperature = this.config.temperature ?? 0.2;
|
|
805
|
+
const maxToolRounds = this.config.maxToolRounds || 5;
|
|
806
|
+
const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
|
|
807
|
+
// Newer OpenAI models (gpt-5.x, o-series) use max_completion_tokens
|
|
808
|
+
// instead of max_tokens. Detect from config or model name.
|
|
809
|
+
const useMaxCompletionTokens = this.config.max_output_tokens != null ||
|
|
810
|
+
this.config.max_completion_tokens != null ||
|
|
811
|
+
model.startsWith("gpt-5") ||
|
|
812
|
+
model.startsWith("o3") ||
|
|
813
|
+
model.startsWith("o4");
|
|
814
|
+
const maxTokensValue = this.config.max_output_tokens ??
|
|
815
|
+
this.config.max_completion_tokens ??
|
|
816
|
+
this.config.max_tokens ??
|
|
817
|
+
4096;
|
|
818
|
+
const tokenLimitParam = useMaxCompletionTokens
|
|
819
|
+
? { max_completion_tokens: maxTokensValue }
|
|
820
|
+
: { max_tokens: maxTokensValue };
|
|
821
|
+
if (!apiKey) {
|
|
822
|
+
return {
|
|
823
|
+
error: "OPENAI_API_KEY not set. Configure it in env or provider config.",
|
|
824
|
+
output: undefined,
|
|
825
|
+
};
|
|
826
|
+
}
|
|
827
|
+
const fetchFn = this.recorder.isRunning()
|
|
828
|
+
? this.recorder.fetch.bind(this.recorder)
|
|
829
|
+
: globalThis.fetch;
|
|
830
|
+
const tools = this.getAvailableTools();
|
|
831
|
+
const systemPrompt = this.buildSystemPrompt();
|
|
832
|
+
const messages = [
|
|
833
|
+
{ content: systemPrompt, role: "system" },
|
|
834
|
+
{ content: prompt, role: "user" },
|
|
835
|
+
];
|
|
836
|
+
let totalTokens = 0;
|
|
837
|
+
let promptTokens = 0;
|
|
838
|
+
let completionTokens = 0;
|
|
839
|
+
const startTime = Date.now();
|
|
840
|
+
for (let round = 0; round <= maxToolRounds; round++) {
|
|
841
|
+
const isLastRound = round === maxToolRounds;
|
|
842
|
+
const response = await fetchFn("https://api.openai.com/v1/chat/completions", {
|
|
843
|
+
body: JSON.stringify({
|
|
844
|
+
...tokenLimitParam,
|
|
845
|
+
messages,
|
|
846
|
+
model,
|
|
847
|
+
temperature,
|
|
848
|
+
tool_choice: isLastRound ? "none" : "auto",
|
|
849
|
+
tools,
|
|
850
|
+
}),
|
|
851
|
+
headers: {
|
|
852
|
+
Authorization: `Bearer ${apiKey}`,
|
|
853
|
+
"Content-Type": "application/json",
|
|
854
|
+
},
|
|
855
|
+
method: "POST",
|
|
856
|
+
});
|
|
857
|
+
const data = (await response.json());
|
|
858
|
+
if (data.error) {
|
|
859
|
+
return {
|
|
860
|
+
error: data.error.message ?? "Unknown OpenAI error",
|
|
861
|
+
output: undefined,
|
|
862
|
+
};
|
|
863
|
+
}
|
|
864
|
+
totalTokens += data.usage?.total_tokens ?? 0;
|
|
865
|
+
promptTokens += data.usage?.prompt_tokens ?? 0;
|
|
866
|
+
completionTokens += data.usage?.completion_tokens ?? 0;
|
|
867
|
+
const assistantMessage = data.choices?.[0]?.message;
|
|
868
|
+
const finishReason = data.choices?.[0]?.finish_reason;
|
|
869
|
+
if (!assistantMessage) {
|
|
870
|
+
return { error: "No response from model", output: undefined };
|
|
871
|
+
}
|
|
872
|
+
messages.push(assistantMessage);
|
|
873
|
+
if (finishReason !== "tool_calls" ||
|
|
874
|
+
!assistantMessage.tool_calls?.length) {
|
|
875
|
+
return {
|
|
876
|
+
cost: calculateCost(model, promptTokens, completionTokens),
|
|
877
|
+
metadata: {
|
|
878
|
+
agentMode: this.agentMode,
|
|
879
|
+
latencyMs: Date.now() - startTime,
|
|
880
|
+
model,
|
|
881
|
+
provider: "openai",
|
|
882
|
+
toolRounds: round,
|
|
883
|
+
},
|
|
884
|
+
output: assistantMessage.content ?? "",
|
|
885
|
+
tokenUsage: {
|
|
886
|
+
completion: completionTokens,
|
|
887
|
+
prompt: promptTokens,
|
|
888
|
+
total: totalTokens,
|
|
889
|
+
},
|
|
890
|
+
};
|
|
891
|
+
}
|
|
892
|
+
for (const toolCall of assistantMessage.tool_calls) {
|
|
893
|
+
const toolResult = await this.executeTool(toolCall.function.name, toolCall.function.arguments, fetchFn);
|
|
894
|
+
messages.push({
|
|
895
|
+
content: toolResult,
|
|
896
|
+
role: "tool",
|
|
897
|
+
tool_call_id: toolCall.id,
|
|
898
|
+
});
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
const lastAssistant = messages
|
|
902
|
+
.filter((m) => m.role === "assistant" && m.content)
|
|
903
|
+
.pop();
|
|
904
|
+
return {
|
|
905
|
+
cost: calculateCost(model, promptTokens, completionTokens),
|
|
906
|
+
metadata: {
|
|
907
|
+
agentMode: this.agentMode,
|
|
908
|
+
exhaustedRounds: true,
|
|
909
|
+
latencyMs: Date.now() - startTime,
|
|
910
|
+
model,
|
|
911
|
+
provider: "openai",
|
|
912
|
+
toolRounds: maxToolRounds,
|
|
913
|
+
},
|
|
914
|
+
output: lastAssistant?.content ??
|
|
915
|
+
"[Agent exhausted tool rounds without producing a final answer]",
|
|
916
|
+
tokenUsage: {
|
|
917
|
+
completion: completionTokens,
|
|
918
|
+
prompt: promptTokens,
|
|
919
|
+
total: totalTokens,
|
|
920
|
+
},
|
|
921
|
+
};
|
|
922
|
+
}
|
|
923
|
+
/**
|
|
924
|
+
* Strips HTML tags and normalizes whitespace. Fallback for when
|
|
925
|
+
* neither .md endpoints nor Jina are available.
|
|
926
|
+
*/
|
|
927
|
+
stripHtml(html) {
|
|
928
|
+
return html
|
|
929
|
+
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
|
|
930
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
|
|
931
|
+
.replace(/<svg[^>]*>[\s\S]*?<\/svg>/gi, "")
|
|
932
|
+
.replace(/<[^>]+>/g, " ")
|
|
933
|
+
.replace(/&/g, "&")
|
|
934
|
+
.replace(/</g, "<")
|
|
935
|
+
.replace(/>/g, ">")
|
|
936
|
+
.replace(/"/g, '"')
|
|
937
|
+
.replace(/'/g, "'")
|
|
938
|
+
.replace(/ /g, " ")
|
|
939
|
+
.replace(/\s+/g, " ")
|
|
940
|
+
.trim();
|
|
941
|
+
}
|
|
942
|
+
/**
|
|
943
|
+
* Convert our ToolDefinition[] to Anthropic's tool format.
|
|
944
|
+
* Anthropic uses `input_schema` instead of `parameters`.
|
|
945
|
+
*/
|
|
946
|
+
toAnthropicTools(tools) {
|
|
947
|
+
return tools.map((t) => ({
|
|
948
|
+
description: t.function.description,
|
|
949
|
+
input_schema: t.function
|
|
950
|
+
.parameters,
|
|
951
|
+
name: t.function.name,
|
|
952
|
+
}));
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
/**
|
|
956
|
+
* Builds a regex that matches documentation URLs for a given base URL.
|
|
957
|
+
* Handles both http and https, with and without www prefix.
|
|
958
|
+
*
|
|
959
|
+
* @example
|
|
960
|
+
* buildDocsUrlPattern("https://www.sanity.io/docs")
|
|
961
|
+
* // matches: https://sanity.io/docs/schema-types, https://www.sanity.io/docs/
|
|
962
|
+
*/
|
|
963
|
+
function buildDocsUrlPattern(baseUrl) {
|
|
964
|
+
const { hostname, pathname } = new URL(baseUrl);
|
|
965
|
+
const escapedHost = hostname.replace(/^www\./, "").replace(/\./g, "\\.");
|
|
966
|
+
const escapedPath = pathname.replace(/\/$/, "").replace(/\//g, "\\/");
|
|
967
|
+
return new RegExp(`^https?:\\/\\/(www\\.)?${escapedHost}${escapedPath}(\\/[^?#]*)?$`);
|
|
968
|
+
}
|
|
969
|
+
// ---------------------------------------------------------------------------
|
|
970
|
+
// Agentic provider implementation
|
|
971
|
+
// ---------------------------------------------------------------------------
|
|
972
|
+
/**
|
|
973
|
+
* Converts a documentation URL to its .md equivalent.
|
|
974
|
+
* e.g. https://www.sanity.io/docs/schema-types → https://www.sanity.io/docs/schema-types.md
|
|
975
|
+
*/
|
|
976
|
+
function toMarkdownUrl(url) {
|
|
977
|
+
// Strip trailing slash
|
|
978
|
+
const clean = url.replace(/\/$/, "");
|
|
979
|
+
// Don't double-add .md
|
|
980
|
+
if (clean.endsWith(".md"))
|
|
981
|
+
return clean;
|
|
982
|
+
return clean + ".md";
|
|
983
|
+
}
|