@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,983 @@
1
+ /**
2
+ * agentic-provider.ts
3
+ *
4
+ * An agentic Promptfoo provider that gives the model web_search and
5
+ * fetch_page tools, simulating how real AI agents behave when a user
6
+ * asks a development question.
7
+ *
8
+ * Supports two agent modes via the `agentMode` config:
9
+ *
10
+ * - "naive" — Simulates current agents (Claude Code, ChatGPT, Cursor):
11
+ * uses Jina Reader for search + page fetching because real
12
+ * agents can't render JavaScript-heavy SPAs server-side.
13
+ *
14
+ * - "optimized" — Simulates an ideal agent that knows about Sanity's
15
+ * agent-friendly endpoints: fetches .md versions of doc
16
+ * pages directly, uses llms.txt for doc discovery, and
17
+ * falls back to Jina only for non-Sanity pages.
18
+ *
19
+ * All HTTP requests go through the RequestRecorder, so they're automatically
20
+ * classified as docPageVisits, searchQueries, etc.
21
+ *
22
+ * Promptfoo config usage:
23
+ *
24
+ * providers:
25
+ * - id: file://dist/agent-observer/agentic-provider.js
26
+ * label: "GPT-4o (Naive Agent)"
27
+ * config:
28
+ * model: gpt-4o
29
+ * agentMode: naive # or "optimized"
30
+ * maxToolRounds: 5
31
+ */
32
+ import { config as loadDotenv } from "dotenv";
33
+ import { randomUUID } from "crypto";
34
+ import { RequestRecorder } from "./proxy.js";
35
+ import { calculateCost } from "./pricing.js";
36
+ import { isAllowedOrigin } from "../sources.js";
37
+ loadDotenv({
38
+ override: true,
39
+ path: new URL("../../.env", import.meta.url).pathname,
40
+ });
41
+ // ---------------------------------------------------------------------------
42
+ // Tool definitions — mirror what real agents provide
43
+ // ---------------------------------------------------------------------------
44
+ const AGENT_TOOLS = [
45
+ {
46
+ function: {
47
+ description: "Search the web for information. Use this to find documentation, " +
48
+ "tutorials, API references, and examples. Returns a list of search " +
49
+ "results with titles, URLs, and snippets.",
50
+ name: "web_search",
51
+ parameters: {
52
+ properties: {
53
+ query: {
54
+ description: "The search query to execute",
55
+ type: "string",
56
+ },
57
+ },
58
+ required: ["query"],
59
+ type: "object",
60
+ },
61
+ },
62
+ type: "function",
63
+ },
64
+ {
65
+ function: {
66
+ description: "Fetch the content of a web page. Use this to read documentation, " +
67
+ "code examples, API references, or any other web content. Returns " +
68
+ "the text content of the page.",
69
+ name: "fetch_page",
70
+ parameters: {
71
+ properties: {
72
+ url: {
73
+ description: "The URL to fetch",
74
+ type: "string",
75
+ },
76
+ },
77
+ required: ["url"],
78
+ type: "object",
79
+ },
80
+ },
81
+ type: "function",
82
+ },
83
+ ];
84
+ // Optimized mode gets an extra tool for discovering Sanity's doc structure
85
+ const OPTIMIZED_EXTRA_TOOLS = [
86
+ {
87
+ function: {
88
+ description: "List all available documentation pages for a site. Currently supports " +
89
+ "the documentation site (via llms.txt). Returns a structured list of doc page titles " +
90
+ "and URLs. Use this FIRST to discover what documentation is available " +
91
+ "before fetching specific pages.",
92
+ name: "list_docs",
93
+ parameters: {
94
+ properties: {
95
+ site: {
96
+ description: 'The documentation site domain, e.g. "sanity.io"',
97
+ type: "string",
98
+ },
99
+ },
100
+ required: ["site"],
101
+ type: "object",
102
+ },
103
+ },
104
+ type: "function",
105
+ },
106
+ ];
107
+ // ---------------------------------------------------------------------------
108
+ // Sanity docs helpers
109
+ // ---------------------------------------------------------------------------
110
+ /** Default doc base URL — used when no source config is provided */
111
+ const DEFAULT_DOC_BASE_URL = "https://www.sanity.io/docs";
112
+ /** Default llms.txt URL */
113
+ const DEFAULT_LLMS_TXT_URL = "https://www.sanity.io/docs/llms.txt";
114
+ export default class AgenticProvider {
115
+ config;
116
+ providerId;
117
+ agentMode;
118
+ allowedOrigins;
119
+ customHeaders;
120
+ docBaseUrl;
121
+ docsUrlPattern;
122
+ llmsTxtUrl;
123
+ priorityDomain;
124
+ recorder;
125
+ searchMode;
126
+ constructor(options) {
127
+ this.providerId = options.id ?? "agentic-observer";
128
+ this.config = options.config ?? {};
129
+ this.agentMode = this.config.agentMode || "naive";
130
+ // Documentation source configuration — defaults to Sanity production
131
+ this.docBaseUrl = this.config.docBaseUrl || DEFAULT_DOC_BASE_URL;
132
+ this.llmsTxtUrl = this.config.llmsTxtUrl || DEFAULT_LLMS_TXT_URL;
133
+ this.docsUrlPattern = buildDocsUrlPattern(this.docBaseUrl);
134
+ // Custom HTTP headers (e.g., Vercel bypass protection token)
135
+ this.customHeaders =
136
+ this.config.customHeaders || {};
137
+ // Extract priority domain from docBaseUrl for search result ranking
138
+ const baseUrlObj = new URL(this.docBaseUrl);
139
+ this.priorityDomain =
140
+ this.config.priorityDomain ||
141
+ baseUrlObj.hostname.replace(/^www\./, "");
142
+ // Optional origin sandboxing — restrict which URLs the agent can access
143
+ this.allowedOrigins = Array.isArray(this.config.allowedOrigins)
144
+ ? this.config.allowedOrigins.filter(Boolean)
145
+ : [];
146
+ // Search mode: controls web_search tool availability and filtering
147
+ this.searchMode = this.config.searchMode || "open";
148
+ this.recorder = new RequestRecorder(this.config.observerOptions || {});
149
+ }
150
+ /**
151
+ * Main Promptfoo provider entry point. Runs the full agentic loop.
152
+ */
153
+ async callApi(prompt, context) {
154
+ const sessionId = randomUUID();
155
+ const taskDescription = context?.vars?.task ||
156
+ context?.prompt?.label ||
157
+ "unknown-task";
158
+ const observe = this.config.observe !== false;
159
+ if (observe) {
160
+ this.recorder.start(sessionId, this.id(), taskDescription);
161
+ }
162
+ let result;
163
+ try {
164
+ result = await this.runAgenticLoop(prompt);
165
+ }
166
+ catch (err) {
167
+ const error = err;
168
+ result = {
169
+ error: error.message,
170
+ output: undefined,
171
+ };
172
+ }
173
+ if (observe) {
174
+ const behaviorLog = this.recorder.stop();
175
+ result.metadata = {
176
+ ...(result.metadata ?? {}),
177
+ agentBehavior: behaviorLog,
178
+ agentBehaviorSummary: behaviorLog.summary,
179
+ agentMode: this.agentMode,
180
+ };
181
+ }
182
+ return result;
183
+ }
184
+ /**
185
+ * Exposes the recorder for external integrations.
186
+ */
187
+ getRecorder() {
188
+ return this.recorder;
189
+ }
190
+ id() {
191
+ return `agentic:${this.agentMode}:${this.providerId}`;
192
+ }
193
+ // -------------------------------------------------------------------------
194
+ // Tool execution
195
+ // -------------------------------------------------------------------------
196
+ /**
197
+ * Build the system prompt based on agent mode and configured doc URLs.
198
+ */
199
+ buildSystemPrompt() {
200
+ const docDomain = this.priorityDomain;
201
+ const docUrl = this.docBaseUrl;
202
+ const exampleSlug = "schema-types";
203
+ // Search guidance varies based on search mode
204
+ const searchGuidance = this.searchMode === "off"
205
+ ? "You do NOT have web search. Fetch documentation pages directly " +
206
+ "using the URLs you know or discover via list_docs."
207
+ : this.searchMode === "origin-only"
208
+ ? `You have web search restricted to ${docDomain}. ` +
209
+ "Search results will only include pages from this domain."
210
+ : "You have access to web search.";
211
+ return this.agentMode === "optimized"
212
+ ? "You are an expert developer helping with implementation tasks. " +
213
+ searchGuidance +
214
+ " You also have page fetching and a doc listing tool. " +
215
+ "IMPORTANT: The documentation supports agent-friendly endpoints. " +
216
+ `Start by calling list_docs for "${docDomain}" to discover available doc pages. ` +
217
+ 'When fetching doc pages, append ".md" to the URL to get clean markdown ' +
218
+ `(e.g., ${docUrl}/${exampleSlug}.md instead of ` +
219
+ `${docUrl}/${exampleSlug}). ` +
220
+ "Always verify against the current docs before answering."
221
+ : "You are an expert developer helping with implementation tasks. " +
222
+ searchGuidance +
223
+ " You also have page fetching tools. " +
224
+ "When asked about APIs, configuration, schemas, GROQ queries, " +
225
+ "or any topic related to the documentation, " +
226
+ `USE the tools to look up the latest official documentation at ${docDomain} ` +
227
+ "before answering. Do not rely solely on your training data — always verify " +
228
+ "against the current docs. Prefer official documentation over third-party sources.";
229
+ }
230
+ /**
231
+ * Cleans Jina Reader markdown output by removing navigation boilerplate,
232
+ * cookie banners, and footer content.
233
+ */
234
+ cleanJinaContent(markdown) {
235
+ const lines = markdown.split("\n");
236
+ const cleanLines = [];
237
+ let inMainContent = false;
238
+ let skipCount = 0;
239
+ for (const line of lines) {
240
+ if (line.includes("cookie") && line.includes("Privacy Policy"))
241
+ continue;
242
+ if (line.includes("Accept Deny Non-Essential"))
243
+ continue;
244
+ if (line.trim() ===
245
+ "Opens in a new window Opens an external website Opens an external website in a new window")
246
+ continue;
247
+ if (line.trim().startsWith("* ") &&
248
+ line.includes(`](${this.docBaseUrl}/`)) {
249
+ skipCount++;
250
+ if (skipCount > 3 && !inMainContent)
251
+ continue;
252
+ }
253
+ else {
254
+ skipCount = 0;
255
+ }
256
+ if (line.startsWith("# ") ||
257
+ line.startsWith("## ") ||
258
+ line.startsWith("### ")) {
259
+ inMainContent = true;
260
+ }
261
+ if (inMainContent) {
262
+ cleanLines.push(line);
263
+ }
264
+ }
265
+ if (cleanLines.length < 10)
266
+ return markdown;
267
+ return cleanLines.join("\n").trim();
268
+ }
269
+ // -------------------------------------------------------------------------
270
+ // web_search — different strategies per mode
271
+ // -------------------------------------------------------------------------
272
+ /**
273
+ * Detect which LLM provider to use based on config and model name.
274
+ * Reads the `provider` config field set by generate-configs, with
275
+ * fallback heuristics for backward compatibility.
276
+ */
277
+ detectProvider() {
278
+ const explicit = this.config.provider;
279
+ if (explicit === "anthropic")
280
+ return "anthropic";
281
+ if (explicit === "openai")
282
+ return "openai";
283
+ // Heuristic fallback: detect from model name
284
+ const model = this.config.model || "";
285
+ if (model.startsWith("claude"))
286
+ return "anthropic";
287
+ return "openai";
288
+ }
289
+ // -------------------------------------------------------------------------
290
+ // fetch_page — key difference between naive and optimized modes
291
+ // -------------------------------------------------------------------------
292
+ async executeFetchPage(url, fetchFn) {
293
+ const maxContentLength = 12000;
294
+ // -----------------------------------------------------------------------
295
+ // ORIGIN SANDBOXING: reject URLs outside allowed origins (supports globs)
296
+ // -----------------------------------------------------------------------
297
+ if (this.allowedOrigins.length > 0 &&
298
+ !isAllowedOrigin(url, this.allowedOrigins)) {
299
+ return `[Blocked] URL ${url} is outside the allowed origins: ${this.allowedOrigins.join(", ")}`;
300
+ }
301
+ // -----------------------------------------------------------------------
302
+ // OPTIMIZED MODE: Use .md endpoints for docs pages
303
+ // -----------------------------------------------------------------------
304
+ if (this.agentMode === "optimized" && this.docsUrlPattern.test(url)) {
305
+ const mdUrl = toMarkdownUrl(url);
306
+ const response = await fetchFn(mdUrl, {
307
+ headers: this.mergeDocHeaders({
308
+ Accept: "text/markdown, text/plain",
309
+ "User-Agent": "Mozilla/5.0 (compatible; SanityEvalBot/1.0)",
310
+ }, mdUrl),
311
+ method: "GET",
312
+ });
313
+ if (response.ok) {
314
+ const contentType = response.headers.get("content-type") ?? "";
315
+ const text = await response.text();
316
+ // Verify we got markdown, not HTML (the .md endpoint returns
317
+ // Content-Type: text/markdown;charset=UTF-8)
318
+ if (contentType.includes("markdown") || !text.startsWith("<!DOCTYPE")) {
319
+ return text.slice(0, maxContentLength);
320
+ }
321
+ }
322
+ // If .md failed, fall through to naive strategy
323
+ }
324
+ // -----------------------------------------------------------------------
325
+ // NAIVE MODE (and fallback): Use Jina Reader for JS-rendered pages
326
+ // -----------------------------------------------------------------------
327
+ try {
328
+ const jinaUrl = `https://r.jina.ai/${url}`;
329
+ const jinaResponse = await fetchFn(jinaUrl, {
330
+ headers: {
331
+ Accept: "text/plain",
332
+ "User-Agent": "Mozilla/5.0 (compatible; SanityEvalBot/1.0)",
333
+ },
334
+ method: "GET",
335
+ });
336
+ if (jinaResponse.ok) {
337
+ const markdown = await jinaResponse.text();
338
+ if (markdown.length > 100) {
339
+ return this.cleanJinaContent(markdown).slice(0, maxContentLength);
340
+ }
341
+ }
342
+ }
343
+ catch {
344
+ // Jina unavailable — fall through to direct fetch
345
+ }
346
+ // -----------------------------------------------------------------------
347
+ // LAST RESORT: Direct fetch with HTML stripping
348
+ // -----------------------------------------------------------------------
349
+ const response = await fetchFn(url, {
350
+ headers: this.mergeDocHeaders({
351
+ Accept: "text/html,application/xhtml+xml,text/plain",
352
+ "User-Agent": "Mozilla/5.0 (compatible; SanityEvalBot/1.0)",
353
+ }, url),
354
+ method: "GET",
355
+ });
356
+ if (!response.ok) {
357
+ return JSON.stringify({
358
+ error: `HTTP ${response.status}: ${response.statusText}`,
359
+ url,
360
+ });
361
+ }
362
+ const contentType = response.headers.get("content-type") ?? "";
363
+ const text = await response.text();
364
+ if (contentType.includes("markdown")) {
365
+ return text.slice(0, maxContentLength);
366
+ }
367
+ if (contentType.includes("html")) {
368
+ return this.stripHtml(text).slice(0, maxContentLength);
369
+ }
370
+ return text.slice(0, maxContentLength);
371
+ }
372
+ // -------------------------------------------------------------------------
373
+ // list_docs — fetches llms.txt (optimized mode only)
374
+ // -------------------------------------------------------------------------
375
+ async executeListDocs(site, fetchFn) {
376
+ // Origin sandboxing for list_docs — block requests to off-origin sites
377
+ if (this.allowedOrigins.length > 0) {
378
+ const siteHost = site
379
+ .replace(/^https?:\/\//, "")
380
+ .replace(/\/.*$/, "")
381
+ .replace(/^www\./, "");
382
+ if (!isAllowedOrigin(`https://${siteHost}`, this.allowedOrigins)) {
383
+ return JSON.stringify({
384
+ error: `list_docs restricted to allowed origins: ${this.allowedOrigins.join(", ")}`,
385
+ suggestion: `Try list_docs("${this.priorityDomain}") instead.`,
386
+ });
387
+ }
388
+ }
389
+ // Use configured llms.txt URL, or construct from the provided site
390
+ const llmsTxtUrl = site.includes("llms.txt")
391
+ ? site
392
+ : site === this.priorityDomain || site.includes(this.priorityDomain)
393
+ ? this.llmsTxtUrl
394
+ : `https://${site.replace(/^https?:\/\//, "").replace(/\/.*$/, "")}/llms.txt`;
395
+ const response = await fetchFn(llmsTxtUrl, {
396
+ headers: this.mergeDocHeaders({
397
+ Accept: "text/plain, text/markdown",
398
+ "User-Agent": "Mozilla/5.0 (compatible; SanityEvalBot/1.0)",
399
+ }, llmsTxtUrl),
400
+ method: "GET",
401
+ });
402
+ if (!response.ok) {
403
+ return JSON.stringify({
404
+ error: `No llms.txt found at ${llmsTxtUrl} (HTTP ${response.status})`,
405
+ suggestion: "Try using web_search instead to find documentation.",
406
+ });
407
+ }
408
+ const contentType = response.headers.get("content-type") ?? "";
409
+ const text = await response.text();
410
+ // Verify it's actually an llms.txt file (markdown with links)
411
+ if (contentType.includes("html") && text.includes("<!DOCTYPE")) {
412
+ return JSON.stringify({
413
+ error: `${llmsTxtUrl} returned HTML, not a docs listing.`,
414
+ suggestion: "Try using web_search instead to find documentation.",
415
+ });
416
+ }
417
+ // Return the full llms.txt — it's already a clean markdown listing
418
+ // Trim to reasonable size (llms.txt can be long)
419
+ return text.slice(0, 15000);
420
+ }
421
+ async executeTool(name, argsJson, fetchFn) {
422
+ try {
423
+ const args = JSON.parse(argsJson);
424
+ switch (name) {
425
+ case "fetch_page":
426
+ return await this.executeFetchPage(args.url, fetchFn);
427
+ case "list_docs":
428
+ return await this.executeListDocs(args.site, fetchFn);
429
+ case "web_search":
430
+ return await this.executeWebSearch(args.query, fetchFn);
431
+ default:
432
+ return JSON.stringify({ error: `Unknown tool: ${name}` });
433
+ }
434
+ }
435
+ catch (err) {
436
+ const error = err;
437
+ return JSON.stringify({ error: error.message });
438
+ }
439
+ }
440
+ // -------------------------------------------------------------------------
441
+ // Content cleaning helpers
442
+ // -------------------------------------------------------------------------
443
+ async executeWebSearch(query, fetchFn) {
444
+ let results = [];
445
+ // Try Google Custom Search API if configured (both modes)
446
+ const googleApiKey = process.env.GOOGLE_SEARCH_API_KEY;
447
+ const googleCseId = process.env.GOOGLE_CSE_ID;
448
+ if (googleApiKey && googleCseId) {
449
+ const params = new URLSearchParams({
450
+ cx: googleCseId,
451
+ key: googleApiKey,
452
+ num: "5",
453
+ q: query,
454
+ });
455
+ const response = await fetchFn(`https://www.googleapis.com/customsearch/v1?${params}`);
456
+ const data = (await response.json());
457
+ if (data.items?.length) {
458
+ results = data.items.map((item) => ({
459
+ snippet: item.snippet,
460
+ title: item.title,
461
+ url: item.link,
462
+ }));
463
+ }
464
+ }
465
+ // Fallback: Use Jina Reader to search via DuckDuckGo
466
+ if (results.length === 0) {
467
+ const ddgUrl = `https://duckduckgo.com/?q=${encodeURIComponent(query)}`;
468
+ try {
469
+ const jinaResponse = await fetchFn(`https://r.jina.ai/${ddgUrl}`, {
470
+ headers: { Accept: "text/plain" },
471
+ });
472
+ if (jinaResponse.ok) {
473
+ const text = await jinaResponse.text();
474
+ results = this.parseSearchResults(text);
475
+ }
476
+ }
477
+ catch {
478
+ // Jina search unavailable
479
+ }
480
+ }
481
+ // Final fallback: construct likely Sanity doc URLs from the query
482
+ if (results.length === 0) {
483
+ const sanitized = query
484
+ .toLowerCase()
485
+ .replace(/sanity\.?(io)?/gi, "")
486
+ .trim();
487
+ const slugGuess = sanitized
488
+ .replace(/\s+/g, "-")
489
+ .replace(/[^a-z0-9-]/g, "");
490
+ results = [
491
+ {
492
+ snippet: `Try the documentation page for: ${sanitized}`,
493
+ title: `Documentation: ${query}`,
494
+ url: `${this.docBaseUrl}/${slugGuess}`,
495
+ },
496
+ ];
497
+ }
498
+ // -----------------------------------------------------------------------
499
+ // ORIGIN FILTERING: in "origin-only" mode, restrict results to allowed
500
+ // origins. This filters search results the same way fetch_page is
501
+ // sandboxed — the agent only sees results from permitted domains.
502
+ // -----------------------------------------------------------------------
503
+ if (this.searchMode === "origin-only" && this.allowedOrigins.length > 0) {
504
+ const filtered = results.filter((r) => isAllowedOrigin(r.url, this.allowedOrigins));
505
+ if (filtered.length > 0) {
506
+ return JSON.stringify(filtered.slice(0, 8));
507
+ }
508
+ // No on-origin results — return a helpful fallback
509
+ return JSON.stringify([
510
+ {
511
+ snippet: `Search was restricted to ${this.allowedOrigins.join(", ")}. ` +
512
+ `Try fetching docs directly at ${this.docBaseUrl} or use list_docs.`,
513
+ title: "No results found within allowed origins",
514
+ url: this.docBaseUrl,
515
+ },
516
+ ]);
517
+ }
518
+ // "open" mode: return all results (priority-sorted by parseSearchResults)
519
+ return JSON.stringify(results.slice(0, 8));
520
+ }
521
+ /**
522
+ * Build the set of tools available to the agent based on search mode
523
+ * and agent mode. When searchMode is "off", web_search is excluded
524
+ * entirely — the model can't call what it can't see.
525
+ */
526
+ getAvailableTools() {
527
+ const tools = [];
528
+ // web_search: included unless search mode is "off"
529
+ if (this.searchMode !== "off") {
530
+ tools.push(AGENT_TOOLS[0]); // web_search
531
+ }
532
+ // fetch_page: always included (origin sandboxing handles restriction)
533
+ tools.push(AGENT_TOOLS[1]); // fetch_page
534
+ // list_docs: optimized mode only
535
+ if (this.agentMode === "optimized") {
536
+ tools.push(...OPTIMIZED_EXTRA_TOOLS);
537
+ }
538
+ return tools;
539
+ }
540
+ /**
541
+ * Merge custom headers into a request's headers.
542
+ * Custom headers are injected into doc-site requests only — never into
543
+ * external APIs (OpenAI, Jina, Google).
544
+ */
545
+ mergeDocHeaders(baseHeaders, url) {
546
+ if (Object.keys(this.customHeaders).length === 0)
547
+ return baseHeaders;
548
+ // Only inject custom headers for requests to the doc site
549
+ try {
550
+ const urlHost = new URL(url).hostname.replace(/^www\./, "");
551
+ const docHost = new URL(this.docBaseUrl).hostname.replace(/^www\./, "");
552
+ if (urlHost !== docHost && !urlHost.endsWith(`.${docHost}`)) {
553
+ return baseHeaders;
554
+ }
555
+ }
556
+ catch {
557
+ return baseHeaders;
558
+ }
559
+ return { ...baseHeaders, ...this.customHeaders };
560
+ }
561
+ /**
562
+ * Parses search results from Jina Reader markdown output.
563
+ */
564
+ parseSearchResults(markdown) {
565
+ const results = [];
566
+ const lines = markdown.split("\n");
567
+ let currentTitle = "";
568
+ let currentUrl = "";
569
+ let currentSnippet = "";
570
+ for (const line of lines) {
571
+ const linkMatch = line.match(/\[([^\]]+)\]\((https?:\/\/[^)]+)\)/);
572
+ if (linkMatch) {
573
+ if (currentUrl && currentTitle) {
574
+ results.push({
575
+ snippet: currentSnippet || currentTitle,
576
+ title: currentTitle,
577
+ url: currentUrl,
578
+ });
579
+ }
580
+ currentTitle = linkMatch[1];
581
+ currentUrl = linkMatch[2];
582
+ currentSnippet = "";
583
+ continue;
584
+ }
585
+ const urlMatch = line.match(/^(https?:\/\/\S+)/);
586
+ if (urlMatch && !currentUrl) {
587
+ currentUrl = urlMatch[1];
588
+ continue;
589
+ }
590
+ if (currentUrl && line.trim().length > 20) {
591
+ currentSnippet += (currentSnippet ? " " : "") + line.trim();
592
+ }
593
+ }
594
+ if (currentUrl && currentTitle) {
595
+ results.push({
596
+ snippet: currentSnippet || currentTitle,
597
+ title: currentTitle,
598
+ url: currentUrl,
599
+ });
600
+ }
601
+ // Prioritize results from the configured documentation domain
602
+ const domain = this.priorityDomain;
603
+ const priorityResults = results.filter((r) => r.url.includes(domain));
604
+ const otherResults = results.filter((r) => !r.url.includes(domain));
605
+ return [...priorityResults, ...otherResults];
606
+ }
607
+ /**
608
+ * Runs the agentic tool-calling loop. Routes to OpenAI or Anthropic
609
+ * based on the `provider` config field.
610
+ */
611
+ async runAgenticLoop(prompt) {
612
+ const providerType = this.detectProvider();
613
+ return providerType === "anthropic"
614
+ ? this.runAnthropicLoop(prompt)
615
+ : this.runOpenAILoop(prompt);
616
+ }
617
+ // -------------------------------------------------------------------------
618
+ // OpenAI agentic loop
619
+ // -------------------------------------------------------------------------
620
+ async runAnthropicLoop(prompt) {
621
+ const model = this.config.model || "claude-sonnet-4-20250514";
622
+ const temperature = this.config.temperature ?? 0.2;
623
+ const maxTokens = this.config.max_tokens || 4096;
624
+ const maxToolRounds = this.config.maxToolRounds || 5;
625
+ const apiKey = this.config.apiKey || process.env.ANTHROPIC_API_KEY;
626
+ if (!apiKey) {
627
+ return {
628
+ error: "ANTHROPIC_API_KEY not set. Configure it in env or provider config.",
629
+ output: undefined,
630
+ };
631
+ }
632
+ const fetchFn = this.recorder.isRunning()
633
+ ? this.recorder.fetch.bind(this.recorder)
634
+ : globalThis.fetch;
635
+ const openAiTools = this.getAvailableTools();
636
+ const tools = this.toAnthropicTools(openAiTools);
637
+ const systemPrompt = this.buildSystemPrompt();
638
+ // Anthropic uses a separate `system` field, not a system message in the array
639
+ const anthropicMessages = [
640
+ { content: prompt, role: "user" },
641
+ ];
642
+ let inputTokens = 0;
643
+ let outputTokens = 0;
644
+ const startTime = Date.now();
645
+ for (let round = 0; round <= maxToolRounds; round++) {
646
+ const isLastRound = round === maxToolRounds;
647
+ // On the last round, omit tools entirely to force a text-only response.
648
+ // Anthropic doesn't support tool_choice: "none" — the way to disable
649
+ // tools is to simply not include them in the request.
650
+ // We also inject a synthesis prompt so the model knows to produce
651
+ // a final answer from whatever context it has gathered so far.
652
+ if (isLastRound) {
653
+ // Ensure the last message is a user message (Anthropic requires
654
+ // alternating user/assistant). If the last message is already a
655
+ // user message (tool_result), we can append text to it or add a
656
+ // new user message.
657
+ const lastMsg = anthropicMessages[anthropicMessages.length - 1];
658
+ const synthesisText = "You've gathered enough information. Based on the documentation " +
659
+ "and context you've collected, provide your complete, final answer now. " +
660
+ "Include all necessary code, imports, and configuration.";
661
+ if (lastMsg?.role === "user" && Array.isArray(lastMsg.content)) {
662
+ // Last message is tool_result blocks — append a text block
663
+ ;
664
+ lastMsg.content.push({
665
+ text: synthesisText,
666
+ type: "text",
667
+ });
668
+ }
669
+ else {
670
+ anthropicMessages.push({
671
+ content: synthesisText,
672
+ role: "user",
673
+ });
674
+ }
675
+ }
676
+ const body = {
677
+ max_tokens: maxTokens,
678
+ messages: anthropicMessages,
679
+ model,
680
+ system: systemPrompt,
681
+ temperature,
682
+ };
683
+ if (!isLastRound) {
684
+ body.tools = tools;
685
+ }
686
+ const response = await fetchFn("https://api.anthropic.com/v1/messages", {
687
+ body: JSON.stringify(body),
688
+ headers: {
689
+ "anthropic-version": "2023-06-01",
690
+ "Content-Type": "application/json",
691
+ "x-api-key": apiKey,
692
+ },
693
+ method: "POST",
694
+ });
695
+ const data = (await response.json());
696
+ if (data.error) {
697
+ return {
698
+ error: data.error.message ??
699
+ `Anthropic API error: ${JSON.stringify(data.error)}`,
700
+ output: undefined,
701
+ };
702
+ }
703
+ inputTokens += data.usage?.input_tokens ?? 0;
704
+ outputTokens += data.usage?.output_tokens ?? 0;
705
+ if (!data.content?.length) {
706
+ // Empty content with end_turn is valid (model chose to say nothing).
707
+ // Return empty output rather than treating as an error.
708
+ return {
709
+ cost: calculateCost(model, inputTokens, outputTokens),
710
+ metadata: {
711
+ agentMode: this.agentMode,
712
+ emptyResponse: true,
713
+ latencyMs: Date.now() - startTime,
714
+ model,
715
+ provider: "anthropic",
716
+ toolRounds: round,
717
+ },
718
+ output: "",
719
+ tokenUsage: {
720
+ completion: outputTokens,
721
+ prompt: inputTokens,
722
+ total: inputTokens + outputTokens,
723
+ },
724
+ };
725
+ }
726
+ // Add the assistant response to message history
727
+ anthropicMessages.push({
728
+ content: data.content,
729
+ role: "assistant",
730
+ });
731
+ // Check if the model wants to use tools
732
+ const toolUseBlocks = data.content.filter((block) => block.type === "tool_use");
733
+ if (data.stop_reason !== "tool_use" || toolUseBlocks.length === 0) {
734
+ // Model is done — extract text response
735
+ const textBlocks = data.content.filter((block) => block.type === "text");
736
+ const output = textBlocks.map((b) => b.text).join("\n") || "";
737
+ return {
738
+ cost: calculateCost(model, inputTokens, outputTokens),
739
+ metadata: {
740
+ agentMode: this.agentMode,
741
+ latencyMs: Date.now() - startTime,
742
+ model,
743
+ provider: "anthropic",
744
+ toolRounds: round,
745
+ },
746
+ output,
747
+ tokenUsage: {
748
+ completion: outputTokens,
749
+ prompt: inputTokens,
750
+ total: inputTokens + outputTokens,
751
+ },
752
+ };
753
+ }
754
+ // Execute tool calls and add results
755
+ const toolResults = [];
756
+ for (const toolUse of toolUseBlocks) {
757
+ const argsJson = JSON.stringify(toolUse.input);
758
+ const result = await this.executeTool(toolUse.name, argsJson, fetchFn);
759
+ toolResults.push({
760
+ content: result,
761
+ tool_use_id: toolUse.id,
762
+ type: "tool_result",
763
+ });
764
+ }
765
+ // Add tool results as a user message (Anthropic's format)
766
+ anthropicMessages.push({
767
+ content: toolResults,
768
+ role: "user",
769
+ });
770
+ }
771
+ // Safety net: exhausted tool rounds
772
+ const lastAssistantMsg = [...anthropicMessages]
773
+ .reverse()
774
+ .find((m) => m.role === "assistant");
775
+ let lastText = "";
776
+ if (lastAssistantMsg && Array.isArray(lastAssistantMsg.content)) {
777
+ const textBlocks = lastAssistantMsg.content.filter((b) => b.type === "text");
778
+ lastText = textBlocks.map((b) => b.text).join("\n");
779
+ }
780
+ return {
781
+ cost: calculateCost(model, inputTokens, outputTokens),
782
+ metadata: {
783
+ agentMode: this.agentMode,
784
+ exhaustedRounds: true,
785
+ latencyMs: Date.now() - startTime,
786
+ model,
787
+ provider: "anthropic",
788
+ toolRounds: maxToolRounds,
789
+ },
790
+ output: lastText ||
791
+ "[Agent exhausted tool rounds without producing a final answer]",
792
+ tokenUsage: {
793
+ completion: outputTokens,
794
+ prompt: inputTokens,
795
+ total: inputTokens + outputTokens,
796
+ },
797
+ };
798
+ }
799
+ // -------------------------------------------------------------------------
800
+ // Anthropic agentic loop
801
+ // -------------------------------------------------------------------------
802
+ async runOpenAILoop(prompt) {
803
+ const model = this.config.model || "gpt-4o";
804
+ const temperature = this.config.temperature ?? 0.2;
805
+ const maxToolRounds = this.config.maxToolRounds || 5;
806
+ const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
807
+ // Newer OpenAI models (gpt-5.x, o-series) use max_completion_tokens
808
+ // instead of max_tokens. Detect from config or model name.
809
+ const useMaxCompletionTokens = this.config.max_output_tokens != null ||
810
+ this.config.max_completion_tokens != null ||
811
+ model.startsWith("gpt-5") ||
812
+ model.startsWith("o3") ||
813
+ model.startsWith("o4");
814
+ const maxTokensValue = this.config.max_output_tokens ??
815
+ this.config.max_completion_tokens ??
816
+ this.config.max_tokens ??
817
+ 4096;
818
+ const tokenLimitParam = useMaxCompletionTokens
819
+ ? { max_completion_tokens: maxTokensValue }
820
+ : { max_tokens: maxTokensValue };
821
+ if (!apiKey) {
822
+ return {
823
+ error: "OPENAI_API_KEY not set. Configure it in env or provider config.",
824
+ output: undefined,
825
+ };
826
+ }
827
+ const fetchFn = this.recorder.isRunning()
828
+ ? this.recorder.fetch.bind(this.recorder)
829
+ : globalThis.fetch;
830
+ const tools = this.getAvailableTools();
831
+ const systemPrompt = this.buildSystemPrompt();
832
+ const messages = [
833
+ { content: systemPrompt, role: "system" },
834
+ { content: prompt, role: "user" },
835
+ ];
836
+ let totalTokens = 0;
837
+ let promptTokens = 0;
838
+ let completionTokens = 0;
839
+ const startTime = Date.now();
840
+ for (let round = 0; round <= maxToolRounds; round++) {
841
+ const isLastRound = round === maxToolRounds;
842
+ const response = await fetchFn("https://api.openai.com/v1/chat/completions", {
843
+ body: JSON.stringify({
844
+ ...tokenLimitParam,
845
+ messages,
846
+ model,
847
+ temperature,
848
+ tool_choice: isLastRound ? "none" : "auto",
849
+ tools,
850
+ }),
851
+ headers: {
852
+ Authorization: `Bearer ${apiKey}`,
853
+ "Content-Type": "application/json",
854
+ },
855
+ method: "POST",
856
+ });
857
+ const data = (await response.json());
858
+ if (data.error) {
859
+ return {
860
+ error: data.error.message ?? "Unknown OpenAI error",
861
+ output: undefined,
862
+ };
863
+ }
864
+ totalTokens += data.usage?.total_tokens ?? 0;
865
+ promptTokens += data.usage?.prompt_tokens ?? 0;
866
+ completionTokens += data.usage?.completion_tokens ?? 0;
867
+ const assistantMessage = data.choices?.[0]?.message;
868
+ const finishReason = data.choices?.[0]?.finish_reason;
869
+ if (!assistantMessage) {
870
+ return { error: "No response from model", output: undefined };
871
+ }
872
+ messages.push(assistantMessage);
873
+ if (finishReason !== "tool_calls" ||
874
+ !assistantMessage.tool_calls?.length) {
875
+ return {
876
+ cost: calculateCost(model, promptTokens, completionTokens),
877
+ metadata: {
878
+ agentMode: this.agentMode,
879
+ latencyMs: Date.now() - startTime,
880
+ model,
881
+ provider: "openai",
882
+ toolRounds: round,
883
+ },
884
+ output: assistantMessage.content ?? "",
885
+ tokenUsage: {
886
+ completion: completionTokens,
887
+ prompt: promptTokens,
888
+ total: totalTokens,
889
+ },
890
+ };
891
+ }
892
+ for (const toolCall of assistantMessage.tool_calls) {
893
+ const toolResult = await this.executeTool(toolCall.function.name, toolCall.function.arguments, fetchFn);
894
+ messages.push({
895
+ content: toolResult,
896
+ role: "tool",
897
+ tool_call_id: toolCall.id,
898
+ });
899
+ }
900
+ }
901
+ const lastAssistant = messages
902
+ .filter((m) => m.role === "assistant" && m.content)
903
+ .pop();
904
+ return {
905
+ cost: calculateCost(model, promptTokens, completionTokens),
906
+ metadata: {
907
+ agentMode: this.agentMode,
908
+ exhaustedRounds: true,
909
+ latencyMs: Date.now() - startTime,
910
+ model,
911
+ provider: "openai",
912
+ toolRounds: maxToolRounds,
913
+ },
914
+ output: lastAssistant?.content ??
915
+ "[Agent exhausted tool rounds without producing a final answer]",
916
+ tokenUsage: {
917
+ completion: completionTokens,
918
+ prompt: promptTokens,
919
+ total: totalTokens,
920
+ },
921
+ };
922
+ }
923
+ /**
924
+ * Strips HTML tags and normalizes whitespace. Fallback for when
925
+ * neither .md endpoints nor Jina are available.
926
+ */
927
+ stripHtml(html) {
928
+ return html
929
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
930
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
931
+ .replace(/<svg[^>]*>[\s\S]*?<\/svg>/gi, "")
932
+ .replace(/<[^>]+>/g, " ")
933
+ .replace(/&amp;/g, "&")
934
+ .replace(/&lt;/g, "<")
935
+ .replace(/&gt;/g, ">")
936
+ .replace(/&quot;/g, '"')
937
+ .replace(/&#39;/g, "'")
938
+ .replace(/&nbsp;/g, " ")
939
+ .replace(/\s+/g, " ")
940
+ .trim();
941
+ }
942
+ /**
943
+ * Convert our ToolDefinition[] to Anthropic's tool format.
944
+ * Anthropic uses `input_schema` instead of `parameters`.
945
+ */
946
+ toAnthropicTools(tools) {
947
+ return tools.map((t) => ({
948
+ description: t.function.description,
949
+ input_schema: t.function
950
+ .parameters,
951
+ name: t.function.name,
952
+ }));
953
+ }
954
+ }
955
+ /**
956
+ * Builds a regex that matches documentation URLs for a given base URL.
957
+ * Handles both http and https, with and without www prefix.
958
+ *
959
+ * @example
960
+ * buildDocsUrlPattern("https://www.sanity.io/docs")
961
+ * // matches: https://sanity.io/docs/schema-types, https://www.sanity.io/docs/
962
+ */
963
+ function buildDocsUrlPattern(baseUrl) {
964
+ const { hostname, pathname } = new URL(baseUrl);
965
+ const escapedHost = hostname.replace(/^www\./, "").replace(/\./g, "\\.");
966
+ const escapedPath = pathname.replace(/\/$/, "").replace(/\//g, "\\/");
967
+ return new RegExp(`^https?:\\/\\/(www\\.)?${escapedHost}${escapedPath}(\\/[^?#]*)?$`);
968
+ }
969
+ // ---------------------------------------------------------------------------
970
+ // Agentic provider implementation
971
+ // ---------------------------------------------------------------------------
972
+ /**
973
+ * Converts a documentation URL to its .md equivalent.
974
+ * e.g. https://www.sanity.io/docs/schema-types → https://www.sanity.io/docs/schema-types.md
975
+ */
976
+ function toMarkdownUrl(url) {
977
+ // Strip trailing slash
978
+ const clean = url.replace(/\/$/, "");
979
+ // Don't double-add .md
980
+ if (clean.endsWith(".md"))
981
+ return clean;
982
+ return clean + ".md";
983
+ }