@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,91 @@
1
+ /**
2
+ * proxy.ts
3
+ *
4
+ * A lightweight HTTP recording proxy that captures all outbound requests
5
+ * made by an AI agent during evaluation. Instead of being an actual network
6
+ * proxy (which requires HTTPS MITM certificates), this module provides:
7
+ *
8
+ * 1. A `fetch` wrapper that records every request/response
9
+ * 2. A session manager that groups observations by test case
10
+ *
11
+ * This design works with any agent framework that uses `fetch` or can be
12
+ * configured to use a custom HTTP client. For agents that use a real browser,
13
+ * see the Chrome DevTools Protocol integration (future work).
14
+ *
15
+ * Usage:
16
+ * const recorder = new RequestRecorder()
17
+ * recorder.start('session-123')
18
+ *
19
+ * // The agent makes requests through the recorder's fetch wrapper
20
+ * const response = await recorder.fetch('https://sanity.io/docs/groq')
21
+ *
22
+ * const log = recorder.stop()
23
+ * // → AgentBehaviorLog with all requests classified
24
+ */
25
+ import type { ObservedRequest, AgentBehaviorLog } from "./types.js";
26
+ export interface RecorderOptions {
27
+ /** Headers to capture from requests (case-insensitive). Default: common ones */
28
+ captureHeaders?: string[];
29
+ /** Whether to capture response body previews. Default: true */
30
+ captureResponsePreview?: boolean;
31
+ /** Filter: skip requests matching these URL patterns. Default: skip none.
32
+ * Accepts RegExp or string (strings are auto-converted to case-insensitive RegExp). */
33
+ excludePatterns?: (RegExp | string)[];
34
+ /** Filter: only record requests matching these URL patterns. Default: record all.
35
+ * Accepts RegExp or string (strings are auto-converted to case-insensitive RegExp). */
36
+ includePatterns?: (RegExp | string)[];
37
+ /** Maximum request body bytes to capture. Default: 4096 */
38
+ maxBodyBytes?: number;
39
+ /** Maximum response body bytes to capture in preview. Default: 2048 */
40
+ maxPreviewBytes?: number;
41
+ }
42
+ export declare class RequestRecorder {
43
+ private observations;
44
+ private options;
45
+ private provider;
46
+ private running;
47
+ private seq;
48
+ private sessionId;
49
+ private startTime;
50
+ private taskDescription;
51
+ constructor(options?: RecorderOptions);
52
+ /**
53
+ * A `fetch`-compatible wrapper that records the request and response,
54
+ * then returns the real response unmodified.
55
+ *
56
+ * Drop-in replacement: `const response = await recorder.fetch(url, init)`
57
+ */
58
+ fetch(input: Request | string | URL, init?: RequestInit): Promise<Response>;
59
+ /**
60
+ * Whether the recorder is currently active.
61
+ */
62
+ isRunning(): boolean;
63
+ /**
64
+ * Get current observations without stopping the recorder.
65
+ */
66
+ peek(): ObservedRequest[];
67
+ /**
68
+ * Record an externally-observed request/response pair.
69
+ *
70
+ * Use this when you can't wrap `fetch` directly but can observe traffic
71
+ * (e.g., via browser DevTools Protocol, mitmproxy logs, etc.).
72
+ */
73
+ record(observation: Omit<ObservedRequest, "seq">): void;
74
+ /**
75
+ * Reset the recorder for reuse without creating a new instance.
76
+ */
77
+ reset(): void;
78
+ /**
79
+ * Start a new recording session. Call this before the agent runs a task.
80
+ */
81
+ start(sessionId: string, provider: string, taskDescription: string): void;
82
+ /**
83
+ * Stop recording and return the complete behavior log with
84
+ * classified observations and summary statistics.
85
+ */
86
+ stop(): AgentBehaviorLog;
87
+ private computeSummary;
88
+ private extractBody;
89
+ private extractHeaders;
90
+ private redactSensitive;
91
+ }
@@ -0,0 +1,321 @@
1
+ /**
2
+ * proxy.ts
3
+ *
4
+ * A lightweight HTTP recording proxy that captures all outbound requests
5
+ * made by an AI agent during evaluation. Instead of being an actual network
6
+ * proxy (which requires HTTPS MITM certificates), this module provides:
7
+ *
8
+ * 1. A `fetch` wrapper that records every request/response
9
+ * 2. A session manager that groups observations by test case
10
+ *
11
+ * This design works with any agent framework that uses `fetch` or can be
12
+ * configured to use a custom HTTP client. For agents that use a real browser,
13
+ * see the Chrome DevTools Protocol integration (future work).
14
+ *
15
+ * Usage:
16
+ * const recorder = new RequestRecorder()
17
+ * recorder.start('session-123')
18
+ *
19
+ * // The agent makes requests through the recorder's fetch wrapper
20
+ * const response = await recorder.fetch('https://sanity.io/docs/groq')
21
+ *
22
+ * const log = recorder.stop()
23
+ * // → AgentBehaviorLog with all requests classified
24
+ */
25
+ import { classifyRequests } from "./classifier.js";
26
+ const DEFAULT_OPTIONS = {
27
+ captureHeaders: [
28
+ "accept",
29
+ "content-type",
30
+ "user-agent",
31
+ "authorization",
32
+ "x-sanity-token",
33
+ ],
34
+ captureResponsePreview: true,
35
+ excludePatterns: [
36
+ // Skip common noise
37
+ /\.(woff2?|ttf|eot)(\?|$)/i,
38
+ /\.(png|jpg|jpeg|gif|svg|ico|webp)(\?|$)/i,
39
+ /favicon/i,
40
+ ],
41
+ includePatterns: [],
42
+ maxBodyBytes: 4096,
43
+ maxPreviewBytes: 2048,
44
+ };
45
+ // ---------------------------------------------------------------------------
46
+ // RequestRecorder
47
+ // ---------------------------------------------------------------------------
48
+ export class RequestRecorder {
49
+ observations = [];
50
+ options;
51
+ provider = "";
52
+ running = false;
53
+ seq = 0;
54
+ sessionId = "";
55
+ startTime = 0;
56
+ taskDescription = "";
57
+ constructor(options) {
58
+ const merged = { ...DEFAULT_OPTIONS, ...options };
59
+ // Coerce string patterns from YAML config into RegExp instances
60
+ if (merged.includePatterns) {
61
+ merged.includePatterns = merged.includePatterns.map(toRegExp);
62
+ }
63
+ if (merged.excludePatterns) {
64
+ merged.excludePatterns = merged.excludePatterns.map(toRegExp);
65
+ }
66
+ this.options = merged;
67
+ }
68
+ /**
69
+ * A `fetch`-compatible wrapper that records the request and response,
70
+ * then returns the real response unmodified.
71
+ *
72
+ * Drop-in replacement: `const response = await recorder.fetch(url, init)`
73
+ */
74
+ async fetch(input, init) {
75
+ const reqStart = Date.now();
76
+ const url = typeof input === "string"
77
+ ? input
78
+ : input instanceof URL
79
+ ? input.toString()
80
+ : input.url;
81
+ const method = init?.method ??
82
+ (typeof input !== "string" && !(input instanceof URL)
83
+ ? input.method
84
+ : "GET") ??
85
+ "GET";
86
+ let response;
87
+ let error = null;
88
+ try {
89
+ response = await globalThis.fetch(input, init);
90
+ }
91
+ catch (err) {
92
+ error = err;
93
+ // Record the failed request
94
+ this.record({
95
+ body: await this.extractBody(init?.body),
96
+ contentType: undefined,
97
+ headers: this.extractHeaders(init?.headers),
98
+ latencyMs: Date.now() - reqStart,
99
+ method: method.toUpperCase(),
100
+ responsePreview: `Error: ${error.message}`,
101
+ responseSize: 0,
102
+ statusCode: 0,
103
+ timestamp: new Date(reqStart).toISOString(),
104
+ url,
105
+ });
106
+ throw error;
107
+ }
108
+ const latencyMs = Date.now() - reqStart;
109
+ // Clone the response so we can read the body without consuming it
110
+ const clone = response.clone();
111
+ let responseSize = 0;
112
+ let responsePreview;
113
+ if (this.options.captureResponsePreview) {
114
+ try {
115
+ const text = await clone.text();
116
+ responseSize = new TextEncoder().encode(text).length;
117
+ responsePreview = text.slice(0, this.options.maxPreviewBytes);
118
+ }
119
+ catch {
120
+ // Body might not be text — that's fine
121
+ responseSize = parseInt(response.headers.get("content-length") ?? "0", 10);
122
+ }
123
+ }
124
+ this.record({
125
+ body: await this.extractBody(init?.body),
126
+ contentType: response.headers.get("content-type") ?? undefined,
127
+ headers: this.extractHeaders(init?.headers),
128
+ latencyMs,
129
+ method: method.toUpperCase(),
130
+ responsePreview,
131
+ responseSize,
132
+ statusCode: response.status,
133
+ timestamp: new Date(reqStart).toISOString(),
134
+ url,
135
+ });
136
+ return response;
137
+ }
138
+ /**
139
+ * Whether the recorder is currently active.
140
+ */
141
+ isRunning() {
142
+ return this.running;
143
+ }
144
+ /**
145
+ * Get current observations without stopping the recorder.
146
+ */
147
+ peek() {
148
+ return [...this.observations];
149
+ }
150
+ /**
151
+ * Record an externally-observed request/response pair.
152
+ *
153
+ * Use this when you can't wrap `fetch` directly but can observe traffic
154
+ * (e.g., via browser DevTools Protocol, mitmproxy logs, etc.).
155
+ */
156
+ record(observation) {
157
+ if (!this.running)
158
+ return;
159
+ const url = observation.url;
160
+ // Apply filters
161
+ if (this.options.includePatterns.length > 0) {
162
+ if (!this.options.includePatterns.some((p) => p.test(url)))
163
+ return;
164
+ }
165
+ if (this.options.excludePatterns.some((p) => p.test(url)))
166
+ return;
167
+ this.observations.push({
168
+ ...observation,
169
+ // Truncate body if needed
170
+ body: observation.body?.slice(0, this.options.maxBodyBytes),
171
+ responsePreview: observation.responsePreview?.slice(0, this.options.maxPreviewBytes),
172
+ seq: this.seq++,
173
+ });
174
+ }
175
+ /**
176
+ * Reset the recorder for reuse without creating a new instance.
177
+ */
178
+ reset() {
179
+ this.observations = [];
180
+ this.seq = 0;
181
+ this.sessionId = "";
182
+ this.provider = "";
183
+ this.taskDescription = "";
184
+ this.startTime = 0;
185
+ this.running = false;
186
+ }
187
+ /**
188
+ * Start a new recording session. Call this before the agent runs a task.
189
+ */
190
+ start(sessionId, provider, taskDescription) {
191
+ this.sessionId = sessionId;
192
+ this.provider = provider;
193
+ this.taskDescription = taskDescription;
194
+ this.observations = [];
195
+ this.seq = 0;
196
+ this.startTime = Date.now();
197
+ this.running = true;
198
+ }
199
+ /**
200
+ * Stop recording and return the complete behavior log with
201
+ * classified observations and summary statistics.
202
+ */
203
+ stop() {
204
+ this.running = false;
205
+ const endTime = Date.now();
206
+ const classified = classifyRequests(this.observations);
207
+ const summary = this.computeSummary();
208
+ return {
209
+ endedAt: new Date(endTime).toISOString(),
210
+ networkDurationMs: this.observations.reduce((sum, r) => sum + r.latencyMs, 0),
211
+ provider: this.provider,
212
+ requests: this.observations,
213
+ sessionId: this.sessionId,
214
+ startedAt: new Date(this.startTime).toISOString(),
215
+ taskDescription: this.taskDescription,
216
+ totalDurationMs: endTime - this.startTime,
217
+ ...classified,
218
+ summary,
219
+ };
220
+ }
221
+ // -------------------------------------------------------------------------
222
+ // Private helpers
223
+ // -------------------------------------------------------------------------
224
+ computeSummary() {
225
+ const classified = classifyRequests(this.observations);
226
+ const uniqueUrls = new Set(this.observations.map((r) => r.url));
227
+ const docSlugs = [
228
+ ...new Set(classified.docPageVisits.map((d) => d.slug)),
229
+ ].filter(Boolean);
230
+ const searchQueries = [
231
+ ...new Set(classified.searchQueries.map((s) => s.query)),
232
+ ].filter(Boolean);
233
+ const extDomains = [
234
+ ...new Set(classified.externalRequests.map((e) => e.domain)),
235
+ ];
236
+ return {
237
+ apiCallCount: classified.apiCalls.length,
238
+ docPagesVisited: classified.docPageVisits.length,
239
+ docSlugsVisited: docSlugs,
240
+ externalDomains: extDomains,
241
+ externalRequestCount: classified.externalRequests.length,
242
+ searchesPerformed: classified.searchQueries.length,
243
+ totalBytesDownloaded: this.observations.reduce((sum, r) => sum + r.responseSize, 0),
244
+ totalNetworkMs: this.observations.reduce((sum, r) => sum + r.latencyMs, 0),
245
+ totalRequests: this.observations.length,
246
+ uniqueSearchQueries: searchQueries,
247
+ uniqueUrls: uniqueUrls.size,
248
+ usedDocs: classified.docPageVisits.length > 0,
249
+ usedSearch: classified.searchQueries.length > 0,
250
+ };
251
+ }
252
+ async extractBody(body) {
253
+ if (!body)
254
+ return undefined;
255
+ if (typeof body === "string") {
256
+ return body.slice(0, this.options.maxBodyBytes);
257
+ }
258
+ // For other body types, try to convert
259
+ try {
260
+ if (body instanceof ArrayBuffer) {
261
+ return new TextDecoder()
262
+ .decode(body)
263
+ .slice(0, this.options.maxBodyBytes);
264
+ }
265
+ if (body instanceof Blob) {
266
+ const text = await body.text();
267
+ return text.slice(0, this.options.maxBodyBytes);
268
+ }
269
+ }
270
+ catch {
271
+ return "[binary body]";
272
+ }
273
+ return undefined;
274
+ }
275
+ extractHeaders(headers) {
276
+ const result = {};
277
+ if (!headers)
278
+ return result;
279
+ const allowed = new Set(this.options.captureHeaders.map((h) => h.toLowerCase()));
280
+ if (headers instanceof Headers) {
281
+ headers.forEach((value, key) => {
282
+ if (allowed.has(key.toLowerCase())) {
283
+ result[key.toLowerCase()] = this.redactSensitive(key, value);
284
+ }
285
+ });
286
+ }
287
+ else if (Array.isArray(headers)) {
288
+ for (const [key, value] of headers) {
289
+ if (allowed.has(key.toLowerCase())) {
290
+ result[key.toLowerCase()] = this.redactSensitive(key, value);
291
+ }
292
+ }
293
+ }
294
+ else {
295
+ for (const [key, value] of Object.entries(headers)) {
296
+ if (allowed.has(key.toLowerCase())) {
297
+ result[key.toLowerCase()] = this.redactSensitive(key, value);
298
+ }
299
+ }
300
+ }
301
+ return result;
302
+ }
303
+ redactSensitive(key, value) {
304
+ const sensitive = ["authorization", "x-sanity-token", "cookie"];
305
+ if (sensitive.includes(key.toLowerCase())) {
306
+ return value.slice(0, 8) + "...[REDACTED]";
307
+ }
308
+ return value;
309
+ }
310
+ }
311
+ /**
312
+ * Coerce a pattern that may be a string (from YAML config) into a RegExp.
313
+ * If it is already a RegExp, return as-is.
314
+ */
315
+ function toRegExp(pattern) {
316
+ if (pattern instanceof RegExp)
317
+ return pattern;
318
+ // Simple containment match — treat string as a literal substring
319
+ const escaped = String(pattern).replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&");
320
+ return new RegExp(escaped, "i");
321
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Final validation — ensures all agent-observer modules work together
3
+ * and the full data pipeline (record → classify → summarize) is correct.
4
+ *
5
+ * Run: tsx src/agent-observer/test-imports.ts
6
+ */
7
+ export {};
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Final validation — ensures all agent-observer modules work together
3
+ * and the full data pipeline (record → classify → summarize) is correct.
4
+ *
5
+ * Run: tsx src/agent-observer/test-imports.ts
6
+ */
7
+ import { classifyRequests, extractDocSlug, extractSearchQuery, extractApiEndpoint, extractDomain, extractPageTitle, isDocPageRequest, isSearchRequest, isSanityApiRequest, } from "./classifier.js";
8
+ import { RequestRecorder } from "./proxy.js";
9
+ import { default as InstrumentedProvider } from "./provider.js";
10
+ // ─── Test data ───────────────────────────────────────────────────────────────
11
+ const now = Date.now();
12
+ const mockRequests = [
13
+ // 1. Doc page visit
14
+ {
15
+ headers: {},
16
+ latencyMs: 234,
17
+ method: "GET",
18
+ responsePreview: "<html><head><title>Create a Schema - Sanity</title></head>...",
19
+ responseSize: 45000,
20
+ seq: 0,
21
+ statusCode: 200,
22
+ timestamp: new Date(now).toISOString(),
23
+ url: "https://www.sanity.io/docs/create-a-schema-and-configure-sanity-studio",
24
+ },
25
+ // 2. Another doc page
26
+ {
27
+ headers: {},
28
+ latencyMs: 180,
29
+ method: "GET",
30
+ responsePreview: "<html><head><title>Object Type - Sanity Docs</title></head>...",
31
+ responseSize: 32000,
32
+ seq: 1,
33
+ statusCode: 200,
34
+ timestamp: new Date(now + 100).toISOString(),
35
+ url: "https://www.sanity.io/docs/schema-types/object-type",
36
+ },
37
+ // 3. Search query
38
+ {
39
+ headers: {},
40
+ latencyMs: 450,
41
+ method: "GET",
42
+ responseSize: 12000,
43
+ seq: 2,
44
+ statusCode: 200,
45
+ timestamp: new Date(now + 200).toISOString(),
46
+ url: "https://www.sanity.io/search?q=visual+editing+preview",
47
+ },
48
+ // 4. Sanity API call (GROQ query — must NOT be classified as search)
49
+ {
50
+ headers: {},
51
+ latencyMs: 320,
52
+ method: "GET",
53
+ responseSize: 8500,
54
+ seq: 3,
55
+ statusCode: 200,
56
+ timestamp: new Date(now + 300).toISOString(),
57
+ url: 'https://api.sanity.io/v2021-03-25/data/query/production?query=*[_type=="article"]',
58
+ },
59
+ // 5. CDN API call
60
+ {
61
+ headers: {},
62
+ latencyMs: 85,
63
+ method: "GET",
64
+ responseSize: 150000,
65
+ seq: 4,
66
+ statusCode: 200,
67
+ timestamp: new Date(now + 350).toISOString(),
68
+ url: "https://cdn.sanity.io/images/abc123/production/image-xyz.jpg",
69
+ },
70
+ // 6. External request (npm docs)
71
+ {
72
+ headers: {},
73
+ latencyMs: 300,
74
+ method: "GET",
75
+ responseSize: 20000,
76
+ seq: 5,
77
+ statusCode: 200,
78
+ timestamp: new Date(now + 400).toISOString(),
79
+ url: "https://docs.npmjs.com/cli/install",
80
+ },
81
+ // 7. Algolia search
82
+ {
83
+ body: JSON.stringify({ query: "presentation tool setup" }),
84
+ headers: {},
85
+ latencyMs: 150,
86
+ method: "POST",
87
+ responseSize: 5000,
88
+ seq: 6,
89
+ statusCode: 200,
90
+ timestamp: new Date(now + 500).toISOString(),
91
+ url: "https://abc123.algolia.net/1/indexes/sanity_docs/query",
92
+ },
93
+ // 8. Google search
94
+ {
95
+ headers: {},
96
+ latencyMs: 200,
97
+ method: "GET",
98
+ responseSize: 80000,
99
+ seq: 7,
100
+ statusCode: 200,
101
+ timestamp: new Date(now + 600).toISOString(),
102
+ url: "https://www.google.com/search?q=sanity+studio+custom+tool",
103
+ },
104
+ // 9. Failed request (should be skipped)
105
+ {
106
+ headers: {},
107
+ latencyMs: 0,
108
+ method: "GET",
109
+ responseSize: 0,
110
+ seq: 8,
111
+ statusCode: 0,
112
+ timestamp: new Date(now + 700).toISOString(),
113
+ url: "https://www.sanity.io/docs/nonexistent-page",
114
+ },
115
+ ];
116
+ // ─── Run tests ───────────────────────────────────────────────────────────────
117
+ let passed = 0;
118
+ let failed = 0;
119
+ function assert(condition, msg) {
120
+ if (condition) {
121
+ console.log(` ✅ ${msg}`);
122
+ passed++;
123
+ }
124
+ else {
125
+ console.log(` ❌ FAIL: ${msg}`);
126
+ failed++;
127
+ }
128
+ }
129
+ console.log("\n═══ Agent Observer — Final Validation ═══\n");
130
+ // --- Individual function tests ---
131
+ console.log("1. Individual detection functions:");
132
+ assert(isDocPageRequest(mockRequests[0]) === true, "Doc page detected");
133
+ assert(isDocPageRequest(mockRequests[3]) === false, "API call NOT detected as doc page");
134
+ assert(isSearchRequest(mockRequests[2]) === true, "Search detected");
135
+ assert(isSearchRequest(mockRequests[3]) === false, "API call NOT detected as search");
136
+ assert(isSanityApiRequest(mockRequests[3]) === true, "API call detected");
137
+ assert(isSanityApiRequest(mockRequests[0]) === false, "Doc page NOT detected as API call");
138
+ // --- Slug extraction ---
139
+ console.log("\n2. Metadata extraction:");
140
+ assert(extractDocSlug("https://www.sanity.io/docs/create-a-schema-and-configure-sanity-studio") === "create-a-schema-and-configure-sanity-studio", "Doc slug extracted correctly");
141
+ assert(extractDocSlug("https://www.sanity.io/docs/schema-types/object-type") ===
142
+ "schema-types/object-type", "Nested doc slug extracted correctly");
143
+ assert(extractSearchQuery(mockRequests[2]) === "visual editing preview", `Search query extracted: "${extractSearchQuery(mockRequests[2])}"`);
144
+ assert(extractApiEndpoint("https://api.sanity.io/v2021-03-25/data/query/production?query=*") === "/data/query/production?query=*", `API endpoint extracted: "${extractApiEndpoint("https://api.sanity.io/v2021-03-25/data/query/production?query=*")}"`);
145
+ assert(extractDomain("https://docs.npmjs.com/cli/install") === "docs.npmjs.com", "Domain extracted correctly");
146
+ assert(extractPageTitle("<html><head><title>Create a Schema - Sanity</title></head>") === "Create a Schema - Sanity", "Page title extracted correctly");
147
+ // --- Full classification pipeline ---
148
+ console.log("\n3. Full classification pipeline:");
149
+ const classified = classifyRequests(mockRequests);
150
+ assert(classified.docPageVisits.length === 2, `Doc pages: ${classified.docPageVisits.length} (expected 2)`);
151
+ assert(classified.searchQueries.length === 3, `Searches: ${classified.searchQueries.length} (expected 3 — site search + algolia + google)`);
152
+ assert(classified.apiCalls.length === 2, `API calls: ${classified.apiCalls.length} (expected 2 — GROQ query + CDN)`);
153
+ assert(classified.externalRequests.length === 1, `External: ${classified.externalRequests.length} (expected 1 — npmjs)`);
154
+ // Verify the API ?query= param was NOT classified as a search
155
+ const searchUrls = classified.searchQueries.map((s) => s.url);
156
+ assert(!searchUrls.includes('https://api.sanity.io/v2021-03-25/data/query/production?query=*[_type=="article"]'), "API ?query= param NOT misclassified as search");
157
+ // Verify the failed request was skipped
158
+ const allUrls = [
159
+ ...classified.docPageVisits.map((d) => d.url),
160
+ ...classified.searchQueries.map((s) => s.url),
161
+ ...classified.apiCalls.map((a) => a.url),
162
+ ...classified.externalRequests.map((e) => e.url),
163
+ ];
164
+ assert(!allUrls.includes("https://www.sanity.io/docs/nonexistent-page"), "Failed request (status 0) was skipped");
165
+ // --- RequestRecorder ---
166
+ console.log("\n4. RequestRecorder:");
167
+ const recorder = new RequestRecorder({ includePatterns: [/sanity\.io/] });
168
+ assert(typeof recorder.start === "function", "RequestRecorder.start() exists");
169
+ assert(typeof recorder.stop === "function", "RequestRecorder.stop() exists");
170
+ // --- InstrumentedProvider ---
171
+ console.log("\n5. InstrumentedProvider:");
172
+ const provider = new InstrumentedProvider({ config: {}, id: "test-validation" });
173
+ assert(provider.id() === "instrumented:test-validation", `Provider ID: "${provider.id()}"`);
174
+ assert(typeof provider.callApi === "function", "Provider.callApi() exists");
175
+ // --- Summary ---
176
+ console.log(`\n${"═".repeat(50)}`);
177
+ console.log(`Results: ${passed} passed, ${failed} failed, ${passed + failed} total`);
178
+ if (failed > 0) {
179
+ console.log("\n⚠️ Some tests failed!");
180
+ process.exit(1);
181
+ }
182
+ else {
183
+ console.log("\n✅ All tests passed! Agent observer system is ready.");
184
+ process.exit(0);
185
+ }