@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -1,8 +1,129 @@
1
1
  /**
2
- * repo-validation.ts — Re-exports semantic validation from @sanity/ailf-tasks.
2
+ * repo-validation.ts — Semantic validation for task definitions.
3
3
  *
4
- * The validation logic is the single source of truth in @sanity/ailf-tasks.
5
- * This file re-exports so existing eval-package importers don't need
6
- * to change their import paths.
4
+ * Checks that go beyond Zod schema parsing:
5
+ * - Assertion types are in the curated set
6
+ * - Rubric template names resolve to known templates
7
+ * - Doc ref slugs look reasonable (slugs, not URLs)
8
+ * - Tasks have at least one LLM rubric assertion (recommended)
9
+ * - Tasks have a prompt text (recommended)
10
+ *
11
+ * These produce warnings, not errors — the pipeline can still run
12
+ * with imperfect tasks. Only structural failures (caught by Zod) block.
13
+ *
14
+ * Previously this file re-exported from @sanity/ailf-tasks. That package
15
+ * has been eliminated — all validation logic now lives here.
16
+ */
17
+ import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
18
+ // ---------------------------------------------------------------------------
19
+ // Public API
20
+ // ---------------------------------------------------------------------------
21
+ /**
22
+ * Run semantic validation on an array of parsed canonical tasks.
23
+ *
24
+ * Returns warnings for issues that don't block execution (unknown feature
25
+ * areas, unresolved slugs) and errors for issues that would cause pipeline
26
+ * failures (completely missing required fields — though Zod catches most).
27
+ */
28
+ export function validateCanonicalTasks(tasks) {
29
+ const errors = [];
30
+ const warnings = [];
31
+ // Check for duplicate IDs
32
+ const seenIds = new Set();
33
+ for (const task of tasks) {
34
+ if (seenIds.has(task.id)) {
35
+ errors.push({
36
+ taskId: task.id,
37
+ field: "id",
38
+ message: `Duplicate task ID "${task.id}"`,
39
+ });
40
+ }
41
+ seenIds.add(task.id);
42
+ }
43
+ for (const task of tasks) {
44
+ const assertions = task.assertions ?? [];
45
+ // Check assertion types
46
+ for (let i = 0; i < assertions.length; i++) {
47
+ const assertion = assertions[i];
48
+ if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
49
+ warnings.push({
50
+ taskId: task.id,
51
+ field: `assertions[${i}].type`,
52
+ message: `Unknown assertion type "${assertion.type}". ` +
53
+ `Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
54
+ });
55
+ }
56
+ // Check rubric template for llm-rubric assertions
57
+ if (assertion.type === "llm-rubric" && "template" in assertion) {
58
+ const template = assertion.template;
59
+ if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
60
+ warnings.push({
61
+ taskId: task.id,
62
+ field: `assertions[${i}].template`,
63
+ message: `Unknown rubric template "${template}". ` +
64
+ `Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
65
+ });
66
+ }
67
+ }
68
+ }
69
+ // Check canonical doc refs look reasonable
70
+ const docs = task.context?.docs ?? [];
71
+ for (let i = 0; i < docs.length; i++) {
72
+ const doc = docs[i];
73
+ // Slug refs: warn if they look like URLs or paths
74
+ if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
75
+ if (doc.slug.includes("/") || doc.slug.includes("http")) {
76
+ warnings.push({
77
+ taskId: task.id,
78
+ field: `context.docs[${i}].slug`,
79
+ message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
80
+ });
81
+ }
82
+ }
83
+ }
84
+ // Check task has at least one llm-rubric assertion (recommended but not required)
85
+ const hasLlmRubric = assertions.some((a) => a.type === "llm-rubric");
86
+ if (!hasLlmRubric) {
87
+ warnings.push({
88
+ taskId: task.id,
89
+ field: "assertions",
90
+ message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
91
+ });
92
+ }
93
+ // Check prompt text exists
94
+ if (!task.prompt?.text) {
95
+ warnings.push({
96
+ taskId: task.id,
97
+ field: "prompt.text",
98
+ message: "No task prompt found in prompt.text. The LLM will receive an empty implementation request.",
99
+ });
100
+ }
101
+ }
102
+ return {
103
+ valid: errors.length === 0,
104
+ errors,
105
+ warnings,
106
+ };
107
+ }
108
+ /**
109
+ * Format validation results for console output.
7
110
  */
8
- export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "../../_vendor/ailf-tasks/index.js";
111
+ export function formatValidationResult(result) {
112
+ const lines = [];
113
+ if (result.errors.length > 0) {
114
+ lines.push("Errors:");
115
+ for (const e of result.errors) {
116
+ lines.push(` [${e.taskId}] ${e.field}: ${e.message}`);
117
+ }
118
+ }
119
+ if (result.warnings.length > 0) {
120
+ lines.push("Warnings:");
121
+ for (const w of result.warnings) {
122
+ lines.push(` [${w.taskId}] ${w.field}: ${w.message}`);
123
+ }
124
+ }
125
+ if (result.valid && result.warnings.length === 0) {
126
+ lines.push("All tasks pass validation");
127
+ }
128
+ return lines.join("\n");
129
+ }
@@ -1,10 +1,9 @@
1
1
  /**
2
2
  * TaskFileLoader — loads task definitions from TypeScript files.
3
3
  *
4
- * Supplements the existing YAML-based task loading by supporting
5
- * `*.task.ts` and `*.task.js` files in task directories. Files are
6
- * loaded via jiti and validated through the RepoTaskSchema from
7
- * @sanity/ailf-tasks.
4
+ * Supports `*.task.ts` and `*.task.js` files in task directories. Files
5
+ * are loaded via jiti and expected to export GeneralizedTaskDefinition
6
+ * objects authored with `defineTask()`.
8
7
  *
9
8
  * TS task files export a single task or an array of tasks:
10
9
  *
@@ -20,9 +19,6 @@
20
19
  * ]
21
20
  * ```
22
21
  *
23
- * The loader integrates into the existing RepoTaskSource adapter — TS
24
- * task files are discovered alongside YAML files in the same directory.
25
- *
26
22
  * @see docs/design-docs/architecture-overhaul/typescript-configuration.md
27
23
  */
28
24
  /** A raw task object loaded from a TS file (pre-validation) */
@@ -55,6 +51,13 @@ export declare function discoverTsTaskFiles(tasksDir: string): string[];
55
51
  * @returns The loaded task(s), or throws on load failure
56
52
  */
57
53
  export declare function loadTsTaskFile(filePath: string): Promise<RawTsTask>;
54
+ /**
55
+ * Synchronously load task definitions from a single TS/JS task file.
56
+ *
57
+ * Uses jiti's synchronous require-style loading instead of async import().
58
+ * Needed by resolve-mappings.ts which is called from sync contexts.
59
+ */
60
+ export declare function loadTsTaskFileSync(filePath: string): RawTsTask;
58
61
  /**
59
62
  * Load all TS task files from a directory.
60
63
  *
@@ -1,10 +1,9 @@
1
1
  /**
2
2
  * TaskFileLoader — loads task definitions from TypeScript files.
3
3
  *
4
- * Supplements the existing YAML-based task loading by supporting
5
- * `*.task.ts` and `*.task.js` files in task directories. Files are
6
- * loaded via jiti and validated through the RepoTaskSchema from
7
- * @sanity/ailf-tasks.
4
+ * Supports `*.task.ts` and `*.task.js` files in task directories. Files
5
+ * are loaded via jiti and expected to export GeneralizedTaskDefinition
6
+ * objects authored with `defineTask()`.
8
7
  *
9
8
  * TS task files export a single task or an array of tasks:
10
9
  *
@@ -20,13 +19,12 @@
20
19
  * ]
21
20
  * ```
22
21
  *
23
- * The loader integrates into the existing RepoTaskSource adapter — TS
24
- * task files are discovered alongside YAML files in the same directory.
25
- *
26
22
  * @see docs/design-docs/architecture-overhaul/typescript-configuration.md
27
23
  */
28
24
  import { existsSync, readdirSync } from "fs";
25
+ import { pathToFileURL } from "node:url";
29
26
  import { resolve } from "path";
27
+ import { createJiti } from "jiti";
30
28
  import { loadTsConfig } from "../config-sources/ts-config-loader.js";
31
29
  /**
32
30
  * Discover TS/JS task files in a directory.
@@ -67,6 +65,22 @@ export async function loadTsTaskFile(filePath) {
67
65
  const tasks = Array.isArray(value) ? value : [value];
68
66
  return { filePath, tasks };
69
67
  }
68
+ /**
69
+ * Synchronously load task definitions from a single TS/JS task file.
70
+ *
71
+ * Uses jiti's synchronous require-style loading instead of async import().
72
+ * Needed by resolve-mappings.ts which is called from sync contexts.
73
+ */
74
+ export function loadTsTaskFileSync(filePath) {
75
+ const jiti = createJiti(pathToFileURL(filePath).href, {
76
+ interopDefault: true,
77
+ requireCache: true,
78
+ });
79
+ const mod = jiti(filePath);
80
+ const value = mod && typeof mod === "object" && "default" in mod ? mod.default : mod;
81
+ const tasks = Array.isArray(value) ? value : [value];
82
+ return { filePath, tasks };
83
+ }
70
84
  /**
71
85
  * Load all TS task files from a directory.
72
86
  *
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Final validation — ensures all agent-observer modules work together
3
+ * and the full data pipeline (record → classify → summarize) is correct.
4
+ *
5
+ * Run: tsx src/agent-observer/test-imports.ts
6
+ */
7
+ export {};
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Final validation — ensures all agent-observer modules work together
3
+ * and the full data pipeline (record → classify → summarize) is correct.
4
+ *
5
+ * Run: tsx src/agent-observer/test-imports.ts
6
+ */
7
+ import { classifyRequests, extractDocSlug, extractSearchQuery, extractApiEndpoint, extractDomain, extractPageTitle, isDocPageRequest, isSearchRequest, isSanityApiRequest, } from "./classifier.js";
8
+ import { RequestRecorder } from "./proxy.js";
9
+ import { default as InstrumentedProvider } from "./provider.js";
10
+ // ─── Test data ───────────────────────────────────────────────────────────────
11
+ const now = Date.now();
12
+ const mockRequests = [
13
+ // 1. Doc page visit
14
+ {
15
+ headers: {},
16
+ latencyMs: 234,
17
+ method: "GET",
18
+ responsePreview: "<html><head><title>Create a Schema - Sanity</title></head>...",
19
+ responseSize: 45000,
20
+ seq: 0,
21
+ statusCode: 200,
22
+ timestamp: new Date(now).toISOString(),
23
+ url: "https://www.sanity.io/docs/create-a-schema-and-configure-sanity-studio",
24
+ },
25
+ // 2. Another doc page
26
+ {
27
+ headers: {},
28
+ latencyMs: 180,
29
+ method: "GET",
30
+ responsePreview: "<html><head><title>Object Type - Sanity Docs</title></head>...",
31
+ responseSize: 32000,
32
+ seq: 1,
33
+ statusCode: 200,
34
+ timestamp: new Date(now + 100).toISOString(),
35
+ url: "https://www.sanity.io/docs/schema-types/object-type",
36
+ },
37
+ // 3. Search query
38
+ {
39
+ headers: {},
40
+ latencyMs: 450,
41
+ method: "GET",
42
+ responseSize: 12000,
43
+ seq: 2,
44
+ statusCode: 200,
45
+ timestamp: new Date(now + 200).toISOString(),
46
+ url: "https://www.sanity.io/search?q=visual+editing+preview",
47
+ },
48
+ // 4. Sanity API call (GROQ query — must NOT be classified as search)
49
+ {
50
+ headers: {},
51
+ latencyMs: 320,
52
+ method: "GET",
53
+ responseSize: 8500,
54
+ seq: 3,
55
+ statusCode: 200,
56
+ timestamp: new Date(now + 300).toISOString(),
57
+ url: 'https://api.sanity.io/v2021-03-25/data/query/production?query=*[_type=="article"]',
58
+ },
59
+ // 5. CDN API call
60
+ {
61
+ headers: {},
62
+ latencyMs: 85,
63
+ method: "GET",
64
+ responseSize: 150000,
65
+ seq: 4,
66
+ statusCode: 200,
67
+ timestamp: new Date(now + 350).toISOString(),
68
+ url: "https://cdn.sanity.io/images/abc123/production/image-xyz.jpg",
69
+ },
70
+ // 6. External request (npm docs)
71
+ {
72
+ headers: {},
73
+ latencyMs: 300,
74
+ method: "GET",
75
+ responseSize: 20000,
76
+ seq: 5,
77
+ statusCode: 200,
78
+ timestamp: new Date(now + 400).toISOString(),
79
+ url: "https://docs.npmjs.com/cli/install",
80
+ },
81
+ // 7. Algolia search
82
+ {
83
+ body: JSON.stringify({ query: "presentation tool setup" }),
84
+ headers: {},
85
+ latencyMs: 150,
86
+ method: "POST",
87
+ responseSize: 5000,
88
+ seq: 6,
89
+ statusCode: 200,
90
+ timestamp: new Date(now + 500).toISOString(),
91
+ url: "https://abc123.algolia.net/1/indexes/sanity_docs/query",
92
+ },
93
+ // 8. Google search
94
+ {
95
+ headers: {},
96
+ latencyMs: 200,
97
+ method: "GET",
98
+ responseSize: 80000,
99
+ seq: 7,
100
+ statusCode: 200,
101
+ timestamp: new Date(now + 600).toISOString(),
102
+ url: "https://www.google.com/search?q=sanity+studio+custom+tool",
103
+ },
104
+ // 9. Failed request (should be skipped)
105
+ {
106
+ headers: {},
107
+ latencyMs: 0,
108
+ method: "GET",
109
+ responseSize: 0,
110
+ seq: 8,
111
+ statusCode: 0,
112
+ timestamp: new Date(now + 700).toISOString(),
113
+ url: "https://www.sanity.io/docs/nonexistent-page",
114
+ },
115
+ ];
116
+ // ─── Run tests ───────────────────────────────────────────────────────────────
117
+ let passed = 0;
118
+ let failed = 0;
119
+ function assert(condition, msg) {
120
+ if (condition) {
121
+ console.log(` ✅ ${msg}`);
122
+ passed++;
123
+ }
124
+ else {
125
+ console.log(` ❌ FAIL: ${msg}`);
126
+ failed++;
127
+ }
128
+ }
129
+ console.log("\n═══ Agent Observer — Final Validation ═══\n");
130
+ // --- Individual function tests ---
131
+ console.log("1. Individual detection functions:");
132
+ assert(isDocPageRequest(mockRequests[0]) === true, "Doc page detected");
133
+ assert(isDocPageRequest(mockRequests[3]) === false, "API call NOT detected as doc page");
134
+ assert(isSearchRequest(mockRequests[2]) === true, "Search detected");
135
+ assert(isSearchRequest(mockRequests[3]) === false, "API call NOT detected as search");
136
+ assert(isSanityApiRequest(mockRequests[3]) === true, "API call detected");
137
+ assert(isSanityApiRequest(mockRequests[0]) === false, "Doc page NOT detected as API call");
138
+ // --- Slug extraction ---
139
+ console.log("\n2. Metadata extraction:");
140
+ assert(extractDocSlug("https://www.sanity.io/docs/create-a-schema-and-configure-sanity-studio") === "create-a-schema-and-configure-sanity-studio", "Doc slug extracted correctly");
141
+ assert(extractDocSlug("https://www.sanity.io/docs/schema-types/object-type") ===
142
+ "schema-types/object-type", "Nested doc slug extracted correctly");
143
+ assert(extractSearchQuery(mockRequests[2]) === "visual editing preview", `Search query extracted: "${extractSearchQuery(mockRequests[2])}"`);
144
+ assert(extractApiEndpoint("https://api.sanity.io/v2021-03-25/data/query/production?query=*") === "/data/query/production?query=*", `API endpoint extracted: "${extractApiEndpoint("https://api.sanity.io/v2021-03-25/data/query/production?query=*")}"`);
145
+ assert(extractDomain("https://docs.npmjs.com/cli/install") === "docs.npmjs.com", "Domain extracted correctly");
146
+ assert(extractPageTitle("<html><head><title>Create a Schema - Sanity</title></head>") === "Create a Schema - Sanity", "Page title extracted correctly");
147
+ // --- Full classification pipeline ---
148
+ console.log("\n3. Full classification pipeline:");
149
+ const classified = classifyRequests(mockRequests);
150
+ assert(classified.docPageVisits.length === 2, `Doc pages: ${classified.docPageVisits.length} (expected 2)`);
151
+ assert(classified.searchQueries.length === 3, `Searches: ${classified.searchQueries.length} (expected 3 — site search + algolia + google)`);
152
+ assert(classified.apiCalls.length === 2, `API calls: ${classified.apiCalls.length} (expected 2 — GROQ query + CDN)`);
153
+ assert(classified.externalRequests.length === 1, `External: ${classified.externalRequests.length} (expected 1 — npmjs)`);
154
+ // Verify the API ?query= param was NOT classified as a search
155
+ const searchUrls = classified.searchQueries.map((s) => s.url);
156
+ assert(!searchUrls.includes('https://api.sanity.io/v2021-03-25/data/query/production?query=*[_type=="article"]'), "API ?query= param NOT misclassified as search");
157
+ // Verify the failed request was skipped
158
+ const allUrls = [
159
+ ...classified.docPageVisits.map((d) => d.url),
160
+ ...classified.searchQueries.map((s) => s.url),
161
+ ...classified.apiCalls.map((a) => a.url),
162
+ ...classified.externalRequests.map((e) => e.url),
163
+ ];
164
+ assert(!allUrls.includes("https://www.sanity.io/docs/nonexistent-page"), "Failed request (status 0) was skipped");
165
+ // --- RequestRecorder ---
166
+ console.log("\n4. RequestRecorder:");
167
+ const recorder = new RequestRecorder({ includePatterns: [/sanity\.io/] });
168
+ assert(typeof recorder.start === "function", "RequestRecorder.start() exists");
169
+ assert(typeof recorder.stop === "function", "RequestRecorder.stop() exists");
170
+ // --- InstrumentedProvider ---
171
+ console.log("\n5. InstrumentedProvider:");
172
+ const provider = new InstrumentedProvider({ config: {}, id: "test-validation" });
173
+ assert(provider.id() === "instrumented:test-validation", `Provider ID: "${provider.id()}"`);
174
+ assert(typeof provider.callApi === "function", "Provider.callApi() exists");
175
+ // --- Summary ---
176
+ console.log(`\n${"═".repeat(50)}`);
177
+ console.log(`Results: ${passed} passed, ${failed} failed, ${passed + failed} total`);
178
+ if (failed > 0) {
179
+ console.log("\n⚠️ Some tests failed!");
180
+ process.exit(1);
181
+ }
182
+ else {
183
+ console.log("\n✅ All tests passed! Agent observer system is ready.");
184
+ process.exit(0);
185
+ }
@@ -0,0 +1,22 @@
1
+ /**
2
+ * CaptureComparator — compares two capture directories and produces a diff report.
3
+ *
4
+ * Reads manifest.json from both directories and computes:
5
+ * - Inventory diff (added/removed/common artifacts)
6
+ * - Content diff (structural or strict, for common artifacts)
7
+ * - Score comparison (from score-summary.json)
8
+ * - Timing comparison (from pipeline-context.json)
9
+ * - Metadata comparison (mode, variant, config keys)
10
+ * - Security scan (regex for leaked secrets)
11
+ *
12
+ * Implementation for the types defined in @sanity/ailf-core.
13
+ */
14
+ import type { CaptureDiffReport, ComparisonOptions } from "../_vendor/ailf-core/index.d.ts";
15
+ /**
16
+ * Compare two capture directories and produce a structured diff report.
17
+ *
18
+ * @param baselineDir - Path to the baseline capture directory (contains manifest.json)
19
+ * @param experimentDir - Path to the experiment capture directory
20
+ * @param opts - Comparison options (mode, thresholds, etc.)
21
+ */
22
+ export declare function compareCaptures(baselineDir: string, experimentDir: string, opts?: Partial<ComparisonOptions>): CaptureDiffReport;