@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,503 @@
1
+ /**
2
+ * telemetry.test.ts — Tests for the observability & telemetry subsystem.
3
+ *
4
+ * Covers tool call classification, trace collection, cost tracking,
5
+ * redaction pipeline, trace storage, and per-turn trace merging.
6
+ *
7
+ * Run: npx tsx --test src/pipeline/compiler/__tests__/telemetry.test.ts
8
+ */
9
+ import assert from "node:assert/strict";
10
+ import { existsSync, rmSync } from "node:fs";
11
+ import { afterEach, describe, it } from "node:test";
12
+ import { tmpdir } from "os";
13
+ import { resolve } from "path";
14
+ import { classifyToolCall, classifyToolCalls, } from "../telemetry/tool-classifier.js";
15
+ import { collectTrace, mergeTraces } from "../telemetry/trace-collector.js";
16
+ import { checkBudget, computeCost, estimateRunCost, lookupPricing, } from "../telemetry/cost-tracker.js";
17
+ import { redactTrace } from "../telemetry/redactor.js";
18
+ import { extractTraceSummary, LocalTraceStore, } from "../telemetry/trace-store.js";
19
+ // ---------------------------------------------------------------------------
20
+ // Tool call classification
21
+ // ---------------------------------------------------------------------------
22
+ describe("classifyToolCall", () => {
23
+ it("classifies known tools by exact name", () => {
24
+ assert.equal(classifyToolCall("WebSearch"), "search");
25
+ assert.equal(classifyToolCall("Read"), "read");
26
+ assert.equal(classifyToolCall("Write"), "write");
27
+ assert.equal(classifyToolCall("Bash"), "execute");
28
+ assert.equal(classifyToolCall("Browser.navigate"), "navigate");
29
+ assert.equal(classifyToolCall("AskUser"), "communicate");
30
+ });
31
+ it("uses heuristic for unknown tools", () => {
32
+ assert.equal(classifyToolCall("custom_search_tool"), "search");
33
+ assert.equal(classifyToolCall("ReadFromDB"), "read");
34
+ assert.equal(classifyToolCall("writeConfig"), "write");
35
+ assert.equal(classifyToolCall("executeScript"), "execute");
36
+ });
37
+ it("uses custom mappings over defaults", () => {
38
+ assert.equal(classifyToolCall("MyTool", { MyTool: "communicate" }), "communicate");
39
+ });
40
+ it("falls back to execute for truly unknown tools", () => {
41
+ assert.equal(classifyToolCall("zzz_unknown_zzz"), "execute");
42
+ });
43
+ });
44
+ describe("classifyToolCalls", () => {
45
+ it("classifies a batch and reports unrecognized names", () => {
46
+ const { categories, unrecognized } = classifyToolCalls([
47
+ "WebSearch",
48
+ "Read",
49
+ "zzz_mystery_tool",
50
+ ]);
51
+ assert.equal(categories.length, 3);
52
+ assert.equal(categories[0], "search");
53
+ assert.equal(categories[1], "read");
54
+ assert.ok(unrecognized.includes("zzz_mystery_tool"));
55
+ });
56
+ });
57
+ // ---------------------------------------------------------------------------
58
+ // Trace collection
59
+ // ---------------------------------------------------------------------------
60
+ describe("collectTrace", () => {
61
+ const baseOptions = {
62
+ runId: "run-1",
63
+ taskId: "task-1",
64
+ testCaseIndex: 0,
65
+ modelId: "openai:chat:gpt-4o",
66
+ };
67
+ it("creates a trace from an empty response", () => {
68
+ const trace = collectTrace({}, baseOptions);
69
+ assert.equal(trace.runId, "run-1");
70
+ assert.equal(trace.taskId, "task-1");
71
+ assert.equal(trace.modelId, "openai:chat:gpt-4o");
72
+ assert.equal(trace.toolCalls.length, 0);
73
+ assert.equal(trace.tokensUsed.totalTokens, 0);
74
+ });
75
+ it("extracts tool calls from metadata", () => {
76
+ const trace = collectTrace({
77
+ metadata: {
78
+ toolCalls: [
79
+ { name: "WebSearch", input: { query: "GROQ" }, durationMs: 100 },
80
+ { name: "Read", input: { path: "/docs/groq.md" }, durationMs: 50 },
81
+ ],
82
+ },
83
+ }, baseOptions);
84
+ assert.equal(trace.toolCalls.length, 2);
85
+ assert.equal(trace.toolCalls[0].name, "WebSearch");
86
+ assert.equal(trace.toolCalls[0].category, "search");
87
+ assert.equal(trace.toolCalls[1].name, "Read");
88
+ assert.equal(trace.toolCalls[1].category, "read");
89
+ });
90
+ it("extracts token usage", () => {
91
+ const trace = collectTrace({ tokenUsage: { prompt: 1000, completion: 500, total: 1500 } }, baseOptions);
92
+ assert.equal(trace.tokensUsed.promptTokens, 1000);
93
+ assert.equal(trace.tokensUsed.completionTokens, 500);
94
+ assert.equal(trace.tokensUsed.totalTokens, 1500);
95
+ });
96
+ it("extracts URLs from tool calls", () => {
97
+ const trace = collectTrace({
98
+ metadata: {
99
+ toolCalls: [
100
+ { name: "WebFetch", input: { url: "https://sanity.io/docs" } },
101
+ ],
102
+ },
103
+ }, baseOptions);
104
+ assert.ok(trace.urlsVisited.includes("https://sanity.io/docs"));
105
+ });
106
+ it("extracts search terms", () => {
107
+ const trace = collectTrace({
108
+ metadata: {
109
+ toolCalls: [
110
+ { name: "WebSearch", input: { query: "GROQ projection" } },
111
+ ],
112
+ },
113
+ }, baseOptions);
114
+ assert.ok(trace.searchTerms.includes("GROQ projection"));
115
+ });
116
+ it("extracts files read and written", () => {
117
+ const trace = collectTrace({
118
+ metadata: {
119
+ toolCalls: [
120
+ { name: "Read", input: { path: "/src/schema.ts" } },
121
+ { name: "Write", input: { path: "/src/config.ts" } },
122
+ ],
123
+ },
124
+ }, baseOptions);
125
+ assert.ok(trace.filesRead.includes("/src/schema.ts"));
126
+ assert.ok(trace.filesWritten.includes("/src/config.ts"));
127
+ });
128
+ it("creates event log from tool calls", () => {
129
+ const trace = collectTrace({
130
+ metadata: {
131
+ toolCalls: [
132
+ { name: "WebSearch", input: { query: "test" }, durationMs: 100 },
133
+ ],
134
+ },
135
+ latencyMs: 500,
136
+ }, baseOptions);
137
+ // Should have: llm_request, tool_call_start, tool_call_end, llm_response
138
+ assert.equal(trace.events.length, 4);
139
+ assert.equal(trace.events[0].type, "llm_request");
140
+ assert.equal(trace.events[1].type, "tool_call_start");
141
+ assert.equal(trace.events[2].type, "tool_call_end");
142
+ assert.equal(trace.events[3].type, "llm_response");
143
+ });
144
+ it("builds a root span", () => {
145
+ const trace = collectTrace({ latencyMs: 1000 }, baseOptions);
146
+ assert.equal(trace.spans.length, 1);
147
+ assert.equal(trace.spans[0].operation, "test-case");
148
+ assert.equal(trace.spans[0].parentSpanId, null);
149
+ });
150
+ });
151
+ // ---------------------------------------------------------------------------
152
+ // mergeTraces (per-turn tracing — task 6f)
153
+ // ---------------------------------------------------------------------------
154
+ describe("mergeTraces", () => {
155
+ const parentOptions = {
156
+ runId: "run-1",
157
+ taskId: "task-1",
158
+ testCaseIndex: 0,
159
+ modelId: "openai:chat:gpt-4o",
160
+ };
161
+ function makeTurn(index) {
162
+ return collectTrace({
163
+ metadata: {
164
+ toolCalls: [
165
+ {
166
+ name: "WebSearch",
167
+ input: { query: `turn ${index}` },
168
+ durationMs: 50,
169
+ },
170
+ ],
171
+ },
172
+ tokenUsage: { prompt: 100, completion: 50, total: 150 },
173
+ latencyMs: 200,
174
+ }, { ...parentOptions, testCaseIndex: index });
175
+ }
176
+ it("merges multiple turns into one trace", () => {
177
+ const turns = [makeTurn(0), makeTurn(1), makeTurn(2)];
178
+ const merged = mergeTraces(turns, parentOptions);
179
+ assert.equal(merged.toolCalls.length, 3);
180
+ assert.equal(merged.tokensUsed.promptTokens, 300);
181
+ assert.equal(merged.tokensUsed.completionTokens, 150);
182
+ assert.equal(merged.durationMs, 600);
183
+ });
184
+ it("creates per-turn spans under root", () => {
185
+ const turns = [makeTurn(0), makeTurn(1)];
186
+ const merged = mergeTraces(turns, parentOptions);
187
+ // root + 2 turns
188
+ assert.equal(merged.spans.length, 3);
189
+ assert.equal(merged.spans[0].operation, "test-case");
190
+ assert.equal(merged.spans[0].parentSpanId, null);
191
+ assert.equal(merged.spans[1].operation, "turn-0");
192
+ assert.equal(merged.spans[1].parentSpanId, merged.spans[0].spanId);
193
+ assert.equal(merged.spans[2].operation, "turn-1");
194
+ });
195
+ it("deduplicates URLs and search terms", () => {
196
+ const t1 = collectTrace({
197
+ metadata: {
198
+ toolCalls: [
199
+ { name: "WebSearch", input: { query: "GROQ" } },
200
+ { name: "WebFetch", input: { url: "https://sanity.io" } },
201
+ ],
202
+ },
203
+ }, { ...parentOptions, testCaseIndex: 0 });
204
+ const t2 = collectTrace({
205
+ metadata: {
206
+ toolCalls: [
207
+ { name: "WebSearch", input: { query: "GROQ" } },
208
+ { name: "WebFetch", input: { url: "https://sanity.io" } },
209
+ ],
210
+ },
211
+ }, { ...parentOptions, testCaseIndex: 1 });
212
+ const merged = mergeTraces([t1, t2], parentOptions);
213
+ assert.equal(merged.searchTerms.length, 1); // deduplicated
214
+ assert.equal(merged.urlsVisited.length, 1); // deduplicated
215
+ });
216
+ it("handles empty turns", () => {
217
+ const merged = mergeTraces([], parentOptions);
218
+ assert.equal(merged.toolCalls.length, 0);
219
+ assert.equal(merged.spans.length, 1); // root only
220
+ });
221
+ });
222
+ // ---------------------------------------------------------------------------
223
+ // Cost tracking
224
+ // ---------------------------------------------------------------------------
225
+ describe("computeCost", () => {
226
+ it("computes cost from token usage and pricing", () => {
227
+ const cost = computeCost({ promptTokens: 1000, completionTokens: 500, totalTokens: 1500 }, { input: 3.0, output: 15.0 });
228
+ // 1000 * 3.0/1M + 500 * 15.0/1M = 0.003 + 0.0075 = 0.0105
229
+ assert.ok(Math.abs(cost - 0.0105) < 0.0001);
230
+ });
231
+ it("accounts for cached input tokens", () => {
232
+ const cost = computeCost({
233
+ promptTokens: 1000,
234
+ completionTokens: 500,
235
+ totalTokens: 1500,
236
+ toolTokens: 300,
237
+ }, { input: 3.0, output: 15.0, cachedInput: 0.3 });
238
+ // 700 * 3.0/1M + 300 * 0.3/1M + 500 * 15.0/1M = 0.0021 + 0.00009 + 0.0075
239
+ assert.ok(cost > 0);
240
+ assert.ok(cost < 0.02);
241
+ });
242
+ });
243
+ describe("lookupPricing", () => {
244
+ it("finds exact match", () => {
245
+ const pricing = lookupPricing("openai:chat:gpt-4o");
246
+ assert.ok(pricing);
247
+ assert.ok(pricing.input > 0);
248
+ });
249
+ it("falls back to prefix match", () => {
250
+ const pricing = lookupPricing("openai:chat:gpt-4o-2024-11-20");
251
+ assert.ok(pricing);
252
+ });
253
+ it("returns undefined for unknown model", () => {
254
+ const pricing = lookupPricing("unknown:model:xyz");
255
+ assert.equal(pricing, undefined);
256
+ });
257
+ it("uses custom pricing over defaults", () => {
258
+ const pricing = lookupPricing("custom:model", {
259
+ "custom:model": { input: 1.0, output: 2.0 },
260
+ });
261
+ assert.ok(pricing);
262
+ assert.equal(pricing.input, 1.0);
263
+ });
264
+ });
265
+ describe("estimateRunCost", () => {
266
+ it("estimates cost for a run", () => {
267
+ const estimate = estimateRunCost(5, ["openai:chat:gpt-4o"]);
268
+ assert.ok(estimate.totalUSD > 0);
269
+ assert.equal(estimate.perModel.length, 1);
270
+ });
271
+ it("flags budget warning", () => {
272
+ const estimate = estimateRunCost(100, ["openai:chat:gpt-4o", "anthropic:messages:claude-sonnet-4-6"], { perRun: { warn: 0.01, stop: 1.0 } });
273
+ assert.equal(estimate.exceedsWarning, true);
274
+ });
275
+ });
276
+ describe("checkBudget", () => {
277
+ it("allows spend below thresholds", () => {
278
+ const result = checkBudget(1.0, { perRun: { warn: 5.0, stop: 20.0 } }, "perRun");
279
+ assert.equal(result.proceed, true);
280
+ assert.equal(result.warning, undefined);
281
+ });
282
+ it("warns at warn threshold", () => {
283
+ const result = checkBudget(5.5, { perRun: { warn: 5.0, stop: 20.0 } }, "perRun");
284
+ assert.equal(result.proceed, true);
285
+ assert.ok(result.warning?.includes("warning"));
286
+ });
287
+ it("stops at stop threshold", () => {
288
+ const result = checkBudget(25.0, { perRun: { warn: 5.0, stop: 20.0 } }, "perRun");
289
+ assert.equal(result.proceed, false);
290
+ assert.ok(result.warning?.includes("exceeded"));
291
+ });
292
+ });
293
+ // ---------------------------------------------------------------------------
294
+ // Redaction
295
+ // ---------------------------------------------------------------------------
296
+ describe("redactTrace", () => {
297
+ function makeTrace(toolCalls) {
298
+ return {
299
+ traceId: "trace-1",
300
+ runId: "run-1",
301
+ taskId: "task-1",
302
+ testCaseIndex: 0,
303
+ modelId: "openai:chat:gpt-4o",
304
+ spans: [],
305
+ toolCalls,
306
+ urlsVisited: [],
307
+ searchTerms: [],
308
+ filesRead: [],
309
+ filesWritten: [],
310
+ tokensUsed: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
311
+ costEstimate: 0,
312
+ durationMs: 0,
313
+ events: [],
314
+ startedAt: new Date().toISOString(),
315
+ completedAt: new Date().toISOString(),
316
+ };
317
+ }
318
+ it("redacts Bearer tokens in tool call inputs", () => {
319
+ const trace = makeTrace([
320
+ {
321
+ name: "WebFetch",
322
+ input: {
323
+ url: "https://api.sanity.io",
324
+ auth: "Bearer sk_live_abc123def456ghi789",
325
+ },
326
+ output: "OK",
327
+ durationMs: 100,
328
+ category: "read",
329
+ },
330
+ ]);
331
+ const { trace: redacted, redactionCount } = redactTrace(trace);
332
+ const inputStr = JSON.stringify(redacted.toolCalls[0].input);
333
+ assert.ok(!inputStr.includes("sk_live_abc123def456ghi789"));
334
+ assert.ok(inputStr.includes("[REDACTED]"));
335
+ assert.ok(redactionCount > 0);
336
+ });
337
+ it("redacts Sanity tokens", () => {
338
+ const trace = makeTrace([
339
+ {
340
+ name: "Write",
341
+ input: { token: "skAbcDefGhiJklMnoPqrStUvWxYz0123456789" },
342
+ output: null,
343
+ durationMs: 10,
344
+ category: "write",
345
+ },
346
+ ]);
347
+ const { trace: redacted } = redactTrace(trace);
348
+ const inputStr = JSON.stringify(redacted.toolCalls[0].input);
349
+ assert.ok(inputStr.includes("[REDACTED_SANITY_TOKEN]"));
350
+ });
351
+ it("redacts OpenAI keys", () => {
352
+ const trace = makeTrace([
353
+ {
354
+ name: "Bash",
355
+ input: {
356
+ command: "export OPENAI_API_KEY=sk-proj-abcdefghij1234567890abcdefghij",
357
+ },
358
+ output: null,
359
+ durationMs: 10,
360
+ category: "execute",
361
+ },
362
+ ]);
363
+ const { trace: redacted } = redactTrace(trace);
364
+ const inputStr = JSON.stringify(redacted.toolCalls[0].input);
365
+ assert.ok(!inputStr.includes("sk-proj-abcdefghij1234567890abcdefghij"), "OpenAI key should be redacted");
366
+ });
367
+ it("does not mutate the original trace", () => {
368
+ const original = makeTrace([
369
+ {
370
+ name: "WebFetch",
371
+ input: { auth: "Bearer secrettoken1234567890" },
372
+ output: null,
373
+ durationMs: 10,
374
+ category: "read",
375
+ },
376
+ ]);
377
+ const originalStr = JSON.stringify(original);
378
+ redactTrace(original);
379
+ assert.equal(JSON.stringify(original), originalStr);
380
+ });
381
+ it("reports which rules fired", () => {
382
+ const trace = makeTrace([
383
+ {
384
+ name: "Bash",
385
+ input: {
386
+ cmd: "curl -H 'Authorization: Bearer abc123def456789' https://api.example.com",
387
+ },
388
+ output: null,
389
+ durationMs: 10,
390
+ category: "execute",
391
+ },
392
+ ]);
393
+ const { rulesApplied } = redactTrace(trace);
394
+ assert.ok(rulesApplied.includes("bearer_tokens"));
395
+ });
396
+ });
397
+ // ---------------------------------------------------------------------------
398
+ // Trace storage
399
+ // ---------------------------------------------------------------------------
400
+ describe("LocalTraceStore", () => {
401
+ const storeDir = resolve(tmpdir(), `ailf-trace-test-${process.pid}`);
402
+ afterEach(() => {
403
+ if (existsSync(storeDir)) {
404
+ rmSync(storeDir, { recursive: true, force: true });
405
+ }
406
+ });
407
+ it("stores and retrieves a trace", async () => {
408
+ const store = new LocalTraceStore(storeDir);
409
+ const trace = {
410
+ traceId: "trace-store-test",
411
+ runId: "run-1",
412
+ taskId: "task-1",
413
+ testCaseIndex: 0,
414
+ modelId: "openai:chat:gpt-4o",
415
+ spans: [],
416
+ toolCalls: [],
417
+ urlsVisited: [],
418
+ searchTerms: [],
419
+ filesRead: [],
420
+ filesWritten: [],
421
+ tokensUsed: { promptTokens: 100, completionTokens: 50, totalTokens: 150 },
422
+ costEstimate: 0.001,
423
+ durationMs: 500,
424
+ events: [],
425
+ startedAt: new Date().toISOString(),
426
+ completedAt: new Date().toISOString(),
427
+ };
428
+ const result = await store.store(trace);
429
+ assert.ok(result.uri.startsWith("file://"));
430
+ assert.ok(result.sizeBytes > 0);
431
+ const retrieved = await store.retrieve(result.uri);
432
+ assert.ok(retrieved);
433
+ assert.equal(retrieved.traceId, "trace-store-test");
434
+ });
435
+ it("returns null for non-existent trace", async () => {
436
+ const store = new LocalTraceStore(storeDir);
437
+ const result = await store.retrieve("file:///nonexistent/path.json");
438
+ assert.equal(result, null);
439
+ });
440
+ });
441
+ // ---------------------------------------------------------------------------
442
+ // Trace summary extraction
443
+ // ---------------------------------------------------------------------------
444
+ describe("extractTraceSummary", () => {
445
+ it("extracts sanitized summary from full trace", () => {
446
+ const trace = {
447
+ traceId: "trace-summary-test",
448
+ runId: "run-1",
449
+ taskId: "task-1",
450
+ testCaseIndex: 0,
451
+ modelId: "openai:chat:gpt-4o",
452
+ spans: [],
453
+ toolCalls: [
454
+ {
455
+ name: "WebSearch",
456
+ input: {},
457
+ output: null,
458
+ durationMs: 100,
459
+ category: "search",
460
+ },
461
+ {
462
+ name: "Read",
463
+ input: {},
464
+ output: null,
465
+ durationMs: 50,
466
+ category: "read",
467
+ },
468
+ {
469
+ name: "Read",
470
+ input: {},
471
+ output: null,
472
+ durationMs: 30,
473
+ category: "read",
474
+ },
475
+ ],
476
+ urlsVisited: ["https://sanity.io/docs"],
477
+ searchTerms: ["GROQ"],
478
+ filesRead: ["/src/schema.ts"],
479
+ filesWritten: [],
480
+ tokensUsed: {
481
+ promptTokens: 1000,
482
+ completionTokens: 500,
483
+ totalTokens: 1500,
484
+ },
485
+ costEstimate: 0.01,
486
+ durationMs: 2000,
487
+ events: [],
488
+ startedAt: new Date().toISOString(),
489
+ completedAt: new Date().toISOString(),
490
+ };
491
+ const summary = extractTraceSummary(trace, "file:///traces/trace-1.json");
492
+ assert.equal(summary.traceId, "trace-summary-test");
493
+ assert.equal(summary.traceDataUri, "file:///traces/trace-1.json");
494
+ assert.equal(summary.toolCallCount, 3);
495
+ assert.equal(summary.toolCallCategories.search, 1);
496
+ assert.equal(summary.toolCallCategories.read, 2);
497
+ assert.equal(summary.totalTokens, 1500);
498
+ assert.equal(summary.costEstimate, 0.01);
499
+ assert.equal(summary.urlsVisitedCount, 1);
500
+ assert.equal(summary.filesReadCount, 1);
501
+ assert.equal(summary.filesWrittenCount, 0);
502
+ });
503
+ });
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Assertion type mapper — maps AILF assertion types to Promptfoo assertion types.
3
+ *
4
+ * AILF assertions have two flavors:
5
+ * 1. Templated assertions (`type: "llm-rubric"` with `template` + `criteria`)
6
+ * → resolved into Promptfoo's `llm-rubric` with a fully assembled rubric prompt
7
+ * 2. Value assertions (any other `type` with a `value`)
8
+ * → passed through to Promptfoo mostly as-is
9
+ *
10
+ * This module handles the mapping for both, validates mode compatibility
11
+ * (e.g., `tool-called` is only valid for agent-harness/mcp-server modes),
12
+ * and normalizes weight fields.
13
+ *
14
+ * @see docs/design-docs/architecture-overhaul/scoring-rubrics-assertions.md
15
+ * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
16
+ */
17
+ import type { GeneralizedAssertionDefinition } from "../../_vendor/ailf-core/index.d.ts";
18
+ import type { EvalMode } from "../../_vendor/ailf-shared/index.d.ts";
19
+ /** A Promptfoo-compatible assertion object */
20
+ export interface PromptfooAssertion {
21
+ type: string;
22
+ value?: unknown;
23
+ weight?: number;
24
+ /** Promptfoo-specific: provider for model-graded assertions */
25
+ provider?: string;
26
+ /** Promptfoo-specific: rubric prompt text */
27
+ rubricPrompt?: string;
28
+ /** Promptfoo-specific: threshold for similarity */
29
+ threshold?: number;
30
+ /** Additional properties passed through */
31
+ [key: string]: unknown;
32
+ }
33
+ /** Options for mapping assertions */
34
+ export interface AssertionMapperOptions {
35
+ /** Evaluation mode — used for compatibility checking */
36
+ mode?: EvalMode;
37
+ /** Default grader provider (for LLM-graded assertions) */
38
+ graderProvider?: string;
39
+ }
40
+ /**
41
+ * Map an array of AILF assertions to Promptfoo assertions.
42
+ *
43
+ * @param assertions - AILF assertion definitions
44
+ * @param options - Mapper options
45
+ * @returns Mapped Promptfoo assertions and any warnings
46
+ */
47
+ export declare function mapAssertions(assertions: GeneralizedAssertionDefinition[], options?: AssertionMapperOptions): {
48
+ mapped: PromptfooAssertion[];
49
+ warnings: string[];
50
+ };
51
+ /**
52
+ * Check if an assertion type is valid.
53
+ */
54
+ export declare function isValidAssertionType(type: string): boolean;
55
+ /**
56
+ * Check if an assertion type is compatible with a given mode.
57
+ */
58
+ export declare function isAssertionCompatibleWithMode(type: string, mode: EvalMode): boolean;