@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,291 @@
1
+ /**
2
+ * TaskGraphBuilder — converts task definitions into a TaskGraph IR.
3
+ *
4
+ * The builder is the first stage of the compilation pipeline:
5
+ * GeneralizedTaskDefinitions → TaskGraphBuilder → TaskGraph → PromptfooCompiler → YAML
6
+ *
7
+ * Responsibilities:
8
+ * - Accept tasks from any source (TS, YAML, Content Lake)
9
+ * - Apply area/tag/mode filtering
10
+ * - Resolve inter-task dependencies into edges
11
+ * - Validate the graph is a DAG (reject cycles)
12
+ * - Assign execution priority via topological sort
13
+ *
14
+ * This module exists alongside `generate-configs.ts` — it does NOT replace
15
+ * the existing codegen path. Phase 7 will swap callers over to the compiler.
16
+ *
17
+ * @see packages/core/src/types/task-graph.ts — TaskGraph types
18
+ * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
19
+ */
20
+ // ---------------------------------------------------------------------------
21
+ // Public API
22
+ // ---------------------------------------------------------------------------
23
+ /**
24
+ * Build a TaskGraph from task definitions.
25
+ *
26
+ * 1. Filters tasks by area, tags, task IDs, and status
27
+ * 2. Creates TaskNodes with resolved variables
28
+ * 3. Discovers dependency edges from task metadata
29
+ * 4. Validates the graph is acyclic
30
+ * 5. Assigns topological priority
31
+ */
32
+ export function buildTaskGraph(options) {
33
+ const warnings = [];
34
+ const filteredOut = [];
35
+ // Step 1: Filter tasks
36
+ const filtered = filterTasks(options.tasks, options.filter, filteredOut);
37
+ if (filtered.length === 0) {
38
+ return {
39
+ graph: null,
40
+ warnings: ["No tasks matched the filter criteria"],
41
+ filteredOut,
42
+ };
43
+ }
44
+ // Step 2: Create nodes
45
+ const nodes = new Map();
46
+ for (const task of filtered) {
47
+ const node = taskToNode(task);
48
+ if (nodes.has(node.taskId)) {
49
+ warnings.push(`Duplicate task ID "${node.taskId}" — later definition wins`);
50
+ }
51
+ nodes.set(node.taskId, node);
52
+ }
53
+ // Step 3: Discover edges from dependency metadata
54
+ const edges = discoverEdges(filtered, nodes, warnings);
55
+ // Step 4: Validate acyclicity
56
+ const cycleError = detectCycle(nodes, edges);
57
+ if (cycleError) {
58
+ throw new Error(`Task graph contains a cycle: ${cycleError.join(" → ")}. ` +
59
+ "Task graphs must be directed acyclic graphs (DAGs).");
60
+ }
61
+ // Step 5: Assign topological priority
62
+ assignPriority(nodes, edges);
63
+ // Step 6: Build fixture map (empty for now — Phase 2d fills this)
64
+ const fixtures = new Map();
65
+ const graph = {
66
+ compilationTarget: options.compilationTarget ?? "promptfoo",
67
+ edges,
68
+ fixtures,
69
+ nodes,
70
+ };
71
+ return { graph, warnings, filteredOut };
72
+ }
73
+ // ---------------------------------------------------------------------------
74
+ // Filtering
75
+ // ---------------------------------------------------------------------------
76
+ function filterTasks(tasks, filter, filteredOut) {
77
+ return tasks.filter((task) => {
78
+ // Status filter — always applied (even without explicit filter options)
79
+ const status = task.status ?? "active";
80
+ const isTargetedById = filter?.taskIds && filter.taskIds.includes(task.id);
81
+ if (status === "archived") {
82
+ filteredOut.push(task.id);
83
+ return false;
84
+ }
85
+ if (status === "paused" && !isTargetedById) {
86
+ filteredOut.push(task.id);
87
+ return false;
88
+ }
89
+ if (status === "draft" && !isTargetedById && !filter?.includeDrafts) {
90
+ filteredOut.push(task.id);
91
+ return false;
92
+ }
93
+ // Remaining filters only apply when an explicit filter is provided
94
+ if (!filter)
95
+ return true;
96
+ // Area filter — GeneralizedTaskDefinition uses `area` (not `featureArea`)
97
+ const taskArea = task.area ?? "";
98
+ if (filter.areas &&
99
+ filter.areas.length > 0 &&
100
+ !filter.areas.map((a) => a.toLowerCase()).includes(taskArea.toLowerCase())) {
101
+ filteredOut.push(task.id);
102
+ return false;
103
+ }
104
+ // Task ID filter
105
+ if (filter.taskIds &&
106
+ filter.taskIds.length > 0 &&
107
+ !filter.taskIds.includes(task.id)) {
108
+ filteredOut.push(task.id);
109
+ return false;
110
+ }
111
+ // Tag filter
112
+ if (filter.tags &&
113
+ filter.tags.length > 0 &&
114
+ (!task.tags || !task.tags.some((t) => filter.tags.includes(t)))) {
115
+ filteredOut.push(task.id);
116
+ return false;
117
+ }
118
+ return true;
119
+ });
120
+ }
121
+ // ---------------------------------------------------------------------------
122
+ // Node creation
123
+ // ---------------------------------------------------------------------------
124
+ function taskToNode(task) {
125
+ // GeneralizedTaskDefinition uses prompt.text/prompt.template instead of taskPrompt,
126
+ // and prompt.vars instead of extraVars
127
+ const promptText = task.prompt?.text ?? task.prompt?.template ?? "";
128
+ const promptVars = task.prompt?.vars ?? {};
129
+ const envelope = {
130
+ declarations: [],
131
+ provenance: {},
132
+ values: {
133
+ ...(promptText ? { task: promptText } : {}),
134
+ ...promptVars,
135
+ },
136
+ };
137
+ return {
138
+ dependsOn: [],
139
+ mode: task.mode,
140
+ priority: 0,
141
+ resolvedPrompt: promptText,
142
+ resolvedVariables: envelope,
143
+ taskId: task.id,
144
+ };
145
+ }
146
+ // ---------------------------------------------------------------------------
147
+ // Edge discovery
148
+ // ---------------------------------------------------------------------------
149
+ /**
150
+ * Discover dependency edges from task metadata.
151
+ *
152
+ * Looks for explicit `dependsOn` arrays in prompt.vars (the generalized
153
+ * equivalent of the old extraVars convention).
154
+ * Future phases will add implicit deps from fixture sharing, data flow, etc.
155
+ */
156
+ function discoverEdges(tasks, nodes, warnings) {
157
+ const edges = [];
158
+ for (const task of tasks) {
159
+ // Check for explicit dependencies in prompt.vars (was extraVars.dependsOn)
160
+ const deps = task.prompt?.vars?.dependsOn;
161
+ if (Array.isArray(deps)) {
162
+ for (const dep of deps) {
163
+ if (typeof dep !== "string")
164
+ continue;
165
+ if (!nodes.has(dep)) {
166
+ warnings.push(`Task "${task.id}" depends on "${dep}" which is not in the graph — ` +
167
+ "dependency ignored (task may have been filtered out)");
168
+ continue;
169
+ }
170
+ edges.push({ from: dep, to: task.id, type: "ordering" });
171
+ const node = nodes.get(task.id);
172
+ if (node && !node.dependsOn.includes(dep)) {
173
+ node.dependsOn.push(dep);
174
+ }
175
+ }
176
+ }
177
+ }
178
+ return edges;
179
+ }
180
+ // ---------------------------------------------------------------------------
181
+ // Cycle detection — Kahn's algorithm (topological sort)
182
+ // ---------------------------------------------------------------------------
183
+ /**
184
+ * Detect cycles in the task graph using Kahn's algorithm.
185
+ *
186
+ * @returns null if acyclic, or the cycle path as a string array
187
+ */
188
+ export function detectCycle(nodes, edges) {
189
+ // Build in-degree map
190
+ const inDegree = new Map();
191
+ const adjacency = new Map();
192
+ for (const id of nodes.keys()) {
193
+ inDegree.set(id, 0);
194
+ adjacency.set(id, []);
195
+ }
196
+ for (const edge of edges) {
197
+ adjacency.get(edge.from).push(edge.to);
198
+ inDegree.set(edge.to, (inDegree.get(edge.to) ?? 0) + 1);
199
+ }
200
+ // Start with all zero-in-degree nodes
201
+ const queue = [];
202
+ for (const [id, deg] of inDegree) {
203
+ if (deg === 0)
204
+ queue.push(id);
205
+ }
206
+ let visited = 0;
207
+ while (queue.length > 0) {
208
+ const current = queue.shift();
209
+ visited++;
210
+ for (const neighbor of adjacency.get(current) ?? []) {
211
+ const newDeg = (inDegree.get(neighbor) ?? 1) - 1;
212
+ inDegree.set(neighbor, newDeg);
213
+ if (newDeg === 0)
214
+ queue.push(neighbor);
215
+ }
216
+ }
217
+ if (visited === nodes.size)
218
+ return null;
219
+ // Find cycle participants (nodes with remaining in-degree > 0)
220
+ const cycleNodes = [...inDegree.entries()]
221
+ .filter(([, deg]) => deg > 0)
222
+ .map(([id]) => id);
223
+ // Reconstruct a cycle path for the error message
224
+ return reconstructCyclePath(cycleNodes, adjacency);
225
+ }
226
+ /**
227
+ * Reconstruct a human-readable cycle path from cycle participants.
228
+ */
229
+ function reconstructCyclePath(cycleNodes, adjacency) {
230
+ if (cycleNodes.length === 0)
231
+ return [];
232
+ const inCycle = new Set(cycleNodes);
233
+ const start = cycleNodes[0];
234
+ const path = [start];
235
+ const visited = new Set();
236
+ let current = start;
237
+ // Follow edges within the cycle to produce a readable path
238
+ while (true) {
239
+ visited.add(current);
240
+ const next = (adjacency.get(current) ?? []).find((n) => inCycle.has(n) && (!visited.has(n) || n === start));
241
+ if (!next)
242
+ break;
243
+ path.push(next);
244
+ if (next === start)
245
+ break; // Completed the cycle
246
+ current = next;
247
+ }
248
+ return path;
249
+ }
250
+ // ---------------------------------------------------------------------------
251
+ // Topological priority assignment
252
+ // ---------------------------------------------------------------------------
253
+ /**
254
+ * Assign execution priority via topological order.
255
+ * Lower priority = earlier execution.
256
+ */
257
+ function assignPriority(nodes, edges) {
258
+ const inDegree = new Map();
259
+ const adjacency = new Map();
260
+ for (const id of nodes.keys()) {
261
+ inDegree.set(id, 0);
262
+ adjacency.set(id, []);
263
+ }
264
+ for (const edge of edges) {
265
+ adjacency.get(edge.from).push(edge.to);
266
+ inDegree.set(edge.to, (inDegree.get(edge.to) ?? 0) + 1);
267
+ }
268
+ const queue = [];
269
+ for (const [id, deg] of inDegree) {
270
+ if (deg === 0)
271
+ queue.push(id);
272
+ }
273
+ let priority = 0;
274
+ while (queue.length > 0) {
275
+ // Process all nodes at the current level (same priority)
276
+ const levelSize = queue.length;
277
+ for (let i = 0; i < levelSize; i++) {
278
+ const current = queue.shift();
279
+ const node = nodes.get(current);
280
+ if (node)
281
+ node.priority = priority;
282
+ for (const neighbor of adjacency.get(current) ?? []) {
283
+ const newDeg = (inDegree.get(neighbor) ?? 1) - 1;
284
+ inDegree.set(neighbor, newDeg);
285
+ if (newDeg === 0)
286
+ queue.push(neighbor);
287
+ }
288
+ }
289
+ priority++;
290
+ }
291
+ }
@@ -0,0 +1,90 @@
1
+ /**
2
+ * Cost tracking — model pricing, pre-run estimation, and post-run actuals.
3
+ *
4
+ * Uses a pricing table (YAML config or TS `definePricingTable()`) to compute
5
+ * USD cost from token usage. Supports budget controls with warn/stop thresholds.
6
+ *
7
+ * @see docs/design-docs/architecture-overhaul/observability-telemetry.md
8
+ */
9
+ import type { TraceTokenUsage } from "../../../_vendor/ailf-core/index.d.ts";
10
+ /** Per-model pricing (USD per 1M tokens) */
11
+ export interface ModelPricing {
12
+ /** Input tokens cost per 1M tokens */
13
+ input: number;
14
+ /** Output tokens cost per 1M tokens */
15
+ output: number;
16
+ /** Cached input tokens cost per 1M tokens (optional) */
17
+ cachedInput?: number;
18
+ }
19
+ /** Budget control thresholds (in USD) */
20
+ export interface BudgetConfig {
21
+ perRun?: {
22
+ warn: number;
23
+ stop: number;
24
+ };
25
+ perTask?: {
26
+ warn: number;
27
+ stop: number;
28
+ };
29
+ }
30
+ /** Cost estimate for a pipeline run */
31
+ export interface CostEstimate {
32
+ /** Estimated total cost in USD */
33
+ totalUSD: number;
34
+ /** Per-model breakdown */
35
+ perModel: {
36
+ modelId: string;
37
+ estimatedUSD: number;
38
+ }[];
39
+ /** Whether estimate exceeds budget warning threshold */
40
+ exceedsWarning: boolean;
41
+ /** Whether estimate exceeds budget stop threshold */
42
+ exceedsStop: boolean;
43
+ }
44
+ /** Actual cost computed from real token usage */
45
+ export interface ActualCost {
46
+ /** Actual total cost in USD */
47
+ totalUSD: number;
48
+ /** Per-model actual cost */
49
+ perModel: {
50
+ modelId: string;
51
+ actualUSD: number;
52
+ tokens: TraceTokenUsage;
53
+ }[];
54
+ }
55
+ /** Budget check result */
56
+ export interface BudgetCheckResult {
57
+ /** Whether to proceed */
58
+ proceed: boolean;
59
+ /** Warning message (if any) */
60
+ warning?: string;
61
+ /** Current spend in USD */
62
+ currentUSD: number;
63
+ /** Budget limit that was checked */
64
+ limitUSD?: number;
65
+ }
66
+ /**
67
+ * Compute actual cost from token usage and model pricing.
68
+ *
69
+ * @param usage - Token counts from provider response
70
+ * @param pricing - Per-model pricing (USD per 1M tokens)
71
+ * @returns Cost in USD
72
+ */
73
+ export declare function computeCost(usage: TraceTokenUsage, pricing: ModelPricing): number;
74
+ /**
75
+ * Look up pricing for a model ID.
76
+ *
77
+ * Tries exact match first, then falls back to prefix matching
78
+ * (e.g., "openai:chat:gpt-4o-2024-11-20" matches "openai:chat:gpt-4o").
79
+ */
80
+ export declare function lookupPricing(modelId: string, customPricing?: Record<string, ModelPricing>): ModelPricing | undefined;
81
+ /**
82
+ * Estimate cost for a pipeline run before execution.
83
+ *
84
+ * Uses task count, estimated tokens per task complexity, and model pricing.
85
+ */
86
+ export declare function estimateRunCost(taskCount: number, modelIds: string[], budget?: BudgetConfig, customPricing?: Record<string, ModelPricing>): CostEstimate;
87
+ /**
88
+ * Check if current spend exceeds budget thresholds.
89
+ */
90
+ export declare function checkBudget(currentUSD: number, budget: BudgetConfig, level: "perRun" | "perTask"): BudgetCheckResult;
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Cost tracking — model pricing, pre-run estimation, and post-run actuals.
3
+ *
4
+ * Uses a pricing table (YAML config or TS `definePricingTable()`) to compute
5
+ * USD cost from token usage. Supports budget controls with warn/stop thresholds.
6
+ *
7
+ * @see docs/design-docs/architecture-overhaul/observability-telemetry.md
8
+ */
9
+ // ---------------------------------------------------------------------------
10
+ // Pricing table
11
+ // ---------------------------------------------------------------------------
12
+ /** Default pricing table (updated periodically) */
13
+ const DEFAULT_PRICING = {
14
+ "anthropic:messages:claude-opus-4-6": {
15
+ input: 15.0,
16
+ output: 75.0,
17
+ cachedInput: 1.5,
18
+ },
19
+ "anthropic:messages:claude-sonnet-4-6": {
20
+ input: 3.0,
21
+ output: 15.0,
22
+ cachedInput: 0.3,
23
+ },
24
+ "openai:chat:gpt-4.1": {
25
+ input: 2.0,
26
+ output: 8.0,
27
+ cachedInput: 0.5,
28
+ },
29
+ "openai:chat:gpt-4.1-mini": {
30
+ input: 0.4,
31
+ output: 1.6,
32
+ cachedInput: 0.1,
33
+ },
34
+ "openai:chat:gpt-4o": {
35
+ input: 2.5,
36
+ output: 10.0,
37
+ cachedInput: 1.25,
38
+ },
39
+ "openai:chat:gpt-5": {
40
+ input: 5.0,
41
+ output: 15.0,
42
+ cachedInput: 1.0,
43
+ },
44
+ };
45
+ // ---------------------------------------------------------------------------
46
+ // Public API
47
+ // ---------------------------------------------------------------------------
48
+ /**
49
+ * Compute actual cost from token usage and model pricing.
50
+ *
51
+ * @param usage - Token counts from provider response
52
+ * @param pricing - Per-model pricing (USD per 1M tokens)
53
+ * @returns Cost in USD
54
+ */
55
+ export function computeCost(usage, pricing) {
56
+ const cached = usage.toolTokens ?? 0;
57
+ const uncachedPrompt = usage.promptTokens - cached;
58
+ const inputCost = (uncachedPrompt * pricing.input) / 1_000_000;
59
+ const cachedCost = pricing.cachedInput !== undefined
60
+ ? (cached * pricing.cachedInput) / 1_000_000
61
+ : (cached * pricing.input) / 1_000_000;
62
+ const outputCost = (usage.completionTokens * pricing.output) / 1_000_000;
63
+ return inputCost + cachedCost + outputCost;
64
+ }
65
+ /**
66
+ * Look up pricing for a model ID.
67
+ *
68
+ * Tries exact match first, then falls back to prefix matching
69
+ * (e.g., "openai:chat:gpt-4o-2024-11-20" matches "openai:chat:gpt-4o").
70
+ */
71
+ export function lookupPricing(modelId, customPricing) {
72
+ // 1. Exact match in custom pricing
73
+ if (customPricing?.[modelId])
74
+ return customPricing[modelId];
75
+ // 2. Exact match in defaults
76
+ if (DEFAULT_PRICING[modelId])
77
+ return DEFAULT_PRICING[modelId];
78
+ // 3. Prefix match in custom pricing
79
+ if (customPricing) {
80
+ for (const [key, pricing] of Object.entries(customPricing)) {
81
+ if (modelId.startsWith(key))
82
+ return pricing;
83
+ }
84
+ }
85
+ // 4. Prefix match in defaults
86
+ for (const [key, pricing] of Object.entries(DEFAULT_PRICING)) {
87
+ if (modelId.startsWith(key))
88
+ return pricing;
89
+ }
90
+ return undefined;
91
+ }
92
+ /**
93
+ * Estimate cost for a pipeline run before execution.
94
+ *
95
+ * Uses task count, estimated tokens per task complexity, and model pricing.
96
+ */
97
+ export function estimateRunCost(taskCount, modelIds, budget, customPricing) {
98
+ // Rough token estimates per task (empirical averages)
99
+ const AVG_PROMPT_TOKENS = 2000;
100
+ const AVG_COMPLETION_TOKENS = 1500;
101
+ const perModel = modelIds.map((modelId) => {
102
+ const pricing = lookupPricing(modelId, customPricing);
103
+ if (!pricing) {
104
+ return { modelId, estimatedUSD: 0 };
105
+ }
106
+ const estimatedUSD = computeCost({
107
+ promptTokens: AVG_PROMPT_TOKENS * taskCount,
108
+ completionTokens: AVG_COMPLETION_TOKENS * taskCount,
109
+ totalTokens: (AVG_PROMPT_TOKENS + AVG_COMPLETION_TOKENS) * taskCount,
110
+ }, pricing);
111
+ return { modelId, estimatedUSD };
112
+ });
113
+ const totalUSD = perModel.reduce((sum, m) => sum + m.estimatedUSD, 0);
114
+ return {
115
+ totalUSD,
116
+ perModel,
117
+ exceedsWarning: budget?.perRun ? totalUSD >= budget.perRun.warn : false,
118
+ exceedsStop: budget?.perRun ? totalUSD >= budget.perRun.stop : false,
119
+ };
120
+ }
121
+ /**
122
+ * Check if current spend exceeds budget thresholds.
123
+ */
124
+ export function checkBudget(currentUSD, budget, level) {
125
+ const limits = budget[level];
126
+ if (!limits) {
127
+ return { proceed: true, currentUSD };
128
+ }
129
+ if (currentUSD >= limits.stop) {
130
+ return {
131
+ proceed: false,
132
+ warning: `Budget exceeded: $${currentUSD.toFixed(4)} >= $${limits.stop} (${level} stop limit)`,
133
+ currentUSD,
134
+ limitUSD: limits.stop,
135
+ };
136
+ }
137
+ if (currentUSD >= limits.warn) {
138
+ return {
139
+ proceed: true,
140
+ warning: `Budget warning: $${currentUSD.toFixed(4)} >= $${limits.warn} (${level} warn threshold)`,
141
+ currentUSD,
142
+ limitUSD: limits.warn,
143
+ };
144
+ }
145
+ return { proceed: true, currentUSD };
146
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Telemetry — observability infrastructure for evaluation traces.
3
+ *
4
+ * Captures tool calls, token usage, cost, and timing for every evaluation.
5
+ * Full traces go to blob storage; sanitized summaries to Content Lake.
6
+ *
7
+ * @see docs/exec-plans/architecture-overhaul/phase-6-observability.md
8
+ * @see docs/design-docs/architecture-overhaul/observability-telemetry.md
9
+ */
10
+ export { collectTrace, mergeTraces, type ProviderResponse, type RawToolCall, type TraceCollectorOptions, } from "./trace-collector.js";
11
+ export { classifyToolCall, classifyToolCalls } from "./tool-classifier.js";
12
+ export { checkBudget, computeCost, estimateRunCost, lookupPricing, type ActualCost, type BudgetCheckResult, type BudgetConfig, type CostEstimate, type ModelPricing, } from "./cost-tracker.js";
13
+ export { extractTraceSummary, LocalTraceStore, type TraceSummary, type TraceStore, type TraceStoreResult, } from "./trace-store.js";
14
+ export { createRedactionConfig, DEFAULT_REDACTION_RULES, redactTrace, type RedactionConfig, type RedactionResult, type RedactionRule, } from "./redactor.js";
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Telemetry — observability infrastructure for evaluation traces.
3
+ *
4
+ * Captures tool calls, token usage, cost, and timing for every evaluation.
5
+ * Full traces go to blob storage; sanitized summaries to Content Lake.
6
+ *
7
+ * @see docs/exec-plans/architecture-overhaul/phase-6-observability.md
8
+ * @see docs/design-docs/architecture-overhaul/observability-telemetry.md
9
+ */
10
+ // Trace collection
11
+ export { collectTrace, mergeTraces, } from "./trace-collector.js";
12
+ // Tool call classification
13
+ export { classifyToolCall, classifyToolCalls } from "./tool-classifier.js";
14
+ // Cost tracking
15
+ export { checkBudget, computeCost, estimateRunCost, lookupPricing, } from "./cost-tracker.js";
16
+ // Trace storage
17
+ export { extractTraceSummary, LocalTraceStore, } from "./trace-store.js";
18
+ // Redaction
19
+ export { createRedactionConfig, DEFAULT_REDACTION_RULES, redactTrace, } from "./redactor.js";
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Redaction pipeline — strips sensitive data from traces before storage.
3
+ *
4
+ * Applied before ANY storage (both blob and Content Lake). Configurable
5
+ * patterns handle Bearer tokens, API keys, Sanity tokens, and other
6
+ * common secret formats.
7
+ *
8
+ * Principles:
9
+ * 1. Redact before store — sensitive data never reaches storage
10
+ * 2. Configurable patterns — teams can add project-specific rules
11
+ * 3. Truncation for cost — large outputs truncated to max bytes
12
+ * 4. No PII by default — tasks shouldn't contain PII, this is a safety net
13
+ *
14
+ * @see docs/design-docs/architecture-overhaul/observability-telemetry.md
15
+ */
16
+ import type { EvalTrace } from "../../../_vendor/ailf-core/index.d.ts";
17
+ /** A single redaction rule */
18
+ export interface RedactionRule {
19
+ /** Rule name (for logging) */
20
+ name: string;
21
+ /** Regex pattern to match */
22
+ pattern: RegExp;
23
+ /** Replacement string (use $1, $2 for capture groups) */
24
+ replacement: string;
25
+ }
26
+ /** Redaction configuration */
27
+ export interface RedactionConfig {
28
+ /** Regex-based substitution rules */
29
+ rules: RedactionRule[];
30
+ /** Fields to omit entirely from stored traces */
31
+ omitFields: string[];
32
+ /** Maximum tool call output size in bytes */
33
+ maxOutputBytes: number;
34
+ }
35
+ /** Result of redaction */
36
+ export interface RedactionResult {
37
+ /** Redacted trace */
38
+ trace: EvalTrace;
39
+ /** Number of redactions applied */
40
+ redactionCount: number;
41
+ /** Which rules fired */
42
+ rulesApplied: string[];
43
+ }
44
+ /** Built-in redaction rules for common secret patterns */
45
+ export declare const DEFAULT_REDACTION_RULES: RedactionRule[];
46
+ /**
47
+ * Create a default redaction config.
48
+ *
49
+ * @param overrides - Custom rules or settings to merge
50
+ */
51
+ export declare function createRedactionConfig(overrides?: Partial<RedactionConfig>): RedactionConfig;
52
+ /**
53
+ * Apply redaction to an evaluation trace.
54
+ *
55
+ * Processes tool call inputs and outputs, event data, and search terms.
56
+ * Returns a new trace (does not mutate the original).
57
+ */
58
+ export declare function redactTrace(trace: EvalTrace, config?: RedactionConfig): RedactionResult;