@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,237 @@
1
+ /**
2
+ * 4-tier scoring engine — unified scoring across all evaluation modes.
3
+ *
4
+ * Tier 1: Assertion-level (atomic pass/fail + optional numeric score)
5
+ * Tier 2: Dimension-level (aggregated per scoring dimension)
6
+ * Tier 3: Task-level (weighted composite of dimensions)
7
+ * Tier 4: Suite/Area-level (aggregated across tasks)
8
+ *
9
+ * This engine is mode-agnostic — it works for literacy, MCP server,
10
+ * agent harness, knowledge probe, and custom modes.
11
+ *
12
+ * @see docs/design-docs/architecture-overhaul/scoring-rubrics-assertions.md
13
+ */
14
+ /**
15
+ * Aggregate assertion scores into dimension scores.
16
+ *
17
+ * Groups assertions by dimension, then applies the configured aggregation
18
+ * strategy (default: weighted-mean).
19
+ */
20
+ export function aggregateDimensions(assertions, options) {
21
+ const defaultAgg = options?.defaultAggregation ?? "weighted-mean";
22
+ const labels = options?.dimensionLabels ?? {};
23
+ // Group by dimension
24
+ const groups = new Map();
25
+ for (const a of assertions) {
26
+ const dim = a.dimension || "uncategorized";
27
+ const existing = groups.get(dim);
28
+ if (existing) {
29
+ existing.push(a);
30
+ }
31
+ else {
32
+ groups.set(dim, [a]);
33
+ }
34
+ }
35
+ const dimensions = [];
36
+ for (const [dimId, dimAssertions] of groups) {
37
+ const score = aggregateScores(dimAssertions, defaultAgg);
38
+ dimensions.push({
39
+ dimensionId: dimId,
40
+ label: labels[dimId] ?? dimId,
41
+ score,
42
+ assertionCount: dimAssertions.length,
43
+ passCount: dimAssertions.filter((a) => a.pass).length,
44
+ aggregation: defaultAgg,
45
+ assertions: dimAssertions,
46
+ });
47
+ }
48
+ return dimensions.sort((a, b) => a.dimensionId.localeCompare(b.dimensionId));
49
+ }
50
+ /**
51
+ * Compute a weighted task score from dimension scores.
52
+ */
53
+ export function computeTaskScore(dimensions, options) {
54
+ const { weights, taskId } = options;
55
+ const threshold = options.threshold ?? 0.5;
56
+ const warnings = [];
57
+ // Weighted sum
58
+ let score = 0;
59
+ let totalWeight = 0;
60
+ const matchedDimensions = [];
61
+ for (const dim of dimensions) {
62
+ const weight = weights[dim.dimensionId] ?? 0;
63
+ if (weight > 0)
64
+ matchedDimensions.push(dim.dimensionId);
65
+ score += dim.score * weight;
66
+ totalWeight += weight;
67
+ }
68
+ // Warn when no dimensions match any weight key — likely misconfiguration
69
+ if (totalWeight === 0 && dimensions.length > 0) {
70
+ const dimIds = dimensions.map((d) => d.dimensionId).join(", ");
71
+ const weightKeys = Object.keys(weights).join(", ");
72
+ warnings.push(`Task "${taskId}": no dimensions matched weight keys. ` +
73
+ `Dimensions: [${dimIds}], weights: [${weightKeys}]. Score will be 0.`);
74
+ }
75
+ // Normalize if weights don't sum to 1 (guard against NaN when totalWeight is 0)
76
+ if (totalWeight > 0 && Math.abs(totalWeight - 1.0) > 0.001) {
77
+ score = score / totalWeight;
78
+ }
79
+ else if (totalWeight === 0) {
80
+ score = 0;
81
+ }
82
+ return {
83
+ taskId,
84
+ ...(options.area ? { area: options.area } : {}),
85
+ score,
86
+ dimensions,
87
+ weights,
88
+ weightSource: options.weightSource ?? "default",
89
+ passesThreshold: score >= threshold,
90
+ threshold,
91
+ ...(warnings.length > 0 ? { warnings } : {}),
92
+ };
93
+ }
94
+ /**
95
+ * Aggregate task scores into area scores.
96
+ */
97
+ export function aggregateAreas(tasks, previousScores) {
98
+ // Group tasks by area (from explicit metadata, falling back to taskId prefix)
99
+ const groups = new Map();
100
+ for (const task of tasks) {
101
+ const area = task.area ?? extractArea(task.taskId);
102
+ const existing = groups.get(area);
103
+ if (existing) {
104
+ existing.push(task);
105
+ }
106
+ else {
107
+ groups.set(area, [task]);
108
+ }
109
+ }
110
+ const areas = [];
111
+ for (const [areaId, areaTasks] of groups) {
112
+ const score = areaTasks.length > 0
113
+ ? areaTasks.reduce((sum, t) => sum + t.score, 0) / areaTasks.length
114
+ : 0;
115
+ const previousScore = previousScores?.[areaId] ?? null;
116
+ areas.push({
117
+ areaId,
118
+ score,
119
+ taskCount: areaTasks.length,
120
+ passingTaskCount: areaTasks.filter((t) => t.passesThreshold).length,
121
+ tasks: areaTasks,
122
+ delta: previousScore !== null ? score - previousScore : null,
123
+ });
124
+ }
125
+ return areas.sort((a, b) => a.areaId.localeCompare(b.areaId));
126
+ }
127
+ // ---------------------------------------------------------------------------
128
+ // Score normalization
129
+ // ---------------------------------------------------------------------------
130
+ /**
131
+ * Normalize an assertion score to [0, 1] range.
132
+ *
133
+ * Different assertion types produce scores in different ranges:
134
+ * - Boolean (contains, equals, regex): 0 or 1
135
+ * - LLM rubric: 0-100 (needs /100)
136
+ * - similar: 0-1 (already normalized)
137
+ * - javascript/python: user-defined (assumed 0-1)
138
+ */
139
+ export function normalizeScore(rawScore, assertionType) {
140
+ switch (assertionType) {
141
+ case "g-eval":
142
+ case "llm-rubric":
143
+ case "model-graded-closedqa":
144
+ case "model-graded-factuality":
145
+ // LLM rubrics typically return 0-100
146
+ return rawScore > 1 ? rawScore / 100 : rawScore;
147
+ case "similar":
148
+ // Similarity score is already 0-1
149
+ return Math.max(0, Math.min(1, rawScore));
150
+ case "contains":
151
+ case "contains-all":
152
+ case "contains-any":
153
+ case "equals":
154
+ case "is-json":
155
+ case "regex":
156
+ // Boolean assertions: 0 or 1
157
+ return rawScore > 0 ? 1 : 0;
158
+ default:
159
+ // Custom assertions: clamp to [0, 1]
160
+ return Math.max(0, Math.min(1, rawScore));
161
+ }
162
+ }
163
+ /**
164
+ * Compute ensemble score from multiple grader outputs.
165
+ */
166
+ export function computeEnsembleScore(scores, aggregation = "mean") {
167
+ if (scores.length === 0)
168
+ return { score: 0, agreement: 0 };
169
+ if (scores.length === 1)
170
+ return { score: scores[0], agreement: 1 };
171
+ let score;
172
+ switch (aggregation) {
173
+ case "mean":
174
+ score = scores.reduce((a, b) => a + b, 0) / scores.length;
175
+ break;
176
+ case "median": {
177
+ const sorted = [...scores].sort((a, b) => a - b);
178
+ const mid = Math.floor(sorted.length / 2);
179
+ score =
180
+ sorted.length % 2 === 0
181
+ ? (sorted[mid - 1] + sorted[mid]) / 2
182
+ : sorted[mid];
183
+ break;
184
+ }
185
+ case "max":
186
+ score = Math.max(...scores);
187
+ break;
188
+ }
189
+ // Agreement: 1 - normalized standard deviation
190
+ const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
191
+ const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / scores.length;
192
+ const stdDev = Math.sqrt(variance);
193
+ const agreement = Math.max(0, 1 - stdDev);
194
+ return { score, agreement };
195
+ }
196
+ // ---------------------------------------------------------------------------
197
+ // Helpers
198
+ // ---------------------------------------------------------------------------
199
+ function aggregateScores(assertions, strategy) {
200
+ // Pre-filter to assertions with numeric scores. After this filter,
201
+ // every element's .score is guaranteed non-null — the cast is safe.
202
+ const scored = assertions.filter((a) => a.score !== null);
203
+ if (scored.length === 0) {
204
+ // Fall back to pass rate
205
+ return assertions.length > 0
206
+ ? assertions.filter((a) => a.pass).length / assertions.length
207
+ : 0;
208
+ }
209
+ switch (strategy) {
210
+ case "mean":
211
+ return scored.reduce((sum, a) => sum + a.score, 0) / scored.length;
212
+ case "weighted-mean": {
213
+ const totalWeight = scored.reduce((sum, a) => sum + a.weight, 0);
214
+ if (totalWeight === 0) {
215
+ return scored.reduce((sum, a) => sum + a.score, 0) / scored.length;
216
+ }
217
+ return (scored.reduce((sum, a) => sum + a.score * a.weight, 0) / totalWeight);
218
+ }
219
+ case "min":
220
+ return Math.min(...scored.map((a) => a.score));
221
+ case "max":
222
+ return Math.max(...scored.map((a) => a.score));
223
+ }
224
+ }
225
+ /**
226
+ * Extract the area name from a task ID.
227
+ *
228
+ * Uses the first segment before the first hyphen (e.g., "groq-blog-queries" → "groq").
229
+ * This works for single-word areas but fails for multi-word areas
230
+ * (e.g., "content-lake-queries" → "content" instead of "content-lake").
231
+ *
232
+ * TODO: Use explicit area metadata from task definitions instead of parsing taskId.
233
+ */
234
+ function extractArea(taskId) {
235
+ const parts = taskId.split("-");
236
+ return parts[0] || "general";
237
+ }
@@ -9,13 +9,26 @@
9
9
  */
10
10
  import type { FeatureScore } from "../types/index.js";
11
11
  import type { ActualScoreEntry, ComponentResult, TestResult, UrlMetadata } from "../types/scoring-input.js";
12
+ /**
13
+ * Extract dimension names from a scoring profile's weight map.
14
+ *
15
+ * Scoring profiles (defined in config/rubrics.ts) map dimension names
16
+ * to numeric weights. This function returns those dimension names so
17
+ * callers can work with dynamic dimensions instead of hardcoded ones.
18
+ */
19
+ export declare function extractDimensions(profile: Record<string, number>): string[];
12
20
  /**
13
21
  * Classify a grading component into a scoring dimension.
14
22
  *
15
23
  * Prefers structured metadata (Approach 5) over heuristic string matching.
16
- * Returns null if the component doesn't map to a known dimension.
24
+ * Returns the dimension as a kebab-case string, or null if the component
25
+ * doesn't map to any dimension.
26
+ *
27
+ * Returns `string | null` so non-literacy scoring profiles (MCP, agent,
28
+ * knowledge-probe) can define arbitrary dimension names in metadata
29
+ * without requiring changes here.
17
30
  */
18
- export declare function classifyRubric(component: ComponentResult): "codeCorrectness" | "docCoverage" | "taskCompletion" | null;
31
+ export declare function classifyRubric(component: ComponentResult): string | null;
19
32
  /**
20
33
  * Detect the feature area from a test description string.
21
34
  *
@@ -8,40 +8,50 @@
8
8
  * the Ports & Adapters migration (Phase 4e).
9
9
  */
10
10
  // ---------------------------------------------------------------------------
11
+ // Dimension extraction
12
+ // ---------------------------------------------------------------------------
13
+ /**
14
+ * Extract dimension names from a scoring profile's weight map.
15
+ *
16
+ * Scoring profiles (defined in config/rubrics.ts) map dimension names
17
+ * to numeric weights. This function returns those dimension names so
18
+ * callers can work with dynamic dimensions instead of hardcoded ones.
19
+ */
20
+ export function extractDimensions(profile) {
21
+ return Object.keys(profile);
22
+ }
23
+ // ---------------------------------------------------------------------------
11
24
  // Rubric classification
12
25
  // ---------------------------------------------------------------------------
13
26
  /**
14
27
  * Classify a grading component into a scoring dimension.
15
28
  *
16
29
  * Prefers structured metadata (Approach 5) over heuristic string matching.
17
- * Returns null if the component doesn't map to a known dimension.
30
+ * Returns the dimension as a kebab-case string, or null if the component
31
+ * doesn't map to any dimension.
32
+ *
33
+ * Returns `string | null` so non-literacy scoring profiles (MCP, agent,
34
+ * knowledge-probe) can define arbitrary dimension names in metadata
35
+ * without requiring changes here.
18
36
  */
19
37
  export function classifyRubric(component) {
20
- // Prefer structured metadata (Approach 5) over heuristic matching
38
+ // Prefer structured metadata any dimension name is valid, enabling
39
+ // non-literacy profiles to pass through names like 'input-validation'
21
40
  const metadata = component.assertion?.metadata;
22
41
  if (metadata?.dimension) {
23
- switch (metadata.dimension) {
24
- case "code-correctness":
25
- return "codeCorrectness";
26
- case "doc-coverage":
27
- return "docCoverage";
28
- case "task-completion":
29
- return "taskCompletion";
30
- default:
31
- return null;
32
- }
42
+ return metadata.dimension;
33
43
  }
34
44
  // Fallback: heuristic name matching (for backward compatibility)
35
45
  const value = (component.assertion?.value ?? "").toLowerCase();
36
46
  if (value.includes("task completion")) {
37
- return "taskCompletion";
47
+ return "task-completion";
38
48
  }
39
49
  if (value.includes("code correctness")) {
40
- return "codeCorrectness";
50
+ return "code-correctness";
41
51
  }
42
52
  if (value.includes("documentation coverage") ||
43
53
  value.includes("hallucinate")) {
44
- return "docCoverage";
54
+ return "doc-coverage";
45
55
  }
46
56
  return null;
47
57
  }
@@ -0,0 +1,137 @@
1
+ /**
2
+ * Branded ID types — nominal typing for entity identifiers.
3
+ *
4
+ * All entity IDs use branded types to prevent accidental misuse.
5
+ * A `TaskId` cannot be passed where a `RunId` is expected, even
6
+ * though both are strings at runtime.
7
+ *
8
+ * Constructor functions validate format and return `Result<T, E>` —
9
+ * parse-don't-validate at the boundary, then pass branded values
10
+ * through the pipeline.
11
+ *
12
+ * The `Brand` utility and `Result` type are defined here as the
13
+ * foundation. Existing branded types in the codebase (`ReportId`,
14
+ * `ISOTimestamp`) use inline branding — those will be migrated to
15
+ * use this utility in Phase 7.
16
+ *
17
+ * @see docs/design-docs/architecture-overhaul/domain-model.md (canonical)
18
+ * @see docs/design-docs/parse-dont-validate.md (design principle)
19
+ */
20
+ /** Unique symbol for nominal type branding */
21
+ declare const __brand: unique symbol;
22
+ /**
23
+ * Brand a base type `T` with a nominal tag `B`.
24
+ *
25
+ * At runtime, branded values are identical to their base type.
26
+ * At compile time, `Brand<string, "TaskId">` is incompatible with
27
+ * `Brand<string, "RunId">` — preventing accidental ID swaps.
28
+ */
29
+ export type Brand<T, B extends string> = T & {
30
+ readonly [__brand]: B;
31
+ };
32
+ /** Unique identifier for an evaluation task */
33
+ export type TaskId = Brand<string, "TaskId">;
34
+ /** URL-safe slug for a task (derived from title) */
35
+ export type TaskSlug = Brand<string, "TaskSlug">;
36
+ /** Unique identifier for an evaluation suite */
37
+ export type SuiteId = Brand<string, "SuiteId">;
38
+ /** Unique identifier for an evaluation run */
39
+ export type RunId = Brand<string, "RunId">;
40
+ /** Content-addressable fingerprint for a run's inputs */
41
+ export type RunFingerprint = Brand<string, "RunFingerprint">;
42
+ /** Unique identifier for a single task × provider result */
43
+ export type ResultId = Brand<string, "ResultId">;
44
+ /** Unique identifier for a trace (observability record) */
45
+ export type TraceId = Brand<string, "TraceId">;
46
+ /**
47
+ * Unique identifier for a published report (UUID v7).
48
+ *
49
+ * Note: An existing `ReportId` branded type is defined in
50
+ * `packages/core/src/types/index.ts` using inline branding.
51
+ * This definition uses the `Brand` utility for consistency.
52
+ * Phase 7 will unify them.
53
+ */
54
+ export type NewReportId = Brand<string, "ReportId">;
55
+ /** Unique identifier for a provider (LLM, MCP server, agent harness) */
56
+ export type ProviderId = Brand<string, "ProviderId">;
57
+ /** Unique identifier for a prompt template */
58
+ export type PromptId = Brand<string, "PromptId">;
59
+ /** Unique identifier for a rubric scoring template */
60
+ export type RubricId = Brand<string, "RubricId">;
61
+ /** Unique identifier for a fixture (test data) */
62
+ export type FixtureId = Brand<string, "FixtureId">;
63
+ /** Unique identifier for a build artifact */
64
+ export type ArtifactId = Brand<string, "ArtifactId">;
65
+ /**
66
+ * A success result containing a value.
67
+ */
68
+ export interface Ok<T> {
69
+ readonly ok: true;
70
+ readonly value: T;
71
+ }
72
+ /**
73
+ * A failure result containing an error.
74
+ */
75
+ export interface Err<E> {
76
+ readonly ok: false;
77
+ readonly error: E;
78
+ }
79
+ /** Discriminated union for parse results — parse-don't-validate pattern */
80
+ export type Result<T, E> = Ok<T> | Err<E>;
81
+ /** Construct a success result */
82
+ export declare function ok<T>(value: T): Ok<T>;
83
+ /** Construct a failure result */
84
+ export declare function err<E>(error: E): Err<E>;
85
+ /** Error returned when an ID string fails format validation */
86
+ export interface IdValidationError {
87
+ /** Error code identifying the specific validation failure */
88
+ code: string;
89
+ /** The raw input that failed validation */
90
+ raw: string;
91
+ /** Human-readable error message */
92
+ message: string;
93
+ }
94
+ /**
95
+ * Parse a raw string into a `TaskId`.
96
+ *
97
+ * Valid format: alphanumeric + hyphens, 1–128 characters.
98
+ * Examples: `"groq-projection-basics"`, `"mcp-server-tools-list"`
99
+ */
100
+ export declare function taskId(raw: string): Result<TaskId, IdValidationError>;
101
+ /**
102
+ * Parse a raw string into a `RunId`.
103
+ *
104
+ * Valid format: `run_` prefix followed by alphanumeric characters.
105
+ */
106
+ export declare function runId(raw: string): Result<RunId, IdValidationError>;
107
+ /**
108
+ * Parse a raw string into a `SuiteId`.
109
+ *
110
+ * Valid format: `suite_` prefix followed by alphanumeric characters.
111
+ */
112
+ export declare function suiteId(raw: string): Result<SuiteId, IdValidationError>;
113
+ /**
114
+ * Parse a raw string into a `ResultId`.
115
+ *
116
+ * Valid format: `res_` prefix followed by alphanumeric characters.
117
+ */
118
+ export declare function resultId(raw: string): Result<ResultId, IdValidationError>;
119
+ /**
120
+ * Parse a raw string into a `TraceId`.
121
+ *
122
+ * Valid format: `trace_` prefix followed by alphanumeric characters.
123
+ */
124
+ export declare function traceId(raw: string): Result<TraceId, IdValidationError>;
125
+ /**
126
+ * Parse a raw string into a `ProviderId`.
127
+ *
128
+ * Valid format: colon-separated segments (e.g., `"openai:chat:gpt-4o"`).
129
+ */
130
+ export declare function providerId(raw: string): Result<ProviderId, IdValidationError>;
131
+ /**
132
+ * Parse a raw string into a `FixtureId`.
133
+ *
134
+ * Valid format: alphanumeric + hyphens, 1–128 characters.
135
+ */
136
+ export declare function fixtureId(raw: string): Result<FixtureId, IdValidationError>;
137
+ export {};
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Branded ID types — nominal typing for entity identifiers.
3
+ *
4
+ * All entity IDs use branded types to prevent accidental misuse.
5
+ * A `TaskId` cannot be passed where a `RunId` is expected, even
6
+ * though both are strings at runtime.
7
+ *
8
+ * Constructor functions validate format and return `Result<T, E>` —
9
+ * parse-don't-validate at the boundary, then pass branded values
10
+ * through the pipeline.
11
+ *
12
+ * The `Brand` utility and `Result` type are defined here as the
13
+ * foundation. Existing branded types in the codebase (`ReportId`,
14
+ * `ISOTimestamp`) use inline branding — those will be migrated to
15
+ * use this utility in Phase 7.
16
+ *
17
+ * @see docs/design-docs/architecture-overhaul/domain-model.md (canonical)
18
+ * @see docs/design-docs/parse-dont-validate.md (design principle)
19
+ */
20
+ /** Construct a success result */
21
+ export function ok(value) {
22
+ return { ok: true, value };
23
+ }
24
+ /** Construct a failure result */
25
+ export function err(error) {
26
+ return { ok: false, error };
27
+ }
28
+ // ---------------------------------------------------------------------------
29
+ // Constructor functions — parse-don't-validate
30
+ // ---------------------------------------------------------------------------
31
+ /**
32
+ * Parse a raw string into a `TaskId`.
33
+ *
34
+ * Valid format: alphanumeric + hyphens, 1–128 characters.
35
+ * Examples: `"groq-projection-basics"`, `"mcp-server-tools-list"`
36
+ */
37
+ export function taskId(raw) {
38
+ if (!raw.match(/^[a-z0-9][a-z0-9-]{0,127}$/)) {
39
+ return err({
40
+ code: "INVALID_TASK_ID",
41
+ raw,
42
+ message: `Invalid TaskId "${raw}": must be 1–128 lowercase alphanumeric characters or hyphens, starting with a letter or digit`,
43
+ });
44
+ }
45
+ return ok(raw);
46
+ }
47
+ /**
48
+ * Parse a raw string into a `RunId`.
49
+ *
50
+ * Valid format: `run_` prefix followed by alphanumeric characters.
51
+ */
52
+ export function runId(raw) {
53
+ if (!raw.match(/^run_[a-zA-Z0-9]{8,}$/)) {
54
+ return err({
55
+ code: "INVALID_RUN_ID",
56
+ raw,
57
+ message: `Invalid RunId "${raw}": must match run_[a-zA-Z0-9]{8,}`,
58
+ });
59
+ }
60
+ return ok(raw);
61
+ }
62
+ /**
63
+ * Parse a raw string into a `SuiteId`.
64
+ *
65
+ * Valid format: `suite_` prefix followed by alphanumeric characters.
66
+ */
67
+ export function suiteId(raw) {
68
+ if (!raw.match(/^suite_[a-zA-Z0-9]{4,}$/)) {
69
+ return err({
70
+ code: "INVALID_SUITE_ID",
71
+ raw,
72
+ message: `Invalid SuiteId "${raw}": must match suite_[a-zA-Z0-9]{4,}`,
73
+ });
74
+ }
75
+ return ok(raw);
76
+ }
77
+ /**
78
+ * Parse a raw string into a `ResultId`.
79
+ *
80
+ * Valid format: `res_` prefix followed by alphanumeric characters.
81
+ */
82
+ export function resultId(raw) {
83
+ if (!raw.match(/^res_[a-zA-Z0-9]{8,}$/)) {
84
+ return err({
85
+ code: "INVALID_RESULT_ID",
86
+ raw,
87
+ message: `Invalid ResultId "${raw}": must match res_[a-zA-Z0-9]{8,}`,
88
+ });
89
+ }
90
+ return ok(raw);
91
+ }
92
+ /**
93
+ * Parse a raw string into a `TraceId`.
94
+ *
95
+ * Valid format: `trace_` prefix followed by alphanumeric characters.
96
+ */
97
+ export function traceId(raw) {
98
+ if (!raw.match(/^trace_[a-zA-Z0-9]{8,}$/)) {
99
+ return err({
100
+ code: "INVALID_TRACE_ID",
101
+ raw,
102
+ message: `Invalid TraceId "${raw}": must match trace_[a-zA-Z0-9]{8,}`,
103
+ });
104
+ }
105
+ return ok(raw);
106
+ }
107
+ /**
108
+ * Parse a raw string into a `ProviderId`.
109
+ *
110
+ * Valid format: colon-separated segments (e.g., `"openai:chat:gpt-4o"`).
111
+ */
112
+ export function providerId(raw) {
113
+ if (!raw.match(/^[a-zA-Z0-9][a-zA-Z0-9:._-]{0,127}$/)) {
114
+ return err({
115
+ code: "INVALID_PROVIDER_ID",
116
+ raw,
117
+ message: `Invalid ProviderId "${raw}": must be 1–128 alphanumeric characters, colons, dots, underscores, or hyphens`,
118
+ });
119
+ }
120
+ return ok(raw);
121
+ }
122
+ /**
123
+ * Parse a raw string into a `FixtureId`.
124
+ *
125
+ * Valid format: alphanumeric + hyphens, 1–128 characters.
126
+ */
127
+ export function fixtureId(raw) {
128
+ if (!raw.match(/^[a-z0-9][a-z0-9-]{0,127}$/)) {
129
+ return err({
130
+ code: "INVALID_FIXTURE_ID",
131
+ raw,
132
+ message: `Invalid FixtureId "${raw}": must be 1–128 lowercase alphanumeric characters or hyphens`,
133
+ });
134
+ }
135
+ return ok(raw);
136
+ }