@amodalai/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (463) hide show
  1. package/LICENSE +21 -0
  2. package/dist/.last_build +0 -0
  3. package/dist/src/amodal-config.d.ts +194 -0
  4. package/dist/src/amodal-config.js +326 -0
  5. package/dist/src/amodal-config.js.map +1 -0
  6. package/dist/src/audit/audit-logger.d.ts +52 -0
  7. package/dist/src/audit/audit-logger.js +137 -0
  8. package/dist/src/audit/audit-logger.js.map +1 -0
  9. package/dist/src/audit/audit-outputs.d.ts +34 -0
  10. package/dist/src/audit/audit-outputs.js +73 -0
  11. package/dist/src/audit/audit-outputs.js.map +1 -0
  12. package/dist/src/audit/audit-redact.d.ts +14 -0
  13. package/dist/src/audit/audit-redact.js +55 -0
  14. package/dist/src/audit/audit-redact.js.map +1 -0
  15. package/dist/src/audit/audit-types.d.ts +122 -0
  16. package/dist/src/audit/audit-types.js +64 -0
  17. package/dist/src/audit/audit-types.js.map +1 -0
  18. package/dist/src/audit/index.d.ts +10 -0
  19. package/dist/src/audit/index.js +10 -0
  20. package/dist/src/audit/index.js.map +1 -0
  21. package/dist/src/eval/eval-cost.d.ts +33 -0
  22. package/dist/src/eval/eval-cost.js +73 -0
  23. package/dist/src/eval/eval-cost.js.map +1 -0
  24. package/dist/src/eval/eval-diff.d.ts +11 -0
  25. package/dist/src/eval/eval-diff.js +97 -0
  26. package/dist/src/eval/eval-diff.js.map +1 -0
  27. package/dist/src/eval/eval-formatter.d.ts +23 -0
  28. package/dist/src/eval/eval-formatter.js +221 -0
  29. package/dist/src/eval/eval-formatter.js.map +1 -0
  30. package/dist/src/eval/eval-judge.d.ts +26 -0
  31. package/dist/src/eval/eval-judge.js +76 -0
  32. package/dist/src/eval/eval-judge.js.map +1 -0
  33. package/dist/src/eval/eval-run-builder.d.ts +25 -0
  34. package/dist/src/eval/eval-run-builder.js +78 -0
  35. package/dist/src/eval/eval-run-builder.js.map +1 -0
  36. package/dist/src/eval/eval-runner.d.ts +36 -0
  37. package/dist/src/eval/eval-runner.js +92 -0
  38. package/dist/src/eval/eval-runner.js.map +1 -0
  39. package/dist/src/eval/eval-session-provider.d.ts +40 -0
  40. package/dist/src/eval/eval-session-provider.js +46 -0
  41. package/dist/src/eval/eval-session-provider.js.map +1 -0
  42. package/dist/src/eval/eval-types.d.ts +146 -0
  43. package/dist/src/eval/eval-types.js +7 -0
  44. package/dist/src/eval/eval-types.js.map +1 -0
  45. package/dist/src/eval/experiment-runner.d.ts +16 -0
  46. package/dist/src/eval/experiment-runner.js +73 -0
  47. package/dist/src/eval/experiment-runner.js.map +1 -0
  48. package/dist/src/eval/experiment-types.d.ts +56 -0
  49. package/dist/src/eval/experiment-types.js +7 -0
  50. package/dist/src/eval/experiment-types.js.map +1 -0
  51. package/dist/src/eval/index.d.ts +22 -0
  52. package/dist/src/eval/index.js +18 -0
  53. package/dist/src/eval/index.js.map +1 -0
  54. package/dist/src/eval/multi-model-runner.d.ts +42 -0
  55. package/dist/src/eval/multi-model-runner.js +70 -0
  56. package/dist/src/eval/multi-model-runner.js.map +1 -0
  57. package/dist/src/eval/platform-eval-client.d.ts +105 -0
  58. package/dist/src/eval/platform-eval-client.js +155 -0
  59. package/dist/src/eval/platform-eval-client.js.map +1 -0
  60. package/dist/src/index.d.ts +41 -0
  61. package/dist/src/index.js +68 -0
  62. package/dist/src/index.js.map +1 -0
  63. package/dist/src/knowledge/index.d.ts +11 -0
  64. package/dist/src/knowledge/index.js +12 -0
  65. package/dist/src/knowledge/index.js.map +1 -0
  66. package/dist/src/knowledge/kb-formatter.d.ts +15 -0
  67. package/dist/src/knowledge/kb-formatter.js +78 -0
  68. package/dist/src/knowledge/kb-formatter.js.map +1 -0
  69. package/dist/src/knowledge/kb-index.d.ts +27 -0
  70. package/dist/src/knowledge/kb-index.js +66 -0
  71. package/dist/src/knowledge/kb-index.js.map +1 -0
  72. package/dist/src/knowledge/kb-types.d.ts +54 -0
  73. package/dist/src/knowledge/kb-types.js +7 -0
  74. package/dist/src/knowledge/kb-types.js.map +1 -0
  75. package/dist/src/knowledge/knowledge-store.d.ts +56 -0
  76. package/dist/src/knowledge/knowledge-store.js +141 -0
  77. package/dist/src/knowledge/knowledge-store.js.map +1 -0
  78. package/dist/src/knowledge/load-knowledge.d.ts +30 -0
  79. package/dist/src/knowledge/load-knowledge.js +161 -0
  80. package/dist/src/knowledge/load-knowledge.js.map +1 -0
  81. package/dist/src/knowledge/propose-kb-update.d.ts +32 -0
  82. package/dist/src/knowledge/propose-kb-update.js +148 -0
  83. package/dist/src/knowledge/propose-kb-update.js.map +1 -0
  84. package/dist/src/knowledge/propose-knowledge.d.ts +34 -0
  85. package/dist/src/knowledge/propose-knowledge.js +163 -0
  86. package/dist/src/knowledge/propose-knowledge.js.map +1 -0
  87. package/dist/src/mcp/index.d.ts +7 -0
  88. package/dist/src/mcp/index.js +7 -0
  89. package/dist/src/mcp/index.js.map +1 -0
  90. package/dist/src/mcp/mcp-manager.d.ts +96 -0
  91. package/dist/src/mcp/mcp-manager.js +192 -0
  92. package/dist/src/mcp/mcp-manager.js.map +1 -0
  93. package/dist/src/packages/config-deps.d.ts +20 -0
  94. package/dist/src/packages/config-deps.js +77 -0
  95. package/dist/src/packages/config-deps.js.map +1 -0
  96. package/dist/src/packages/env-file.d.ts +28 -0
  97. package/dist/src/packages/env-file.js +143 -0
  98. package/dist/src/packages/env-file.js.map +1 -0
  99. package/dist/src/packages/frontmatter.d.ts +28 -0
  100. package/dist/src/packages/frontmatter.js +77 -0
  101. package/dist/src/packages/frontmatter.js.map +1 -0
  102. package/dist/src/packages/index.d.ts +16 -0
  103. package/dist/src/packages/index.js +17 -0
  104. package/dist/src/packages/index.js.map +1 -0
  105. package/dist/src/packages/lock-file.d.ts +35 -0
  106. package/dist/src/packages/lock-file.js +117 -0
  107. package/dist/src/packages/lock-file.js.map +1 -0
  108. package/dist/src/packages/manifest-reader.d.ts +19 -0
  109. package/dist/src/packages/manifest-reader.js +91 -0
  110. package/dist/src/packages/manifest-reader.js.map +1 -0
  111. package/dist/src/packages/merge-engine.d.ts +63 -0
  112. package/dist/src/packages/merge-engine.js +357 -0
  113. package/dist/src/packages/merge-engine.js.map +1 -0
  114. package/dist/src/packages/npm-context.d.ts +56 -0
  115. package/dist/src/packages/npm-context.js +235 -0
  116. package/dist/src/packages/npm-context.js.map +1 -0
  117. package/dist/src/packages/npm-registry.d.ts +35 -0
  118. package/dist/src/packages/npm-registry.js +107 -0
  119. package/dist/src/packages/npm-registry.js.map +1 -0
  120. package/dist/src/packages/package-error.d.ts +16 -0
  121. package/dist/src/packages/package-error.js +17 -0
  122. package/dist/src/packages/package-error.js.map +1 -0
  123. package/dist/src/packages/package-types.d.ts +477 -0
  124. package/dist/src/packages/package-types.js +156 -0
  125. package/dist/src/packages/package-types.js.map +1 -0
  126. package/dist/src/packages/resolver.d.ts +41 -0
  127. package/dist/src/packages/resolver.js +353 -0
  128. package/dist/src/packages/resolver.js.map +1 -0
  129. package/dist/src/platform/config-builder.d.ts +24 -0
  130. package/dist/src/platform/config-builder.js +70 -0
  131. package/dist/src/platform/config-builder.js.map +1 -0
  132. package/dist/src/platform/index.d.ts +8 -0
  133. package/dist/src/platform/index.js +9 -0
  134. package/dist/src/platform/index.js.map +1 -0
  135. package/dist/src/platform/platform-client.d.ts +160 -0
  136. package/dist/src/platform/platform-client.js +486 -0
  137. package/dist/src/platform/platform-client.js.map +1 -0
  138. package/dist/src/platform/platform-types.d.ts +81 -0
  139. package/dist/src/platform/platform-types.js +18 -0
  140. package/dist/src/platform/platform-types.js.map +1 -0
  141. package/dist/src/providers/content-generator/google-to-llm.d.ts +87 -0
  142. package/dist/src/providers/content-generator/google-to-llm.js +226 -0
  143. package/dist/src/providers/content-generator/google-to-llm.js.map +1 -0
  144. package/dist/src/providers/content-generator/index.d.ts +10 -0
  145. package/dist/src/providers/content-generator/index.js +9 -0
  146. package/dist/src/providers/content-generator/index.js.map +1 -0
  147. package/dist/src/providers/content-generator/llm-to-google.d.ts +59 -0
  148. package/dist/src/providers/content-generator/llm-to-google.js +178 -0
  149. package/dist/src/providers/content-generator/llm-to-google.js.map +1 -0
  150. package/dist/src/providers/content-generator/multi-provider-content-generator.d.ts +61 -0
  151. package/dist/src/providers/content-generator/multi-provider-content-generator.js +144 -0
  152. package/dist/src/providers/content-generator/multi-provider-content-generator.js.map +1 -0
  153. package/dist/src/providers/runtime/anthropic-provider.d.ts +18 -0
  154. package/dist/src/providers/runtime/anthropic-provider.js +253 -0
  155. package/dist/src/providers/runtime/anthropic-provider.js.map +1 -0
  156. package/dist/src/providers/runtime/azure-provider.d.ts +25 -0
  157. package/dist/src/providers/runtime/azure-provider.js +206 -0
  158. package/dist/src/providers/runtime/azure-provider.js.map +1 -0
  159. package/dist/src/providers/runtime/bedrock-provider.d.ts +22 -0
  160. package/dist/src/providers/runtime/bedrock-provider.js +276 -0
  161. package/dist/src/providers/runtime/bedrock-provider.js.map +1 -0
  162. package/dist/src/providers/runtime/failover-provider.d.ts +30 -0
  163. package/dist/src/providers/runtime/failover-provider.js +124 -0
  164. package/dist/src/providers/runtime/failover-provider.js.map +1 -0
  165. package/dist/src/providers/runtime/google-provider.d.ts +17 -0
  166. package/dist/src/providers/runtime/google-provider.js +239 -0
  167. package/dist/src/providers/runtime/google-provider.js.map +1 -0
  168. package/dist/src/providers/runtime/index.d.ts +16 -0
  169. package/dist/src/providers/runtime/index.js +16 -0
  170. package/dist/src/providers/runtime/index.js.map +1 -0
  171. package/dist/src/providers/runtime/openai-provider.d.ts +21 -0
  172. package/dist/src/providers/runtime/openai-provider.js +266 -0
  173. package/dist/src/providers/runtime/openai-provider.js.map +1 -0
  174. package/dist/src/providers/runtime/provider-errors.d.ts +39 -0
  175. package/dist/src/providers/runtime/provider-errors.js +50 -0
  176. package/dist/src/providers/runtime/provider-errors.js.map +1 -0
  177. package/dist/src/providers/runtime/provider-factory.d.ts +19 -0
  178. package/dist/src/providers/runtime/provider-factory.js +45 -0
  179. package/dist/src/providers/runtime/provider-factory.js.map +1 -0
  180. package/dist/src/providers/runtime/runtime-provider-types.d.ts +63 -0
  181. package/dist/src/providers/runtime/runtime-provider-types.js +7 -0
  182. package/dist/src/providers/runtime/runtime-provider-types.js.map +1 -0
  183. package/dist/src/providers/runtime/streaming-types.d.ts +40 -0
  184. package/dist/src/providers/runtime/streaming-types.js +7 -0
  185. package/dist/src/providers/runtime/streaming-types.js.map +1 -0
  186. package/dist/src/repo/config-schema.d.ts +238 -0
  187. package/dist/src/repo/config-schema.js +155 -0
  188. package/dist/src/repo/config-schema.js.map +1 -0
  189. package/dist/src/repo/connection-schemas.d.ts +449 -0
  190. package/dist/src/repo/connection-schemas.js +109 -0
  191. package/dist/src/repo/connection-schemas.js.map +1 -0
  192. package/dist/src/repo/connection-types.d.ts +29 -0
  193. package/dist/src/repo/connection-types.js +7 -0
  194. package/dist/src/repo/connection-types.js.map +1 -0
  195. package/dist/src/repo/drift-detector.d.ts +26 -0
  196. package/dist/src/repo/drift-detector.js +66 -0
  197. package/dist/src/repo/drift-detector.js.map +1 -0
  198. package/dist/src/repo/graphql-drift-detector.d.ts +27 -0
  199. package/dist/src/repo/graphql-drift-detector.js +66 -0
  200. package/dist/src/repo/graphql-drift-detector.js.map +1 -0
  201. package/dist/src/repo/graphql-parser.d.ts +30 -0
  202. package/dist/src/repo/graphql-parser.js +125 -0
  203. package/dist/src/repo/graphql-parser.js.map +1 -0
  204. package/dist/src/repo/graphql-surface-parser.d.ts +20 -0
  205. package/dist/src/repo/graphql-surface-parser.js +74 -0
  206. package/dist/src/repo/graphql-surface-parser.js.map +1 -0
  207. package/dist/src/repo/index.d.ts +30 -0
  208. package/dist/src/repo/index.js +29 -0
  209. package/dist/src/repo/index.js.map +1 -0
  210. package/dist/src/repo/local-reader.d.ts +10 -0
  211. package/dist/src/repo/local-reader.js +299 -0
  212. package/dist/src/repo/local-reader.js.map +1 -0
  213. package/dist/src/repo/openapi-parser.d.ts +35 -0
  214. package/dist/src/repo/openapi-parser.js +93 -0
  215. package/dist/src/repo/openapi-parser.js.map +1 -0
  216. package/dist/src/repo/parsers.d.ts +91 -0
  217. package/dist/src/repo/parsers.js +454 -0
  218. package/dist/src/repo/parsers.js.map +1 -0
  219. package/dist/src/repo/platform-reader.d.ts +10 -0
  220. package/dist/src/repo/platform-reader.js +206 -0
  221. package/dist/src/repo/platform-reader.js.map +1 -0
  222. package/dist/src/repo/repo-loader.d.ts +14 -0
  223. package/dist/src/repo/repo-loader.js +25 -0
  224. package/dist/src/repo/repo-loader.js.map +1 -0
  225. package/dist/src/repo/repo-types.d.ts +159 -0
  226. package/dist/src/repo/repo-types.js +17 -0
  227. package/dist/src/repo/repo-types.js.map +1 -0
  228. package/dist/src/repo/spec-syncer.d.ts +30 -0
  229. package/dist/src/repo/spec-syncer.js +85 -0
  230. package/dist/src/repo/spec-syncer.js.map +1 -0
  231. package/dist/src/repo/store-loader.d.ts +19 -0
  232. package/dist/src/repo/store-loader.js +94 -0
  233. package/dist/src/repo/store-loader.js.map +1 -0
  234. package/dist/src/repo/store-schemas.d.ts +313 -0
  235. package/dist/src/repo/store-schemas.js +103 -0
  236. package/dist/src/repo/store-schemas.js.map +1 -0
  237. package/dist/src/repo/store-tool-schema.d.ts +29 -0
  238. package/dist/src/repo/store-tool-schema.js +103 -0
  239. package/dist/src/repo/store-tool-schema.js.map +1 -0
  240. package/dist/src/repo/store-types.d.ts +91 -0
  241. package/dist/src/repo/store-types.js +7 -0
  242. package/dist/src/repo/store-types.js.map +1 -0
  243. package/dist/src/repo/surface-parser.d.ts +17 -0
  244. package/dist/src/repo/surface-parser.js +75 -0
  245. package/dist/src/repo/surface-parser.js.map +1 -0
  246. package/dist/src/repo/tool-loader.d.ts +33 -0
  247. package/dist/src/repo/tool-loader.js +240 -0
  248. package/dist/src/repo/tool-loader.js.map +1 -0
  249. package/dist/src/repo/tool-types.d.ts +205 -0
  250. package/dist/src/repo/tool-types.js +61 -0
  251. package/dist/src/repo/tool-types.js.map +1 -0
  252. package/dist/src/roles/index.d.ts +7 -0
  253. package/dist/src/roles/index.js +8 -0
  254. package/dist/src/roles/index.js.map +1 -0
  255. package/dist/src/roles/role-filter.d.ts +33 -0
  256. package/dist/src/roles/role-filter.js +55 -0
  257. package/dist/src/roles/role-filter.js.map +1 -0
  258. package/dist/src/roles/role-types.d.ts +76 -0
  259. package/dist/src/roles/role-types.js +38 -0
  260. package/dist/src/roles/role-types.js.map +1 -0
  261. package/dist/src/runtime/connection-bridge.d.ts +19 -0
  262. package/dist/src/runtime/connection-bridge.js +103 -0
  263. package/dist/src/runtime/connection-bridge.js.map +1 -0
  264. package/dist/src/runtime/context-compiler.d.ts +35 -0
  265. package/dist/src/runtime/context-compiler.js +183 -0
  266. package/dist/src/runtime/context-compiler.js.map +1 -0
  267. package/dist/src/runtime/default-prompt.d.ts +28 -0
  268. package/dist/src/runtime/default-prompt.js +71 -0
  269. package/dist/src/runtime/default-prompt.js.map +1 -0
  270. package/dist/src/runtime/explore-tool.d.ts +96 -0
  271. package/dist/src/runtime/explore-tool.js +111 -0
  272. package/dist/src/runtime/explore-tool.js.map +1 -0
  273. package/dist/src/runtime/index.d.ts +26 -0
  274. package/dist/src/runtime/index.js +19 -0
  275. package/dist/src/runtime/index.js.map +1 -0
  276. package/dist/src/runtime/output-pipeline.d.ts +62 -0
  277. package/dist/src/runtime/output-pipeline.js +69 -0
  278. package/dist/src/runtime/output-pipeline.js.map +1 -0
  279. package/dist/src/runtime/plan-mode.d.ts +39 -0
  280. package/dist/src/runtime/plan-mode.js +81 -0
  281. package/dist/src/runtime/plan-mode.js.map +1 -0
  282. package/dist/src/runtime/preference-client.d.ts +39 -0
  283. package/dist/src/runtime/preference-client.js +70 -0
  284. package/dist/src/runtime/preference-client.js.map +1 -0
  285. package/dist/src/runtime/preference-detector.d.ts +22 -0
  286. package/dist/src/runtime/preference-detector.js +95 -0
  287. package/dist/src/runtime/preference-detector.js.map +1 -0
  288. package/dist/src/runtime/request-integration.d.ts +18 -0
  289. package/dist/src/runtime/request-integration.js +36 -0
  290. package/dist/src/runtime/request-integration.js.map +1 -0
  291. package/dist/src/runtime/runtime-types.d.ts +48 -0
  292. package/dist/src/runtime/runtime-types.js +7 -0
  293. package/dist/src/runtime/runtime-types.js.map +1 -0
  294. package/dist/src/runtime/session-setup.d.ts +53 -0
  295. package/dist/src/runtime/session-setup.js +90 -0
  296. package/dist/src/runtime/session-setup.js.map +1 -0
  297. package/dist/src/runtime/telemetry-client.d.ts +39 -0
  298. package/dist/src/runtime/telemetry-client.js +87 -0
  299. package/dist/src/runtime/telemetry-client.js.map +1 -0
  300. package/dist/src/runtime/telemetry-hooks.d.ts +47 -0
  301. package/dist/src/runtime/telemetry-hooks.js +115 -0
  302. package/dist/src/runtime/telemetry-hooks.js.map +1 -0
  303. package/dist/src/runtime/token-allocator.d.ts +34 -0
  304. package/dist/src/runtime/token-allocator.js +86 -0
  305. package/dist/src/runtime/token-allocator.js.map +1 -0
  306. package/dist/src/runtime/user-context.d.ts +49 -0
  307. package/dist/src/runtime/user-context.js +135 -0
  308. package/dist/src/runtime/user-context.js.map +1 -0
  309. package/dist/src/sdk.d.ts +57 -0
  310. package/dist/src/sdk.js +377 -0
  311. package/dist/src/sdk.js.map +1 -0
  312. package/dist/src/security/action-gate.d.ts +23 -0
  313. package/dist/src/security/action-gate.js +78 -0
  314. package/dist/src/security/action-gate.js.map +1 -0
  315. package/dist/src/security/field-scrubber.d.ts +27 -0
  316. package/dist/src/security/field-scrubber.js +152 -0
  317. package/dist/src/security/field-scrubber.js.map +1 -0
  318. package/dist/src/security/index.d.ts +14 -0
  319. package/dist/src/security/index.js +15 -0
  320. package/dist/src/security/index.js.map +1 -0
  321. package/dist/src/security/leak-detector.d.ts +23 -0
  322. package/dist/src/security/leak-detector.js +51 -0
  323. package/dist/src/security/leak-detector.js.map +1 -0
  324. package/dist/src/security/output-guard.d.ts +33 -0
  325. package/dist/src/security/output-guard.js +118 -0
  326. package/dist/src/security/output-guard.js.map +1 -0
  327. package/dist/src/security/pattern-scanner.d.ts +19 -0
  328. package/dist/src/security/pattern-scanner.js +66 -0
  329. package/dist/src/security/pattern-scanner.js.map +1 -0
  330. package/dist/src/security/scope-checker.d.ts +27 -0
  331. package/dist/src/security/scope-checker.js +52 -0
  332. package/dist/src/security/scope-checker.js.map +1 -0
  333. package/dist/src/security/scrub-tracker.d.ts +21 -0
  334. package/dist/src/security/scrub-tracker.js +39 -0
  335. package/dist/src/security/scrub-tracker.js.map +1 -0
  336. package/dist/src/security/security-types.d.ts +69 -0
  337. package/dist/src/security/security-types.js +17 -0
  338. package/dist/src/security/security-types.js.map +1 -0
  339. package/dist/src/security/threshold-evaluator.d.ts +13 -0
  340. package/dist/src/security/threshold-evaluator.js +44 -0
  341. package/dist/src/security/threshold-evaluator.js.map +1 -0
  342. package/dist/src/snapshot/index.d.ts +8 -0
  343. package/dist/src/snapshot/index.js +9 -0
  344. package/dist/src/snapshot/index.js.map +1 -0
  345. package/dist/src/snapshot/snapshot-builder.d.ts +37 -0
  346. package/dist/src/snapshot/snapshot-builder.js +152 -0
  347. package/dist/src/snapshot/snapshot-builder.js.map +1 -0
  348. package/dist/src/snapshot/snapshot-loader.d.ts +29 -0
  349. package/dist/src/snapshot/snapshot-loader.js +188 -0
  350. package/dist/src/snapshot/snapshot-loader.js.map +1 -0
  351. package/dist/src/snapshot/snapshot-types.d.ts +1940 -0
  352. package/dist/src/snapshot/snapshot-types.js +145 -0
  353. package/dist/src/snapshot/snapshot-types.js.map +1 -0
  354. package/dist/src/stores/index.d.ts +6 -0
  355. package/dist/src/stores/index.js +7 -0
  356. package/dist/src/stores/index.js.map +1 -0
  357. package/dist/src/stores/store-backend.d.ts +135 -0
  358. package/dist/src/stores/store-backend.js +7 -0
  359. package/dist/src/stores/store-backend.js.map +1 -0
  360. package/dist/src/templates/connections.d.ts +14 -0
  361. package/dist/src/templates/connections.js +7 -0
  362. package/dist/src/templates/connections.js.map +1 -0
  363. package/dist/src/templates/index.d.ts +7 -0
  364. package/dist/src/templates/index.js +8 -0
  365. package/dist/src/templates/index.js.map +1 -0
  366. package/dist/src/templates/template-resolver.d.ts +45 -0
  367. package/dist/src/templates/template-resolver.js +121 -0
  368. package/dist/src/templates/template-resolver.js.map +1 -0
  369. package/dist/src/tool-context.d.ts +33 -0
  370. package/dist/src/tool-context.js +7 -0
  371. package/dist/src/tool-context.js.map +1 -0
  372. package/dist/src/tool-registration.d.ts +14 -0
  373. package/dist/src/tool-registration.js +51 -0
  374. package/dist/src/tool-registration.js.map +1 -0
  375. package/dist/src/tools/amodal-tool-errors.d.ts +24 -0
  376. package/dist/src/tools/amodal-tool-errors.js +28 -0
  377. package/dist/src/tools/amodal-tool-errors.js.map +1 -0
  378. package/dist/src/tools/amodal-tool-names.d.ts +14 -0
  379. package/dist/src/tools/amodal-tool-names.js +15 -0
  380. package/dist/src/tools/amodal-tool-names.js.map +1 -0
  381. package/dist/src/tools/chain-tool-registry.d.ts +20 -0
  382. package/dist/src/tools/chain-tool-registry.js +49 -0
  383. package/dist/src/tools/chain-tool-registry.js.map +1 -0
  384. package/dist/src/tools/chain-tool-types.d.ts +190 -0
  385. package/dist/src/tools/chain-tool-types.js +50 -0
  386. package/dist/src/tools/chain-tool-types.js.map +1 -0
  387. package/dist/src/tools/chain-tool.d.ts +34 -0
  388. package/dist/src/tools/chain-tool.js +294 -0
  389. package/dist/src/tools/chain-tool.js.map +1 -0
  390. package/dist/src/tools/custom-tool-registrar.d.ts +8 -0
  391. package/dist/src/tools/custom-tool-registrar.js +10 -0
  392. package/dist/src/tools/custom-tool-registrar.js.map +1 -0
  393. package/dist/src/tools/definitions/amodal-tools.d.ts +9 -0
  394. package/dist/src/tools/definitions/amodal-tools.js +192 -0
  395. package/dist/src/tools/definitions/amodal-tools.js.map +1 -0
  396. package/dist/src/tools/function-tool-registry.d.ts +22 -0
  397. package/dist/src/tools/function-tool-registry.js +45 -0
  398. package/dist/src/tools/function-tool-registry.js.map +1 -0
  399. package/dist/src/tools/function-tool-types.d.ts +76 -0
  400. package/dist/src/tools/function-tool-types.js +27 -0
  401. package/dist/src/tools/function-tool-types.js.map +1 -0
  402. package/dist/src/tools/function-tool.d.ts +34 -0
  403. package/dist/src/tools/function-tool.js +97 -0
  404. package/dist/src/tools/function-tool.js.map +1 -0
  405. package/dist/src/tools/http-tool-registry.d.ts +20 -0
  406. package/dist/src/tools/http-tool-registry.js +34 -0
  407. package/dist/src/tools/http-tool-registry.js.map +1 -0
  408. package/dist/src/tools/http-tool-types.d.ts +92 -0
  409. package/dist/src/tools/http-tool-types.js +44 -0
  410. package/dist/src/tools/http-tool-types.js.map +1 -0
  411. package/dist/src/tools/http-tool.d.ts +32 -0
  412. package/dist/src/tools/http-tool.js +176 -0
  413. package/dist/src/tools/http-tool.js.map +1 -0
  414. package/dist/src/tools/merge-template.d.ts +33 -0
  415. package/dist/src/tools/merge-template.js +65 -0
  416. package/dist/src/tools/merge-template.js.map +1 -0
  417. package/dist/src/tools/request-tool-types.d.ts +46 -0
  418. package/dist/src/tools/request-tool-types.js +20 -0
  419. package/dist/src/tools/request-tool-types.js.map +1 -0
  420. package/dist/src/tools/request-tool.d.ts +29 -0
  421. package/dist/src/tools/request-tool.js +268 -0
  422. package/dist/src/tools/request-tool.js.map +1 -0
  423. package/dist/src/tools/store-query-tool.d.ts +29 -0
  424. package/dist/src/tools/store-query-tool.js +82 -0
  425. package/dist/src/tools/store-query-tool.js.map +1 -0
  426. package/dist/src/tools/store-write-tool.d.ts +26 -0
  427. package/dist/src/tools/store-write-tool.js +84 -0
  428. package/dist/src/tools/store-write-tool.js.map +1 -0
  429. package/dist/src/tools/tool-definition-types.d.ts +21 -0
  430. package/dist/src/tools/tool-definition-types.js +7 -0
  431. package/dist/src/tools/tool-definition-types.js.map +1 -0
  432. package/dist/src/tools/tool-utils.d.ts +29 -0
  433. package/dist/src/tools/tool-utils.js +66 -0
  434. package/dist/src/tools/tool-utils.js.map +1 -0
  435. package/dist/src/versions/bundle-loader.d.ts +37 -0
  436. package/dist/src/versions/bundle-loader.js +99 -0
  437. package/dist/src/versions/bundle-loader.js.map +1 -0
  438. package/dist/src/versions/dependency-manager.d.ts +54 -0
  439. package/dist/src/versions/dependency-manager.js +132 -0
  440. package/dist/src/versions/dependency-manager.js.map +1 -0
  441. package/dist/src/versions/handler-loader.d.ts +27 -0
  442. package/dist/src/versions/handler-loader.js +62 -0
  443. package/dist/src/versions/handler-loader.js.map +1 -0
  444. package/dist/src/versions/index.d.ts +10 -0
  445. package/dist/src/versions/index.js +11 -0
  446. package/dist/src/versions/index.js.map +1 -0
  447. package/dist/src/versions/version-bundle-types.d.ts +1278 -0
  448. package/dist/src/versions/version-bundle-types.js +207 -0
  449. package/dist/src/versions/version-bundle-types.js.map +1 -0
  450. package/dist/src/versions/version-manager.d.ts +89 -0
  451. package/dist/src/versions/version-manager.js +124 -0
  452. package/dist/src/versions/version-manager.js.map +1 -0
  453. package/dist/src/widgets/index.d.ts +8 -0
  454. package/dist/src/widgets/index.js +8 -0
  455. package/dist/src/widgets/index.js.map +1 -0
  456. package/dist/src/widgets/present-tool.d.ts +21 -0
  457. package/dist/src/widgets/present-tool.js +107 -0
  458. package/dist/src/widgets/present-tool.js.map +1 -0
  459. package/dist/src/widgets/widget-types.d.ts +78 -0
  460. package/dist/src/widgets/widget-types.js +28 -0
  461. package/dist/src/widgets/widget-types.js.map +1 -0
  462. package/dist/tsconfig.tsbuildinfo +1 -0
  463. package/package.json +50 -0
@@ -0,0 +1,11 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ import type { EvalSuiteResult, EvalDiff } from './eval-types.js';
7
+ /**
8
+ * Compare current eval results against a baseline.
9
+ * Returns a diff for each eval case showing regressions, improvements, etc.
10
+ */
11
+ export declare function diffEvalResults(current: EvalSuiteResult, baseline: EvalSuiteResult): EvalDiff[];
@@ -0,0 +1,97 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ /**
7
+ * Compare current eval results against a baseline.
8
+ * Returns a diff for each eval case showing regressions, improvements, etc.
9
+ */
10
+ export function diffEvalResults(current, baseline) {
11
+ const currentMap = new Map(current.results.map((r) => [r.eval.name, r]));
12
+ const baselineMap = new Map(baseline.results.map((r) => [r.eval.name, r]));
13
+ const allNames = new Set([...currentMap.keys(), ...baselineMap.keys()]);
14
+ const diffs = [];
15
+ for (const name of allNames) {
16
+ const cur = currentMap.get(name);
17
+ const base = baselineMap.get(name);
18
+ if (cur && !base) {
19
+ diffs.push({
20
+ evalName: name,
21
+ status: 'new',
22
+ currentPassed: cur.passed,
23
+ baselinePassed: null,
24
+ assertionChanges: cur.assertions.map((a) => ({
25
+ text: a.text,
26
+ currentPassed: a.passed,
27
+ baselinePassed: null,
28
+ status: 'new',
29
+ })),
30
+ });
31
+ continue;
32
+ }
33
+ if (!cur && base) {
34
+ diffs.push({
35
+ evalName: name,
36
+ status: 'removed',
37
+ currentPassed: null,
38
+ baselinePassed: base.passed,
39
+ assertionChanges: base.assertions.map((a) => ({
40
+ text: a.text,
41
+ currentPassed: null,
42
+ baselinePassed: a.passed,
43
+ status: 'removed',
44
+ })),
45
+ });
46
+ continue;
47
+ }
48
+ if (cur && base) {
49
+ const assertionChanges = diffAssertions(cur.assertions, base.assertions);
50
+ let status = 'unchanged';
51
+ if (cur.passed && !base.passed) {
52
+ status = 'improved';
53
+ }
54
+ else if (!cur.passed && base.passed) {
55
+ status = 'regressed';
56
+ }
57
+ else if (assertionChanges.some((a) => a.status !== 'unchanged')) {
58
+ // Same overall pass/fail but assertion-level changes
59
+ status = assertionChanges.some((a) => a.status === 'regressed') ? 'regressed' : 'improved';
60
+ }
61
+ diffs.push({
62
+ evalName: name,
63
+ status,
64
+ currentPassed: cur.passed,
65
+ baselinePassed: base.passed,
66
+ assertionChanges,
67
+ });
68
+ }
69
+ }
70
+ return diffs;
71
+ }
72
+ function diffAssertions(current, baseline) {
73
+ const baseMap = new Map(baseline.map((a) => [a.text, a.passed]));
74
+ const curMap = new Map(current.map((a) => [a.text, a.passed]));
75
+ const allTexts = new Set([...baseMap.keys(), ...curMap.keys()]);
76
+ const result = [];
77
+ for (const text of allTexts) {
78
+ const curPassed = curMap.get(text) ?? null;
79
+ const basePassed = baseMap.get(text) ?? null;
80
+ let status = 'unchanged';
81
+ if (curPassed !== null && basePassed === null) {
82
+ status = 'new';
83
+ }
84
+ else if (curPassed === null && basePassed !== null) {
85
+ status = 'removed';
86
+ }
87
+ else if (curPassed === true && basePassed === false) {
88
+ status = 'improved';
89
+ }
90
+ else if (curPassed === false && basePassed === true) {
91
+ status = 'regressed';
92
+ }
93
+ result.push({ text, currentPassed: curPassed, baselinePassed: basePassed, status });
94
+ }
95
+ return result;
96
+ }
97
+ //# sourceMappingURL=eval-diff.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-diff.js","sourceRoot":"","sources":["../../../src/eval/eval-diff.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAIH;;;GAGG;AACH,MAAM,UAAU,eAAe,CAC7B,OAAwB,EACxB,QAAyB;IAEzB,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACzE,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3E,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,UAAU,CAAC,IAAI,EAAE,EAAE,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACxE,MAAM,KAAK,GAAe,EAAE,CAAC;IAE7B,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACjC,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAEnC,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC;gBACT,QAAQ,EAAE,IAAI;gBACd,MAAM,EAAE,KAAK;gBACb,aAAa,EAAE,GAAG,CAAC,MAAM;gBACzB,cAAc,EAAE,IAAI;gBACpB,gBAAgB,EAAE,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC3C,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,aAAa,EAAE,CAAC,CAAC,MAAM;oBACvB,cAAc,EAAE,IAAI;oBACpB,MAAM,EAAE,KAAuB;iBAChC,CAAC,CAAC;aACJ,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,IAAI,CAAC,GAAG,IAAI,IAAI,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC;gBACT,QAAQ,EAAE,IAAI;gBACd,MAAM,EAAE,SAAS;gBACjB,aAAa,EAAE,IAAI;gBACnB,cAAc,EAAE,IAAI,CAAC,MAAM;gBAC3B,gBAAgB,EAAE,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC5C,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,aAAa,EAAE,IAAI;oBACnB,cAAc,EAAE,CAAC,CAAC,MAAM;oBACxB,MAAM,EAAE,SAA2B;iBACpC,CAAC,CAAC;aACJ,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;YAChB,MAAM,gBAAgB,GAAG,cAAc,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;YACzE,IAAI,MAAM,GAAmB,WAAW,CAAC;YAEzC,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC/B,MAAM,GAAG,UAAU,CAAC;YACtB,CAAC;iBAAM,IAAI,CAAC,GAAG,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACtC,MAAM,GAAG,WAAW,CAAC;YACvB,CAAC;iBAAM,IAAI,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,EAAE,CAAC;gBAClE,qDAAqD;gBACrD,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YAC7F,CAAC;YAED,KAAK,CAAC,IAAI,CAAC;gBACT,QAAQ,EAAE,IAAI;gBACd,MAAM;gBACN,aAAa,EAAE,GAAG,CAAC,MAAM;gBACzB,cAAc,EAAE,IAAI,CAAC,MAAM;gBAC3B,gBAAgB;aACjB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CACrB,OAA+C,EAC/C,QAAgD;IAEhD,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IACjE,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAChE,MAAM,MAAM,GAAiH,EAAE,CAAC;IAEhI,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;QAC3C,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;QAE7C,IAAI,MAAM,GAAmB,WAAW,CAAC;QACzC,IAAI,SAAS,KAAK,IAAI,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;YAC9C,MAAM,GAAG,KAAK,CAAC;QACjB,CAAC;aAAM,IAAI,SAAS,KAAK,IAAI,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;YACrD,MAAM,GAAG,SAAS,CAAC;QACrB,CAAC;aAAM,IAAI,SAAS,KAAK,IAAI,IAAI,UAAU,KAAK,KAAK,EAAE,CAAC;YACtD,MAAM,GAAG,UAAU,CAAC;QACtB,CAAC;aAAM,IAAI,SAAS,KAAK,KAAK,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;YACtD,MAAM,GAAG,WAAW,CAAC;QACvB,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,EAAC,IAAI,EAAE,aAAa,EAAE,SAAS,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,EAAC,CAAC,CAAC;IACpF,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,23 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ import type { EvalSuiteResult, EvalDiff, EvalModelInfo } from './eval-types.js';
7
+ /**
8
+ * Format eval results as a terminal table with token usage and cost.
9
+ */
10
+ export declare function formatEvalTable(result: EvalSuiteResult, model?: EvalModelInfo): string;
11
+ /**
12
+ * Format eval diff as a terminal table.
13
+ */
14
+ export declare function formatDiffTable(diffs: EvalDiff[]): string;
15
+ /**
16
+ * Format a side-by-side comparison of two eval suite results.
17
+ * Shows model, accuracy, tokens, cost, and duration for each eval.
18
+ */
19
+ export declare function formatComparisonTable(current: EvalSuiteResult, baseline: EvalSuiteResult, diffs: EvalDiff[]): string;
20
+ /**
21
+ * Format eval results as CI-friendly markdown.
22
+ */
23
+ export declare function formatEvalMarkdown(result: EvalSuiteResult, diffs?: EvalDiff[], model?: EvalModelInfo): string;
@@ -0,0 +1,221 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ import { formatCostMicros } from './eval-cost.js';
7
+ /**
8
+ * Format eval results as a terminal table with token usage and cost.
9
+ */
10
+ export function formatEvalTable(result, model) {
11
+ const lines = [];
12
+ const nameWidth = Math.max(20, ...result.results.map((r) => r.eval.name.length));
13
+ const hasUsage = result.results.some((r) => r.cost);
14
+ // Header
15
+ if (model) {
16
+ lines.push('');
17
+ lines.push(`Model: ${model.provider}/${model.model}`);
18
+ }
19
+ lines.push('');
20
+ if (hasUsage) {
21
+ lines.push(`${'Eval'.padEnd(nameWidth)} Status Assertions Tokens Cost Duration`);
22
+ lines.push('-'.repeat(nameWidth + 70));
23
+ }
24
+ else {
25
+ lines.push(`${'Eval'.padEnd(nameWidth)} Status Assertions Duration`);
26
+ lines.push('-'.repeat(nameWidth + 40));
27
+ }
28
+ for (const r of result.results) {
29
+ const status = r.passed ? 'PASS' : 'FAIL';
30
+ const passedCount = r.assertions.filter((a) => a.passed).length;
31
+ const totalCount = r.assertions.length;
32
+ const duration = `${r.durationMs}ms`;
33
+ if (hasUsage && r.cost) {
34
+ const tokens = `${r.cost.totalTokens.toLocaleString()}`;
35
+ const cost = formatCostMicros(r.cost.estimatedCostMicros);
36
+ lines.push(`${r.eval.name.padEnd(nameWidth)} ${status.padEnd(7)} ${`${passedCount}/${totalCount}`.padEnd(12)}${tokens.padEnd(13)}${cost.padEnd(11)}${duration}`);
37
+ }
38
+ else {
39
+ lines.push(`${r.eval.name.padEnd(nameWidth)} ${status.padEnd(7)} ${passedCount}/${totalCount}`.padEnd(nameWidth + 20) +
40
+ ` ${duration}`);
41
+ }
42
+ // Show per-assertion detail for failed evals
43
+ if (!r.passed) {
44
+ for (const a of r.assertions) {
45
+ const icon = a.passed ? ' ✓' : ' ✗';
46
+ const reason = a.reason ? ` — ${a.reason}` : '';
47
+ lines.push(`${icon} ${a.negated ? 'NOT ' : ''}${a.text}${reason}`);
48
+ }
49
+ }
50
+ }
51
+ lines.push('-'.repeat(hasUsage ? nameWidth + 70 : nameWidth + 40));
52
+ // Summary line
53
+ const totalAssertions = result.results.reduce((n, r) => n + r.assertions.length, 0);
54
+ const passedAssertions = result.results.reduce((n, r) => n + r.assertions.filter((a) => a.passed).length, 0);
55
+ const accuracy = totalAssertions > 0 ? ((passedAssertions / totalAssertions) * 100).toFixed(0) : '0';
56
+ lines.push(`Total: ${result.totalPassed} passed, ${result.totalFailed} failed (${result.totalDurationMs}ms)`);
57
+ lines.push(`Accuracy: ${passedAssertions}/${totalAssertions} assertions (${accuracy}%)`);
58
+ if (result.totalCost) {
59
+ const cost = formatCostMicros(result.totalCost.estimatedCostMicros);
60
+ lines.push(`Tokens: ${result.totalCost.totalTokens.toLocaleString()} (${result.totalCost.inputTokens.toLocaleString()} in / ${result.totalCost.outputTokens.toLocaleString()} out)`);
61
+ lines.push(`Cost: ${cost}`);
62
+ }
63
+ lines.push('');
64
+ return lines.join('\n');
65
+ }
66
+ /**
67
+ * Format eval diff as a terminal table.
68
+ */
69
+ export function formatDiffTable(diffs) {
70
+ const lines = [];
71
+ const nameWidth = Math.max(20, ...diffs.map((d) => d.evalName.length));
72
+ lines.push('');
73
+ lines.push(`${'Eval'.padEnd(nameWidth)} Status Current Baseline`);
74
+ lines.push('-'.repeat(nameWidth + 40));
75
+ for (const d of diffs) {
76
+ const icon = statusIcon(d.status);
77
+ const current = d.currentPassed === null ? '-' : d.currentPassed ? 'PASS' : 'FAIL';
78
+ const baseline = d.baselinePassed === null ? '-' : d.baselinePassed ? 'PASS' : 'FAIL';
79
+ lines.push(`${d.evalName.padEnd(nameWidth)} ${icon} ${d.status.padEnd(10)} ${current.padEnd(7)} ${baseline}`);
80
+ }
81
+ const regressed = diffs.filter((d) => d.status === 'regressed').length;
82
+ const improved = diffs.filter((d) => d.status === 'improved').length;
83
+ const unchanged = diffs.filter((d) => d.status === 'unchanged').length;
84
+ lines.push('-'.repeat(nameWidth + 40));
85
+ lines.push(`Summary: ${regressed} regressed, ${improved} improved, ${unchanged} unchanged`);
86
+ lines.push('');
87
+ return lines.join('\n');
88
+ }
89
+ /**
90
+ * Format a side-by-side comparison of two eval suite results.
91
+ * Shows model, accuracy, tokens, cost, and duration for each eval.
92
+ */
93
+ export function formatComparisonTable(current, baseline, diffs) {
94
+ const lines = [];
95
+ const currentModel = current.model ? `${current.model.provider}/${current.model.model}` : 'current';
96
+ const baselineModel = baseline.model ? `${baseline.model.provider}/${baseline.model.model}` : 'baseline';
97
+ const nameWidth = Math.max(20, ...diffs.map((d) => d.evalName.length));
98
+ const colWidth = Math.max(30, baselineModel.length + 4, currentModel.length + 4);
99
+ const sep = '-'.repeat(nameWidth + 4 + colWidth * 2);
100
+ // Header
101
+ lines.push('');
102
+ lines.push(`${''.padEnd(nameWidth + 4)}${baselineModel.padEnd(colWidth)}${currentModel.padEnd(colWidth)}`);
103
+ lines.push(sep);
104
+ // Per-eval rows
105
+ const baseMap = new Map(baseline.results.map((r) => [r.eval.name, r]));
106
+ const curMap = new Map(current.results.map((r) => [r.eval.name, r]));
107
+ for (const d of diffs) {
108
+ const base = baseMap.get(d.evalName);
109
+ const cur = curMap.get(d.evalName);
110
+ const icon = statusIcon(d.status);
111
+ const baseAssertions = base ? `${base.assertions.filter((a) => a.passed).length}/${base.assertions.length}` : '-';
112
+ const curAssertions = cur ? `${cur.assertions.filter((a) => a.passed).length}/${cur.assertions.length}` : '-';
113
+ const baseStatus = base ? (base.passed ? 'PASS' : 'FAIL') : '-';
114
+ const curStatus = cur ? (cur.passed ? 'PASS' : 'FAIL') : '-';
115
+ const baseTokens = base?.cost ? base.cost.totalTokens.toLocaleString() : '-';
116
+ const curTokens = cur?.cost ? cur.cost.totalTokens.toLocaleString() : '-';
117
+ const baseCost = base?.cost ? formatCostMicros(base.cost.estimatedCostMicros) : '-';
118
+ const curCost = cur?.cost ? formatCostMicros(cur.cost.estimatedCostMicros) : '-';
119
+ const baseDuration = base ? `${(base.durationMs / 1000).toFixed(1)}s` : '-';
120
+ const curDuration = cur ? `${(cur.durationMs / 1000).toFixed(1)}s` : '-';
121
+ lines.push(`${`${icon} ${d.evalName}`.padEnd(nameWidth + 4)}${`${baseStatus} ${baseAssertions}`.padEnd(colWidth)}${`${curStatus} ${curAssertions}`.padEnd(colWidth)}`);
122
+ lines.push(`${''.padEnd(nameWidth + 4)}${`${baseTokens} tok ${baseCost}`.padEnd(colWidth)}${`${curTokens} tok ${curCost}`.padEnd(colWidth)}`);
123
+ lines.push(`${''.padEnd(nameWidth + 4)}${baseDuration.padEnd(colWidth)}${curDuration.padEnd(colWidth)}`);
124
+ }
125
+ lines.push(sep);
126
+ // Totals
127
+ const baseTotal = baseline.results.length;
128
+ const curTotal = current.results.length;
129
+ const basePassed = baseline.totalPassed;
130
+ const curPassed = current.totalPassed;
131
+ const baseAssTotal = baseline.results.reduce((n, r) => n + r.assertions.length, 0);
132
+ const baseAssPassed = baseline.results.reduce((n, r) => n + r.assertions.filter((a) => a.passed).length, 0);
133
+ const curAssTotal = current.results.reduce((n, r) => n + r.assertions.length, 0);
134
+ const curAssPassed = current.results.reduce((n, r) => n + r.assertions.filter((a) => a.passed).length, 0);
135
+ const baseAccuracy = baseAssTotal > 0 ? ((baseAssPassed / baseAssTotal) * 100).toFixed(0) : '0';
136
+ const curAccuracy = curAssTotal > 0 ? ((curAssPassed / curAssTotal) * 100).toFixed(0) : '0';
137
+ lines.push(`${'Evals'.padEnd(nameWidth + 4)}${`${basePassed}/${baseTotal} passed`.padEnd(colWidth)}${`${curPassed}/${curTotal} passed`.padEnd(colWidth)}`);
138
+ lines.push(`${'Accuracy'.padEnd(nameWidth + 4)}${`${baseAccuracy}% (${baseAssPassed}/${baseAssTotal})`.padEnd(colWidth)}${`${curAccuracy}% (${curAssPassed}/${curAssTotal})`.padEnd(colWidth)}`);
139
+ if (baseline.totalCost || current.totalCost) {
140
+ const baseTok = baseline.totalCost ? baseline.totalCost.totalTokens.toLocaleString() : '-';
141
+ const curTok = current.totalCost ? current.totalCost.totalTokens.toLocaleString() : '-';
142
+ const baseCostTotal = baseline.totalCost ? formatCostMicros(baseline.totalCost.estimatedCostMicros) : '-';
143
+ const curCostTotal = current.totalCost ? formatCostMicros(current.totalCost.estimatedCostMicros) : '-';
144
+ lines.push(`${'Tokens'.padEnd(nameWidth + 4)}${baseTok.padEnd(colWidth)}${curTok.padEnd(colWidth)}`);
145
+ lines.push(`${'Cost'.padEnd(nameWidth + 4)}${baseCostTotal.padEnd(colWidth)}${curCostTotal.padEnd(colWidth)}`);
146
+ }
147
+ const baseDurTotal = `${(baseline.totalDurationMs / 1000).toFixed(1)}s`;
148
+ const curDurTotal = `${(current.totalDurationMs / 1000).toFixed(1)}s`;
149
+ lines.push(`${'Duration'.padEnd(nameWidth + 4)}${baseDurTotal.padEnd(colWidth)}${curDurTotal.padEnd(colWidth)}`);
150
+ lines.push('');
151
+ return lines.join('\n');
152
+ }
153
+ /**
154
+ * Format eval results as CI-friendly markdown.
155
+ */
156
+ export function formatEvalMarkdown(result, diffs, model) {
157
+ const lines = [];
158
+ lines.push('## Eval Results');
159
+ if (model) {
160
+ lines.push(`**Model:** ${model.provider}/${model.model}`);
161
+ }
162
+ lines.push('');
163
+ const hasUsage = result.results.some((r) => r.cost);
164
+ if (hasUsage) {
165
+ lines.push(`| Eval | Status | Assertions | Tokens | Cost | Duration |`);
166
+ lines.push(`|------|--------|------------|--------|------|----------|`);
167
+ }
168
+ else {
169
+ lines.push(`| Eval | Status | Assertions | Duration |`);
170
+ lines.push(`|------|--------|------------|----------|`);
171
+ }
172
+ for (const r of result.results) {
173
+ const status = r.passed ? 'PASS' : 'FAIL';
174
+ const passedCount = r.assertions.filter((a) => a.passed).length;
175
+ if (hasUsage && r.cost) {
176
+ const cost = formatCostMicros(r.cost.estimatedCostMicros);
177
+ lines.push(`| ${r.eval.name} | ${status} | ${passedCount}/${r.assertions.length} | ${r.cost.totalTokens.toLocaleString()} | ${cost} | ${r.durationMs}ms |`);
178
+ }
179
+ else {
180
+ lines.push(`| ${r.eval.name} | ${status} | ${passedCount}/${r.assertions.length} | ${r.durationMs}ms |`);
181
+ }
182
+ }
183
+ const totalAssertions = result.results.reduce((n, r) => n + r.assertions.length, 0);
184
+ const passedAssertions = result.results.reduce((n, r) => n + r.assertions.filter((a) => a.passed).length, 0);
185
+ const accuracy = totalAssertions > 0 ? ((passedAssertions / totalAssertions) * 100).toFixed(0) : '0';
186
+ lines.push('');
187
+ lines.push(`**${result.totalPassed} passed, ${result.totalFailed} failed** (${result.totalDurationMs}ms) — **${accuracy}% accuracy** (${passedAssertions}/${totalAssertions})`);
188
+ if (result.totalCost) {
189
+ const cost = formatCostMicros(result.totalCost.estimatedCostMicros);
190
+ lines.push(`**Tokens:** ${result.totalCost.totalTokens.toLocaleString()} — **Cost:** ${cost}`);
191
+ }
192
+ if (diffs && diffs.length > 0) {
193
+ lines.push('');
194
+ lines.push('### Diff vs Baseline');
195
+ lines.push('');
196
+ lines.push('| Eval | Status | Current | Baseline |');
197
+ lines.push('|------|--------|---------|----------|');
198
+ for (const d of diffs) {
199
+ const icon = statusIcon(d.status);
200
+ const current = d.currentPassed === null ? '-' : d.currentPassed ? 'PASS' : 'FAIL';
201
+ const baseline = d.baselinePassed === null ? '-' : d.baselinePassed ? 'PASS' : 'FAIL';
202
+ lines.push(`| ${d.evalName} | ${icon} ${d.status} | ${current} | ${baseline} |`);
203
+ }
204
+ const regressed = diffs.filter((d) => d.status === 'regressed').length;
205
+ if (regressed > 0) {
206
+ lines.push('');
207
+ lines.push(`**${regressed} regression(s) detected**`);
208
+ }
209
+ }
210
+ return lines.join('\n');
211
+ }
212
+ function statusIcon(status) {
213
+ switch (status) {
214
+ case 'improved': return '+';
215
+ case 'regressed': return '!';
216
+ case 'new': return '*';
217
+ case 'removed': return '-';
218
+ default: return '=';
219
+ }
220
+ }
221
+ //# sourceMappingURL=eval-formatter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-formatter.js","sourceRoot":"","sources":["../../../src/eval/eval-formatter.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAAC,gBAAgB,EAAC,MAAM,gBAAgB,CAAC;AAEhD;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,MAAuB,EAAE,KAAqB;IAC5E,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IACjF,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAEpD,SAAS;IACT,IAAI,KAAK,EAAE,CAAC;QACV,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,IAAI,QAAQ,EAAE,CAAC;QACb,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,yDAAyD,CAAC,CAAC;QACjG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC;IACzC,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,iCAAiC,CAAC,CAAC;QACzE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC;IACzC,CAAC;IAED,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAC1C,MAAM,WAAW,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAChE,MAAM,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;QACvC,MAAM,QAAQ,GAAG,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC;QAErC,IAAI,QAAQ,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YACvB,MAAM,MAAM,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE,EAAE,CAAC;YACxD,MAAM,IAAI,GAAG,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;YAC1D,KAAK,CAAC,IAAI,CACR,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,GAAG,WAAW,IAAI,UAAU,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CACvJ,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CACR,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,WAAW,IAAI,UAAU,EAAE,CAAC,MAAM,CAAC,SAAS,GAAG,EAAE,CAAC;gBAC5G,YAAY,QAAQ,EAAE,CACvB,CAAC;QACJ,CAAC;QAED,6CAA6C;QAC7C,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;YACd,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;gBAC7B,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;gBACtC,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAChD,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,GAAG,MAAM,EAAE,CAAC,CAAC;YACrE,CAAC;QACH,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC;IAEnE,eAAe;IACf,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACpF,MAAM,gBAAgB,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC7G,MAAM,QAAQ,GAAG,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,gBAAgB,GAAG,eAAe,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IACrG,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,WAAW,YAAY,MAAM,CAAC,WAAW,YAAY,MAAM,CAAC,eAAe,KAAK,CAAC,CAAC;IAC9G,KAAK,CAAC,IAAI,CAAC,aAAa,gBAAgB,IAAI,eAAe,gBAAgB,QAAQ,IAAI,CAAC,CAAC;IAEzF,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC;QACpE,KAAK,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,cAAc,EAAE,KAAK,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,cAAc,EAAE,SAAS,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;QACrL,KAAK,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,KAAiB;IAC/C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;IAEvE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,kCAAkC,CAAC,CAAC;IAC1E,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC;IAEvC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,OAAO,GAAG,CAAC,CAAC,aAAa,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QACnF,MAAM,QAAQ,GAAG,CAAC,CAAC,cAAc,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAEtF,KAAK,CAAC,IAAI,CACR,GAAG,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,QAAQ,EAAE,CACrG,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,MAAM,CAAC;IACvE,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IACrE,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,MAAM,CAAC;IAEvE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC;IACvC,KAAK,CAAC,IAAI,CAAC,YAAY,SAAS,eAAe,QAAQ,cAAc,SAAS,YAAY,CAAC,CAAC;IAC5F,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CACnC,OAAwB,EACxB,QAAyB,EACzB,KAAiB;IAEjB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,IAAI,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IACpG,MAAM,aAAa,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,IAAI,QAAQ,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC;IACzG,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;IACvE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACjF,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC;IAErD,SAAS;IACT,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IAC3G,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEhB,gBAAgB;IAChB,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACvE,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAErE,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACrC,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAElC,MAAM,cAAc,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAClH,MAAM,aAAa,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAC9G,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QAChE,MAAM,SAAS,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QAC7D,MAAM,UAAU,GAAG,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAC7E,MAAM,SAAS,GAAG,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAC1E,MAAM,QAAQ,GAAG,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QACpF,MAAM,OAAO,GAAG,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QACjF,MAAM,YAAY,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QAC5E,MAAM,WAAW,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QAEzE,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,IAAI,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,GAAG,UAAU,IAAI,cAAc,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,GAAG,SAAS,IAAI,aAAa,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACvK,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,GAAG,UAAU,QAAQ,QAAQ,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,GAAG,SAAS,QAAQ,OAAO,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAC9I,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IAC3G,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEhB,SAAS;IACT,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC;IAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC;IACxC,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,CAAC;IACxC,MAAM,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC;IACtC,MAAM,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACnF,MAAM,aAAa,GAAG,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC5G,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACjF,MAAM,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC1G,MAAM,YAAY,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,YAAY,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAChG,MAAM,WAAW,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,WAAW,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAE5F,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,GAAG,UAAU,IAAI,SAAS,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,GAAG,SAAS,IAAI,QAAQ,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IAC3J,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,GAAG,YAAY,MAAM,aAAa,IAAI,YAAY,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,GAAG,WAAW,MAAM,YAAY,IAAI,WAAW,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IAEjM,IAAI,QAAQ,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;QAC5C,MAAM,OAAO,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAC3F,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QACxF,MAAM,aAAa,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,CAAC,QAAQ,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QAC1G,MAAM,YAAY,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,CAAC,OAAO,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QACvG,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACrG,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACjH,CAAC;IAED,MAAM,YAAY,GAAG,GAAG,CAAC,QAAQ,CAAC,eAAe,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACxE,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,eAAe,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtE,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACjH,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,MAAuB,EAAE,KAAkB,EAAE,KAAqB;IACnG,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;IAC9B,IAAI,KAAK,EAAE,CAAC;QACV,KAAK,CAAC,IAAI,CAAC,cAAc,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;IAC5D,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACpD,IAAI,QAAQ,EAAE,CAAC;QACb,KAAK,CAAC,IAAI,CAAC,2DAA2D,CAAC,CAAC;QACxE,KAAK,CAAC,IAAI,CAAC,2DAA2D,CAAC,CAAC;IAC1E,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;QACxD,KAAK,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IAC1D,CAAC;IAED,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAC1C,MAAM,WAAW,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAChE,IAAI,QAAQ,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YACvB,MAAM,IAAI,GAAG,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;YAC1D,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,MAAM,MAAM,MAAM,WAAW,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE,MAAM,IAAI,MAAM,CAAC,CAAC,UAAU,MAAM,CAAC,CAAC;QAC9J,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,MAAM,MAAM,MAAM,WAAW,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,MAAM,CAAC,CAAC,UAAU,MAAM,CAAC,CAAC;QAC3G,CAAC;IACH,CAAC;IAED,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACpF,MAAM,gBAAgB,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC7G,MAAM,QAAQ,GAAG,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,gBAAgB,GAAG,eAAe,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAErG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,KAAK,MAAM,CAAC,WAAW,YAAY,MAAM,CAAC,WAAW,cAAc,MAAM,CAAC,eAAe,WAAW,QAAQ,iBAAiB,gBAAgB,IAAI,eAAe,GAAG,CAAC,CAAC;IAEhL,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC;QACpE,KAAK,CAAC,IAAI,CAAC,eAAe,MAAM,CAAC,SAAS,CAAC,WAAW,CAAC,cAAc,EAAE,gBAAgB,IAAI,EAAE,CAAC,CAAC;IACjG,CAAC;IAED,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QACnC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;QAErD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAClC,MAAM,OAAO,GAAG,CAAC,CAAC,aAAa,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;YACnF,MAAM,QAAQ,GAAG,CAAC,CAAC,cAAc,KAAK,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;YACtF,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,MAAM,IAAI,IAAI,CAAC,CAAC,MAAM,MAAM,OAAO,MAAM,QAAQ,IAAI,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,MAAM,CAAC;QACvE,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,KAAK,SAAS,2BAA2B,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,UAAU,CAAC,MAAc;IAChC,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,UAAU,CAAC,CAAC,OAAO,GAAG,CAAC;QAC5B,KAAK,WAAW,CAAC,CAAC,OAAO,GAAG,CAAC;QAC7B,KAAK,KAAK,CAAC,CAAC,OAAO,GAAG,CAAC;QACvB,KAAK,SAAS,CAAC,CAAC,OAAO,GAAG,CAAC;QAC3B,OAAO,CAAC,CAAC,OAAO,GAAG,CAAC;IACtB,CAAC;AACH,CAAC"}
@@ -0,0 +1,26 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ import type { AssertionResult } from './eval-types.js';
7
+ /**
8
+ * Provider interface for LLM-based judgment.
9
+ */
10
+ export interface JudgeProvider {
11
+ judge(prompt: string): Promise<string>;
12
+ }
13
+ /**
14
+ * Judge whether a response satisfies an assertion using an LLM.
15
+ */
16
+ export declare function judgeAssertion(response: string, assertion: {
17
+ text: string;
18
+ negated: boolean;
19
+ }, provider: JudgeProvider): Promise<AssertionResult>;
20
+ /**
21
+ * Judge all assertions for a response.
22
+ */
23
+ export declare function judgeAllAssertions(response: string, assertions: Array<{
24
+ text: string;
25
+ negated: boolean;
26
+ }>, provider: JudgeProvider): Promise<AssertionResult[]>;
@@ -0,0 +1,76 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ /**
7
+ * Judge whether a response satisfies an assertion using an LLM.
8
+ */
9
+ export async function judgeAssertion(response, assertion, provider) {
10
+ const direction = assertion.negated ? 'should NOT' : 'should';
11
+ const prompt = [
12
+ 'You are an eval judge. Determine if the assistant response satisfies the assertion.',
13
+ '',
14
+ '## Assistant Response',
15
+ response,
16
+ '',
17
+ '## Assertion',
18
+ `The response ${direction}: ${assertion.text}`,
19
+ '',
20
+ '## Instructions',
21
+ 'Reply with exactly one line in the format:',
22
+ 'PASS: <brief reason>',
23
+ 'or',
24
+ 'FAIL: <brief reason>',
25
+ '',
26
+ 'Nothing else.',
27
+ ].join('\n');
28
+ try {
29
+ const raw = await provider.judge(prompt);
30
+ const line = raw.trim().split('\n')[0];
31
+ const passMatch = /^PASS:\s*(.*)$/i.exec(line);
32
+ const failMatch = /^FAIL:\s*(.*)$/i.exec(line);
33
+ if (passMatch) {
34
+ return {
35
+ text: assertion.text,
36
+ negated: assertion.negated,
37
+ passed: true,
38
+ reason: passMatch[1].trim(),
39
+ };
40
+ }
41
+ if (failMatch) {
42
+ return {
43
+ text: assertion.text,
44
+ negated: assertion.negated,
45
+ passed: false,
46
+ reason: failMatch[1].trim(),
47
+ };
48
+ }
49
+ // Couldn't parse — treat as failure
50
+ return {
51
+ text: assertion.text,
52
+ negated: assertion.negated,
53
+ passed: false,
54
+ reason: `Judge response unparseable: ${line}`,
55
+ };
56
+ }
57
+ catch (err) {
58
+ return {
59
+ text: assertion.text,
60
+ negated: assertion.negated,
61
+ passed: false,
62
+ reason: `Judge error: ${err instanceof Error ? err.message : String(err)}`,
63
+ };
64
+ }
65
+ }
66
+ /**
67
+ * Judge all assertions for a response.
68
+ */
69
+ export async function judgeAllAssertions(response, assertions, provider) {
70
+ const results = [];
71
+ for (const assertion of assertions) {
72
+ results.push(await judgeAssertion(response, assertion, provider));
73
+ }
74
+ return results;
75
+ }
76
+ //# sourceMappingURL=eval-judge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-judge.js","sourceRoot":"","sources":["../../../src/eval/eval-judge.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAWH;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,QAAgB,EAChB,SAA2C,EAC3C,QAAuB;IAEvB,MAAM,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,QAAQ,CAAC;IAC9D,MAAM,MAAM,GAAG;QACb,qFAAqF;QACrF,EAAE;QACF,uBAAuB;QACvB,QAAQ;QACR,EAAE;QACF,cAAc;QACd,gBAAgB,SAAS,KAAK,SAAS,CAAC,IAAI,EAAE;QAC9C,EAAE;QACF,iBAAiB;QACjB,4CAA4C;QAC5C,sBAAsB;QACtB,IAAI;QACJ,sBAAsB;QACtB,EAAE;QACF,eAAe;KAChB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEb,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,SAAS,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE/C,IAAI,SAAS,EAAE,CAAC;YACd,OAAO;gBACL,IAAI,EAAE,SAAS,CAAC,IAAI;gBACpB,OAAO,EAAE,SAAS,CAAC,OAAO;gBAC1B,MAAM,EAAE,IAAI;gBACZ,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE;aAC5B,CAAC;QACJ,CAAC;QAED,IAAI,SAAS,EAAE,CAAC;YACd,OAAO;gBACL,IAAI,EAAE,SAAS,CAAC,IAAI;gBACpB,OAAO,EAAE,SAAS,CAAC,OAAO;gBAC1B,MAAM,EAAE,KAAK;gBACb,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE;aAC5B,CAAC;QACJ,CAAC;QAED,oCAAoC;QACpC,OAAO;YACL,IAAI,EAAE,SAAS,CAAC,IAAI;YACpB,OAAO,EAAE,SAAS,CAAC,OAAO;YAC1B,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,+BAA+B,IAAI,EAAE;SAC9C,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,IAAI,EAAE,SAAS,CAAC,IAAI;YACpB,OAAO,EAAE,SAAS,CAAC,OAAO;YAC1B,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,gBAAgB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;SAC3E,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,UAAmD,EACnD,QAAuB;IAEvB,MAAM,OAAO,GAAsB,EAAE,CAAC;IACtC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,OAAO,CAAC,IAAI,CAAC,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,25 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ import type { EvalModelInfo, EvalRunRecord, EvalRunComparison, EvalSuiteResult, EvalTrendPoint } from './eval-types.js';
7
+ /**
8
+ * Build an EvalRunRecord from a suite result and model info.
9
+ */
10
+ export declare function buildEvalRun(suite: EvalSuiteResult, model: EvalModelInfo, options: {
11
+ id?: string;
12
+ orgId: string;
13
+ appId?: string;
14
+ gitSha?: string;
15
+ label?: string;
16
+ triggeredBy?: 'manual' | 'ci' | 'automation';
17
+ }): EvalRunRecord;
18
+ /**
19
+ * Compare two eval runs, producing diffs and cost/quality deltas.
20
+ */
21
+ export declare function compareRuns(runA: EvalRunRecord, runB: EvalRunRecord): EvalRunComparison;
22
+ /**
23
+ * Extract trend points from a list of eval runs for charting.
24
+ */
25
+ export declare function buildTrendPoints(runs: EvalRunRecord[]): EvalTrendPoint[];
@@ -0,0 +1,78 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ import { diffEvalResults } from './eval-diff.js';
7
+ import { aggregateRunCost } from './eval-cost.js';
8
+ /**
9
+ * Build an EvalRunRecord from a suite result and model info.
10
+ */
11
+ export function buildEvalRun(suite, model, options) {
12
+ const perCaseCosts = suite.results.map((r) => r.cost ?? { inputTokens: 0, outputTokens: 0, totalTokens: 0, estimatedCostMicros: 0 });
13
+ const totalCost = suite.totalCost ?? aggregateRunCost(perCaseCosts);
14
+ return {
15
+ id: options.id ?? crypto.randomUUID(),
16
+ orgId: options.orgId,
17
+ appId: options.appId,
18
+ model,
19
+ suite,
20
+ perCaseCosts,
21
+ totalCost,
22
+ gitSha: options.gitSha ?? suite.gitSha,
23
+ label: options.label,
24
+ triggeredBy: options.triggeredBy ?? 'manual',
25
+ createdAt: suite.timestamp,
26
+ };
27
+ }
28
+ /**
29
+ * Compare two eval runs, producing diffs and cost/quality deltas.
30
+ */
31
+ export function compareRuns(runA, runB) {
32
+ const diff = diffEvalResults(runA.suite, runB.suite);
33
+ const totalA = runA.suite.results.length;
34
+ const totalB = runB.suite.results.length;
35
+ const passRateA = totalA > 0 ? runA.suite.totalPassed / totalA : 0;
36
+ const passRateB = totalB > 0 ? runB.suite.totalPassed / totalB : 0;
37
+ const maxLen = Math.max(runA.perCaseCosts.length, runB.perCaseCosts.length);
38
+ const perCaseDeltas = [];
39
+ for (let i = 0; i < maxLen; i++) {
40
+ const costA = runA.perCaseCosts[i]?.estimatedCostMicros ?? 0;
41
+ const costB = runB.perCaseCosts[i]?.estimatedCostMicros ?? 0;
42
+ perCaseDeltas.push(costB - costA);
43
+ }
44
+ return {
45
+ runA,
46
+ runB,
47
+ diff,
48
+ costDelta: {
49
+ totalMicros: runB.totalCost.estimatedCostMicros - runA.totalCost.estimatedCostMicros,
50
+ perCase: perCaseDeltas,
51
+ },
52
+ qualityDelta: {
53
+ passRateDelta: passRateB - passRateA,
54
+ durationDeltaMs: runB.suite.totalDurationMs - runA.suite.totalDurationMs,
55
+ },
56
+ };
57
+ }
58
+ /**
59
+ * Extract trend points from a list of eval runs for charting.
60
+ */
61
+ export function buildTrendPoints(runs) {
62
+ return runs.map((run) => {
63
+ const total = run.suite.results.length;
64
+ const passRate = total > 0 ? run.suite.totalPassed / total : 0;
65
+ const avgDurationMs = total > 0 ? run.suite.totalDurationMs / total : 0;
66
+ return {
67
+ runId: run.id,
68
+ label: run.label,
69
+ gitSha: run.gitSha,
70
+ model: run.model,
71
+ passRate,
72
+ totalCostMicros: run.totalCost.estimatedCostMicros,
73
+ avgDurationMs,
74
+ timestamp: run.createdAt,
75
+ };
76
+ });
77
+ }
78
+ //# sourceMappingURL=eval-run-builder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-run-builder.js","sourceRoot":"","sources":["../../../src/eval/eval-run-builder.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAUH,OAAO,EAAC,eAAe,EAAC,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAC,gBAAgB,EAAC,MAAM,gBAAgB,CAAC;AAEhD;;GAEG;AACH,MAAM,UAAU,YAAY,CAC1B,KAAsB,EACtB,KAAoB,EACpB,OAOC;IAED,MAAM,YAAY,GAAmB,KAAK,CAAC,OAAO,CAAC,GAAG,CACpD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,EAAC,WAAW,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,mBAAmB,EAAE,CAAC,EAAC,CAC3F,CAAC;IAEF,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAEpE,OAAO;QACL,EAAE,EAAE,OAAO,CAAC,EAAE,IAAI,MAAM,CAAC,UAAU,EAAE;QACrC,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,KAAK;QACL,KAAK;QACL,YAAY;QACZ,SAAS;QACT,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM;QACtC,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,QAAQ;QAC5C,SAAS,EAAE,KAAK,CAAC,SAAS;KAC3B,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAmB,EAAE,IAAmB;IAClE,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;IAErD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;IACzC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;IACzC,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IACnE,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAEnE,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IAC5E,MAAM,aAAa,GAAa,EAAE,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,mBAAmB,IAAI,CAAC,CAAC;QAC7D,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,mBAAmB,IAAI,CAAC,CAAC;QAC7D,aAAa,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC;IACpC,CAAC;IAED,OAAO;QACL,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,SAAS,EAAE;YACT,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,mBAAmB,GAAG,IAAI,CAAC,SAAS,CAAC,mBAAmB;YACpF,OAAO,EAAE,aAAa;SACvB;QACD,YAAY,EAAE;YACZ,aAAa,EAAE,SAAS,GAAG,SAAS;YACpC,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe;SACzE;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAqB;IACpD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACtB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;QACvC,MAAM,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,aAAa,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,eAAe,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAExE,OAAO;YACL,KAAK,EAAE,GAAG,CAAC,EAAE;YACb,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,MAAM,EAAE,GAAG,CAAC,MAAM;YAClB,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,QAAQ;YACR,eAAe,EAAE,GAAG,CAAC,SAAS,CAAC,mBAAmB;YAClD,aAAa;YACb,SAAS,EAAE,GAAG,CAAC,SAAS;SACzB,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,36 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2026 Amodal Labs, Inc.
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ import type { AmodalRepo } from '../repo/repo-types.js';
7
+ import type { EvalSuiteResult, EvalProgress, EvalModelInfo } from './eval-types.js';
8
+ import type { JudgeProvider } from './eval-judge.js';
9
+ /**
10
+ * Provider interface for running a query against the agent.
11
+ */
12
+ export interface EvalQueryProvider {
13
+ query(message: string, tenantId?: string): Promise<{
14
+ response: string;
15
+ toolCalls: Array<{
16
+ name: string;
17
+ parameters: Record<string, unknown>;
18
+ }>;
19
+ usage?: {
20
+ inputTokens: number;
21
+ outputTokens: number;
22
+ };
23
+ }>;
24
+ }
25
+ export interface EvalRunnerOptions {
26
+ filter?: string;
27
+ queryProvider: EvalQueryProvider;
28
+ judgeProvider: JudgeProvider;
29
+ gitSha?: string;
30
+ model?: EvalModelInfo;
31
+ }
32
+ /**
33
+ * Run the eval suite from a loaded repo.
34
+ * Yields progress events and returns the full suite result.
35
+ */
36
+ export declare function runEvalSuite(repo: AmodalRepo, options: EvalRunnerOptions): AsyncGenerator<EvalProgress, EvalSuiteResult>;