@oscharko-dev/keiko 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (450) hide show
  1. package/LICENSE +202 -0
  2. package/NOTICE +7 -0
  3. package/README.md +621 -0
  4. package/TRADEMARKS.md +41 -0
  5. package/dist/audit/aggregate.d.ts +5 -0
  6. package/dist/audit/aggregate.js +25 -0
  7. package/dist/audit/build.d.ts +2 -0
  8. package/dist/audit/build.js +224 -0
  9. package/dist/audit/errors.d.ts +25 -0
  10. package/dist/audit/errors.js +39 -0
  11. package/dist/audit/index-api.d.ts +14 -0
  12. package/dist/audit/index-api.js +131 -0
  13. package/dist/audit/index.d.ts +12 -0
  14. package/dist/audit/index.js +17 -0
  15. package/dist/audit/persist.d.ts +8 -0
  16. package/dist/audit/persist.js +40 -0
  17. package/dist/audit/redaction.d.ts +3 -0
  18. package/dist/audit/redaction.js +61 -0
  19. package/dist/audit/report.d.ts +18 -0
  20. package/dist/audit/report.js +50 -0
  21. package/dist/audit/retention.d.ts +3 -0
  22. package/dist/audit/retention.js +95 -0
  23. package/dist/audit/runid.d.ts +1 -0
  24. package/dist/audit/runid.js +29 -0
  25. package/dist/audit/side-file.d.ts +12 -0
  26. package/dist/audit/side-file.js +82 -0
  27. package/dist/audit/store.d.ts +12 -0
  28. package/dist/audit/store.js +198 -0
  29. package/dist/audit/types.d.ts +188 -0
  30. package/dist/audit/types.js +8 -0
  31. package/dist/audit/workflow-evidence.d.ts +27 -0
  32. package/dist/audit/workflow-evidence.js +145 -0
  33. package/dist/cli/context.d.ts +2 -0
  34. package/dist/cli/context.js +102 -0
  35. package/dist/cli/evaluate.d.ts +7 -0
  36. package/dist/cli/evaluate.js +207 -0
  37. package/dist/cli/evidence.d.ts +8 -0
  38. package/dist/cli/evidence.js +88 -0
  39. package/dist/cli/gateway-config.d.ts +10 -0
  40. package/dist/cli/gateway-config.js +12 -0
  41. package/dist/cli/gen-tests.d.ts +7 -0
  42. package/dist/cli/gen-tests.js +208 -0
  43. package/dist/cli/index.d.ts +2 -0
  44. package/dist/cli/index.js +14 -0
  45. package/dist/cli/investigate.d.ts +8 -0
  46. package/dist/cli/investigate.js +242 -0
  47. package/dist/cli/models.d.ts +3 -0
  48. package/dist/cli/models.js +64 -0
  49. package/dist/cli/run.d.ts +7 -0
  50. package/dist/cli/run.js +187 -0
  51. package/dist/cli/runner.d.ts +6 -0
  52. package/dist/cli/runner.js +83 -0
  53. package/dist/cli/ui.d.ts +31 -0
  54. package/dist/cli/ui.js +240 -0
  55. package/dist/cli/verify.d.ts +2 -0
  56. package/dist/cli/verify.js +103 -0
  57. package/dist/evaluations/fixtures/bug-investigation/happy-path.d.ts +2 -0
  58. package/dist/evaluations/fixtures/bug-investigation/happy-path.js +66 -0
  59. package/dist/evaluations/fixtures/bug-investigation/investigation-only.d.ts +2 -0
  60. package/dist/evaluations/fixtures/bug-investigation/investigation-only.js +39 -0
  61. package/dist/evaluations/fixtures/bug-investigation/unsafe-action.d.ts +2 -0
  62. package/dist/evaluations/fixtures/bug-investigation/unsafe-action.js +37 -0
  63. package/dist/evaluations/fixtures/index.d.ts +7 -0
  64. package/dist/evaluations/fixtures/index.js +35 -0
  65. package/dist/evaluations/fixtures/support.d.ts +5 -0
  66. package/dist/evaluations/fixtures/support.js +42 -0
  67. package/dist/evaluations/fixtures/unit-tests/happy-path.d.ts +2 -0
  68. package/dist/evaluations/fixtures/unit-tests/happy-path.js +40 -0
  69. package/dist/evaluations/fixtures/unit-tests/retry-then-accept.d.ts +2 -0
  70. package/dist/evaluations/fixtures/unit-tests/retry-then-accept.js +39 -0
  71. package/dist/evaluations/fixtures/unit-tests/unsafe-action.d.ts +2 -0
  72. package/dist/evaluations/fixtures/unit-tests/unsafe-action.js +32 -0
  73. package/dist/evaluations/index.d.ts +12 -0
  74. package/dist/evaluations/index.js +12 -0
  75. package/dist/evaluations/manifest-check.d.ts +1 -0
  76. package/dist/evaluations/manifest-check.js +48 -0
  77. package/dist/evaluations/model-provider.d.ts +12 -0
  78. package/dist/evaluations/model-provider.js +26 -0
  79. package/dist/evaluations/render.d.ts +2 -0
  80. package/dist/evaluations/render.js +59 -0
  81. package/dist/evaluations/runner-support.d.ts +27 -0
  82. package/dist/evaluations/runner-support.js +163 -0
  83. package/dist/evaluations/runner.d.ts +20 -0
  84. package/dist/evaluations/runner.js +174 -0
  85. package/dist/evaluations/scorer.d.ts +14 -0
  86. package/dist/evaluations/scorer.js +131 -0
  87. package/dist/evaluations/scripted-model.d.ts +6 -0
  88. package/dist/evaluations/scripted-model.js +26 -0
  89. package/dist/evaluations/surface-parity.d.ts +2 -0
  90. package/dist/evaluations/surface-parity.js +184 -0
  91. package/dist/evaluations/types.d.ts +74 -0
  92. package/dist/evaluations/types.js +16 -0
  93. package/dist/gateway/capabilities.d.ts +11 -0
  94. package/dist/gateway/capabilities.data.d.ts +2 -0
  95. package/dist/gateway/capabilities.data.js +203 -0
  96. package/dist/gateway/capabilities.js +41 -0
  97. package/dist/gateway/config.d.ts +15 -0
  98. package/dist/gateway/config.js +154 -0
  99. package/dist/gateway/errors.d.ts +72 -0
  100. package/dist/gateway/errors.js +82 -0
  101. package/dist/gateway/gateway.d.ts +19 -0
  102. package/dist/gateway/gateway.js +94 -0
  103. package/dist/gateway/index.d.ts +10 -0
  104. package/dist/gateway/index.js +11 -0
  105. package/dist/gateway/model-selection.d.ts +9 -0
  106. package/dist/gateway/model-selection.js +36 -0
  107. package/dist/gateway/normalize.d.ts +7 -0
  108. package/dist/gateway/normalize.js +93 -0
  109. package/dist/gateway/openai-adapter.d.ts +20 -0
  110. package/dist/gateway/openai-adapter.js +263 -0
  111. package/dist/gateway/redaction.d.ts +1 -0
  112. package/dist/gateway/redaction.js +51 -0
  113. package/dist/gateway/resilience.d.ts +24 -0
  114. package/dist/gateway/resilience.js +166 -0
  115. package/dist/gateway/types.d.ts +108 -0
  116. package/dist/gateway/types.js +2 -0
  117. package/dist/harness/adapters.d.ts +23 -0
  118. package/dist/harness/adapters.js +38 -0
  119. package/dist/harness/context.d.ts +33 -0
  120. package/dist/harness/context.js +21 -0
  121. package/dist/harness/emitter.d.ts +15 -0
  122. package/dist/harness/emitter.js +72 -0
  123. package/dist/harness/errors.d.ts +21 -0
  124. package/dist/harness/errors.js +39 -0
  125. package/dist/harness/executor.d.ts +3 -0
  126. package/dist/harness/executor.js +211 -0
  127. package/dist/harness/fingerprint.d.ts +6 -0
  128. package/dist/harness/fingerprint.js +43 -0
  129. package/dist/harness/index.d.ts +9 -0
  130. package/dist/harness/index.js +13 -0
  131. package/dist/harness/loop.d.ts +3 -0
  132. package/dist/harness/loop.js +159 -0
  133. package/dist/harness/patcher.d.ts +4 -0
  134. package/dist/harness/patcher.js +49 -0
  135. package/dist/harness/planner.d.ts +3 -0
  136. package/dist/harness/planner.js +21 -0
  137. package/dist/harness/ports.d.ts +61 -0
  138. package/dist/harness/ports.js +4 -0
  139. package/dist/harness/session.d.ts +25 -0
  140. package/dist/harness/session.js +116 -0
  141. package/dist/harness/sinks.d.ts +30 -0
  142. package/dist/harness/sinks.js +72 -0
  143. package/dist/harness/tasks/explain-plan.d.ts +3 -0
  144. package/dist/harness/tasks/explain-plan.js +29 -0
  145. package/dist/harness/tasks/generate-unit-tests.d.ts +3 -0
  146. package/dist/harness/tasks/generate-unit-tests.js +28 -0
  147. package/dist/harness/tasks/investigate-bug.d.ts +3 -0
  148. package/dist/harness/tasks/investigate-bug.js +31 -0
  149. package/dist/harness/tasks/policy.d.ts +11 -0
  150. package/dist/harness/tasks/policy.js +22 -0
  151. package/dist/harness/tasks/verify.d.ts +3 -0
  152. package/dist/harness/tasks/verify.js +16 -0
  153. package/dist/harness/types.d.ts +270 -0
  154. package/dist/harness/types.js +33 -0
  155. package/dist/index.d.ts +11 -0
  156. package/dist/index.js +36 -0
  157. package/dist/sdk/index.d.ts +9 -0
  158. package/dist/sdk/index.js +37 -0
  159. package/dist/sdk/run-agent.d.ts +16 -0
  160. package/dist/sdk/run-agent.js +56 -0
  161. package/dist/tools/browser/cdp-client.d.ts +35 -0
  162. package/dist/tools/browser/cdp-client.js +218 -0
  163. package/dist/tools/browser/errors.d.ts +25 -0
  164. package/dist/tools/browser/errors.js +55 -0
  165. package/dist/tools/browser/index.d.ts +5 -0
  166. package/dist/tools/browser/index.js +6 -0
  167. package/dist/tools/browser/session.d.ts +44 -0
  168. package/dist/tools/browser/session.js +748 -0
  169. package/dist/tools/browser/types.d.ts +48 -0
  170. package/dist/tools/browser/types.js +2 -0
  171. package/dist/tools/browser/validators.d.ts +5 -0
  172. package/dist/tools/browser/validators.js +97 -0
  173. package/dist/tools/errors.d.ts +59 -0
  174. package/dist/tools/errors.js +94 -0
  175. package/dist/tools/exec.d.ts +42 -0
  176. package/dist/tools/exec.js +327 -0
  177. package/dist/tools/index.d.ts +11 -0
  178. package/dist/tools/index.js +14 -0
  179. package/dist/tools/patch-content.d.ts +10 -0
  180. package/dist/tools/patch-content.js +126 -0
  181. package/dist/tools/patch-normalize.d.ts +1 -0
  182. package/dist/tools/patch-normalize.js +80 -0
  183. package/dist/tools/patch-parse.d.ts +8 -0
  184. package/dist/tools/patch-parse.js +201 -0
  185. package/dist/tools/patch.d.ts +18 -0
  186. package/dist/tools/patch.js +403 -0
  187. package/dist/tools/registry.d.ts +36 -0
  188. package/dist/tools/registry.js +231 -0
  189. package/dist/tools/sandbox.d.ts +8 -0
  190. package/dist/tools/sandbox.js +121 -0
  191. package/dist/tools/schemas.d.ts +2 -0
  192. package/dist/tools/schemas.js +51 -0
  193. package/dist/tools/terminal-policy.d.ts +9 -0
  194. package/dist/tools/terminal-policy.js +313 -0
  195. package/dist/tools/types.d.ts +99 -0
  196. package/dist/tools/types.js +103 -0
  197. package/dist/tools/writer.d.ts +7 -0
  198. package/dist/tools/writer.js +20 -0
  199. package/dist/ui/browser.d.ts +10 -0
  200. package/dist/ui/browser.js +231 -0
  201. package/dist/ui/chat-handlers.d.ts +4 -0
  202. package/dist/ui/chat-handlers.js +281 -0
  203. package/dist/ui/csp-hashes.json +17 -0
  204. package/dist/ui/csp.d.ts +2 -0
  205. package/dist/ui/csp.js +66 -0
  206. package/dist/ui/deps.d.ts +34 -0
  207. package/dist/ui/deps.js +137 -0
  208. package/dist/ui/evidence.d.ts +27 -0
  209. package/dist/ui/evidence.js +142 -0
  210. package/dist/ui/files-deny.d.ts +2 -0
  211. package/dist/ui/files-deny.js +12 -0
  212. package/dist/ui/files.d.ts +65 -0
  213. package/dist/ui/files.js +492 -0
  214. package/dist/ui/headers.d.ts +2 -0
  215. package/dist/ui/headers.js +21 -0
  216. package/dist/ui/host-check.d.ts +2 -0
  217. package/dist/ui/host-check.js +58 -0
  218. package/dist/ui/index.d.ts +20 -0
  219. package/dist/ui/index.js +23 -0
  220. package/dist/ui/load-csp.d.ts +1 -0
  221. package/dist/ui/load-csp.js +28 -0
  222. package/dist/ui/read-handlers.d.ts +8 -0
  223. package/dist/ui/read-handlers.js +247 -0
  224. package/dist/ui/routes.d.ts +36 -0
  225. package/dist/ui/routes.js +129 -0
  226. package/dist/ui/run-engine.d.ts +20 -0
  227. package/dist/ui/run-engine.js +345 -0
  228. package/dist/ui/run-handlers.d.ts +8 -0
  229. package/dist/ui/run-handlers.js +431 -0
  230. package/dist/ui/run-request.d.ts +13 -0
  231. package/dist/ui/run-request.js +219 -0
  232. package/dist/ui/runs.d.ts +43 -0
  233. package/dist/ui/runs.js +92 -0
  234. package/dist/ui/server.d.ts +11 -0
  235. package/dist/ui/server.js +143 -0
  236. package/dist/ui/sink.d.ts +27 -0
  237. package/dist/ui/sink.js +80 -0
  238. package/dist/ui/sse.d.ts +7 -0
  239. package/dist/ui/sse.js +27 -0
  240. package/dist/ui/static/404.html +1 -0
  241. package/dist/ui/static/_next/static/ca-A01hy9W98aRvMZKdAw/_buildManifest.js +1 -0
  242. package/dist/ui/static/_next/static/ca-A01hy9W98aRvMZKdAw/_ssgManifest.js +1 -0
  243. package/dist/ui/static/_next/static/chunks/255-d47fd57964443afe.js +1 -0
  244. package/dist/ui/static/_next/static/chunks/4-be1fef693af8e088.js +1 -0
  245. package/dist/ui/static/_next/static/chunks/4bd1b696-c023c6e3521b1417.js +1 -0
  246. package/dist/ui/static/_next/static/chunks/app/_not-found/page-75825b09bcecad97.js +1 -0
  247. package/dist/ui/static/_next/static/chunks/app/launch/page-9c86a13c29884245.js +1 -0
  248. package/dist/ui/static/_next/static/chunks/app/layout-bdea63fe87947d50.js +1 -0
  249. package/dist/ui/static/_next/static/chunks/app/page-4168c12c68b7a853.js +1 -0
  250. package/dist/ui/static/_next/static/chunks/framework-a6e0b7e30f98059a.js +1 -0
  251. package/dist/ui/static/_next/static/chunks/main-778a50aebff02192.js +1 -0
  252. package/dist/ui/static/_next/static/chunks/main-app-30679af7240d63e9.js +1 -0
  253. package/dist/ui/static/_next/static/chunks/pages/_app-7d307437aca18ad4.js +1 -0
  254. package/dist/ui/static/_next/static/chunks/pages/_error-cb2a52f75f2162e2.js +1 -0
  255. package/dist/ui/static/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  256. package/dist/ui/static/_next/static/chunks/webpack-4a462cecab786e93.js +1 -0
  257. package/dist/ui/static/_next/static/css/be7cb54d5c5673b6.css +1 -0
  258. package/dist/ui/static/assets/editors/goland.svg +35 -0
  259. package/dist/ui/static/assets/editors/intellij.svg +39 -0
  260. package/dist/ui/static/assets/editors/pycharm.svg +58 -0
  261. package/dist/ui/static/assets/editors/rustrover.svg +19 -0
  262. package/dist/ui/static/assets/editors/vscode.svg +1 -0
  263. package/dist/ui/static/assets/editors/webstorm.svg +21 -0
  264. package/dist/ui/static/assets/icons/anthropic.svg +1 -0
  265. package/dist/ui/static/assets/icons/brave.svg +1 -0
  266. package/dist/ui/static/assets/icons/css3.svg +1 -0
  267. package/dist/ui/static/assets/icons/docker.svg +1 -0
  268. package/dist/ui/static/assets/icons/git.svg +1 -0
  269. package/dist/ui/static/assets/icons/github.svg +1 -0
  270. package/dist/ui/static/assets/icons/go.svg +1 -0
  271. package/dist/ui/static/assets/icons/gradle.svg +1 -0
  272. package/dist/ui/static/assets/icons/grafana.svg +1 -0
  273. package/dist/ui/static/assets/icons/graphql.svg +1 -0
  274. package/dist/ui/static/assets/icons/html5.svg +1 -0
  275. package/dist/ui/static/assets/icons/image.svg +1 -0
  276. package/dist/ui/static/assets/icons/java.svg +1 -0
  277. package/dist/ui/static/assets/icons/javascript.svg +1 -0
  278. package/dist/ui/static/assets/icons/json.svg +1 -0
  279. package/dist/ui/static/assets/icons/kafka.svg +1 -0
  280. package/dist/ui/static/assets/icons/kubernetes.svg +1 -0
  281. package/dist/ui/static/assets/icons/linear.svg +1 -0
  282. package/dist/ui/static/assets/icons/markdown.svg +1 -0
  283. package/dist/ui/static/assets/icons/nginx.svg +1 -0
  284. package/dist/ui/static/assets/icons/nodejs.svg +1 -0
  285. package/dist/ui/static/assets/icons/notion.svg +1 -0
  286. package/dist/ui/static/assets/icons/openai.svg +1 -0
  287. package/dist/ui/static/assets/icons/playwright.svg +1 -0
  288. package/dist/ui/static/assets/icons/postgresql.svg +1 -0
  289. package/dist/ui/static/assets/icons/prometheus.svg +1 -0
  290. package/dist/ui/static/assets/icons/properties.svg +1 -0
  291. package/dist/ui/static/assets/icons/puppeteer.svg +1 -0
  292. package/dist/ui/static/assets/icons/python.svg +1 -0
  293. package/dist/ui/static/assets/icons/react.svg +1 -0
  294. package/dist/ui/static/assets/icons/redis.svg +1 -0
  295. package/dist/ui/static/assets/icons/rust.svg +1 -0
  296. package/dist/ui/static/assets/icons/sentry.svg +1 -0
  297. package/dist/ui/static/assets/icons/slack.svg +1 -0
  298. package/dist/ui/static/assets/icons/spring.svg +1 -0
  299. package/dist/ui/static/assets/icons/typescript.svg +1 -0
  300. package/dist/ui/static/assets/icons/upstash.svg +1 -0
  301. package/dist/ui/static/assets/icons/yaml.svg +1 -0
  302. package/dist/ui/static/assets/keiko-logo.svg +10 -0
  303. package/dist/ui/static/index.html +1 -0
  304. package/dist/ui/static/index.txt +19 -0
  305. package/dist/ui/static/keiko-logo.svg +10 -0
  306. package/dist/ui/static/launch.html +1 -0
  307. package/dist/ui/static/launch.txt +19 -0
  308. package/dist/ui/static.d.ts +3 -0
  309. package/dist/ui/static.js +72 -0
  310. package/dist/ui/store/chats.d.ts +14 -0
  311. package/dist/ui/store/chats.js +110 -0
  312. package/dist/ui/store/db.d.ts +6 -0
  313. package/dist/ui/store/db.js +182 -0
  314. package/dist/ui/store/errors.d.ts +12 -0
  315. package/dist/ui/store/errors.js +30 -0
  316. package/dist/ui/store/index.d.ts +6 -0
  317. package/dist/ui/store/index.js +6 -0
  318. package/dist/ui/store/messages.d.ts +5 -0
  319. package/dist/ui/store/messages.js +137 -0
  320. package/dist/ui/store/paths.d.ts +4 -0
  321. package/dist/ui/store/paths.js +69 -0
  322. package/dist/ui/store/projects.d.ts +7 -0
  323. package/dist/ui/store/projects.js +61 -0
  324. package/dist/ui/store/schema.d.ts +3 -0
  325. package/dist/ui/store/schema.js +77 -0
  326. package/dist/ui/store/types.d.ts +80 -0
  327. package/dist/ui/store/types.js +3 -0
  328. package/dist/ui/store/validation.d.ts +4 -0
  329. package/dist/ui/store/validation.js +72 -0
  330. package/dist/ui/store-handlers.d.ts +16 -0
  331. package/dist/ui/store-handlers.js +465 -0
  332. package/dist/ui/terminal-errors.d.ts +21 -0
  333. package/dist/ui/terminal-errors.js +45 -0
  334. package/dist/ui/terminal-evidence.d.ts +20 -0
  335. package/dist/ui/terminal-evidence.js +65 -0
  336. package/dist/ui/terminal-routes.d.ts +9 -0
  337. package/dist/ui/terminal-routes.js +219 -0
  338. package/dist/ui/terminal.d.ts +67 -0
  339. package/dist/ui/terminal.js +835 -0
  340. package/dist/verification/classify.d.ts +10 -0
  341. package/dist/verification/classify.js +53 -0
  342. package/dist/verification/detect.d.ts +4 -0
  343. package/dist/verification/detect.js +81 -0
  344. package/dist/verification/errors.d.ts +11 -0
  345. package/dist/verification/errors.js +21 -0
  346. package/dist/verification/index.d.ts +17 -0
  347. package/dist/verification/index.js +13 -0
  348. package/dist/verification/limits.d.ts +3 -0
  349. package/dist/verification/limits.js +40 -0
  350. package/dist/verification/monitor.d.ts +4 -0
  351. package/dist/verification/monitor.js +58 -0
  352. package/dist/verification/orchestrator.d.ts +16 -0
  353. package/dist/verification/orchestrator.js +363 -0
  354. package/dist/verification/plan.d.ts +9 -0
  355. package/dist/verification/plan.js +125 -0
  356. package/dist/verification/summary.d.ts +40 -0
  357. package/dist/verification/summary.js +67 -0
  358. package/dist/verification/types.d.ts +63 -0
  359. package/dist/verification/types.js +13 -0
  360. package/dist/workflows/bug-investigation/context.d.ts +7 -0
  361. package/dist/workflows/bug-investigation/context.js +119 -0
  362. package/dist/workflows/bug-investigation/descriptor.d.ts +3 -0
  363. package/dist/workflows/bug-investigation/descriptor.js +46 -0
  364. package/dist/workflows/bug-investigation/emit.d.ts +12 -0
  365. package/dist/workflows/bug-investigation/emit.js +35 -0
  366. package/dist/workflows/bug-investigation/events.d.ts +81 -0
  367. package/dist/workflows/bug-investigation/events.js +9 -0
  368. package/dist/workflows/bug-investigation/failure-parse.d.ts +3 -0
  369. package/dist/workflows/bug-investigation/failure-parse.js +154 -0
  370. package/dist/workflows/bug-investigation/guard.d.ts +2 -0
  371. package/dist/workflows/bug-investigation/guard.js +69 -0
  372. package/dist/workflows/bug-investigation/index.d.ts +7 -0
  373. package/dist/workflows/bug-investigation/index.js +13 -0
  374. package/dist/workflows/bug-investigation/internal.d.ts +37 -0
  375. package/dist/workflows/bug-investigation/internal.js +64 -0
  376. package/dist/workflows/bug-investigation/model-loop.d.ts +4 -0
  377. package/dist/workflows/bug-investigation/model-loop.js +223 -0
  378. package/dist/workflows/bug-investigation/parse.d.ts +3 -0
  379. package/dist/workflows/bug-investigation/parse.js +123 -0
  380. package/dist/workflows/bug-investigation/prompt.d.ts +4 -0
  381. package/dist/workflows/bug-investigation/prompt.js +107 -0
  382. package/dist/workflows/bug-investigation/report.d.ts +23 -0
  383. package/dist/workflows/bug-investigation/report.js +151 -0
  384. package/dist/workflows/bug-investigation/stages.d.ts +13 -0
  385. package/dist/workflows/bug-investigation/stages.js +242 -0
  386. package/dist/workflows/bug-investigation/types.d.ts +91 -0
  387. package/dist/workflows/bug-investigation/types.js +14 -0
  388. package/dist/workflows/bug-investigation/verify-stage.d.ts +10 -0
  389. package/dist/workflows/bug-investigation/verify-stage.js +91 -0
  390. package/dist/workflows/bug-investigation/workflow.d.ts +2 -0
  391. package/dist/workflows/bug-investigation/workflow.js +74 -0
  392. package/dist/workflows/descriptor.d.ts +20 -0
  393. package/dist/workflows/descriptor.js +8 -0
  394. package/dist/workflows/index.d.ts +3 -0
  395. package/dist/workflows/index.js +2 -0
  396. package/dist/workflows/unit-tests/context.d.ts +7 -0
  397. package/dist/workflows/unit-tests/context.js +129 -0
  398. package/dist/workflows/unit-tests/conventions.d.ts +4 -0
  399. package/dist/workflows/unit-tests/conventions.js +87 -0
  400. package/dist/workflows/unit-tests/descriptor.d.ts +4 -0
  401. package/dist/workflows/unit-tests/descriptor.js +43 -0
  402. package/dist/workflows/unit-tests/emit.d.ts +12 -0
  403. package/dist/workflows/unit-tests/emit.js +35 -0
  404. package/dist/workflows/unit-tests/events.d.ts +78 -0
  405. package/dist/workflows/unit-tests/events.js +7 -0
  406. package/dist/workflows/unit-tests/index.d.ts +6 -0
  407. package/dist/workflows/unit-tests/index.js +10 -0
  408. package/dist/workflows/unit-tests/internal.d.ts +35 -0
  409. package/dist/workflows/unit-tests/internal.js +43 -0
  410. package/dist/workflows/unit-tests/model-loop.d.ts +4 -0
  411. package/dist/workflows/unit-tests/model-loop.js +95 -0
  412. package/dist/workflows/unit-tests/parse.d.ts +6 -0
  413. package/dist/workflows/unit-tests/parse.js +68 -0
  414. package/dist/workflows/unit-tests/prompt.d.ts +4 -0
  415. package/dist/workflows/unit-tests/prompt.js +71 -0
  416. package/dist/workflows/unit-tests/report.d.ts +21 -0
  417. package/dist/workflows/unit-tests/report.js +90 -0
  418. package/dist/workflows/unit-tests/stages.d.ts +9 -0
  419. package/dist/workflows/unit-tests/stages.js +155 -0
  420. package/dist/workflows/unit-tests/types.d.ts +70 -0
  421. package/dist/workflows/unit-tests/types.js +11 -0
  422. package/dist/workflows/unit-tests/verify-stage.d.ts +9 -0
  423. package/dist/workflows/unit-tests/verify-stage.js +56 -0
  424. package/dist/workflows/unit-tests/workflow.d.ts +2 -0
  425. package/dist/workflows/unit-tests/workflow.js +58 -0
  426. package/dist/workspace/contextPack.d.ts +9 -0
  427. package/dist/workspace/contextPack.js +94 -0
  428. package/dist/workspace/detect.d.ts +3 -0
  429. package/dist/workspace/detect.js +135 -0
  430. package/dist/workspace/discovery.d.ts +9 -0
  431. package/dist/workspace/discovery.js +167 -0
  432. package/dist/workspace/errors.d.ts +39 -0
  433. package/dist/workspace/errors.js +66 -0
  434. package/dist/workspace/fs.d.ts +21 -0
  435. package/dist/workspace/fs.js +36 -0
  436. package/dist/workspace/ignore.d.ts +14 -0
  437. package/dist/workspace/ignore.js +176 -0
  438. package/dist/workspace/index.d.ts +11 -0
  439. package/dist/workspace/index.js +13 -0
  440. package/dist/workspace/paths.d.ts +2 -0
  441. package/dist/workspace/paths.js +38 -0
  442. package/dist/workspace/realpath.d.ts +7 -0
  443. package/dist/workspace/realpath.js +72 -0
  444. package/dist/workspace/retrieval.d.ts +9 -0
  445. package/dist/workspace/retrieval.js +74 -0
  446. package/dist/workspace/summary.d.ts +3 -0
  447. package/dist/workspace/summary.js +54 -0
  448. package/dist/workspace/types.d.ts +103 -0
  449. package/dist/workspace/types.js +27 -0
  450. package/package.json +58 -0
@@ -0,0 +1,184 @@
1
+ // Surface-parity checks (ADR-0012 D7). A pure, no-model assertion that the four surfaces for each
2
+ // workflow — UI descriptor, CLI flags, SDK exports, and the UI RunRequest shape — present consistent
3
+ // contracts. It is NOT a scored dimension: it is a fixed structural invariant of the codebase, so it
4
+ // has its own scorecard section and its own test file. A parity failure is a hard blocker that causes
5
+ // `keiko evaluate` to exit 1 regardless of dimension scores.
6
+ import { runGenTestsCli } from "../cli/gen-tests.js";
7
+ import { runInvestigateCli } from "../cli/investigate.js";
8
+ import { BUG_INVESTIGATION_WORKFLOW_DESCRIPTOR, UNIT_TEST_WORKFLOW_DESCRIPTOR, } from "../workflows/index.js";
9
+ const DESCRIPTOR_EXPECTATIONS = [
10
+ {
11
+ kind: "unit-tests",
12
+ descriptor: UNIT_TEST_WORKFLOW_DESCRIPTOR,
13
+ requiredInputs: ["target", "modelId"],
14
+ },
15
+ {
16
+ kind: "bug-investigation",
17
+ descriptor: BUG_INVESTIGATION_WORKFLOW_DESCRIPTOR,
18
+ requiredInputs: ["report", "modelId"],
19
+ },
20
+ ];
21
+ const SDK_EXPORT_EXPECTATIONS = [
22
+ {
23
+ kind: "unit-tests",
24
+ functionExport: "generateUnitTests",
25
+ descriptorExport: "UNIT_TEST_WORKFLOW_DESCRIPTOR",
26
+ },
27
+ {
28
+ kind: "bug-investigation",
29
+ functionExport: "investigateBug",
30
+ descriptorExport: "BUG_INVESTIGATION_WORKFLOW_DESCRIPTOR",
31
+ },
32
+ ];
33
+ const RUN_REQUEST_EXPECTATIONS = [
34
+ {
35
+ kind: "unit-tests",
36
+ workflowId: "unit-test-generation",
37
+ input: {
38
+ workspaceRoot: "/tmp/keiko-surface-parity",
39
+ target: { kind: "file", filePath: "src/example.ts" },
40
+ },
41
+ },
42
+ {
43
+ kind: "bug-investigation",
44
+ workflowId: "bug-investigation",
45
+ input: {
46
+ workspaceRoot: "/tmp/keiko-surface-parity",
47
+ report: { description: "example failure" },
48
+ },
49
+ },
50
+ ];
51
+ function isRecord(value) {
52
+ return typeof value === "object" && value !== null && !Array.isArray(value);
53
+ }
54
+ function checkDescriptor(expectation) {
55
+ const missing = expectation.requiredInputs.filter((name) => !expectation.descriptor.inputs.some((input) => input.name === name && input.required));
56
+ const hasLimitsInput = expectation.descriptor.inputs.some((input) => input.name === "limits" && input.type === "object" && !input.required);
57
+ const hasDefaultLimits = isRecord(expectation.descriptor.defaultLimits) &&
58
+ Object.keys(expectation.descriptor.defaultLimits).length > 0;
59
+ const dryRunApply = expectation.descriptor.supportsDryRun && expectation.descriptor.supportsApply;
60
+ if (missing.length > 0) {
61
+ return failed("descriptor-inputs", expectation.kind, `missing required inputs: ${missing.join(", ")}`);
62
+ }
63
+ if (!hasLimitsInput || !hasDefaultLimits) {
64
+ return failed("descriptor-inputs", expectation.kind, "descriptor must expose optional limits input and non-empty defaultLimits");
65
+ }
66
+ if (!dryRunApply) {
67
+ return failed("descriptor-inputs", expectation.kind, "supportsDryRun/supportsApply not both true");
68
+ }
69
+ return passed("descriptor-inputs", expectation.kind);
70
+ }
71
+ function captureCliHelp(run) {
72
+ const chunks = [];
73
+ const io = {
74
+ out: (text) => void chunks.push(text),
75
+ err: (text) => void chunks.push(text),
76
+ };
77
+ // The handlers print their usage string synchronously before any async work when --help fails to
78
+ // parse as a real invocation, so the captured chunks already contain the flag names we assert.
79
+ void run(["--help"], io, {});
80
+ return chunks.join("");
81
+ }
82
+ async function checkCliFlags() {
83
+ const genTestsHelp = captureCliHelp((args, io, env) => runGenTestsCli(args, io, env, {}));
84
+ const investigateHelp = captureCliHelp((args, io, env) => runInvestigateCli(args, io, env, {}));
85
+ await Promise.resolve();
86
+ const expectations = [
87
+ {
88
+ kind: "unit-tests",
89
+ help: genTestsHelp,
90
+ requiredTokens: ["--file", "--dir", "--changed", "--model", "--apply"],
91
+ },
92
+ {
93
+ kind: "bug-investigation",
94
+ help: investigateHelp,
95
+ requiredTokens: [
96
+ "--description",
97
+ "--output",
98
+ "--output-file",
99
+ "--stack",
100
+ "--stack-file",
101
+ "--file",
102
+ "--model",
103
+ "--apply",
104
+ ],
105
+ },
106
+ ];
107
+ return expectations.map(checkCliExpectation);
108
+ }
109
+ function checkCliExpectation(expectation) {
110
+ const missing = expectation.requiredTokens.filter((token) => !expectation.help.includes(token));
111
+ const hasDryRunDefault = expectation.help.toLowerCase().includes("dry-run by default");
112
+ if (missing.length > 0) {
113
+ return failed("cli-flags", expectation.kind, `help missing flags: ${missing.join(", ")}`);
114
+ }
115
+ if (!hasDryRunDefault) {
116
+ return failed("cli-flags", expectation.kind, "help does not state dry-run by default");
117
+ }
118
+ return passed("cli-flags", expectation.kind);
119
+ }
120
+ // The SDK named exports each workflow must surface. A dynamic import breaks the load-time cycle the
121
+ // static import would create (the SDK barrel re-exports this evaluation module).
122
+ async function checkSdkExports() {
123
+ const sdk = (await import("../sdk/index.js"));
124
+ return SDK_EXPORT_EXPECTATIONS.map((expectation) => {
125
+ const missing = [
126
+ ...(typeof sdk[expectation.functionExport] === "function"
127
+ ? []
128
+ : [expectation.functionExport]),
129
+ ...(typeof sdk[expectation.descriptorExport] === "object" &&
130
+ sdk[expectation.descriptorExport] !== null
131
+ ? []
132
+ : [expectation.descriptorExport]),
133
+ ];
134
+ return missing.length === 0
135
+ ? passed("sdk-exports", expectation.kind)
136
+ : failed("sdk-exports", expectation.kind, `missing SDK exports: ${missing.join(", ")}`);
137
+ });
138
+ }
139
+ // The UI RunRequest carries the minimum fields the BFF needs to invoke either workflow. The compile-
140
+ // time guarantee is enforced by the TypeScript check; this is the runtime shape assertion (D7 d).
141
+ // Composer-launched workflow runs must also carry the selected local project context.
142
+ async function checkRunRequestShapes() {
143
+ const { parseRunRequest } = await import("../ui/run-request.js");
144
+ return RUN_REQUEST_EXPECTATIONS.map((expectation) => {
145
+ const parsed = parseRunRequest(JSON.stringify({
146
+ workflowId: expectation.workflowId,
147
+ modelId: "m",
148
+ input: expectation.input,
149
+ apply: true,
150
+ limits: { maxPromptBytes: 1 },
151
+ }));
152
+ if ("code" in parsed) {
153
+ return failed("run-request-shape", expectation.kind, parsed.message);
154
+ }
155
+ const required = ["kind", "modelId", "apply", "input", "limits"];
156
+ const missing = required.filter((field) => !(field in parsed));
157
+ if (missing.length > 0) {
158
+ return failed("run-request-shape", expectation.kind, `RunRequest missing fields: ${missing.join(", ")}`);
159
+ }
160
+ if (parsed.kind !== expectation.kind ||
161
+ typeof parsed.modelId !== "string" ||
162
+ parsed.apply ||
163
+ !isRecord(parsed.input) ||
164
+ !isRecord(parsed.limits)) {
165
+ return failed("run-request-shape", expectation.kind, "RunRequest field types mismatch");
166
+ }
167
+ return passed("run-request-shape", expectation.kind);
168
+ });
169
+ }
170
+ function passed(check, kind) {
171
+ return { check, workflowKind: kind, passed: true };
172
+ }
173
+ function failed(check, kind, reason) {
174
+ return { check, workflowKind: kind, passed: false, reason };
175
+ }
176
+ export async function checkSurfaceParity() {
177
+ const checks = [
178
+ ...DESCRIPTOR_EXPECTATIONS.map(checkDescriptor),
179
+ ...(await checkCliFlags()),
180
+ ...(await checkSdkExports()),
181
+ ...(await checkRunRequestShapes()),
182
+ ];
183
+ return { allPassed: checks.every((check) => check.passed), checks };
184
+ }
@@ -0,0 +1,74 @@
1
+ import type { NormalizedResponse } from "../gateway/types.js";
2
+ export type EvaluationDimension = "task-completion" | "patch-correctness" | "test-pass-rate" | "verification-completeness" | "patch-size" | "audit-completeness" | "unsafe-action-rejection";
3
+ export declare const EVALUATION_DIMENSIONS: readonly EvaluationDimension[];
4
+ export interface FixtureOracle {
5
+ readonly expectedStatuses: readonly string[];
6
+ readonly expectPatch: boolean;
7
+ readonly expectVerificationSkip: boolean;
8
+ readonly maxExpectedChangedFiles: number;
9
+ readonly maxExpectedPatchBytes: number;
10
+ }
11
+ export type WorkflowKind = "unit-tests" | "bug-investigation";
12
+ export interface EvaluationFixture {
13
+ readonly name: string;
14
+ readonly workflowKind: WorkflowKind;
15
+ readonly workspaceFiles: Record<string, string>;
16
+ readonly workflowInput: Record<string, unknown>;
17
+ readonly apply?: boolean | undefined;
18
+ readonly mockTranscript: readonly (NormalizedResponse | Error)[];
19
+ readonly dimensions: ReadonlySet<EvaluationDimension>;
20
+ readonly oracle: FixtureOracle;
21
+ }
22
+ export type DimensionOutcome = "pass" | "fail" | "not-applicable";
23
+ export interface DimensionResult {
24
+ readonly dimension: EvaluationDimension;
25
+ readonly outcome: DimensionOutcome;
26
+ readonly reason?: string | undefined;
27
+ }
28
+ export interface FixtureRunResult {
29
+ readonly fixtureName: string;
30
+ readonly workflowKind: WorkflowKind;
31
+ readonly durationMs: number;
32
+ readonly dimensionResults: readonly DimensionResult[];
33
+ readonly report: Record<string, unknown>;
34
+ }
35
+ export interface ScorecardEntry {
36
+ readonly dimension: EvaluationDimension;
37
+ readonly passCount: number;
38
+ readonly failCount: number;
39
+ readonly notApplicableCount: number;
40
+ readonly passRate: number | null;
41
+ }
42
+ export interface SurfaceParityCheckResult {
43
+ readonly check: string;
44
+ readonly workflowKind: WorkflowKind;
45
+ readonly passed: boolean;
46
+ readonly reason?: string | undefined;
47
+ }
48
+ export interface SurfaceParityResult {
49
+ readonly allPassed: boolean;
50
+ readonly checks: readonly SurfaceParityCheckResult[];
51
+ }
52
+ export interface LiveRunContext {
53
+ readonly modelId: string;
54
+ readonly configDescriptor: string;
55
+ readonly evidenceRefs: readonly string[];
56
+ }
57
+ export declare const EVAL_SCORECARD_SCHEMA_VERSION: "1";
58
+ export interface ScorecardSummary {
59
+ readonly totalFixtures: number;
60
+ readonly fullyPassedFixtures: number;
61
+ readonly safetyGatePassed: boolean;
62
+ readonly pilotReadyIndicator: boolean;
63
+ }
64
+ export interface EvalScorecard {
65
+ readonly schemaVersion: typeof EVAL_SCORECARD_SCHEMA_VERSION;
66
+ readonly evaluatedAt: string;
67
+ readonly mode: EvaluationMode;
68
+ readonly liveRunContext?: LiveRunContext | undefined;
69
+ readonly dimensions: readonly ScorecardEntry[];
70
+ readonly surfaceParity: SurfaceParityResult;
71
+ readonly fixtureResults: readonly FixtureRunResult[];
72
+ readonly summary: ScorecardSummary;
73
+ }
74
+ export type EvaluationMode = "offline" | "live";
@@ -0,0 +1,16 @@
1
+ // All evaluation interfaces and type aliases for the Wave 1 evaluation harness (ADR-0012 D6/D8/D11).
2
+ // No runtime logic lives here beyond the frozen schema-version constant the type layer exposes as a
3
+ // value. `readonly` everywhere; optional props are `| undefined` because exactOptionalPropertyTypes
4
+ // is on. Every scorecard shape is plain JSON-serializable so the harness can emit it to stdout/file.
5
+ // The seven scored dimensions in their canonical scorecard order. A scorecard always carries one
6
+ // ScorecardEntry per name here, even when no fixture exercises the dimension (all not-applicable).
7
+ export const EVALUATION_DIMENSIONS = [
8
+ "task-completion",
9
+ "patch-correctness",
10
+ "test-pass-rate",
11
+ "verification-completeness",
12
+ "patch-size",
13
+ "audit-completeness",
14
+ "unsafe-action-rejection",
15
+ ];
16
+ export const EVAL_SCORECARD_SCHEMA_VERSION = "1";
@@ -0,0 +1,11 @@
1
+ import type { ModelCapability, ModelKind } from "./types.js";
2
+ export declare const CAPABILITY_REGISTRY: readonly ModelCapability[];
3
+ export interface CapabilityQuery {
4
+ readonly kind?: ModelKind | undefined;
5
+ readonly toolCalling?: boolean | undefined;
6
+ readonly structuredOutput?: boolean | undefined;
7
+ readonly minContextWindow?: number | undefined;
8
+ }
9
+ export declare function findCapability(modelId: string): ModelCapability | undefined;
10
+ export declare function listCapabilities(): readonly ModelCapability[];
11
+ export declare function selectCheapest(query: CapabilityQuery): ModelCapability | undefined;
@@ -0,0 +1,2 @@
1
+ import type { ModelCapability } from "./types.js";
2
+ export declare const CAPABILITY_DATA: readonly ModelCapability[];
@@ -0,0 +1,203 @@
1
+ // Raw capability registry data for the Wave 1 model portfolio plus locally deployed Azure models.
2
+ // All numeric and class values are documented assumptions based on public model cards and provider
3
+ // documentation as of 2026-05-28 unless the entry names a live Azure deployment; [assumption] marks
4
+ // figures the customer may override via config when authoritative deployment numbers are available.
5
+ export const CAPABILITY_DATA = [
6
+ {
7
+ id: "Qwen3-Coder-480B-A35B-Instruct-FP8",
8
+ kind: "chat",
9
+ contextWindow: 128_000, // [assumption]
10
+ maxOutputTokens: 8_192, // [assumption]
11
+ toolCalling: true,
12
+ structuredOutput: true,
13
+ streaming: true,
14
+ costClass: "high",
15
+ latencyClass: "slow",
16
+ throughputHint: "~40 tok/s [assumption]",
17
+ preferredUseCases: ["Large-codebase refactor", "Cross-file analysis"],
18
+ knownLimitations: ["Very high VRAM; slow for interactive use"],
19
+ },
20
+ {
21
+ id: "Qwen/Qwen3-Coder-Next-FP8",
22
+ kind: "chat",
23
+ contextWindow: 128_000, // [assumption]
24
+ maxOutputTokens: 8_192, // [assumption]
25
+ toolCalling: true,
26
+ structuredOutput: true,
27
+ streaming: true,
28
+ costClass: "high",
29
+ latencyClass: "slow",
30
+ throughputHint: "~40 tok/s [assumption]",
31
+ preferredUseCases: ["Deep code synthesis requiring maximum reasoning depth"],
32
+ knownLimitations: [
33
+ "Same VRAM/latency constraints as Qwen3-Coder-480B; treat as next-generation upgrade path",
34
+ ],
35
+ },
36
+ {
37
+ id: "Devstral-2-123B-Instruct-2512",
38
+ kind: "chat",
39
+ contextWindow: 128_000, // [assumption]
40
+ maxOutputTokens: 8_192, // [assumption]
41
+ toolCalling: true,
42
+ structuredOutput: true,
43
+ streaming: true,
44
+ costClass: "high",
45
+ latencyClass: "standard",
46
+ throughputHint: "~80 tok/s [assumption]",
47
+ preferredUseCases: ["Agentic code completion", "Multi-step software engineering"],
48
+ knownLimitations: [
49
+ "123B scale; requires dedicated GPU allocation; not suitable for high-QPS workloads",
50
+ ],
51
+ },
52
+ {
53
+ id: "gpt-oss-120b",
54
+ kind: "chat",
55
+ contextWindow: 128_000, // [assumption]
56
+ maxOutputTokens: 8_192, // [assumption]
57
+ toolCalling: true,
58
+ structuredOutput: true,
59
+ streaming: true,
60
+ costClass: "high",
61
+ latencyClass: "standard",
62
+ throughputHint: "~80 tok/s [assumption]",
63
+ preferredUseCases: ["General-purpose coding", "Code review", "Explanation"],
64
+ knownLimitations: [
65
+ "Customer-hosted OSS model; endpoint reliability depends on customer infrastructure",
66
+ ],
67
+ },
68
+ {
69
+ id: "mistral-large-3",
70
+ kind: "chat",
71
+ contextWindow: 128_000, // Azure deployment Mistral-Large-3, Swedish Central.
72
+ maxOutputTokens: 8_192, // [assumption]
73
+ toolCalling: true,
74
+ structuredOutput: true, // [assumption]
75
+ streaming: true,
76
+ costClass: "high",
77
+ latencyClass: "standard",
78
+ throughputHint: "20 RPM / 20k TPM on current Visual Studio subscription quota",
79
+ preferredUseCases: ["Alternative coding agent", "Large-context explanation", "Review"],
80
+ knownLimitations: [
81
+ "Current subscription quota caps this deployment at 20 capacity units without quota increase",
82
+ ],
83
+ },
84
+ {
85
+ id: "llama-4-maverick-vision",
86
+ kind: "chat",
87
+ contextWindow: 128_000, // Azure deployment Llama-4-Maverick-17B-128E-Instruct-FP8.
88
+ maxOutputTokens: 8_192, // [assumption]
89
+ toolCalling: true,
90
+ structuredOutput: false, // [assumption]
91
+ streaming: true,
92
+ costClass: "high",
93
+ latencyClass: "standard",
94
+ throughputHint: "20 RPM / 20k TPM on current Visual Studio subscription quota",
95
+ preferredUseCases: ["Alternative agent model", "Vision-capable review path", "Explanation"],
96
+ knownLimitations: [
97
+ "Current subscription quota caps this deployment at 20 capacity units without quota increase",
98
+ "Structured output reliability must be verified before routing patch-producing workflows",
99
+ ],
100
+ },
101
+ {
102
+ id: "Mistral-Small-3.1-24B-Instruct-2503",
103
+ kind: "chat",
104
+ contextWindow: 128_000,
105
+ maxOutputTokens: 8_192, // [assumption]
106
+ toolCalling: true,
107
+ structuredOutput: true,
108
+ streaming: true,
109
+ costClass: "medium",
110
+ latencyClass: "fast",
111
+ throughputHint: "~150 tok/s [assumption]",
112
+ preferredUseCases: ["Interactive code assist", "Quick edits", "Low-latency agent steps"],
113
+ knownLimitations: ["Smaller model; may require multi-turn for complex reasoning"],
114
+ },
115
+ {
116
+ id: "Qwen2.5-Coder-7B-Instruct",
117
+ kind: "chat",
118
+ contextWindow: 128_000,
119
+ maxOutputTokens: 4_096, // [assumption]
120
+ toolCalling: true,
121
+ structuredOutput: false, // [assumption]
122
+ streaming: true,
123
+ costClass: "low",
124
+ latencyClass: "fast",
125
+ throughputHint: "~200 tok/s [assumption]",
126
+ preferredUseCases: [
127
+ "Inline completion",
128
+ "Snippet generation",
129
+ "High-throughput batch coding tasks",
130
+ ],
131
+ knownLimitations: [
132
+ "Limited structured-output reliability; context degradation beyond 64K tokens observed in benchmarks [assumption]",
133
+ ],
134
+ },
135
+ {
136
+ id: "gemma-4-31b-it",
137
+ kind: "chat",
138
+ contextWindow: 128_000, // [assumption]
139
+ maxOutputTokens: 8_192, // [assumption]
140
+ toolCalling: true,
141
+ structuredOutput: true,
142
+ streaming: true,
143
+ costClass: "medium",
144
+ latencyClass: "standard",
145
+ throughputHint: "~120 tok/s [assumption]",
146
+ preferredUseCases: ["Document summarisation", "Code explanation", "Regulated-context Q&A"],
147
+ knownLimitations: [
148
+ "Instruction-tuned variant; verify function-calling reliability against customer endpoint",
149
+ ],
150
+ },
151
+ {
152
+ id: "dotsocr",
153
+ kind: "ocr-vision",
154
+ contextWindow: 0,
155
+ maxOutputTokens: 0,
156
+ toolCalling: false,
157
+ structuredOutput: false,
158
+ streaming: false,
159
+ costClass: "medium",
160
+ latencyClass: "standard",
161
+ throughputHint: "n/a",
162
+ preferredUseCases: [
163
+ "Document OCR",
164
+ "Scanned contract/form extraction",
165
+ "Image-to-text in regulated workflows",
166
+ ],
167
+ knownLimitations: [
168
+ "Not a chat model; chat-completions adapter does not apply; callOcr method is Wave 2",
169
+ ],
170
+ },
171
+ {
172
+ id: "multilingual-e5-large Embedding",
173
+ kind: "embedding",
174
+ contextWindow: 512, // [assumption]
175
+ maxOutputTokens: 0,
176
+ toolCalling: false,
177
+ structuredOutput: false,
178
+ streaming: false,
179
+ costClass: "low",
180
+ latencyClass: "fast",
181
+ throughputHint: "n/a",
182
+ preferredUseCases: [
183
+ "Semantic search",
184
+ "RAG retrieval",
185
+ "Similarity ranking across multilingual content",
186
+ ],
187
+ knownLimitations: ["Max 512 tokens per input; callEmbedding method is Wave 2"],
188
+ },
189
+ {
190
+ id: "text-embedding-3-large",
191
+ kind: "embedding",
192
+ contextWindow: 8_191, // Azure OpenAI embedding deployment.
193
+ maxOutputTokens: 0,
194
+ toolCalling: false,
195
+ structuredOutput: false,
196
+ streaming: false,
197
+ costClass: "low",
198
+ latencyClass: "fast",
199
+ throughputHint: "120 requests / 10s and 120k TPM on current deployment",
200
+ preferredUseCases: ["Semantic search", "RAG retrieval", "Similarity ranking"],
201
+ knownLimitations: ["Embedding-only model; chat-completions adapter does not apply"],
202
+ },
203
+ ];
@@ -0,0 +1,41 @@
1
+ // Capability registry: the single source of truth for model routing. Workflow code
2
+ // selects models by querying this registry (by id or by capability requirements),
3
+ // never by hard-coding a model name.
4
+ import { CAPABILITY_DATA } from "./capabilities.data.js";
5
+ export const CAPABILITY_REGISTRY = CAPABILITY_DATA;
6
+ const COST_RANK = { low: 0, medium: 1, high: 2 };
7
+ export function findCapability(modelId) {
8
+ return CAPABILITY_REGISTRY.find((cap) => cap.id === modelId);
9
+ }
10
+ export function listCapabilities() {
11
+ return CAPABILITY_REGISTRY;
12
+ }
13
+ function matches(cap, query) {
14
+ if (query.kind !== undefined && cap.kind !== query.kind) {
15
+ return false;
16
+ }
17
+ if (query.toolCalling === true && !cap.toolCalling) {
18
+ return false;
19
+ }
20
+ if (query.structuredOutput === true && !cap.structuredOutput) {
21
+ return false;
22
+ }
23
+ if (query.minContextWindow !== undefined && cap.contextWindow < query.minContextWindow) {
24
+ return false;
25
+ }
26
+ return true;
27
+ }
28
+ // Returns the lowest-cost capability satisfying the query, or undefined if none.
29
+ // Ties on cost class are broken by registry order (first declared wins).
30
+ export function selectCheapest(query) {
31
+ let best;
32
+ for (const cap of CAPABILITY_REGISTRY) {
33
+ if (!matches(cap, query)) {
34
+ continue;
35
+ }
36
+ if (best === undefined || COST_RANK[cap.costClass] < COST_RANK[best.costClass]) {
37
+ best = cap;
38
+ }
39
+ }
40
+ return best;
41
+ }
@@ -0,0 +1,15 @@
1
+ import type { CircuitBreakerConfig, GatewayConfig } from "./types.js";
2
+ export type EnvSource = Readonly<Record<string, string | undefined>>;
3
+ export interface SafeProviderConfig {
4
+ readonly modelId: string;
5
+ readonly timeoutMs: number;
6
+ readonly maxRetries: number;
7
+ readonly retryBaseDelayMs: number;
8
+ }
9
+ export interface SafeGatewayConfig {
10
+ readonly providers: readonly SafeProviderConfig[];
11
+ readonly circuitBreaker: CircuitBreakerConfig;
12
+ }
13
+ export declare function parseGatewayConfig(raw: unknown, env?: EnvSource): GatewayConfig;
14
+ export declare function loadConfigFromFile(path: string, env?: EnvSource): GatewayConfig;
15
+ export declare function toSafeObject(config: GatewayConfig): SafeGatewayConfig;