@oscharko-dev/keiko 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (450) hide show
  1. package/LICENSE +202 -0
  2. package/NOTICE +7 -0
  3. package/README.md +621 -0
  4. package/TRADEMARKS.md +41 -0
  5. package/dist/audit/aggregate.d.ts +5 -0
  6. package/dist/audit/aggregate.js +25 -0
  7. package/dist/audit/build.d.ts +2 -0
  8. package/dist/audit/build.js +224 -0
  9. package/dist/audit/errors.d.ts +25 -0
  10. package/dist/audit/errors.js +39 -0
  11. package/dist/audit/index-api.d.ts +14 -0
  12. package/dist/audit/index-api.js +131 -0
  13. package/dist/audit/index.d.ts +12 -0
  14. package/dist/audit/index.js +17 -0
  15. package/dist/audit/persist.d.ts +8 -0
  16. package/dist/audit/persist.js +40 -0
  17. package/dist/audit/redaction.d.ts +3 -0
  18. package/dist/audit/redaction.js +61 -0
  19. package/dist/audit/report.d.ts +18 -0
  20. package/dist/audit/report.js +50 -0
  21. package/dist/audit/retention.d.ts +3 -0
  22. package/dist/audit/retention.js +95 -0
  23. package/dist/audit/runid.d.ts +1 -0
  24. package/dist/audit/runid.js +29 -0
  25. package/dist/audit/side-file.d.ts +12 -0
  26. package/dist/audit/side-file.js +82 -0
  27. package/dist/audit/store.d.ts +12 -0
  28. package/dist/audit/store.js +198 -0
  29. package/dist/audit/types.d.ts +188 -0
  30. package/dist/audit/types.js +8 -0
  31. package/dist/audit/workflow-evidence.d.ts +27 -0
  32. package/dist/audit/workflow-evidence.js +145 -0
  33. package/dist/cli/context.d.ts +2 -0
  34. package/dist/cli/context.js +102 -0
  35. package/dist/cli/evaluate.d.ts +7 -0
  36. package/dist/cli/evaluate.js +207 -0
  37. package/dist/cli/evidence.d.ts +8 -0
  38. package/dist/cli/evidence.js +88 -0
  39. package/dist/cli/gateway-config.d.ts +10 -0
  40. package/dist/cli/gateway-config.js +12 -0
  41. package/dist/cli/gen-tests.d.ts +7 -0
  42. package/dist/cli/gen-tests.js +208 -0
  43. package/dist/cli/index.d.ts +2 -0
  44. package/dist/cli/index.js +14 -0
  45. package/dist/cli/investigate.d.ts +8 -0
  46. package/dist/cli/investigate.js +242 -0
  47. package/dist/cli/models.d.ts +3 -0
  48. package/dist/cli/models.js +64 -0
  49. package/dist/cli/run.d.ts +7 -0
  50. package/dist/cli/run.js +187 -0
  51. package/dist/cli/runner.d.ts +6 -0
  52. package/dist/cli/runner.js +83 -0
  53. package/dist/cli/ui.d.ts +31 -0
  54. package/dist/cli/ui.js +240 -0
  55. package/dist/cli/verify.d.ts +2 -0
  56. package/dist/cli/verify.js +103 -0
  57. package/dist/evaluations/fixtures/bug-investigation/happy-path.d.ts +2 -0
  58. package/dist/evaluations/fixtures/bug-investigation/happy-path.js +66 -0
  59. package/dist/evaluations/fixtures/bug-investigation/investigation-only.d.ts +2 -0
  60. package/dist/evaluations/fixtures/bug-investigation/investigation-only.js +39 -0
  61. package/dist/evaluations/fixtures/bug-investigation/unsafe-action.d.ts +2 -0
  62. package/dist/evaluations/fixtures/bug-investigation/unsafe-action.js +37 -0
  63. package/dist/evaluations/fixtures/index.d.ts +7 -0
  64. package/dist/evaluations/fixtures/index.js +35 -0
  65. package/dist/evaluations/fixtures/support.d.ts +5 -0
  66. package/dist/evaluations/fixtures/support.js +42 -0
  67. package/dist/evaluations/fixtures/unit-tests/happy-path.d.ts +2 -0
  68. package/dist/evaluations/fixtures/unit-tests/happy-path.js +40 -0
  69. package/dist/evaluations/fixtures/unit-tests/retry-then-accept.d.ts +2 -0
  70. package/dist/evaluations/fixtures/unit-tests/retry-then-accept.js +39 -0
  71. package/dist/evaluations/fixtures/unit-tests/unsafe-action.d.ts +2 -0
  72. package/dist/evaluations/fixtures/unit-tests/unsafe-action.js +32 -0
  73. package/dist/evaluations/index.d.ts +12 -0
  74. package/dist/evaluations/index.js +12 -0
  75. package/dist/evaluations/manifest-check.d.ts +1 -0
  76. package/dist/evaluations/manifest-check.js +48 -0
  77. package/dist/evaluations/model-provider.d.ts +12 -0
  78. package/dist/evaluations/model-provider.js +26 -0
  79. package/dist/evaluations/render.d.ts +2 -0
  80. package/dist/evaluations/render.js +59 -0
  81. package/dist/evaluations/runner-support.d.ts +27 -0
  82. package/dist/evaluations/runner-support.js +163 -0
  83. package/dist/evaluations/runner.d.ts +20 -0
  84. package/dist/evaluations/runner.js +174 -0
  85. package/dist/evaluations/scorer.d.ts +14 -0
  86. package/dist/evaluations/scorer.js +131 -0
  87. package/dist/evaluations/scripted-model.d.ts +6 -0
  88. package/dist/evaluations/scripted-model.js +26 -0
  89. package/dist/evaluations/surface-parity.d.ts +2 -0
  90. package/dist/evaluations/surface-parity.js +184 -0
  91. package/dist/evaluations/types.d.ts +74 -0
  92. package/dist/evaluations/types.js +16 -0
  93. package/dist/gateway/capabilities.d.ts +11 -0
  94. package/dist/gateway/capabilities.data.d.ts +2 -0
  95. package/dist/gateway/capabilities.data.js +203 -0
  96. package/dist/gateway/capabilities.js +41 -0
  97. package/dist/gateway/config.d.ts +15 -0
  98. package/dist/gateway/config.js +154 -0
  99. package/dist/gateway/errors.d.ts +72 -0
  100. package/dist/gateway/errors.js +82 -0
  101. package/dist/gateway/gateway.d.ts +19 -0
  102. package/dist/gateway/gateway.js +94 -0
  103. package/dist/gateway/index.d.ts +10 -0
  104. package/dist/gateway/index.js +11 -0
  105. package/dist/gateway/model-selection.d.ts +9 -0
  106. package/dist/gateway/model-selection.js +36 -0
  107. package/dist/gateway/normalize.d.ts +7 -0
  108. package/dist/gateway/normalize.js +93 -0
  109. package/dist/gateway/openai-adapter.d.ts +20 -0
  110. package/dist/gateway/openai-adapter.js +263 -0
  111. package/dist/gateway/redaction.d.ts +1 -0
  112. package/dist/gateway/redaction.js +51 -0
  113. package/dist/gateway/resilience.d.ts +24 -0
  114. package/dist/gateway/resilience.js +166 -0
  115. package/dist/gateway/types.d.ts +108 -0
  116. package/dist/gateway/types.js +2 -0
  117. package/dist/harness/adapters.d.ts +23 -0
  118. package/dist/harness/adapters.js +38 -0
  119. package/dist/harness/context.d.ts +33 -0
  120. package/dist/harness/context.js +21 -0
  121. package/dist/harness/emitter.d.ts +15 -0
  122. package/dist/harness/emitter.js +72 -0
  123. package/dist/harness/errors.d.ts +21 -0
  124. package/dist/harness/errors.js +39 -0
  125. package/dist/harness/executor.d.ts +3 -0
  126. package/dist/harness/executor.js +211 -0
  127. package/dist/harness/fingerprint.d.ts +6 -0
  128. package/dist/harness/fingerprint.js +43 -0
  129. package/dist/harness/index.d.ts +9 -0
  130. package/dist/harness/index.js +13 -0
  131. package/dist/harness/loop.d.ts +3 -0
  132. package/dist/harness/loop.js +159 -0
  133. package/dist/harness/patcher.d.ts +4 -0
  134. package/dist/harness/patcher.js +49 -0
  135. package/dist/harness/planner.d.ts +3 -0
  136. package/dist/harness/planner.js +21 -0
  137. package/dist/harness/ports.d.ts +61 -0
  138. package/dist/harness/ports.js +4 -0
  139. package/dist/harness/session.d.ts +25 -0
  140. package/dist/harness/session.js +116 -0
  141. package/dist/harness/sinks.d.ts +30 -0
  142. package/dist/harness/sinks.js +72 -0
  143. package/dist/harness/tasks/explain-plan.d.ts +3 -0
  144. package/dist/harness/tasks/explain-plan.js +29 -0
  145. package/dist/harness/tasks/generate-unit-tests.d.ts +3 -0
  146. package/dist/harness/tasks/generate-unit-tests.js +28 -0
  147. package/dist/harness/tasks/investigate-bug.d.ts +3 -0
  148. package/dist/harness/tasks/investigate-bug.js +31 -0
  149. package/dist/harness/tasks/policy.d.ts +11 -0
  150. package/dist/harness/tasks/policy.js +22 -0
  151. package/dist/harness/tasks/verify.d.ts +3 -0
  152. package/dist/harness/tasks/verify.js +16 -0
  153. package/dist/harness/types.d.ts +270 -0
  154. package/dist/harness/types.js +33 -0
  155. package/dist/index.d.ts +11 -0
  156. package/dist/index.js +36 -0
  157. package/dist/sdk/index.d.ts +9 -0
  158. package/dist/sdk/index.js +37 -0
  159. package/dist/sdk/run-agent.d.ts +16 -0
  160. package/dist/sdk/run-agent.js +56 -0
  161. package/dist/tools/browser/cdp-client.d.ts +35 -0
  162. package/dist/tools/browser/cdp-client.js +218 -0
  163. package/dist/tools/browser/errors.d.ts +25 -0
  164. package/dist/tools/browser/errors.js +55 -0
  165. package/dist/tools/browser/index.d.ts +5 -0
  166. package/dist/tools/browser/index.js +6 -0
  167. package/dist/tools/browser/session.d.ts +44 -0
  168. package/dist/tools/browser/session.js +748 -0
  169. package/dist/tools/browser/types.d.ts +48 -0
  170. package/dist/tools/browser/types.js +2 -0
  171. package/dist/tools/browser/validators.d.ts +5 -0
  172. package/dist/tools/browser/validators.js +97 -0
  173. package/dist/tools/errors.d.ts +59 -0
  174. package/dist/tools/errors.js +94 -0
  175. package/dist/tools/exec.d.ts +42 -0
  176. package/dist/tools/exec.js +327 -0
  177. package/dist/tools/index.d.ts +11 -0
  178. package/dist/tools/index.js +14 -0
  179. package/dist/tools/patch-content.d.ts +10 -0
  180. package/dist/tools/patch-content.js +126 -0
  181. package/dist/tools/patch-normalize.d.ts +1 -0
  182. package/dist/tools/patch-normalize.js +80 -0
  183. package/dist/tools/patch-parse.d.ts +8 -0
  184. package/dist/tools/patch-parse.js +201 -0
  185. package/dist/tools/patch.d.ts +18 -0
  186. package/dist/tools/patch.js +403 -0
  187. package/dist/tools/registry.d.ts +36 -0
  188. package/dist/tools/registry.js +231 -0
  189. package/dist/tools/sandbox.d.ts +8 -0
  190. package/dist/tools/sandbox.js +121 -0
  191. package/dist/tools/schemas.d.ts +2 -0
  192. package/dist/tools/schemas.js +51 -0
  193. package/dist/tools/terminal-policy.d.ts +9 -0
  194. package/dist/tools/terminal-policy.js +313 -0
  195. package/dist/tools/types.d.ts +99 -0
  196. package/dist/tools/types.js +103 -0
  197. package/dist/tools/writer.d.ts +7 -0
  198. package/dist/tools/writer.js +20 -0
  199. package/dist/ui/browser.d.ts +10 -0
  200. package/dist/ui/browser.js +231 -0
  201. package/dist/ui/chat-handlers.d.ts +4 -0
  202. package/dist/ui/chat-handlers.js +281 -0
  203. package/dist/ui/csp-hashes.json +17 -0
  204. package/dist/ui/csp.d.ts +2 -0
  205. package/dist/ui/csp.js +66 -0
  206. package/dist/ui/deps.d.ts +34 -0
  207. package/dist/ui/deps.js +137 -0
  208. package/dist/ui/evidence.d.ts +27 -0
  209. package/dist/ui/evidence.js +142 -0
  210. package/dist/ui/files-deny.d.ts +2 -0
  211. package/dist/ui/files-deny.js +12 -0
  212. package/dist/ui/files.d.ts +65 -0
  213. package/dist/ui/files.js +492 -0
  214. package/dist/ui/headers.d.ts +2 -0
  215. package/dist/ui/headers.js +21 -0
  216. package/dist/ui/host-check.d.ts +2 -0
  217. package/dist/ui/host-check.js +58 -0
  218. package/dist/ui/index.d.ts +20 -0
  219. package/dist/ui/index.js +23 -0
  220. package/dist/ui/load-csp.d.ts +1 -0
  221. package/dist/ui/load-csp.js +28 -0
  222. package/dist/ui/read-handlers.d.ts +8 -0
  223. package/dist/ui/read-handlers.js +247 -0
  224. package/dist/ui/routes.d.ts +36 -0
  225. package/dist/ui/routes.js +129 -0
  226. package/dist/ui/run-engine.d.ts +20 -0
  227. package/dist/ui/run-engine.js +345 -0
  228. package/dist/ui/run-handlers.d.ts +8 -0
  229. package/dist/ui/run-handlers.js +431 -0
  230. package/dist/ui/run-request.d.ts +13 -0
  231. package/dist/ui/run-request.js +219 -0
  232. package/dist/ui/runs.d.ts +43 -0
  233. package/dist/ui/runs.js +92 -0
  234. package/dist/ui/server.d.ts +11 -0
  235. package/dist/ui/server.js +143 -0
  236. package/dist/ui/sink.d.ts +27 -0
  237. package/dist/ui/sink.js +80 -0
  238. package/dist/ui/sse.d.ts +7 -0
  239. package/dist/ui/sse.js +27 -0
  240. package/dist/ui/static/404.html +1 -0
  241. package/dist/ui/static/_next/static/ca-A01hy9W98aRvMZKdAw/_buildManifest.js +1 -0
  242. package/dist/ui/static/_next/static/ca-A01hy9W98aRvMZKdAw/_ssgManifest.js +1 -0
  243. package/dist/ui/static/_next/static/chunks/255-d47fd57964443afe.js +1 -0
  244. package/dist/ui/static/_next/static/chunks/4-be1fef693af8e088.js +1 -0
  245. package/dist/ui/static/_next/static/chunks/4bd1b696-c023c6e3521b1417.js +1 -0
  246. package/dist/ui/static/_next/static/chunks/app/_not-found/page-75825b09bcecad97.js +1 -0
  247. package/dist/ui/static/_next/static/chunks/app/launch/page-9c86a13c29884245.js +1 -0
  248. package/dist/ui/static/_next/static/chunks/app/layout-bdea63fe87947d50.js +1 -0
  249. package/dist/ui/static/_next/static/chunks/app/page-4168c12c68b7a853.js +1 -0
  250. package/dist/ui/static/_next/static/chunks/framework-a6e0b7e30f98059a.js +1 -0
  251. package/dist/ui/static/_next/static/chunks/main-778a50aebff02192.js +1 -0
  252. package/dist/ui/static/_next/static/chunks/main-app-30679af7240d63e9.js +1 -0
  253. package/dist/ui/static/_next/static/chunks/pages/_app-7d307437aca18ad4.js +1 -0
  254. package/dist/ui/static/_next/static/chunks/pages/_error-cb2a52f75f2162e2.js +1 -0
  255. package/dist/ui/static/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  256. package/dist/ui/static/_next/static/chunks/webpack-4a462cecab786e93.js +1 -0
  257. package/dist/ui/static/_next/static/css/be7cb54d5c5673b6.css +1 -0
  258. package/dist/ui/static/assets/editors/goland.svg +35 -0
  259. package/dist/ui/static/assets/editors/intellij.svg +39 -0
  260. package/dist/ui/static/assets/editors/pycharm.svg +58 -0
  261. package/dist/ui/static/assets/editors/rustrover.svg +19 -0
  262. package/dist/ui/static/assets/editors/vscode.svg +1 -0
  263. package/dist/ui/static/assets/editors/webstorm.svg +21 -0
  264. package/dist/ui/static/assets/icons/anthropic.svg +1 -0
  265. package/dist/ui/static/assets/icons/brave.svg +1 -0
  266. package/dist/ui/static/assets/icons/css3.svg +1 -0
  267. package/dist/ui/static/assets/icons/docker.svg +1 -0
  268. package/dist/ui/static/assets/icons/git.svg +1 -0
  269. package/dist/ui/static/assets/icons/github.svg +1 -0
  270. package/dist/ui/static/assets/icons/go.svg +1 -0
  271. package/dist/ui/static/assets/icons/gradle.svg +1 -0
  272. package/dist/ui/static/assets/icons/grafana.svg +1 -0
  273. package/dist/ui/static/assets/icons/graphql.svg +1 -0
  274. package/dist/ui/static/assets/icons/html5.svg +1 -0
  275. package/dist/ui/static/assets/icons/image.svg +1 -0
  276. package/dist/ui/static/assets/icons/java.svg +1 -0
  277. package/dist/ui/static/assets/icons/javascript.svg +1 -0
  278. package/dist/ui/static/assets/icons/json.svg +1 -0
  279. package/dist/ui/static/assets/icons/kafka.svg +1 -0
  280. package/dist/ui/static/assets/icons/kubernetes.svg +1 -0
  281. package/dist/ui/static/assets/icons/linear.svg +1 -0
  282. package/dist/ui/static/assets/icons/markdown.svg +1 -0
  283. package/dist/ui/static/assets/icons/nginx.svg +1 -0
  284. package/dist/ui/static/assets/icons/nodejs.svg +1 -0
  285. package/dist/ui/static/assets/icons/notion.svg +1 -0
  286. package/dist/ui/static/assets/icons/openai.svg +1 -0
  287. package/dist/ui/static/assets/icons/playwright.svg +1 -0
  288. package/dist/ui/static/assets/icons/postgresql.svg +1 -0
  289. package/dist/ui/static/assets/icons/prometheus.svg +1 -0
  290. package/dist/ui/static/assets/icons/properties.svg +1 -0
  291. package/dist/ui/static/assets/icons/puppeteer.svg +1 -0
  292. package/dist/ui/static/assets/icons/python.svg +1 -0
  293. package/dist/ui/static/assets/icons/react.svg +1 -0
  294. package/dist/ui/static/assets/icons/redis.svg +1 -0
  295. package/dist/ui/static/assets/icons/rust.svg +1 -0
  296. package/dist/ui/static/assets/icons/sentry.svg +1 -0
  297. package/dist/ui/static/assets/icons/slack.svg +1 -0
  298. package/dist/ui/static/assets/icons/spring.svg +1 -0
  299. package/dist/ui/static/assets/icons/typescript.svg +1 -0
  300. package/dist/ui/static/assets/icons/upstash.svg +1 -0
  301. package/dist/ui/static/assets/icons/yaml.svg +1 -0
  302. package/dist/ui/static/assets/keiko-logo.svg +10 -0
  303. package/dist/ui/static/index.html +1 -0
  304. package/dist/ui/static/index.txt +19 -0
  305. package/dist/ui/static/keiko-logo.svg +10 -0
  306. package/dist/ui/static/launch.html +1 -0
  307. package/dist/ui/static/launch.txt +19 -0
  308. package/dist/ui/static.d.ts +3 -0
  309. package/dist/ui/static.js +72 -0
  310. package/dist/ui/store/chats.d.ts +14 -0
  311. package/dist/ui/store/chats.js +110 -0
  312. package/dist/ui/store/db.d.ts +6 -0
  313. package/dist/ui/store/db.js +182 -0
  314. package/dist/ui/store/errors.d.ts +12 -0
  315. package/dist/ui/store/errors.js +30 -0
  316. package/dist/ui/store/index.d.ts +6 -0
  317. package/dist/ui/store/index.js +6 -0
  318. package/dist/ui/store/messages.d.ts +5 -0
  319. package/dist/ui/store/messages.js +137 -0
  320. package/dist/ui/store/paths.d.ts +4 -0
  321. package/dist/ui/store/paths.js +69 -0
  322. package/dist/ui/store/projects.d.ts +7 -0
  323. package/dist/ui/store/projects.js +61 -0
  324. package/dist/ui/store/schema.d.ts +3 -0
  325. package/dist/ui/store/schema.js +77 -0
  326. package/dist/ui/store/types.d.ts +80 -0
  327. package/dist/ui/store/types.js +3 -0
  328. package/dist/ui/store/validation.d.ts +4 -0
  329. package/dist/ui/store/validation.js +72 -0
  330. package/dist/ui/store-handlers.d.ts +16 -0
  331. package/dist/ui/store-handlers.js +465 -0
  332. package/dist/ui/terminal-errors.d.ts +21 -0
  333. package/dist/ui/terminal-errors.js +45 -0
  334. package/dist/ui/terminal-evidence.d.ts +20 -0
  335. package/dist/ui/terminal-evidence.js +65 -0
  336. package/dist/ui/terminal-routes.d.ts +9 -0
  337. package/dist/ui/terminal-routes.js +219 -0
  338. package/dist/ui/terminal.d.ts +67 -0
  339. package/dist/ui/terminal.js +835 -0
  340. package/dist/verification/classify.d.ts +10 -0
  341. package/dist/verification/classify.js +53 -0
  342. package/dist/verification/detect.d.ts +4 -0
  343. package/dist/verification/detect.js +81 -0
  344. package/dist/verification/errors.d.ts +11 -0
  345. package/dist/verification/errors.js +21 -0
  346. package/dist/verification/index.d.ts +17 -0
  347. package/dist/verification/index.js +13 -0
  348. package/dist/verification/limits.d.ts +3 -0
  349. package/dist/verification/limits.js +40 -0
  350. package/dist/verification/monitor.d.ts +4 -0
  351. package/dist/verification/monitor.js +58 -0
  352. package/dist/verification/orchestrator.d.ts +16 -0
  353. package/dist/verification/orchestrator.js +363 -0
  354. package/dist/verification/plan.d.ts +9 -0
  355. package/dist/verification/plan.js +125 -0
  356. package/dist/verification/summary.d.ts +40 -0
  357. package/dist/verification/summary.js +67 -0
  358. package/dist/verification/types.d.ts +63 -0
  359. package/dist/verification/types.js +13 -0
  360. package/dist/workflows/bug-investigation/context.d.ts +7 -0
  361. package/dist/workflows/bug-investigation/context.js +119 -0
  362. package/dist/workflows/bug-investigation/descriptor.d.ts +3 -0
  363. package/dist/workflows/bug-investigation/descriptor.js +46 -0
  364. package/dist/workflows/bug-investigation/emit.d.ts +12 -0
  365. package/dist/workflows/bug-investigation/emit.js +35 -0
  366. package/dist/workflows/bug-investigation/events.d.ts +81 -0
  367. package/dist/workflows/bug-investigation/events.js +9 -0
  368. package/dist/workflows/bug-investigation/failure-parse.d.ts +3 -0
  369. package/dist/workflows/bug-investigation/failure-parse.js +154 -0
  370. package/dist/workflows/bug-investigation/guard.d.ts +2 -0
  371. package/dist/workflows/bug-investigation/guard.js +69 -0
  372. package/dist/workflows/bug-investigation/index.d.ts +7 -0
  373. package/dist/workflows/bug-investigation/index.js +13 -0
  374. package/dist/workflows/bug-investigation/internal.d.ts +37 -0
  375. package/dist/workflows/bug-investigation/internal.js +64 -0
  376. package/dist/workflows/bug-investigation/model-loop.d.ts +4 -0
  377. package/dist/workflows/bug-investigation/model-loop.js +223 -0
  378. package/dist/workflows/bug-investigation/parse.d.ts +3 -0
  379. package/dist/workflows/bug-investigation/parse.js +123 -0
  380. package/dist/workflows/bug-investigation/prompt.d.ts +4 -0
  381. package/dist/workflows/bug-investigation/prompt.js +107 -0
  382. package/dist/workflows/bug-investigation/report.d.ts +23 -0
  383. package/dist/workflows/bug-investigation/report.js +151 -0
  384. package/dist/workflows/bug-investigation/stages.d.ts +13 -0
  385. package/dist/workflows/bug-investigation/stages.js +242 -0
  386. package/dist/workflows/bug-investigation/types.d.ts +91 -0
  387. package/dist/workflows/bug-investigation/types.js +14 -0
  388. package/dist/workflows/bug-investigation/verify-stage.d.ts +10 -0
  389. package/dist/workflows/bug-investigation/verify-stage.js +91 -0
  390. package/dist/workflows/bug-investigation/workflow.d.ts +2 -0
  391. package/dist/workflows/bug-investigation/workflow.js +74 -0
  392. package/dist/workflows/descriptor.d.ts +20 -0
  393. package/dist/workflows/descriptor.js +8 -0
  394. package/dist/workflows/index.d.ts +3 -0
  395. package/dist/workflows/index.js +2 -0
  396. package/dist/workflows/unit-tests/context.d.ts +7 -0
  397. package/dist/workflows/unit-tests/context.js +129 -0
  398. package/dist/workflows/unit-tests/conventions.d.ts +4 -0
  399. package/dist/workflows/unit-tests/conventions.js +87 -0
  400. package/dist/workflows/unit-tests/descriptor.d.ts +4 -0
  401. package/dist/workflows/unit-tests/descriptor.js +43 -0
  402. package/dist/workflows/unit-tests/emit.d.ts +12 -0
  403. package/dist/workflows/unit-tests/emit.js +35 -0
  404. package/dist/workflows/unit-tests/events.d.ts +78 -0
  405. package/dist/workflows/unit-tests/events.js +7 -0
  406. package/dist/workflows/unit-tests/index.d.ts +6 -0
  407. package/dist/workflows/unit-tests/index.js +10 -0
  408. package/dist/workflows/unit-tests/internal.d.ts +35 -0
  409. package/dist/workflows/unit-tests/internal.js +43 -0
  410. package/dist/workflows/unit-tests/model-loop.d.ts +4 -0
  411. package/dist/workflows/unit-tests/model-loop.js +95 -0
  412. package/dist/workflows/unit-tests/parse.d.ts +6 -0
  413. package/dist/workflows/unit-tests/parse.js +68 -0
  414. package/dist/workflows/unit-tests/prompt.d.ts +4 -0
  415. package/dist/workflows/unit-tests/prompt.js +71 -0
  416. package/dist/workflows/unit-tests/report.d.ts +21 -0
  417. package/dist/workflows/unit-tests/report.js +90 -0
  418. package/dist/workflows/unit-tests/stages.d.ts +9 -0
  419. package/dist/workflows/unit-tests/stages.js +155 -0
  420. package/dist/workflows/unit-tests/types.d.ts +70 -0
  421. package/dist/workflows/unit-tests/types.js +11 -0
  422. package/dist/workflows/unit-tests/verify-stage.d.ts +9 -0
  423. package/dist/workflows/unit-tests/verify-stage.js +56 -0
  424. package/dist/workflows/unit-tests/workflow.d.ts +2 -0
  425. package/dist/workflows/unit-tests/workflow.js +58 -0
  426. package/dist/workspace/contextPack.d.ts +9 -0
  427. package/dist/workspace/contextPack.js +94 -0
  428. package/dist/workspace/detect.d.ts +3 -0
  429. package/dist/workspace/detect.js +135 -0
  430. package/dist/workspace/discovery.d.ts +9 -0
  431. package/dist/workspace/discovery.js +167 -0
  432. package/dist/workspace/errors.d.ts +39 -0
  433. package/dist/workspace/errors.js +66 -0
  434. package/dist/workspace/fs.d.ts +21 -0
  435. package/dist/workspace/fs.js +36 -0
  436. package/dist/workspace/ignore.d.ts +14 -0
  437. package/dist/workspace/ignore.js +176 -0
  438. package/dist/workspace/index.d.ts +11 -0
  439. package/dist/workspace/index.js +13 -0
  440. package/dist/workspace/paths.d.ts +2 -0
  441. package/dist/workspace/paths.js +38 -0
  442. package/dist/workspace/realpath.d.ts +7 -0
  443. package/dist/workspace/realpath.js +72 -0
  444. package/dist/workspace/retrieval.d.ts +9 -0
  445. package/dist/workspace/retrieval.js +74 -0
  446. package/dist/workspace/summary.d.ts +3 -0
  447. package/dist/workspace/summary.js +54 -0
  448. package/dist/workspace/types.d.ts +103 -0
  449. package/dist/workspace/types.js +27 -0
  450. package/package.json +58 -0
@@ -0,0 +1,33 @@
1
+ import type { ChatMessage } from "../gateway/types.js";
2
+ import type { Clock } from "../gateway/types.js";
3
+ import { Emitter } from "./emitter.js";
4
+ import type { NormalizedResponse } from "../gateway/types.js";
5
+ import type { ModelPort, ToolPort } from "./ports.js";
6
+ import type { TaskPlan } from "./tasks/policy.js";
7
+ import type { HarnessFailure, HarnessLimits, HarnessStateName, RunCounters, TaskType } from "./types.js";
8
+ export interface RunContext {
9
+ readonly model: ModelPort;
10
+ readonly tools: ToolPort;
11
+ readonly emitter: Emitter;
12
+ readonly clock: Clock;
13
+ readonly signal: AbortSignal;
14
+ readonly limits: HarnessLimits;
15
+ readonly modelId: string;
16
+ readonly taskType: TaskType;
17
+ readonly plan: TaskPlan;
18
+ readonly startedAt: number;
19
+ readonly counters: RunCounters;
20
+ messages: ChatMessage[];
21
+ lastResponse: NormalizedResponse | undefined;
22
+ patchDiff: string | undefined;
23
+ report: string | undefined;
24
+ failure: HarnessFailure | undefined;
25
+ cancelReason: string | undefined;
26
+ cancelledAtState: HarnessStateName | undefined;
27
+ }
28
+ export interface StateStep {
29
+ readonly to: HarnessStateName;
30
+ readonly reason: string;
31
+ }
32
+ export declare function newCounters(): RunCounters;
33
+ export declare function contextBytes(messages: readonly ChatMessage[]): number;
@@ -0,0 +1,21 @@
1
+ // Mutable run state shared across the state handlers. Created once per run by the loop;
2
+ // handlers read ports/limits/clock and mutate counters, the message accumulator, and the
3
+ // terminal-outcome carriers (patchDiff, report, failure). Keeping this in one place lets
4
+ // each handler file stay small and free of cross-handler imports.
5
+ import { Emitter } from "./emitter.js";
6
+ export function newCounters() {
7
+ return {
8
+ iterations: 0,
9
+ modelCalls: 0,
10
+ toolCalls: 0,
11
+ commandExecutions: 0,
12
+ failureAttempts: 0,
13
+ browserNavigations: 0,
14
+ };
15
+ }
16
+ // UTF-8 byte length of the serialised message array — the zero-dependency context-size
17
+ // proxy (ADR-0004 D3). Tokens require a model-specific tokeniser; bytes do not.
18
+ const encoder = new TextEncoder();
19
+ export function contextBytes(messages) {
20
+ return encoder.encode(JSON.stringify(messages)).length;
21
+ }
@@ -0,0 +1,15 @@
1
+ import type { Clock } from "../gateway/types.js";
2
+ import type { EventSink } from "./ports.js";
3
+ import type { HarnessEvent } from "./types.js";
4
+ type IdentityField = "schemaVersion" | "runId" | "fingerprint" | "seq" | "ts";
5
+ type EventBody = HarnessEvent extends infer E ? E extends HarnessEvent ? Omit<E, IdentityField> : never : never;
6
+ export declare class Emitter {
7
+ private readonly sinks;
8
+ private readonly clock;
9
+ private readonly runId;
10
+ private readonly fingerprint;
11
+ private seq;
12
+ constructor(sinks: readonly EventSink[], clock: Clock, runId: string, fingerprint: string);
13
+ emit(body: EventBody): void;
14
+ }
15
+ export {};
@@ -0,0 +1,72 @@
1
+ // Stamps every harness event with run identity, a monotonic seq, and a clock timestamp,
2
+ // then forwards it to the sink. SENSITIVE fields (rationale, modelResponse, diff) are
3
+ // redacted before forwarding UNLESS the sink retains raw content for replay (ADR-0004 D6).
4
+ import { redact } from "../gateway/redaction.js";
5
+ function redactReasoningTrace(event) {
6
+ return {
7
+ ...event,
8
+ rationale: redact(event.rationale),
9
+ ...(event.modelResponse === undefined ? {} : { modelResponse: redact(event.modelResponse) }),
10
+ };
11
+ }
12
+ function redactRunCompleted(event) {
13
+ // WHY: report and patchDiff carry full model output; keep known secret formats out of every
14
+ // sink that does not explicitly retain raw content for replay.
15
+ return {
16
+ ...event,
17
+ report: redact(event.report),
18
+ ...(event.patchDiff === undefined ? {} : { patchDiff: redact(event.patchDiff) }),
19
+ };
20
+ }
21
+ function redactRunFailed(event) {
22
+ const failure = {
23
+ ...event.failure,
24
+ message: redact(event.failure.message),
25
+ ...(event.failure.detail === undefined ? {} : { detail: redact(event.failure.detail) }),
26
+ };
27
+ return { ...event, failure };
28
+ }
29
+ const REDACTORS = {
30
+ "reasoning:trace": redactReasoningTrace,
31
+ "patch:proposed": (event) => ({ ...event, diff: redact(event.diff) }),
32
+ "model:call:failed": (event) => ({ ...event, message: redact(event.message) }),
33
+ "tool:call:failed": (event) => ({ ...event, message: redact(event.message) }),
34
+ "verification:result": (event) => ({ ...event, detail: redact(event.detail) }),
35
+ "run:completed": redactRunCompleted,
36
+ "run:cancelled": (event) => event.reason === undefined ? event : { ...event, reason: redact(event.reason) },
37
+ "run:failed": redactRunFailed,
38
+ };
39
+ function redactSensitive(event) {
40
+ const redactor = REDACTORS[event.type];
41
+ return redactor === undefined ? event : redactor(event);
42
+ }
43
+ export class Emitter {
44
+ sinks;
45
+ clock;
46
+ runId;
47
+ fingerprint;
48
+ seq = 0;
49
+ // Fans every event out to all sinks. Each sink receives raw SENSITIVE fields only if it
50
+ // declares `retainsRawContent`; otherwise it receives a redacted copy.
51
+ constructor(sinks, clock, runId, fingerprint) {
52
+ this.sinks = sinks;
53
+ this.clock = clock;
54
+ this.runId = runId;
55
+ this.fingerprint = fingerprint;
56
+ }
57
+ emit(body) {
58
+ this.seq += 1;
59
+ const event = {
60
+ schemaVersion: "1",
61
+ runId: this.runId,
62
+ fingerprint: this.fingerprint,
63
+ seq: this.seq,
64
+ ts: this.clock.now(),
65
+ ...body,
66
+ };
67
+ const redacted = redactSensitive(event);
68
+ for (const sink of this.sinks) {
69
+ sink.emit(sink.retainsRawContent === true ? event : redacted);
70
+ }
71
+ }
72
+ }
@@ -0,0 +1,21 @@
1
+ import { HARNESS_CODES, type HarnessCode, type HarnessFailure } from "./types.js";
2
+ export { HARNESS_CODES };
3
+ export type { HarnessCode };
4
+ export declare abstract class HarnessError extends Error {
5
+ abstract readonly code: HarnessCode;
6
+ constructor(message: string, secrets?: readonly string[]);
7
+ }
8
+ export declare class LimitExceededError extends HarnessError {
9
+ readonly code: HarnessCode;
10
+ constructor(code: HarnessCode, message: string, secrets?: readonly string[]);
11
+ }
12
+ export declare class HarnessModelError extends HarnessError {
13
+ readonly code: "HARNESS_MODEL_ERROR";
14
+ }
15
+ export declare class HarnessToolError extends HarnessError {
16
+ readonly code: "HARNESS_TOOL_ERROR";
17
+ }
18
+ export declare class HarnessInternalError extends HarnessError {
19
+ readonly code: "HARNESS_INTERNAL";
20
+ }
21
+ export declare function toFailure(category: HarnessCode, message: string, detail?: string): HarnessFailure;
@@ -0,0 +1,39 @@
1
+ // Harness error taxonomy, mirroring the gateway pattern (ADR-0003). Errors carry a
2
+ // stable `code` discriminant; callers switch on `code`, never parse `message`.
3
+ // Messages are redacted at construction so they are always safe to log.
4
+ import { redact } from "../gateway/redaction.js";
5
+ import { HARNESS_CODES } from "./types.js";
6
+ export { HARNESS_CODES };
7
+ export class HarnessError extends Error {
8
+ constructor(message, secrets = []) {
9
+ super(redact(message, secrets));
10
+ this.name = new.target.name;
11
+ }
12
+ }
13
+ // Raised when a configured safety limit is breached. Carries the precise category so
14
+ // the loop can map it onto a `limit-exceeded` terminal state with a typed failure.
15
+ export class LimitExceededError extends HarnessError {
16
+ code;
17
+ constructor(code, message, secrets = []) {
18
+ super(message, secrets);
19
+ this.code = code;
20
+ }
21
+ }
22
+ // Raised for a non-recoverable model-port error after retries are exhausted.
23
+ export class HarnessModelError extends HarnessError {
24
+ code = HARNESS_CODES.MODEL_ERROR;
25
+ }
26
+ // Raised for a non-recoverable tool-port error.
27
+ export class HarnessToolError extends HarnessError {
28
+ code = HARNESS_CODES.TOOL_ERROR;
29
+ }
30
+ // Raised for an unexpected harness-internal invariant violation (e.g. an explain-plan
31
+ // task receiving a tool_calls finishReason, which the read-only path forbids).
32
+ export class HarnessInternalError extends HarnessError {
33
+ code = HARNESS_CODES.INTERNAL;
34
+ }
35
+ // Builds the machine-readable failure record carried on the run result and the
36
+ // `run:failed` event. `detail` is SENSITIVE and must be redacted before persistence.
37
+ export function toFailure(category, message, detail) {
38
+ return detail === undefined ? { category, message } : { category, message, detail };
39
+ }
@@ -0,0 +1,3 @@
1
+ import { type RunContext, type StateStep } from "./context.js";
2
+ export declare function handleModelCall(ctx: RunContext): Promise<StateStep>;
3
+ export declare function handleToolCall(ctx: RunContext): Promise<StateStep>;
@@ -0,0 +1,211 @@
1
+ // Handlers for the model-call and tool-call states. The harness — not the model — owns
2
+ // control flow: it inspects finishReason and toolCalls and decides the next state. A model
3
+ // response is never executed as an instruction (ADR-0004 D1).
4
+ import { CancelledError, GatewayError } from "../gateway/errors.js";
5
+ import { ToolError } from "../tools/errors.js";
6
+ import { WorkspaceError } from "../workspace/errors.js";
7
+ import { contextBytes } from "./context.js";
8
+ import { HARNESS_CODES, toFailure } from "./errors.js";
9
+ const RUN_COMMAND_TOOL = "run_command";
10
+ function toolFailureCode(error) {
11
+ if (error instanceof ToolError || error instanceof WorkspaceError) {
12
+ return error.code;
13
+ }
14
+ return "TOOL_ERROR";
15
+ }
16
+ function buildRequest(ctx) {
17
+ const tools = ctx.plan.allowsTools ? ctx.tools.listTools() : undefined;
18
+ return tools === undefined
19
+ ? { modelId: ctx.modelId, messages: ctx.messages }
20
+ : { modelId: ctx.modelId, messages: ctx.messages, tools };
21
+ }
22
+ function routeAfterModel(ctx, response) {
23
+ if (response.finishReason === "tool_calls") {
24
+ if (!ctx.plan.allowsTools) {
25
+ ctx.failure = toFailure(HARNESS_CODES.INTERNAL, "model requested tool calls on a read-only task type");
26
+ return { to: "failed", reason: "tool_calls finishReason forbidden for this task type" };
27
+ }
28
+ return { to: "tool-call", reason: "model requested tool calls" };
29
+ }
30
+ if (ctx.plan.allowsPatch) {
31
+ return { to: "patch-proposal", reason: "model produced final content; assembling patch" };
32
+ }
33
+ return { to: "reporting", reason: "model produced final content; read-only task" };
34
+ }
35
+ function onModelError(ctx, error) {
36
+ if (ctx.signal.aborted || error instanceof CancelledError) {
37
+ if (ctx.failure?.category === HARNESS_CODES.LIMIT_WALL_TIME) {
38
+ return { to: "limit-exceeded", reason: "maxWallTimeMs exceeded during model call" };
39
+ }
40
+ return { to: "cancelled", reason: "abort detected during model call" };
41
+ }
42
+ const code = error instanceof GatewayError ? error.code : "UNKNOWN";
43
+ const message = error instanceof Error ? error.message : "model call failed";
44
+ ctx.emitter.emit({ type: "model:call:failed", modelId: ctx.modelId, errorCode: code, message });
45
+ const retryable = error instanceof GatewayError && error.retryable;
46
+ if (!retryable) {
47
+ ctx.failure = toFailure(HARNESS_CODES.MODEL_ERROR, message);
48
+ return { to: "failed", reason: "non-retryable model error" };
49
+ }
50
+ ctx.counters.failureAttempts += 1;
51
+ if (ctx.counters.failureAttempts >= ctx.limits.maxFailureAttempts) {
52
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_FAILURE_ATTEMPTS, "max failure attempts reached");
53
+ return { to: "limit-exceeded", reason: "maxFailureAttempts exceeded" };
54
+ }
55
+ return { to: "planning", reason: "retryable model error; re-planning" };
56
+ }
57
+ export async function handleModelCall(ctx) {
58
+ ctx.counters.modelCalls += 1;
59
+ ctx.emitter.emit({
60
+ type: "model:call:started",
61
+ modelId: ctx.modelId,
62
+ messageCount: ctx.messages.length,
63
+ contextBytes: contextBytes(ctx.messages),
64
+ });
65
+ let response;
66
+ try {
67
+ response = await ctx.model.call(buildRequest(ctx), ctx.signal);
68
+ }
69
+ catch (error) {
70
+ return onModelError(ctx, error);
71
+ }
72
+ ctx.emitter.emit({
73
+ type: "model:call:completed",
74
+ modelId: ctx.modelId,
75
+ finishReason: response.finishReason,
76
+ toolCallCount: response.toolCalls.length,
77
+ usage: {
78
+ requestId: response.usage.requestId,
79
+ promptTokens: response.usage.promptTokens,
80
+ completionTokens: response.usage.completionTokens,
81
+ latencyMs: response.usage.latencyMs,
82
+ },
83
+ });
84
+ ctx.emitter.emit({
85
+ type: "reasoning:trace",
86
+ phase: "model-call",
87
+ rationale: "evaluated model response and selected next state",
88
+ modelResponse: response.content,
89
+ });
90
+ ctx.messages = [...ctx.messages, assistantMessage(response)];
91
+ ctx.lastResponse = response;
92
+ return routeAfterModel(ctx, response);
93
+ }
94
+ function assistantMessage(response) {
95
+ return response.toolCalls.length === 0
96
+ ? { role: "assistant", content: response.content }
97
+ : { role: "assistant", content: response.content, toolCalls: response.toolCalls };
98
+ }
99
+ // S-M1: emits the redacted audit event matching a tool's metadata, in addition to
100
+ // tool:call:completed, so the issue #10 ledger sees THAT a command ran / a patch applied — never
101
+ // the args, stdout, or file paths. No-op when the tool returned no metadata (read-only tools).
102
+ function emitToolMetadata(ctx, metadata, durationMs) {
103
+ if (metadata === undefined) {
104
+ return;
105
+ }
106
+ if (metadata.kind === "command") {
107
+ ctx.emitter.emit({
108
+ type: "sandbox:configured",
109
+ envAllowlist: metadata.sandbox.envAllowlist,
110
+ network: metadata.sandbox.network,
111
+ maxOutputBytes: metadata.sandbox.maxOutputBytes,
112
+ timeoutMs: metadata.sandbox.timeoutMs,
113
+ terminationGraceMs: metadata.sandbox.terminationGraceMs,
114
+ cwdRequested: metadata.sandbox.cwdRequested,
115
+ });
116
+ ctx.emitter.emit({
117
+ type: "command:executed",
118
+ executable: metadata.executable,
119
+ argCount: metadata.argCount,
120
+ exitCode: metadata.exitCode,
121
+ timedOut: metadata.timedOut,
122
+ durationMs,
123
+ });
124
+ return;
125
+ }
126
+ ctx.emitter.emit({
127
+ type: "patch:applied",
128
+ changedFiles: metadata.changedFiles,
129
+ created: metadata.created,
130
+ deleted: metadata.deleted,
131
+ });
132
+ }
133
+ function abortStep(ctx, reason) {
134
+ if (ctx.failure?.category === HARNESS_CODES.LIMIT_WALL_TIME) {
135
+ return { to: "limit-exceeded", reason: "maxWallTimeMs exceeded during tool call" };
136
+ }
137
+ return { to: "cancelled", reason };
138
+ }
139
+ function commandBudgetExceeded(ctx) {
140
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_COMMAND_EXEC, "command-execution budget exhausted");
141
+ return { to: "limit-exceeded", reason: "maxCommandExecutions exceeded" };
142
+ }
143
+ function toolOutputBudgetExceeded(ctx, bytes) {
144
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_CONTEXT_SIZE, `context ${String(bytes)} bytes exceeds limit ${String(ctx.limits.maxContextBytes)}`);
145
+ return { to: "limit-exceeded", reason: "maxContextBytes exceeded after tool output" };
146
+ }
147
+ function isStateStep(value) {
148
+ return "to" in value;
149
+ }
150
+ async function runOneTool(ctx, call) {
151
+ ctx.counters.toolCalls += 1;
152
+ ctx.emitter.emit({ type: "tool:call:started", toolName: call.name, toolCallId: call.id });
153
+ try {
154
+ const result = await ctx.tools.execute({
155
+ toolCallId: call.id,
156
+ toolName: call.name,
157
+ arguments: call.arguments,
158
+ signal: ctx.signal,
159
+ });
160
+ if (result.commandExecuted === true) {
161
+ ctx.counters.commandExecutions += 1;
162
+ }
163
+ ctx.emitter.emit({
164
+ type: "tool:call:completed",
165
+ toolName: call.name,
166
+ toolCallId: call.id,
167
+ durationMs: result.durationMs,
168
+ });
169
+ emitToolMetadata(ctx, result.metadata, result.durationMs);
170
+ return { role: "tool", content: result.output, toolCallId: call.id };
171
+ }
172
+ catch (error) {
173
+ const message = error instanceof Error ? error.message : "tool execution failed";
174
+ ctx.emitter.emit({
175
+ type: "tool:call:failed",
176
+ toolName: call.name,
177
+ toolCallId: call.id,
178
+ errorCode: toolFailureCode(error),
179
+ message,
180
+ });
181
+ if (ctx.signal.aborted || error instanceof CancelledError) {
182
+ return abortStep(ctx, "abort detected during tool call");
183
+ }
184
+ ctx.failure = toFailure(HARNESS_CODES.TOOL_ERROR, message);
185
+ return { to: "failed", reason: "tool execution failed" };
186
+ }
187
+ }
188
+ export async function handleToolCall(ctx) {
189
+ const calls = ctx.lastResponse?.toolCalls ?? [];
190
+ const results = [];
191
+ for (const call of calls) {
192
+ if (ctx.signal.aborted) {
193
+ return abortStep(ctx, "abort detected before tool call");
194
+ }
195
+ if (call.name === RUN_COMMAND_TOOL &&
196
+ ctx.counters.commandExecutions >= ctx.limits.maxCommandExecutions) {
197
+ return commandBudgetExceeded(ctx);
198
+ }
199
+ const result = await runOneTool(ctx, call);
200
+ if (isStateStep(result)) {
201
+ return result;
202
+ }
203
+ const bytes = contextBytes([...ctx.messages, ...results, result]);
204
+ if (bytes > ctx.limits.maxContextBytes) {
205
+ return toolOutputBudgetExceeded(ctx, bytes);
206
+ }
207
+ results.push(result);
208
+ }
209
+ ctx.messages = [...ctx.messages, ...results];
210
+ return { to: "model-call", reason: "tool results fed back to model" };
211
+ }
@@ -0,0 +1,6 @@
1
+ import type { Fingerprinter, FingerprintInput, IdSource } from "./ports.js";
2
+ export declare function canonicalise(value: unknown): string;
3
+ export declare function configFingerprint(input: FingerprintInput): string;
4
+ export declare const defaultFingerprinter: Fingerprinter;
5
+ export declare const defaultIdSource: IdSource;
6
+ export declare function counterIdSource(): IdSource;
@@ -0,0 +1,43 @@
1
+ // Deterministic run-ID and configuration-fingerprint sources. Production uses
2
+ // node:crypto (randomUUID, SHA-256); tests inject a counter so IDs are fixed and
3
+ // runs are reproducible for replay (ADR-0004 D7).
4
+ import { createHash, randomUUID } from "node:crypto";
5
+ // Canonical JSON: object keys sorted recursively, array order preserved, undefined
6
+ // values omitted (matching JSON.stringify semantics). Two structurally equal configs
7
+ // thus serialise to byte-identical strings regardless of key insertion order.
8
+ export function canonicalise(value) {
9
+ if (value === undefined) {
10
+ return "null";
11
+ }
12
+ if (value === null || typeof value !== "object") {
13
+ return JSON.stringify(value);
14
+ }
15
+ if (Array.isArray(value)) {
16
+ return `[${value.map((item) => canonicalise(item)).join(",")}]`;
17
+ }
18
+ const entries = Object.entries(value)
19
+ .filter(([, v]) => v !== undefined)
20
+ .sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0))
21
+ .map(([key, v]) => `${JSON.stringify(key)}:${canonicalise(v)}`);
22
+ return `{${entries.join(",")}}`;
23
+ }
24
+ export function configFingerprint(input) {
25
+ const canonical = canonicalise(input);
26
+ return createHash("sha256").update(canonical, "utf8").digest("hex");
27
+ }
28
+ export const defaultFingerprinter = {
29
+ compute: configFingerprint,
30
+ };
31
+ export const defaultIdSource = {
32
+ newRunId: () => randomUUID(),
33
+ };
34
+ // Test/replay helper: deterministic monotonically increasing run IDs.
35
+ export function counterIdSource() {
36
+ let n = 0;
37
+ return {
38
+ newRunId: () => {
39
+ n += 1;
40
+ return `run-${String(n)}`;
41
+ },
42
+ };
43
+ }
@@ -0,0 +1,9 @@
1
+ export { createSession, HARNESS_VERSION, type AgentConfig, type AgentSession, type HarnessDeps, } from "./session.js";
2
+ export { createSession as runAgent } from "./session.js";
3
+ export { DEFAULT_LIMITS, HARNESS_CODES, TERMINAL_STATES, type ExplainPlanInput, type GenerateUnitTestsInput, type HarnessCode, type HarnessEvent, type HarnessFailure, type HarnessLimits, type HarnessStateName, type InvestigateBugInput, type ModelCallCompletedEvent, type ModelCallFailedEvent, type ModelCallStartedEvent, type PatchProposedEvent, type ReasoningTraceEvent, type RunCancelledEvent, type RunCompletedEvent, type RunCounters, type RunFailedEvent, type RunManifest, type RunOutcome, type RunResult, type RunStartedEvent, type StateTransition, type StateTransitionEvent, type TaskInput, type TaskType, type TerminalState, type ToolCallCompletedEvent, type ToolCallFailedEvent, type ToolCallStartedEvent, type VerificationResultEvent, } from "./types.js";
4
+ export { HarnessError, HarnessInternalError, HarnessModelError, HarnessToolError, LimitExceededError, toFailure, } from "./errors.js";
5
+ export type { EventSink, Fingerprinter, FingerprintInput, IdSource, ModelPort, ToolCallRequest, ToolCallResult, ToolPort, } from "./ports.js";
6
+ export { DryRunToolPort, GatewayModelPort, type ChatModel, type RecordedToolCall, } from "./adapters.js";
7
+ export { CliEventSink, MemoryEventSink, type EventWriter, type ManifestSeed } from "./sinks.js";
8
+ export { canonicalise, configFingerprint, counterIdSource, defaultFingerprinter, defaultIdSource, } from "./fingerprint.js";
9
+ export { resolveTaskPlan, type TaskPlan } from "./tasks/policy.js";
@@ -0,0 +1,13 @@
1
+ // Public barrel for the agent harness: the session API, all ports/adapters/sinks, the
2
+ // task types, the event schema, the limit/error taxonomy, and the deterministic ID and
3
+ // fingerprint sources. Downstream issues (#6 tools, #10 audit, #13 UI) depend only on
4
+ // these typed seams (ADR-0004 D2).
5
+ export { createSession, HARNESS_VERSION, } from "./session.js";
6
+ // runAgent is the ergonomic SDK alias of createSession; both start a bounded run.
7
+ export { createSession as runAgent } from "./session.js";
8
+ export { DEFAULT_LIMITS, HARNESS_CODES, TERMINAL_STATES, } from "./types.js";
9
+ export { HarnessError, HarnessInternalError, HarnessModelError, HarnessToolError, LimitExceededError, toFailure, } from "./errors.js";
10
+ export { DryRunToolPort, GatewayModelPort, } from "./adapters.js";
11
+ export { CliEventSink, MemoryEventSink } from "./sinks.js";
12
+ export { canonicalise, configFingerprint, counterIdSource, defaultFingerprinter, defaultIdSource, } from "./fingerprint.js";
13
+ export { resolveTaskPlan } from "./tasks/policy.js";
@@ -0,0 +1,3 @@
1
+ import { type RunContext } from "./context.js";
2
+ import { type RunOutcome } from "./types.js";
3
+ export declare function runLoop(ctx: RunContext): Promise<RunOutcome>;
@@ -0,0 +1,159 @@
1
+ // The state-machine driver. The harness owns all control flow: it checks abort and limit
2
+ // guards at the top of the loop and before each port call, dispatches the current state to
3
+ // its handler, and emits a state:transition before every change (ADR-0004 D1, D3, D4).
4
+ import { HARNESS_CODES, toFailure } from "./errors.js";
5
+ import { contextBytes } from "./context.js";
6
+ import { handleModelCall, handleToolCall } from "./executor.js";
7
+ import { handlePatchProposal, handleReporting, handleVerification } from "./patcher.js";
8
+ import { handleContextSelection, handlePlanning } from "./planner.js";
9
+ import { TERMINAL_STATES } from "./types.js";
10
+ const MAX_LOOP_STEPS = 10_000; // absolute safety net; bounded states make this unreachable.
11
+ function abortStep(reason) {
12
+ return { to: "cancelled", reason };
13
+ }
14
+ function checkWallTime(ctx) {
15
+ if (ctx.clock.now() - ctx.startedAt > ctx.limits.maxWallTimeMs) {
16
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_WALL_TIME, "wall-time budget exhausted");
17
+ return { to: "limit-exceeded", reason: "maxWallTimeMs exceeded" };
18
+ }
19
+ return null;
20
+ }
21
+ // Limit checks evaluated when re-entering planning (iterations) plus the wall-time gate for
22
+ // the run as a whole.
23
+ function checkLoopLimits(ctx) {
24
+ const wallTime = checkWallTime(ctx);
25
+ if (wallTime !== null) {
26
+ return wallTime;
27
+ }
28
+ if (ctx.counters.iterations >= ctx.limits.maxIterations) {
29
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_ITERATIONS, "iteration budget exhausted");
30
+ return { to: "limit-exceeded", reason: "maxIterations exceeded" };
31
+ }
32
+ return null;
33
+ }
34
+ // Context-size and model-call-count checks, evaluated at every model-call entry so the
35
+ // limit bounds calls that follow tool-call (not only the initial context-selection path).
36
+ function checkModelCallLimits(ctx) {
37
+ if (ctx.counters.modelCalls >= ctx.limits.maxModelCalls) {
38
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_MODEL_CALLS, "model-call budget exhausted");
39
+ return { to: "limit-exceeded", reason: "maxModelCalls exceeded" };
40
+ }
41
+ const bytes = contextBytes(ctx.messages);
42
+ if (bytes > ctx.limits.maxContextBytes) {
43
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_CONTEXT_SIZE, `context ${String(bytes)} bytes exceeds limit ${String(ctx.limits.maxContextBytes)}`);
44
+ return { to: "limit-exceeded", reason: "maxContextBytes exceeded" };
45
+ }
46
+ return null;
47
+ }
48
+ // Per-state-entry guards: abort is honoured before any state; call-count limits are
49
+ // enforced immediately before the state that consumes the bounded resource.
50
+ function checkEntryGuards(ctx, state) {
51
+ const wallTime = checkWallTime(ctx);
52
+ if (wallTime !== null) {
53
+ return wallTime;
54
+ }
55
+ if (ctx.signal.aborted) {
56
+ return abortStep("abort detected before state entry");
57
+ }
58
+ if (state === "model-call") {
59
+ return checkModelCallLimits(ctx);
60
+ }
61
+ if (state === "tool-call") {
62
+ return checkToolLimits(ctx);
63
+ }
64
+ return null;
65
+ }
66
+ function checkToolLimits(ctx) {
67
+ const pending = ctx.lastResponse?.toolCalls.length ?? 0;
68
+ if (ctx.counters.toolCalls + pending > ctx.limits.maxToolCalls) {
69
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_TOOL_CALLS, "tool-call budget exhausted");
70
+ return { to: "limit-exceeded", reason: "maxToolCalls exceeded" };
71
+ }
72
+ if (ctx.counters.commandExecutions >= ctx.limits.maxCommandExecutions) {
73
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_COMMAND_EXEC, "command-execution budget exhausted");
74
+ return { to: "limit-exceeded", reason: "maxCommandExecutions exceeded" };
75
+ }
76
+ return null;
77
+ }
78
+ async function dispatch(ctx, state) {
79
+ switch (state) {
80
+ case "planning":
81
+ ctx.counters.iterations += 1;
82
+ return handlePlanning(ctx);
83
+ case "context-selection":
84
+ return handleContextSelection(ctx);
85
+ case "model-call":
86
+ return handleModelCall(ctx);
87
+ case "tool-call":
88
+ return handleToolCall(ctx);
89
+ case "patch-proposal":
90
+ return handlePatchProposal(ctx);
91
+ case "verification":
92
+ return handleVerification(ctx);
93
+ case "reporting":
94
+ return handleReporting(ctx);
95
+ default:
96
+ ctx.failure = toFailure(HARNESS_CODES.INTERNAL, `no handler for state ${state}`);
97
+ return { to: "failed", reason: "internal: unhandled state" };
98
+ }
99
+ }
100
+ function transition(ctx, from, step) {
101
+ if (step.to === "cancelled") {
102
+ ctx.cancelledAtState = from;
103
+ }
104
+ ctx.emitter.emit({ type: "state:transition", from, to: step.to, reason: step.reason });
105
+ return step.to;
106
+ }
107
+ function emitTerminal(ctx, state) {
108
+ if (state === "completed") {
109
+ ctx.emitter.emit({
110
+ type: "run:completed",
111
+ report: ctx.report ?? "no model output",
112
+ ...(ctx.patchDiff === undefined ? {} : { patchDiff: ctx.patchDiff }),
113
+ });
114
+ return;
115
+ }
116
+ if (state === "cancelled") {
117
+ ctx.emitter.emit({
118
+ type: "run:cancelled",
119
+ atState: ctx.cancelledAtState ?? state,
120
+ ...(ctx.cancelReason === undefined ? {} : { reason: ctx.cancelReason }),
121
+ });
122
+ return;
123
+ }
124
+ if (state === "failed" || state === "limit-exceeded") {
125
+ const failure = ctx.failure ?? toFailure(HARNESS_CODES.INTERNAL, "run failed without a failure record");
126
+ ctx.failure = failure;
127
+ ctx.emitter.emit({ type: "run:failed", failure, atState: state });
128
+ }
129
+ }
130
+ // Runs the state machine from `intake` to a terminal state and returns the outcome.
131
+ export async function runLoop(ctx) {
132
+ let state = transition(ctx, "intake", {
133
+ to: "planning",
134
+ reason: "task validated",
135
+ });
136
+ for (let step = 0; step < MAX_LOOP_STEPS && !TERMINAL_STATES.has(state); step += 1) {
137
+ if (ctx.signal.aborted) {
138
+ state = transition(ctx, state, abortStep("abort detected at top of loop"));
139
+ break;
140
+ }
141
+ const guard = state === "planning" ? checkLoopLimits(ctx) : checkEntryGuards(ctx, state);
142
+ if (guard !== null) {
143
+ state = transition(ctx, state, guard);
144
+ continue;
145
+ }
146
+ const dispatched = await dispatch(ctx, state);
147
+ const postDispatchGuard = checkWallTime(ctx);
148
+ state = transition(ctx, state, postDispatchGuard ?? dispatched);
149
+ }
150
+ if (!TERMINAL_STATES.has(state)) {
151
+ ctx.failure = toFailure(HARNESS_CODES.INTERNAL, "state-machine safety step limit exceeded");
152
+ state = transition(ctx, state, {
153
+ to: "failed",
154
+ reason: "internal: state-machine step limit exceeded",
155
+ });
156
+ }
157
+ emitTerminal(ctx, state);
158
+ return state;
159
+ }
@@ -0,0 +1,4 @@
1
+ import type { RunContext, StateStep } from "./context.js";
2
+ export declare function handlePatchProposal(ctx: RunContext): StateStep;
3
+ export declare function handleVerification(ctx: RunContext): StateStep;
4
+ export declare function handleReporting(ctx: RunContext): StateStep;