lorenz 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (630) hide show
  1. package/LICENSE +201 -0
  2. package/NOTICE +13 -0
  3. package/README.md +774 -0
  4. package/RELEASE-MANIFEST.json +211 -0
  5. package/apps/cli/bin/lorenz.js +25 -0
  6. package/apps/cli/dist/bin/cli.d.ts +3 -0
  7. package/apps/cli/dist/bin/cli.d.ts.map +1 -0
  8. package/apps/cli/dist/bin/cli.js +4 -0
  9. package/apps/cli/dist/bin/cli.js.map +1 -0
  10. package/apps/cli/dist/daemon.d.ts +76 -0
  11. package/apps/cli/dist/daemon.d.ts.map +1 -0
  12. package/apps/cli/dist/daemon.js +189 -0
  13. package/apps/cli/dist/daemon.js.map +1 -0
  14. package/apps/cli/dist/doctor.d.ts +40 -0
  15. package/apps/cli/dist/doctor.d.ts.map +1 -0
  16. package/apps/cli/dist/doctor.js +590 -0
  17. package/apps/cli/dist/doctor.js.map +1 -0
  18. package/apps/cli/dist/index.d.ts +32 -0
  19. package/apps/cli/dist/index.d.ts.map +1 -0
  20. package/apps/cli/dist/index.js +26 -0
  21. package/apps/cli/dist/index.js.map +1 -0
  22. package/apps/cli/dist/main.d.ts +40 -0
  23. package/apps/cli/dist/main.d.ts.map +1 -0
  24. package/apps/cli/dist/main.js +259 -0
  25. package/apps/cli/dist/main.js.map +1 -0
  26. package/apps/cli/dist/runs.d.ts +31 -0
  27. package/apps/cli/dist/runs.d.ts.map +1 -0
  28. package/apps/cli/dist/runs.js +281 -0
  29. package/apps/cli/dist/runs.js.map +1 -0
  30. package/apps/cli/dist/workerDriverLoader.d.ts +64 -0
  31. package/apps/cli/dist/workerDriverLoader.d.ts.map +1 -0
  32. package/apps/cli/dist/workerDriverLoader.js +211 -0
  33. package/apps/cli/dist/workerDriverLoader.js.map +1 -0
  34. package/apps/cli/package.json +57 -0
  35. package/apps/symphony-dashboard/dist/assets/index-B3owF3jd.css +1 -0
  36. package/apps/symphony-dashboard/dist/assets/index-DQ6XlL0d.js +227 -0
  37. package/apps/symphony-dashboard/dist/index.html +18 -0
  38. package/bin/lorenz +16 -0
  39. package/extensions/docker-worker/dist/index.d.ts +92 -0
  40. package/extensions/docker-worker/dist/index.d.ts.map +1 -0
  41. package/extensions/docker-worker/dist/index.js +283 -0
  42. package/extensions/docker-worker/dist/index.js.map +1 -0
  43. package/extensions/docker-worker/package.json +14 -0
  44. package/extensions/jira-tracker/dist/client.d.ts +50 -0
  45. package/extensions/jira-tracker/dist/client.d.ts.map +1 -0
  46. package/extensions/jira-tracker/dist/client.js +619 -0
  47. package/extensions/jira-tracker/dist/client.js.map +1 -0
  48. package/extensions/jira-tracker/dist/index.d.ts +5 -0
  49. package/extensions/jira-tracker/dist/index.d.ts.map +1 -0
  50. package/extensions/jira-tracker/dist/index.js +5 -0
  51. package/extensions/jira-tracker/dist/index.js.map +1 -0
  52. package/extensions/jira-tracker/dist/options.d.ts +38 -0
  53. package/extensions/jira-tracker/dist/options.d.ts.map +1 -0
  54. package/extensions/jira-tracker/dist/options.js +61 -0
  55. package/extensions/jira-tracker/dist/options.js.map +1 -0
  56. package/extensions/jira-tracker/dist/provider.d.ts +6 -0
  57. package/extensions/jira-tracker/dist/provider.d.ts.map +1 -0
  58. package/extensions/jira-tracker/dist/provider.js +178 -0
  59. package/extensions/jira-tracker/dist/provider.js.map +1 -0
  60. package/extensions/jira-tracker/dist/register.d.ts +10 -0
  61. package/extensions/jira-tracker/dist/register.d.ts.map +1 -0
  62. package/extensions/jira-tracker/dist/register.js +15 -0
  63. package/extensions/jira-tracker/dist/register.js.map +1 -0
  64. package/extensions/jira-tracker/package.json +16 -0
  65. package/extensions/linear-tracker/dist/client.d.ts +82 -0
  66. package/extensions/linear-tracker/dist/client.d.ts.map +1 -0
  67. package/extensions/linear-tracker/dist/client.js +622 -0
  68. package/extensions/linear-tracker/dist/client.js.map +1 -0
  69. package/extensions/linear-tracker/dist/index.d.ts +8 -0
  70. package/extensions/linear-tracker/dist/index.d.ts.map +1 -0
  71. package/extensions/linear-tracker/dist/index.js +7 -0
  72. package/extensions/linear-tracker/dist/index.js.map +1 -0
  73. package/extensions/linear-tracker/dist/options.d.ts +32 -0
  74. package/extensions/linear-tracker/dist/options.d.ts.map +1 -0
  75. package/extensions/linear-tracker/dist/options.js +59 -0
  76. package/extensions/linear-tracker/dist/options.js.map +1 -0
  77. package/extensions/linear-tracker/dist/provider.d.ts +4 -0
  78. package/extensions/linear-tracker/dist/provider.d.ts.map +1 -0
  79. package/extensions/linear-tracker/dist/provider.js +58 -0
  80. package/extensions/linear-tracker/dist/provider.js.map +1 -0
  81. package/extensions/linear-tracker/dist/register.d.ts +11 -0
  82. package/extensions/linear-tracker/dist/register.d.ts.map +1 -0
  83. package/extensions/linear-tracker/dist/register.js +19 -0
  84. package/extensions/linear-tracker/dist/register.js.map +1 -0
  85. package/extensions/linear-tracker/dist/toolOps.d.ts +8 -0
  86. package/extensions/linear-tracker/dist/toolOps.d.ts.map +1 -0
  87. package/extensions/linear-tracker/dist/toolOps.js +160 -0
  88. package/extensions/linear-tracker/dist/toolOps.js.map +1 -0
  89. package/extensions/linear-tracker/dist/tools.d.ts +7 -0
  90. package/extensions/linear-tracker/dist/tools.d.ts.map +1 -0
  91. package/extensions/linear-tracker/dist/tools.js +210 -0
  92. package/extensions/linear-tracker/dist/tools.js.map +1 -0
  93. package/extensions/linear-tracker/package.json +18 -0
  94. package/extensions/local-tracker/dist/boardStore.d.ts +116 -0
  95. package/extensions/local-tracker/dist/boardStore.d.ts.map +1 -0
  96. package/extensions/local-tracker/dist/boardStore.js +475 -0
  97. package/extensions/local-tracker/dist/boardStore.js.map +1 -0
  98. package/extensions/local-tracker/dist/client.d.ts +14 -0
  99. package/extensions/local-tracker/dist/client.d.ts.map +1 -0
  100. package/extensions/local-tracker/dist/client.js +27 -0
  101. package/extensions/local-tracker/dist/client.js.map +1 -0
  102. package/extensions/local-tracker/dist/index.d.ts +7 -0
  103. package/extensions/local-tracker/dist/index.d.ts.map +1 -0
  104. package/extensions/local-tracker/dist/index.js +7 -0
  105. package/extensions/local-tracker/dist/index.js.map +1 -0
  106. package/extensions/local-tracker/dist/options.d.ts +31 -0
  107. package/extensions/local-tracker/dist/options.d.ts.map +1 -0
  108. package/extensions/local-tracker/dist/options.js +69 -0
  109. package/extensions/local-tracker/dist/options.js.map +1 -0
  110. package/extensions/local-tracker/dist/provider.d.ts +9 -0
  111. package/extensions/local-tracker/dist/provider.d.ts.map +1 -0
  112. package/extensions/local-tracker/dist/provider.js +35 -0
  113. package/extensions/local-tracker/dist/provider.js.map +1 -0
  114. package/extensions/local-tracker/dist/register.d.ts +11 -0
  115. package/extensions/local-tracker/dist/register.d.ts.map +1 -0
  116. package/extensions/local-tracker/dist/register.js +19 -0
  117. package/extensions/local-tracker/dist/register.js.map +1 -0
  118. package/extensions/local-tracker/dist/resolveBoardDir.d.ts +24 -0
  119. package/extensions/local-tracker/dist/resolveBoardDir.d.ts.map +1 -0
  120. package/extensions/local-tracker/dist/resolveBoardDir.js +39 -0
  121. package/extensions/local-tracker/dist/resolveBoardDir.js.map +1 -0
  122. package/extensions/local-tracker/dist/toolOps.d.ts +9 -0
  123. package/extensions/local-tracker/dist/toolOps.d.ts.map +1 -0
  124. package/extensions/local-tracker/dist/toolOps.js +86 -0
  125. package/extensions/local-tracker/dist/toolOps.js.map +1 -0
  126. package/extensions/local-tracker/dist/tools.d.ts +7 -0
  127. package/extensions/local-tracker/dist/tools.d.ts.map +1 -0
  128. package/extensions/local-tracker/dist/tools.js +170 -0
  129. package/extensions/local-tracker/dist/tools.js.map +1 -0
  130. package/extensions/local-tracker/package.json +18 -0
  131. package/extensions/memory-tracker/dist/index.d.ts +24 -0
  132. package/extensions/memory-tracker/dist/index.d.ts.map +1 -0
  133. package/extensions/memory-tracker/dist/index.js +110 -0
  134. package/extensions/memory-tracker/dist/index.js.map +1 -0
  135. package/extensions/memory-tracker/package.json +16 -0
  136. package/extensions/slack-tracker/dist/client.d.ts +88 -0
  137. package/extensions/slack-tracker/dist/client.d.ts.map +1 -0
  138. package/extensions/slack-tracker/dist/client.js +246 -0
  139. package/extensions/slack-tracker/dist/client.js.map +1 -0
  140. package/extensions/slack-tracker/dist/inMemoryTransport.d.ts +42 -0
  141. package/extensions/slack-tracker/dist/inMemoryTransport.d.ts.map +1 -0
  142. package/extensions/slack-tracker/dist/inMemoryTransport.js +104 -0
  143. package/extensions/slack-tracker/dist/inMemoryTransport.js.map +1 -0
  144. package/extensions/slack-tracker/dist/index.d.ts +15 -0
  145. package/extensions/slack-tracker/dist/index.d.ts.map +1 -0
  146. package/extensions/slack-tracker/dist/index.js +11 -0
  147. package/extensions/slack-tracker/dist/index.js.map +1 -0
  148. package/extensions/slack-tracker/dist/mapping.d.ts +27 -0
  149. package/extensions/slack-tracker/dist/mapping.d.ts.map +1 -0
  150. package/extensions/slack-tracker/dist/mapping.js +109 -0
  151. package/extensions/slack-tracker/dist/mapping.js.map +1 -0
  152. package/extensions/slack-tracker/dist/operations.d.ts +41 -0
  153. package/extensions/slack-tracker/dist/operations.d.ts.map +1 -0
  154. package/extensions/slack-tracker/dist/operations.js +97 -0
  155. package/extensions/slack-tracker/dist/operations.js.map +1 -0
  156. package/extensions/slack-tracker/dist/options.d.ts +30 -0
  157. package/extensions/slack-tracker/dist/options.d.ts.map +1 -0
  158. package/extensions/slack-tracker/dist/options.js +49 -0
  159. package/extensions/slack-tracker/dist/options.js.map +1 -0
  160. package/extensions/slack-tracker/dist/provider.d.ts +9 -0
  161. package/extensions/slack-tracker/dist/provider.d.ts.map +1 -0
  162. package/extensions/slack-tracker/dist/provider.js +74 -0
  163. package/extensions/slack-tracker/dist/provider.js.map +1 -0
  164. package/extensions/slack-tracker/dist/register.d.ts +11 -0
  165. package/extensions/slack-tracker/dist/register.d.ts.map +1 -0
  166. package/extensions/slack-tracker/dist/register.js +19 -0
  167. package/extensions/slack-tracker/dist/register.js.map +1 -0
  168. package/extensions/slack-tracker/dist/threadState.d.ts +52 -0
  169. package/extensions/slack-tracker/dist/threadState.d.ts.map +1 -0
  170. package/extensions/slack-tracker/dist/threadState.js +192 -0
  171. package/extensions/slack-tracker/dist/threadState.js.map +1 -0
  172. package/extensions/slack-tracker/dist/toolOps.d.ts +13 -0
  173. package/extensions/slack-tracker/dist/toolOps.d.ts.map +1 -0
  174. package/extensions/slack-tracker/dist/toolOps.js +76 -0
  175. package/extensions/slack-tracker/dist/toolOps.js.map +1 -0
  176. package/extensions/slack-tracker/dist/tools.d.ts +8 -0
  177. package/extensions/slack-tracker/dist/tools.d.ts.map +1 -0
  178. package/extensions/slack-tracker/dist/tools.js +266 -0
  179. package/extensions/slack-tracker/dist/tools.js.map +1 -0
  180. package/extensions/slack-tracker/dist/transport.d.ts +63 -0
  181. package/extensions/slack-tracker/dist/transport.d.ts.map +1 -0
  182. package/extensions/slack-tracker/dist/transport.js +2 -0
  183. package/extensions/slack-tracker/dist/transport.js.map +1 -0
  184. package/extensions/slack-tracker/dist/webTransport.d.ts +44 -0
  185. package/extensions/slack-tracker/dist/webTransport.d.ts.map +1 -0
  186. package/extensions/slack-tracker/dist/webTransport.js +402 -0
  187. package/extensions/slack-tracker/dist/webTransport.js.map +1 -0
  188. package/extensions/slack-tracker/package.json +17 -0
  189. package/package.json +89 -0
  190. package/packages/acp/dist/childProcess.d.ts +4 -0
  191. package/packages/acp/dist/childProcess.d.ts.map +1 -0
  192. package/packages/acp/dist/childProcess.js +33 -0
  193. package/packages/acp/dist/childProcess.js.map +1 -0
  194. package/packages/acp/dist/index.d.ts +70 -0
  195. package/packages/acp/dist/index.d.ts.map +1 -0
  196. package/packages/acp/dist/index.js +701 -0
  197. package/packages/acp/dist/index.js.map +1 -0
  198. package/packages/acp/dist/options.d.ts +24 -0
  199. package/packages/acp/dist/options.d.ts.map +1 -0
  200. package/packages/acp/dist/options.js +92 -0
  201. package/packages/acp/dist/options.js.map +1 -0
  202. package/packages/acp/dist/toml.d.ts +2 -0
  203. package/packages/acp/dist/toml.d.ts.map +1 -0
  204. package/packages/acp/dist/toml.js +51 -0
  205. package/packages/acp/dist/toml.js.map +1 -0
  206. package/packages/acp/package.json +24 -0
  207. package/packages/agent-runner/dist/index.d.ts +58 -0
  208. package/packages/agent-runner/dist/index.d.ts.map +1 -0
  209. package/packages/agent-runner/dist/index.js +288 -0
  210. package/packages/agent-runner/dist/index.js.map +1 -0
  211. package/packages/agent-runner/package.json +19 -0
  212. package/packages/agent-sdk/dist/index.d.ts +2 -0
  213. package/packages/agent-sdk/dist/index.d.ts.map +1 -0
  214. package/packages/agent-sdk/dist/index.js +2 -0
  215. package/packages/agent-sdk/dist/index.js.map +1 -0
  216. package/packages/agent-sdk/dist/provider.d.ts +66 -0
  217. package/packages/agent-sdk/dist/provider.d.ts.map +1 -0
  218. package/packages/agent-sdk/dist/provider.js +38 -0
  219. package/packages/agent-sdk/dist/provider.js.map +1 -0
  220. package/packages/agent-sdk/package.json +14 -0
  221. package/packages/cli-kit/dist/index.d.ts +20 -0
  222. package/packages/cli-kit/dist/index.d.ts.map +1 -0
  223. package/packages/cli-kit/dist/index.js +72 -0
  224. package/packages/cli-kit/dist/index.js.map +1 -0
  225. package/packages/cli-kit/package.json +14 -0
  226. package/packages/config/dist/aliases.d.ts +10 -0
  227. package/packages/config/dist/aliases.d.ts.map +1 -0
  228. package/packages/config/dist/aliases.js +153 -0
  229. package/packages/config/dist/aliases.js.map +1 -0
  230. package/packages/config/dist/defaults.d.ts +12 -0
  231. package/packages/config/dist/defaults.d.ts.map +1 -0
  232. package/packages/config/dist/defaults.js +78 -0
  233. package/packages/config/dist/defaults.js.map +1 -0
  234. package/packages/config/dist/errors.d.ts +3 -0
  235. package/packages/config/dist/errors.d.ts.map +1 -0
  236. package/packages/config/dist/errors.js +56 -0
  237. package/packages/config/dist/errors.js.map +1 -0
  238. package/packages/config/dist/index.d.ts +5 -0
  239. package/packages/config/dist/index.d.ts.map +1 -0
  240. package/packages/config/dist/index.js +4 -0
  241. package/packages/config/dist/index.js.map +1 -0
  242. package/packages/config/dist/leaf-utils.d.ts +3 -0
  243. package/packages/config/dist/leaf-utils.d.ts.map +1 -0
  244. package/packages/config/dist/leaf-utils.js +9 -0
  245. package/packages/config/dist/leaf-utils.js.map +1 -0
  246. package/packages/config/dist/parse.d.ts +11 -0
  247. package/packages/config/dist/parse.d.ts.map +1 -0
  248. package/packages/config/dist/parse.js +821 -0
  249. package/packages/config/dist/parse.js.map +1 -0
  250. package/packages/config/dist/schemas.d.ts +214 -0
  251. package/packages/config/dist/schemas.d.ts.map +1 -0
  252. package/packages/config/dist/schemas.js +248 -0
  253. package/packages/config/dist/schemas.js.map +1 -0
  254. package/packages/config/package.json +19 -0
  255. package/packages/dispatch/dist/index.d.ts +22 -0
  256. package/packages/dispatch/dist/index.d.ts.map +1 -0
  257. package/packages/dispatch/dist/index.js +117 -0
  258. package/packages/dispatch/dist/index.js.map +1 -0
  259. package/packages/dispatch/package.json +16 -0
  260. package/packages/dispatch-coordinator/dist/coordinator.d.ts +158 -0
  261. package/packages/dispatch-coordinator/dist/coordinator.d.ts.map +1 -0
  262. package/packages/dispatch-coordinator/dist/coordinator.js +529 -0
  263. package/packages/dispatch-coordinator/dist/coordinator.js.map +1 -0
  264. package/packages/dispatch-coordinator/dist/gate.d.ts +24 -0
  265. package/packages/dispatch-coordinator/dist/gate.d.ts.map +1 -0
  266. package/packages/dispatch-coordinator/dist/gate.js +47 -0
  267. package/packages/dispatch-coordinator/dist/gate.js.map +1 -0
  268. package/packages/dispatch-coordinator/dist/index.d.ts +6 -0
  269. package/packages/dispatch-coordinator/dist/index.d.ts.map +1 -0
  270. package/packages/dispatch-coordinator/dist/index.js +16 -0
  271. package/packages/dispatch-coordinator/dist/index.js.map +1 -0
  272. package/packages/dispatch-coordinator/dist/mcpEndpointManager.d.ts +28 -0
  273. package/packages/dispatch-coordinator/dist/mcpEndpointManager.d.ts.map +1 -0
  274. package/packages/dispatch-coordinator/dist/mcpEndpointManager.js +54 -0
  275. package/packages/dispatch-coordinator/dist/mcpEndpointManager.js.map +1 -0
  276. package/packages/dispatch-coordinator/dist/nullEndpointManager.d.ts +18 -0
  277. package/packages/dispatch-coordinator/dist/nullEndpointManager.d.ts.map +1 -0
  278. package/packages/dispatch-coordinator/dist/nullEndpointManager.js +40 -0
  279. package/packages/dispatch-coordinator/dist/nullEndpointManager.js.map +1 -0
  280. package/packages/dispatch-coordinator/dist/types.d.ts +119 -0
  281. package/packages/dispatch-coordinator/dist/types.d.ts.map +1 -0
  282. package/packages/dispatch-coordinator/dist/types.js +17 -0
  283. package/packages/dispatch-coordinator/dist/types.js.map +1 -0
  284. package/packages/dispatch-coordinator/package.json +16 -0
  285. package/packages/domain/dist/index.d.ts +775 -0
  286. package/packages/domain/dist/index.d.ts.map +1 -0
  287. package/packages/domain/dist/index.js +124 -0
  288. package/packages/domain/dist/index.js.map +1 -0
  289. package/packages/domain/package.json +14 -0
  290. package/packages/humanize/dist/index.d.ts +4 -0
  291. package/packages/humanize/dist/index.d.ts.map +1 -0
  292. package/packages/humanize/dist/index.js +347 -0
  293. package/packages/humanize/dist/index.js.map +1 -0
  294. package/packages/humanize/package.json +11 -0
  295. package/packages/issue/dist/index.d.ts +7 -0
  296. package/packages/issue/dist/index.d.ts.map +1 -0
  297. package/packages/issue/dist/index.js +147 -0
  298. package/packages/issue/dist/index.js.map +1 -0
  299. package/packages/issue/package.json +14 -0
  300. package/packages/log-file/dist/index.d.ts +10 -0
  301. package/packages/log-file/dist/index.d.ts.map +1 -0
  302. package/packages/log-file/dist/index.js +200 -0
  303. package/packages/log-file/dist/index.js.map +1 -0
  304. package/packages/log-file/package.json +15 -0
  305. package/packages/mcp/dist/agentEndpoint.d.ts +31 -0
  306. package/packages/mcp/dist/agentEndpoint.d.ts.map +1 -0
  307. package/packages/mcp/dist/agentEndpoint.js +270 -0
  308. package/packages/mcp/dist/agentEndpoint.js.map +1 -0
  309. package/packages/mcp/dist/auth.d.ts +7 -0
  310. package/packages/mcp/dist/auth.d.ts.map +1 -0
  311. package/packages/mcp/dist/auth.js +48 -0
  312. package/packages/mcp/dist/auth.js.map +1 -0
  313. package/packages/mcp/dist/filter.d.ts +70 -0
  314. package/packages/mcp/dist/filter.d.ts.map +1 -0
  315. package/packages/mcp/dist/filter.js +231 -0
  316. package/packages/mcp/dist/filter.js.map +1 -0
  317. package/packages/mcp/dist/index.d.ts +7 -0
  318. package/packages/mcp/dist/index.d.ts.map +1 -0
  319. package/packages/mcp/dist/index.js +5 -0
  320. package/packages/mcp/dist/index.js.map +1 -0
  321. package/packages/mcp/dist/server.d.ts +31 -0
  322. package/packages/mcp/dist/server.d.ts.map +1 -0
  323. package/packages/mcp/dist/server.js +176 -0
  324. package/packages/mcp/dist/server.js.map +1 -0
  325. package/packages/mcp/dist/tools/linear.d.ts +5 -0
  326. package/packages/mcp/dist/tools/linear.d.ts.map +1 -0
  327. package/packages/mcp/dist/tools/linear.js +192 -0
  328. package/packages/mcp/dist/tools/linear.js.map +1 -0
  329. package/packages/mcp/dist/tools/local.d.ts +5 -0
  330. package/packages/mcp/dist/tools/local.d.ts.map +1 -0
  331. package/packages/mcp/dist/tools/local.js +161 -0
  332. package/packages/mcp/dist/tools/local.js.map +1 -0
  333. package/packages/mcp/dist/tools/result.d.ts +5 -0
  334. package/packages/mcp/dist/tools/result.d.ts.map +1 -0
  335. package/packages/mcp/dist/tools/result.js +15 -0
  336. package/packages/mcp/dist/tools/result.js.map +1 -0
  337. package/packages/mcp/dist/tools.d.ts +14 -0
  338. package/packages/mcp/dist/tools.d.ts.map +1 -0
  339. package/packages/mcp/dist/tools.js +58 -0
  340. package/packages/mcp/dist/tools.js.map +1 -0
  341. package/packages/mcp/package.json +20 -0
  342. package/packages/orchestrator/dist/index.d.ts +171 -0
  343. package/packages/orchestrator/dist/index.d.ts.map +1 -0
  344. package/packages/orchestrator/dist/index.js +524 -0
  345. package/packages/orchestrator/dist/index.js.map +1 -0
  346. package/packages/orchestrator/package.json +18 -0
  347. package/packages/policies/dist/index.d.ts +11 -0
  348. package/packages/policies/dist/index.d.ts.map +1 -0
  349. package/packages/policies/dist/index.js +6 -0
  350. package/packages/policies/dist/index.js.map +1 -0
  351. package/packages/policies/dist/reconciliation.d.ts +5 -0
  352. package/packages/policies/dist/reconciliation.d.ts.map +1 -0
  353. package/packages/policies/dist/reconciliation.js +17 -0
  354. package/packages/policies/dist/reconciliation.js.map +1 -0
  355. package/packages/policies/dist/resume.d.ts +14 -0
  356. package/packages/policies/dist/resume.d.ts.map +1 -0
  357. package/packages/policies/dist/resume.js +7 -0
  358. package/packages/policies/dist/resume.js.map +1 -0
  359. package/packages/policies/dist/retry.d.ts +4 -0
  360. package/packages/policies/dist/retry.d.ts.map +1 -0
  361. package/packages/policies/dist/retry.js +7 -0
  362. package/packages/policies/dist/retry.js.map +1 -0
  363. package/packages/policies/dist/stopReason.d.ts +4 -0
  364. package/packages/policies/dist/stopReason.d.ts.map +1 -0
  365. package/packages/policies/dist/stopReason.js +11 -0
  366. package/packages/policies/dist/stopReason.js.map +1 -0
  367. package/packages/policies/dist/usage.d.ts +14 -0
  368. package/packages/policies/dist/usage.d.ts.map +1 -0
  369. package/packages/policies/dist/usage.js +38 -0
  370. package/packages/policies/dist/usage.js.map +1 -0
  371. package/packages/policies/dist/workerHost.d.ts +8 -0
  372. package/packages/policies/dist/workerHost.d.ts.map +1 -0
  373. package/packages/policies/dist/workerHost.js +20 -0
  374. package/packages/policies/dist/workerHost.js.map +1 -0
  375. package/packages/policies/package.json +21 -0
  376. package/packages/presenter/dist/index.d.ts +81 -0
  377. package/packages/presenter/dist/index.d.ts.map +1 -0
  378. package/packages/presenter/dist/index.js +421 -0
  379. package/packages/presenter/dist/index.js.map +1 -0
  380. package/packages/presenter/package.json +16 -0
  381. package/packages/projections/dist/index.d.ts +10 -0
  382. package/packages/projections/dist/index.d.ts.map +1 -0
  383. package/packages/projections/dist/index.js +30 -0
  384. package/packages/projections/dist/index.js.map +1 -0
  385. package/packages/projections/package.json +15 -0
  386. package/packages/prompt/dist/index.d.ts +9 -0
  387. package/packages/prompt/dist/index.d.ts.map +1 -0
  388. package/packages/prompt/dist/index.js +71 -0
  389. package/packages/prompt/dist/index.js.map +1 -0
  390. package/packages/prompt/package.json +16 -0
  391. package/packages/retry-scheduler/dist/index.d.ts +12 -0
  392. package/packages/retry-scheduler/dist/index.d.ts.map +1 -0
  393. package/packages/retry-scheduler/dist/index.js +39 -0
  394. package/packages/retry-scheduler/dist/index.js.map +1 -0
  395. package/packages/retry-scheduler/package.json +15 -0
  396. package/packages/runtime/dist/index.d.ts +157 -0
  397. package/packages/runtime/dist/index.d.ts.map +1 -0
  398. package/packages/runtime/dist/index.js +1074 -0
  399. package/packages/runtime/dist/index.js.map +1 -0
  400. package/packages/runtime/package.json +26 -0
  401. package/packages/runtime-events/dist/index.d.ts +110 -0
  402. package/packages/runtime-events/dist/index.d.ts.map +1 -0
  403. package/packages/runtime-events/dist/index.js +25 -0
  404. package/packages/runtime-events/dist/index.js.map +1 -0
  405. package/packages/runtime-events/package.json +14 -0
  406. package/packages/server/dist/index.d.ts +25 -0
  407. package/packages/server/dist/index.d.ts.map +1 -0
  408. package/packages/server/dist/index.js +213 -0
  409. package/packages/server/dist/index.js.map +1 -0
  410. package/packages/server/dist/issue-store.d.ts +26 -0
  411. package/packages/server/dist/issue-store.d.ts.map +1 -0
  412. package/packages/server/dist/issue-store.js +88 -0
  413. package/packages/server/dist/issue-store.js.map +1 -0
  414. package/packages/server/dist/path-params.d.ts +6 -0
  415. package/packages/server/dist/path-params.d.ts.map +1 -0
  416. package/packages/server/dist/path-params.js +15 -0
  417. package/packages/server/dist/path-params.js.map +1 -0
  418. package/packages/server/dist/source.d.ts +12 -0
  419. package/packages/server/dist/source.d.ts.map +1 -0
  420. package/packages/server/dist/source.js +2 -0
  421. package/packages/server/dist/source.js.map +1 -0
  422. package/packages/server/dist/trace-routes.d.ts +21 -0
  423. package/packages/server/dist/trace-routes.d.ts.map +1 -0
  424. package/packages/server/dist/trace-routes.js +66 -0
  425. package/packages/server/dist/trace-routes.js.map +1 -0
  426. package/packages/server/dist/ws.d.ts +18 -0
  427. package/packages/server/dist/ws.d.ts.map +1 -0
  428. package/packages/server/dist/ws.js +168 -0
  429. package/packages/server/dist/ws.js.map +1 -0
  430. package/packages/server/package.json +22 -0
  431. package/packages/ssh/dist/index.d.ts +33 -0
  432. package/packages/ssh/dist/index.d.ts.map +1 -0
  433. package/packages/ssh/dist/index.js +281 -0
  434. package/packages/ssh/dist/index.js.map +1 -0
  435. package/packages/ssh/package.json +15 -0
  436. package/packages/static-worker/dist/index.d.ts +73 -0
  437. package/packages/static-worker/dist/index.d.ts.map +1 -0
  438. package/packages/static-worker/dist/index.js +150 -0
  439. package/packages/static-worker/dist/index.js.map +1 -0
  440. package/packages/static-worker/package.json +14 -0
  441. package/packages/tool-sdk/dist/filter.d.ts +70 -0
  442. package/packages/tool-sdk/dist/filter.d.ts.map +1 -0
  443. package/packages/tool-sdk/dist/filter.js +231 -0
  444. package/packages/tool-sdk/dist/filter.js.map +1 -0
  445. package/packages/tool-sdk/dist/index.d.ts +6 -0
  446. package/packages/tool-sdk/dist/index.d.ts.map +1 -0
  447. package/packages/tool-sdk/dist/index.js +4 -0
  448. package/packages/tool-sdk/dist/index.js.map +1 -0
  449. package/packages/tool-sdk/dist/provider.d.ts +51 -0
  450. package/packages/tool-sdk/dist/provider.d.ts.map +1 -0
  451. package/packages/tool-sdk/dist/provider.js +2 -0
  452. package/packages/tool-sdk/dist/provider.js.map +1 -0
  453. package/packages/tool-sdk/dist/registry.d.ts +35 -0
  454. package/packages/tool-sdk/dist/registry.d.ts.map +1 -0
  455. package/packages/tool-sdk/dist/registry.js +85 -0
  456. package/packages/tool-sdk/dist/registry.js.map +1 -0
  457. package/packages/tool-sdk/dist/result.d.ts +5 -0
  458. package/packages/tool-sdk/dist/result.d.ts.map +1 -0
  459. package/packages/tool-sdk/dist/result.js +15 -0
  460. package/packages/tool-sdk/dist/result.js.map +1 -0
  461. package/packages/tool-sdk/package.json +14 -0
  462. package/packages/traceviz-emitter/dist/index.d.ts +19 -0
  463. package/packages/traceviz-emitter/dist/index.d.ts.map +1 -0
  464. package/packages/traceviz-emitter/dist/index.js +97 -0
  465. package/packages/traceviz-emitter/dist/index.js.map +1 -0
  466. package/packages/traceviz-emitter/package.json +17 -0
  467. package/packages/traceviz-server/dist/index.d.ts +14 -0
  468. package/packages/traceviz-server/dist/index.d.ts.map +1 -0
  469. package/packages/traceviz-server/dist/index.js +10 -0
  470. package/packages/traceviz-server/dist/index.js.map +1 -0
  471. package/packages/traceviz-server/dist/models/api.d.ts +51 -0
  472. package/packages/traceviz-server/dist/models/api.d.ts.map +1 -0
  473. package/packages/traceviz-server/dist/models/api.js +5 -0
  474. package/packages/traceviz-server/dist/models/api.js.map +1 -0
  475. package/packages/traceviz-server/dist/models/display-events.d.ts +58 -0
  476. package/packages/traceviz-server/dist/models/display-events.d.ts.map +1 -0
  477. package/packages/traceviz-server/dist/models/display-events.js +6 -0
  478. package/packages/traceviz-server/dist/models/display-events.js.map +1 -0
  479. package/packages/traceviz-server/dist/parser.d.ts +14 -0
  480. package/packages/traceviz-server/dist/parser.d.ts.map +1 -0
  481. package/packages/traceviz-server/dist/parser.js +363 -0
  482. package/packages/traceviz-server/dist/parser.js.map +1 -0
  483. package/packages/traceviz-server/dist/stats.d.ts +7 -0
  484. package/packages/traceviz-server/dist/stats.d.ts.map +1 -0
  485. package/packages/traceviz-server/dist/stats.js +81 -0
  486. package/packages/traceviz-server/dist/stats.js.map +1 -0
  487. package/packages/traceviz-server/dist/watcher.d.ts +54 -0
  488. package/packages/traceviz-server/dist/watcher.d.ts.map +1 -0
  489. package/packages/traceviz-server/dist/watcher.js +368 -0
  490. package/packages/traceviz-server/dist/watcher.js.map +1 -0
  491. package/packages/traceviz-server/package.json +16 -0
  492. package/packages/tracker-sdk/dist/index.d.ts +5 -0
  493. package/packages/tracker-sdk/dist/index.d.ts.map +1 -0
  494. package/packages/tracker-sdk/dist/index.js +4 -0
  495. package/packages/tracker-sdk/dist/index.js.map +1 -0
  496. package/packages/tracker-sdk/dist/options.d.ts +20 -0
  497. package/packages/tracker-sdk/dist/options.d.ts.map +1 -0
  498. package/packages/tracker-sdk/dist/options.js +46 -0
  499. package/packages/tracker-sdk/dist/options.js.map +1 -0
  500. package/packages/tracker-sdk/dist/provider.d.ts +104 -0
  501. package/packages/tracker-sdk/dist/provider.d.ts.map +1 -0
  502. package/packages/tracker-sdk/dist/provider.js +2 -0
  503. package/packages/tracker-sdk/dist/provider.js.map +1 -0
  504. package/packages/tracker-sdk/dist/registry.d.ts +26 -0
  505. package/packages/tracker-sdk/dist/registry.d.ts.map +1 -0
  506. package/packages/tracker-sdk/dist/registry.js +52 -0
  507. package/packages/tracker-sdk/dist/registry.js.map +1 -0
  508. package/packages/tracker-sdk/dist/toolPack.d.ts +10 -0
  509. package/packages/tracker-sdk/dist/toolPack.d.ts.map +1 -0
  510. package/packages/tracker-sdk/dist/toolPack.js +185 -0
  511. package/packages/tracker-sdk/dist/toolPack.js.map +1 -0
  512. package/packages/tracker-sdk/package.json +15 -0
  513. package/packages/tui/dist/index.d.ts +35 -0
  514. package/packages/tui/dist/index.d.ts.map +1 -0
  515. package/packages/tui/dist/index.js +354 -0
  516. package/packages/tui/dist/index.js.map +1 -0
  517. package/packages/tui/package.json +18 -0
  518. package/packages/worker-host-pool/dist/index.d.ts +33 -0
  519. package/packages/worker-host-pool/dist/index.d.ts.map +1 -0
  520. package/packages/worker-host-pool/dist/index.js +311 -0
  521. package/packages/worker-host-pool/dist/index.js.map +1 -0
  522. package/packages/worker-host-pool/package.json +14 -0
  523. package/packages/worker-pool/dist/index.d.ts +6 -0
  524. package/packages/worker-pool/dist/index.d.ts.map +1 -0
  525. package/packages/worker-pool/dist/index.js +15 -0
  526. package/packages/worker-pool/dist/index.js.map +1 -0
  527. package/packages/worker-pool/dist/lease.d.ts +36 -0
  528. package/packages/worker-pool/dist/lease.d.ts.map +1 -0
  529. package/packages/worker-pool/dist/lease.js +53 -0
  530. package/packages/worker-pool/dist/lease.js.map +1 -0
  531. package/packages/worker-pool/dist/ledger.d.ts +51 -0
  532. package/packages/worker-pool/dist/ledger.d.ts.map +1 -0
  533. package/packages/worker-pool/dist/ledger.js +165 -0
  534. package/packages/worker-pool/dist/ledger.js.map +1 -0
  535. package/packages/worker-pool/dist/mutex.d.ts +10 -0
  536. package/packages/worker-pool/dist/mutex.d.ts.map +1 -0
  537. package/packages/worker-pool/dist/mutex.js +22 -0
  538. package/packages/worker-pool/dist/mutex.js.map +1 -0
  539. package/packages/worker-pool/dist/pool.d.ts +33 -0
  540. package/packages/worker-pool/dist/pool.d.ts.map +1 -0
  541. package/packages/worker-pool/dist/pool.js +1727 -0
  542. package/packages/worker-pool/dist/pool.js.map +1 -0
  543. package/packages/worker-pool/dist/reaper.d.ts +94 -0
  544. package/packages/worker-pool/dist/reaper.d.ts.map +1 -0
  545. package/packages/worker-pool/dist/reaper.js +295 -0
  546. package/packages/worker-pool/dist/reaper.js.map +1 -0
  547. package/packages/worker-pool/dist/types.d.ts +249 -0
  548. package/packages/worker-pool/dist/types.d.ts.map +1 -0
  549. package/packages/worker-pool/dist/types.js +2 -0
  550. package/packages/worker-pool/dist/types.js.map +1 -0
  551. package/packages/worker-pool/package.json +16 -0
  552. package/packages/worker-sdk/dist/conformance.d.ts +64 -0
  553. package/packages/worker-sdk/dist/conformance.d.ts.map +1 -0
  554. package/packages/worker-sdk/dist/conformance.js +109 -0
  555. package/packages/worker-sdk/dist/conformance.js.map +1 -0
  556. package/packages/worker-sdk/dist/fake.d.ts +76 -0
  557. package/packages/worker-sdk/dist/fake.d.ts.map +1 -0
  558. package/packages/worker-sdk/dist/fake.js +142 -0
  559. package/packages/worker-sdk/dist/fake.js.map +1 -0
  560. package/packages/worker-sdk/dist/index.d.ts +5 -0
  561. package/packages/worker-sdk/dist/index.d.ts.map +1 -0
  562. package/packages/worker-sdk/dist/index.js +10 -0
  563. package/packages/worker-sdk/dist/index.js.map +1 -0
  564. package/packages/worker-sdk/dist/module.d.ts +46 -0
  565. package/packages/worker-sdk/dist/module.d.ts.map +1 -0
  566. package/packages/worker-sdk/dist/module.js +59 -0
  567. package/packages/worker-sdk/dist/module.js.map +1 -0
  568. package/packages/worker-sdk/dist/registry.d.ts +24 -0
  569. package/packages/worker-sdk/dist/registry.d.ts.map +1 -0
  570. package/packages/worker-sdk/dist/registry.js +49 -0
  571. package/packages/worker-sdk/dist/registry.js.map +1 -0
  572. package/packages/worker-sdk/dist/types.d.ts +138 -0
  573. package/packages/worker-sdk/dist/types.d.ts.map +1 -0
  574. package/packages/worker-sdk/dist/types.js +21 -0
  575. package/packages/worker-sdk/dist/types.js.map +1 -0
  576. package/packages/worker-sdk/package.json +15 -0
  577. package/packages/workflow/dist/index.d.ts +33 -0
  578. package/packages/workflow/dist/index.d.ts.map +1 -0
  579. package/packages/workflow/dist/index.js +125 -0
  580. package/packages/workflow/dist/index.js.map +1 -0
  581. package/packages/workflow/package.json +19 -0
  582. package/packages/workspace/dist/index.d.ts +70 -0
  583. package/packages/workspace/dist/index.d.ts.map +1 -0
  584. package/packages/workspace/dist/index.js +1016 -0
  585. package/packages/workspace/dist/index.js.map +1 -0
  586. package/packages/workspace/package.json +17 -0
  587. package/runtime-deps/anthropic-claude-agent-sdk/LICENSE.md +1 -0
  588. package/runtime-deps/anthropic-claude-agent-sdk/README.md +65 -0
  589. package/runtime-deps/anthropic-claude-agent-sdk/agentSdkTypes.d.ts +1 -0
  590. package/runtime-deps/anthropic-claude-agent-sdk/assistant.d.ts +135 -0
  591. package/runtime-deps/anthropic-claude-agent-sdk/assistant.mjs +190 -0
  592. package/runtime-deps/anthropic-claude-agent-sdk/bridge.d.ts +231 -0
  593. package/runtime-deps/anthropic-claude-agent-sdk/bridge.mjs +168 -0
  594. package/runtime-deps/anthropic-claude-agent-sdk/browser-sdk.d.ts +53 -0
  595. package/runtime-deps/anthropic-claude-agent-sdk/browser-sdk.js +93 -0
  596. package/runtime-deps/anthropic-claude-agent-sdk/extractFromBunfs.d.ts +1 -0
  597. package/runtime-deps/anthropic-claude-agent-sdk/extractFromBunfs.js +156 -0
  598. package/runtime-deps/anthropic-claude-agent-sdk/manifest.json +47 -0
  599. package/runtime-deps/anthropic-claude-agent-sdk/manifest.zst.json +55 -0
  600. package/runtime-deps/anthropic-claude-agent-sdk/node_modules/.bin/anthropic-ai-sdk +21 -0
  601. package/runtime-deps/anthropic-claude-agent-sdk/package.json +81 -0
  602. package/runtime-deps/anthropic-claude-agent-sdk/sdk-tools.d.ts +3170 -0
  603. package/runtime-deps/anthropic-claude-agent-sdk/sdk.d.ts +6000 -0
  604. package/runtime-deps/anthropic-claude-agent-sdk/sdk.mjs +119 -0
  605. package/runtime-deps/openai-codex/README.md +60 -0
  606. package/runtime-deps/openai-codex/bin/codex.js +229 -0
  607. package/runtime-deps/openai-codex/bin/rg +79 -0
  608. package/runtime-deps/openai-codex/package.json +22 -0
  609. package/vendor/claude-agent-acp/dist/acp-agent.d.ts +239 -0
  610. package/vendor/claude-agent-acp/dist/acp-agent.d.ts.map +1 -0
  611. package/vendor/claude-agent-acp/dist/acp-agent.js +2693 -0
  612. package/vendor/claude-agent-acp/dist/bundle.js +41230 -0
  613. package/vendor/claude-agent-acp/dist/index.d.ts +3 -0
  614. package/vendor/claude-agent-acp/dist/index.d.ts.map +1 -0
  615. package/vendor/claude-agent-acp/dist/index.js +67 -0
  616. package/vendor/claude-agent-acp/dist/lib.d.ts +6 -0
  617. package/vendor/claude-agent-acp/dist/lib.d.ts.map +1 -0
  618. package/vendor/claude-agent-acp/dist/lib.js +5 -0
  619. package/vendor/claude-agent-acp/dist/settings.d.ts +68 -0
  620. package/vendor/claude-agent-acp/dist/settings.d.ts.map +1 -0
  621. package/vendor/claude-agent-acp/dist/settings.js +182 -0
  622. package/vendor/claude-agent-acp/dist/tools.d.ts +103 -0
  623. package/vendor/claude-agent-acp/dist/tools.d.ts.map +1 -0
  624. package/vendor/claude-agent-acp/dist/tools.js +713 -0
  625. package/vendor/claude-agent-acp/dist/utils.d.ts +16 -0
  626. package/vendor/claude-agent-acp/dist/utils.d.ts.map +1 -0
  627. package/vendor/claude-agent-acp/dist/utils.js +83 -0
  628. package/vendor/claude-agent-acp/package.json +23 -0
  629. package/vendor/codex-acp/dist/index.js +21280 -0
  630. package/vendor/codex-acp/package.json +17 -0
@@ -0,0 +1,1727 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import { defaultWorkerDriverRegistry, POOL_OWNED_LABEL, } from "@lorenz/worker-sdk";
3
+ import { runSsh } from "@lorenz/ssh";
4
+ import { createLedger } from "./ledger.js";
5
+ import { createLease } from "./lease.js";
6
+ import { createMutex } from "./mutex.js";
7
+ import { runReaperTick } from "./reaper.js";
8
+ /**
9
+ * Resolves the configured driver kind through the registry and constructs the
10
+ * driver from the operator's `driverOptions`. The pool is the engine boundary
11
+ * that owns the real ssh dependency: drivers only ever see the injected
12
+ * {@link DriverDeps.runSsh}, never `@lorenz/ssh` itself. Throws the registry's
13
+ * `worker_pool_driver_unavailable` error for an unregistered kind (so the daemon
14
+ * fails loud at startup), and surfaces the factory's own validation error for
15
+ * unusable `driverOptions` at the same fail-loud construction point.
16
+ */
17
+ function resolveDriver(settings, deps) {
18
+ const factory = (deps.drivers ?? defaultWorkerDriverRegistry).require(settings.driver);
19
+ const driverDeps = {
20
+ clock: deps.clock,
21
+ logEvent: deps.logEvent,
22
+ runSsh,
23
+ };
24
+ return factory.create(settings.driverOptions ?? {}, driverDeps);
25
+ }
26
+ /**
27
+ * Bounded retry budget for the authoritative `driver.list()` call on
28
+ * {@link WorkerPoolImpl.hydrate}. A transient driver blip must not be mistaken for a
29
+ * successful (empty) startup, so the list is re-attempted this many times with a
30
+ * short clock-driven backoff before the pool gives up.
31
+ */
32
+ const HYDRATE_LIST_ATTEMPTS = 3;
33
+ /** Base backoff (ms) between hydrate `list()` retries; multiplied by the attempt. */
34
+ const HYDRATE_LIST_BACKOFF_MS = 50;
35
+ /**
36
+ * A freshly-provisioned worker is probed for SSH-readiness up to this many times
37
+ * before a grow / warm top-up gives up on it (a cold cloud worker's sshd may lag the
38
+ * provision return). An already-up host (static-ssh) or the fake probes ok on the
39
+ * first attempt, so the retry only engages for a genuinely cold worker.
40
+ */
41
+ const PROBE_READY_ATTEMPTS = 3;
42
+ /** Base backoff (ms) between readiness probes; multiplied by the attempt. */
43
+ const PROBE_READY_BACKOFF_MS = 50;
44
+ /** UTC calendar-day key (YYYY-MM-DD) used to roll the daily spend accumulator. */
45
+ function utcDayKey(now) {
46
+ return now.toISOString().slice(0, 10);
47
+ }
48
+ /** Worker states that count as live for capacity/spend accounting. */
49
+ function isLive(state) {
50
+ return state !== "DESTROYED" && state !== "DESTROYING" && state !== "DRAINING";
51
+ }
52
+ /** A worker that can serve a fresh lease (idle, healthy, not slated for teardown). */
53
+ function isLeasable(record, slotsPerMachine) {
54
+ if (record.markedForDestroy)
55
+ return false;
56
+ if (record.state !== "WARM_IDLE" && record.state !== "LEASED")
57
+ return false;
58
+ return record.inFlight < slotsPerMachine;
59
+ }
60
+ /**
61
+ * The embedded warm worker pool. A long-lived, reload-surviving singleton that
62
+ * produces each run's `workerHost`. It owns the synchronous select-and-stamp
63
+ * path, RESERVATION-based single-flight growth, the FIFO waiter queue, spend
64
+ * accounting, `maxInFlight`, `maxWorkersPerIssue`, sticky affinity, the recurring
65
+ * reaper timer, and the awaitable `reconcile`/`hydrate`/`drain`/`snapshot`
66
+ * surface. `reconcile` diffs prev-vs-next settings (resize toward min/max,
67
+ * deferring shrink to the reaper oldest-idle-first, never reconstructing the
68
+ * object and never destroying a leased worker synchronously); `hydrate` re-adopts
69
+ * survivors from `driver.list()` + the ledger and drops orphan rows; `drain`
70
+ * rejects new acquires then force-destroys every worker so no paid cloud worker leaks.
71
+ */
72
+ class WorkerPoolImpl {
73
+ settings;
74
+ // The authoritative in-memory inventory, keyed on the pool's idempotency key.
75
+ inventory = new Map();
76
+ // One async mutex per worker so a release and a reaper tick can never both mutate
77
+ // the same record's `inFlight`/state (the reaper-vs-release race fix).
78
+ workerMutexes = new Map();
79
+ // FIFO queue of blocked acquires. A freed worker wakes the oldest compatible
80
+ // waiter first, providing basic fairness.
81
+ waiters = [];
82
+ // Callbacks the pool fires INSIDE the per-worker mutex immediately before it
83
+ // destroys a machine (the single `recycle` chokepoint), so the dispatch
84
+ // coordinator can fail any still-open RunSlot bound to that worker CLEANLY before
85
+ // the host dies (the recycle-vs-endpoint ordering invariant). Each callback is
86
+ // invoked at most once per worker teardown and its errors are swallowed so a
87
+ // misbehaving listener can never block the destroy it precedes.
88
+ recyclingCallbacks = [];
89
+ // Callbacks fired AFTER a waiter wake-up pass whenever capacity is still
90
+ // leasable (see onCapacityAvailable). The runtime registers its poll nudge
91
+ // here; errors are swallowed so a misbehaving listener can never break the
92
+ // settle/reconcile path that freed the capacity.
93
+ capacityAvailableCallbacks = [];
94
+ // Synchronous capacity reservation taken BEFORE any provision await, so two
95
+ // concurrent growth decisions cannot both allocate past `max`. Incremented in
96
+ // the same synchronous tick the growth is decided; released on settle/reject.
97
+ reservedProvisions = 0;
98
+ // Per-issue grow reservations taken synchronously the instant a grow for an
99
+ // issue is decided (and before its provision await), so two concurrent grows
100
+ // for the SAME issue cannot both slip past `maxWorkersPerIssue` while neither has
101
+ // landed in inventory yet. Counted alongside `leaseIssues` in the issue caps;
102
+ // decremented in `grow`'s finally.
103
+ reservedProvisionsByIssue = new Map();
104
+ // Process-lifetime + daily worker-second accumulators. `dayKey` rolls on UTC day
105
+ // change. The daily total is seeded from the ledger sidecar on hydrate (T10).
106
+ workerSecondsUsed = 0;
107
+ dailyWorkerSecondsUsed = 0;
108
+ dayKey;
109
+ // Monotonic sequence for deterministic worker ids (so the fake driver's
110
+ // idempotency key and the test assertions are reproducible).
111
+ workerSeq = 0;
112
+ // Once true the pool rejects new acquires and force-destroys all workers. Set by
113
+ // `drain`; never cleared (drain is terminal for the process).
114
+ draining = false;
115
+ drainPromise = null;
116
+ // Monotonic drain generation. Captured at the start of each `runDrain`; the
117
+ // deadline barrier and the force-destroy loop bail (without destroying live
118
+ // workers) when the epoch they captured no longer matches `drainEpoch` OR
119
+ // `draining` has been cleared. A reconcile RE-ENABLE bumps this epoch so an
120
+ // orphaned drain parked on its deadline cannot force-destroy the workers a
121
+ // re-enabled (now-live) pool just grew.
122
+ drainEpoch = 0;
123
+ // Resolved by `onLeaseSettle` the moment `inFlight` reaches zero while draining,
124
+ // so `drain` proceeds without busy-polling the clock (which a fake clock never
125
+ // advances). Raced against a deadline timer inside `runDrain`.
126
+ notifyDrained = null;
127
+ driver;
128
+ // Monotonic driver generation, bumped by `swapDriver` on every driver
129
+ // hot-reload. A grow / warm-provision CAPTURES this (and `this.driver`) BEFORE
130
+ // its provision await; if the generation has advanced by the time provision
131
+ // returns, a swap happened DURING the await, so the new worker was provisioned on the
132
+ // now-stale driver. The pool then records its origin as the CAPTURED driver
133
+ // (so recycle destroys it on the backend that actually created it) and marks it
134
+ // for destroy (it cannot serve the live driver). Without this, a worker provisioned
135
+ // on driver A but inserted after a swap to B would be recorded under B with no
136
+ // origin, so recycle/destroy routes to B and A's paid machine leaks.
137
+ driverGeneration = 0;
138
+ ledger;
139
+ clock;
140
+ logEvent;
141
+ leaseClock;
142
+ // The recurring reaper timer. Re-armed at the end of each tick so the single
143
+ // serial pass runs at the configured cadence. Detached via `unref?.()` so it
144
+ // never keeps the process alive (systemClock.setTimeout never unrefs on its
145
+ // own). Cleared on drain so a stopped pool issues no further ticks.
146
+ reaperTimer = null;
147
+ reaperStopped = false;
148
+ // True once `hydrate()` has completed at least once. The constructor arms the
149
+ // reaper before `hydrate()` runs, so until the first hydrate re-adopts the
150
+ // labeled survivors from `driver.list()`, the reaper's destroy-unknown branch
151
+ // must stay inert or it would reap the pool's own survivors on restart.
152
+ hydrated = false;
153
+ reaperInternals;
154
+ // The deps used to resolve the driver in the ctor. Retained so `swapDriver`
155
+ // can re-run `resolveDriver` (and rebuild the ledger gate) in place on a
156
+ // driver hot-reload WITHOUT reconstructing the pool singleton.
157
+ deps;
158
+ constructor(settings, deps) {
159
+ this.settings = settings;
160
+ this.deps = deps;
161
+ this.clock = deps.clock;
162
+ this.logEvent = deps.logEvent;
163
+ this.driver = resolveDriver(settings, deps);
164
+ this.ledger = createLedger({
165
+ ledgerPath: deps.ledgerPath ?? "",
166
+ clock: deps.clock,
167
+ usesLedger: this.driver.capabilities.usesLedger && deps.ledgerPath !== undefined,
168
+ });
169
+ // The lease/heartbeat clock works in milliseconds while the ClockPort yields
170
+ // a Date; adapt once so leases see a plain numeric clock.
171
+ this.leaseClock = { now: () => this.clock.now().getTime() };
172
+ this.dayKey = utcDayKey(this.clock.now());
173
+ // The narrow seam the reaper drives over. Every primitive routes back through
174
+ // the pool's per-worker mutex so a reaper tick and a lease release can never both
175
+ // touch the same `inFlight`.
176
+ this.reaperInternals = {
177
+ settings: this.settings,
178
+ driver: this.driver,
179
+ poolOwnedLabel: POOL_OWNED_LABEL,
180
+ now: () => this.leaseClock.now(),
181
+ inventory: this.inventory,
182
+ mutexFor: (workerId) => this.mutexFor(workerId),
183
+ liveWorkerCount: () => this.liveWorkerCount(),
184
+ // In-process invariant: a lease is settled exactly once, only in `runClaim`'s
185
+ // finally (release/fail), so an UN-settled in-flight lease always implies an
186
+ // active run. The reaper therefore treats every in-flight lease as alive and
187
+ // never force-returns a LEASED worker from the live pool (that would kill a
188
+ // legitimate long single-turn run that emits no heartbeat). Orphan recovery
189
+ // after a process restart is handled separately by `hydrate`, which re-adopts
190
+ // only the survivors `driver.list()` still shows and drops orphan rows.
191
+ isRunActive: () => true,
192
+ hydrated: () => this.hydrated,
193
+ hasGrowthBudget: () => this.hasGrowthHeadroom(),
194
+ destroyWorker: async (record, reason) => this.recycle(record, reason),
195
+ provisionWarm: async () => this.provisionWarm(),
196
+ logEvent: this.logEvent,
197
+ wakeWaiters: () => this.wakeWaiters(),
198
+ };
199
+ // Single serial recurring reaper timer, detached so it never keeps the
200
+ // process alive. The tick re-arms itself at the configured cadence.
201
+ this.scheduleReaper();
202
+ }
203
+ // --- public API ---------------------------------------------------------
204
+ async acquire(req) {
205
+ if (!this.settings.enabled || this.draining) {
206
+ return { status: "no_capacity", reason: "pool_disabled" };
207
+ }
208
+ this.rollDayKeyIfNeeded();
209
+ // Spend gate: once worker-seconds (total or daily) are exhausted the pool runs
210
+ // nothing further, even reusing a warm worker, until the cap resets.
211
+ if (this.workerSecondsExhausted()) {
212
+ return { status: "no_capacity", reason: "spend_cap" };
213
+ }
214
+ // 1) Synchronous select-and-stamp over a free/under-capacity worker. No await
215
+ // between selecting the record and stamping it, so two concurrent acquires
216
+ // can never grab the same slot.
217
+ const selected = this.selectAndStamp(req);
218
+ if (selected) {
219
+ return { status: "leased", lease: selected };
220
+ }
221
+ // 2) Grow under the reservation, if capacity and spend allow.
222
+ if (this.canGrow(req)) {
223
+ const grown = await this.grow(req);
224
+ if (grown.status === "leased")
225
+ return grown;
226
+ // A growth that failed for capacity/spend reasons falls through to the
227
+ // waiter queue; a driver_error with nothing to wait on is returned.
228
+ if (grown.status === "no_capacity" && grown.reason === "driver_error") {
229
+ return grown;
230
+ }
231
+ }
232
+ else if (this.blockedBySpendCap()) {
233
+ // A worker could not be selected and growth is barred specifically by a spend
234
+ // cap (concurrent workers). Surface spend_cap now rather than holding the
235
+ // poll thread on a waiter the budget can never satisfy.
236
+ return { status: "no_capacity", reason: "spend_cap" };
237
+ }
238
+ // 3) Block on the FIFO waiter queue until a worker frees, the timeout fires, or
239
+ // the request is aborted.
240
+ return this.waitForCapacity(req);
241
+ }
242
+ canAcquire() {
243
+ if (!this.settings.enabled || this.draining)
244
+ return false;
245
+ this.rollDayKeyIfNeeded();
246
+ if (this.workerSecondsExhausted())
247
+ return false;
248
+ // A warm/under-capacity worker is immediately leasable.
249
+ for (const record of this.inventory.values()) {
250
+ if (isLeasable(record, this.settings.slotsPerMachine))
251
+ return true;
252
+ }
253
+ // Otherwise capacity exists only if the pool can still grow a worker.
254
+ return this.hasGrowthHeadroom();
255
+ }
256
+ /**
257
+ * Whether the pool currently governs worker-host capacity. A config reload can disable the pool
258
+ * (which drains it to zero) without tearing down the orchestrator's lifetime capacity probe; the
259
+ * probe reads this so a disabled pool falls through to static/local execution instead of
260
+ * permanently blocking dispatch. Mirrors `settings.enabled` (swapped in by `reconcile`).
261
+ */
262
+ isEnabled() {
263
+ return this.settings.enabled;
264
+ }
265
+ /**
266
+ * Diffs prev-vs-next settings on a config hot-reload and reconciles the live
267
+ * pool WITHOUT being reconstructed (the singleton survives every reload):
268
+ *
269
+ * - `enabled true -> false`: drain to zero (paid workers must not linger).
270
+ * - `enabled false -> true`: grow from zero toward the warm/min target.
271
+ * - lowering `max` (or any live overshoot of the new `max`): defer the shrink
272
+ * to the reaper, marking the OLDEST-IDLE excess workers `markedForDestroy`
273
+ * (the reaper reaps a flagged idle worker on its next tick, and a flagged
274
+ * LEASED worker is recycled the instant its last lease returns). Leased workers
275
+ * are NEVER destroyed synchronously here.
276
+ * - raising `min`/`warm`: top up toward the new target within the spend budget.
277
+ *
278
+ * Settings are swapped in first so every subsequent acquire / reaper tick reads
279
+ * the latest knobs (the reaper re-syncs `internals.settings` each tick anyway).
280
+ */
281
+ reconcile(next) {
282
+ const prev = this.settings;
283
+ if (!next.enabled) {
284
+ // Disabling the pool drains it to zero, so it needs NO (re)built driver:
285
+ // SKIP the swap entirely. A disable reload that ALSO points at an unavailable
286
+ // driver (or drops the static-ssh hosts so construction would throw) must
287
+ // still disable + drain - never throw inside `swapDriver` and strand the
288
+ // live pool enabled with paid workers still running. The drain tears every worker
289
+ // down on the driver that PROVISIONED it (its origin), not the new one.
290
+ this.settings = next;
291
+ this.reaperInternals.settings = next;
292
+ void this.drain({ deadlineMs: next.drainDeadlineMs });
293
+ return;
294
+ }
295
+ // Finding #1: rebuild the driver in place BEFORE the settings swap when the
296
+ // driver construction actually changed (a new kind or deep-changed
297
+ // driverOptions). A same-driver reconcile skips the swap (no rebuild),
298
+ // keeping the singleton's resolved driver object stable. Once the coordinator
299
+ // exists it will drive `swapDriver`; until then `reconcile` drives it directly.
300
+ if (driverConstructionChanged(prev, next)) {
301
+ this.swapDriver(next);
302
+ }
303
+ this.settings = next;
304
+ this.reaperInternals.settings = next;
305
+ // A re-enabled pool (false -> true) starts from zero; the grow-toward-target
306
+ // path below covers it (a disabled pool was drained to zero, so live==0). The
307
+ // prior disable set `draining`/`reaperStopped` via `drain`; a re-enable must
308
+ // clear them (and re-arm the reaper) or the pool stays permanently dead -
309
+ // every acquire short-circuits on `draining` and no reaper top-up ever runs.
310
+ if (!prev.enabled) {
311
+ this.draining = false;
312
+ this.drainPromise = null;
313
+ this.notifyDrained = null;
314
+ // Invalidate any drain still parked on its deadline barrier. Its captured
315
+ // epoch is now stale, so its force-destroy loop will bail instead of
316
+ // tearing down the workers this re-enable is about to grow.
317
+ this.drainEpoch += 1;
318
+ if (this.reaperStopped) {
319
+ this.reaperStopped = false;
320
+ this.scheduleReaper();
321
+ }
322
+ }
323
+ // Defer any shrink toward a lowered `max` to the reaper, oldest-idle first.
324
+ this.markExcessForShrink();
325
+ // Grow toward the (possibly raised) warm/min target within the spend budget.
326
+ void this.growTowardTarget();
327
+ }
328
+ /**
329
+ * Rebuilds the resolved driver IN PLACE on a driver hot-reload, without
330
+ * reconstructing the pool singleton (Finding #1). The pool's ctor resolved the
331
+ * driver once, but `reconcile` previously only swapped settings, so a reload
332
+ * that changed `driver`/`driverOptions` left every acquire still routed to
333
+ * the stale driver object.
334
+ *
335
+ * TRANSACTIONAL: every step that can THROW (resolving the new driver and
336
+ * constructing its ledger) runs FIRST, into locals, BEFORE any record or
337
+ * `this.driver` is mutated. A failed reload (driver unavailable / invalid
338
+ * driverOptions) therefore throws having mutated NOTHING, matching the
339
+ * runtime's rollback to the last-good settings: marking last-good workers for
340
+ * destroy and THEN throwing would let `onLeaseSettle`/the reaper drain healthy
341
+ * warm/paid capacity after a REJECTED reload (Codex iter-6 HIGH). Once resolve
342
+ * succeeds (the commit point), the remaining steps cannot throw. `swapDriver`:
343
+ *
344
+ * 1. CAPTURES `originDriver` on EVERY existing record BEFORE reassigning, so
345
+ * each surviving worker remembers the backend that PROVISIONED it. This is the
346
+ * no-orphaned-paid-worker invariant: an in-flight lease that settles AFTER the
347
+ * swap routes `recycle`'s `destroy` to its ORIGINAL backend (below), not the
348
+ * new `this.driver`. A record that already carries an `originDriver` (a
349
+ * prior swap) keeps it (the true origin), so repeated swaps never lose it.
350
+ * 2. flags every old-driver worker `markedForDestroy` and recycles each IDLE one
351
+ * immediately (under its per-worker mutex) against its ORIGINAL backend, so no
352
+ * paid worker is orphaned and the new driver's `list()` reconcile never sees a
353
+ * stale old worker it does not own. A still-LEASED old worker keeps the flag and is
354
+ * recycled on its ORIGINAL backend the instant its last lease settles
355
+ * (`onLeaseSettle` -> `recycle`, which routes to `originDriver`).
356
+ * 3. commits the pre-resolved driver (`this.driver = newDriver`).
357
+ * 4. re-threads `reaperInternals.driver` to the new driver so the recurring
358
+ * reaper's `list()` reconcile / probe / top-up drive the new backend.
359
+ * 5. rebuilds the ledger `usesLedger` gate against the new driver's
360
+ * capabilities (e.g. non-ledger -> ledger) WITHOUT reconstructing the spend
361
+ * accumulators, which live on the pool and are untouched.
362
+ *
363
+ * Called by `reconcile` only when {@link driverConstructionChanged} is true.
364
+ */
365
+ swapDriver(next) {
366
+ // TRANSACTIONAL: do ALL throwing work (resolveDriver, and constructing the
367
+ // new ledger) into LOCALS BEFORE mutating ANY record or `this.driver`. A
368
+ // failed reload (driver unavailable / invalid driverOptions) must throw
369
+ // having mutated NOTHING, so the runtime's transactional rollback to the
370
+ // last-good settings is matched by an UNTOUCHED inventory: marking workers for
371
+ // destroy before this throws would let `onLeaseSettle` recycle healthy
372
+ // in-flight leases and the reaper reap idle workers, draining warm/paid capacity
373
+ // after a REJECTED reload. (Codex iter-6 HIGH.)
374
+ const newDriver = resolveDriver(next, this.deps);
375
+ const newLedger = createLedger({
376
+ ledgerPath: this.deps.ledgerPath ?? "",
377
+ clock: this.deps.clock,
378
+ usesLedger: newDriver.capabilities.usesLedger && this.deps.ledgerPath !== undefined,
379
+ });
380
+ // --- COMMIT POINT: resolve succeeded, so from here NOTHING throws. ---------
381
+ // 1) Capture the origin driver on every existing record BEFORE reassigning
382
+ // `this.driver`, and flag each for drain so it is recycled on its origin.
383
+ const idleToRecycle = [];
384
+ for (const record of this.inventory.values()) {
385
+ record.originDriver = record.originDriver ?? this.driver;
386
+ record.markedForDestroy = true;
387
+ // An idle (un-leased) old-driver worker cannot serve the new driver and the
388
+ // new driver's list() will not own it, so recycle it now against its origin
389
+ // rather than deferring to a reaper that would otherwise drop it un-destroyed.
390
+ if (isLive(record.state) && record.inFlight === 0 && record.state !== "DESTROYING") {
391
+ idleToRecycle.push(record);
392
+ }
393
+ }
394
+ // 3) Commit the pre-resolved driver in place, and bump the driver
395
+ // generation so any in-flight grow / warm-provision that captured the PRIOR
396
+ // generation before its provision await detects the swap when it returns
397
+ // (and records its worker's origin as the captured driver).
398
+ this.driver = newDriver;
399
+ this.driverGeneration += 1;
400
+ // 4) Re-thread the reaper's driver so its list()/probe/top-up drive the new
401
+ // backend (the reaper reads `reaperInternals.driver`, not `this.driver`).
402
+ this.reaperInternals.driver = this.driver;
403
+ // 5) Commit the pre-built ledger gate (rebuilt against the new driver's
404
+ // `usesLedger` capability). The pool's spend accumulators are unaffected
405
+ // (they live on the pool, not the ledger object).
406
+ this.ledger = newLedger;
407
+ // 2 (deferred async, fire-and-forget like reconcile's grow/drain): recycle each
408
+ // idle old-driver worker on its ORIGINAL backend under its per-worker mutex, then
409
+ // wake any waiters so the freed capacity refills from the NEW driver.
410
+ if (idleToRecycle.length > 0) {
411
+ void (async () => {
412
+ for (const record of idleToRecycle) {
413
+ await this.mutexFor(record.workerId).runExclusive(async () => {
414
+ if (record.inFlight !== 0)
415
+ return; // a lease landed first; settle recycles it
416
+ await this.recycle(record, "shrink");
417
+ });
418
+ }
419
+ this.wakeWaiters();
420
+ })();
421
+ }
422
+ }
423
+ /**
424
+ * Registers a callback the pool fires INSIDE the per-worker mutex immediately
425
+ * before it destroys a machine. Every teardown path routes through the single
426
+ * {@link recycle} chokepoint, so the callback fires exactly once per worker just
427
+ * before `driver.destroy`. The dispatch coordinator registers a callback here
428
+ * to fail any still-open RunSlot on the recycled worker CLEANLY before the host
429
+ * dies (the recycle-vs-endpoint ordering invariant). A callback error is
430
+ * swallowed so a misbehaving listener can never block the teardown it precedes.
431
+ */
432
+ onMachineRecycling(cb) {
433
+ this.recyclingCallbacks.push(cb);
434
+ }
435
+ /**
436
+ * Registers a callback fired whenever a capacity-freeing event leaves the pool
437
+ * leasable (see {@link WorkerPool.onCapacityAvailable}). Fired at the end of every
438
+ * waiter wake-up pass - a lease settle, a reconcile grow, a reaper top-up -
439
+ * AFTER the FIFO waiters had first claim on the freed worker, and only when
440
+ * `canAcquire()` still holds, so a drained/disabled/spend-capped pool never
441
+ * notifies.
442
+ */
443
+ onCapacityAvailable(cb) {
444
+ this.capacityAvailableCallbacks.push(cb);
445
+ }
446
+ /** Notifies every {@link onCapacityAvailable} listener; errors are swallowed. */
447
+ notifyCapacityAvailable() {
448
+ if (this.capacityAvailableCallbacks.length === 0)
449
+ return;
450
+ if (!this.canAcquire())
451
+ return;
452
+ for (const cb of this.capacityAvailableCallbacks) {
453
+ try {
454
+ cb();
455
+ }
456
+ catch (error) {
457
+ this.logEvent({
458
+ event: "worker_pool_capacity_callback_failed",
459
+ error: errorMessage(error),
460
+ });
461
+ }
462
+ }
463
+ }
464
+ /**
465
+ * Notifies every registered {@link onMachineRecycling} callback that `workerId` is
466
+ * about to be destroyed. Called once at the top of {@link recycle} (inside the
467
+ * per-worker mutex, before `driver.destroy`). Each callback's error is caught and
468
+ * logged so one bad listener can never block the teardown or starve the others.
469
+ */
470
+ notifyMachineRecycling(workerId) {
471
+ for (const cb of this.recyclingCallbacks) {
472
+ try {
473
+ cb(workerId);
474
+ }
475
+ catch (error) {
476
+ this.logEvent({
477
+ event: "worker_pool_recycling_callback_failed",
478
+ workerId,
479
+ error: errorMessage(error),
480
+ });
481
+ }
482
+ }
483
+ }
484
+ /**
485
+ * Flags the OLDEST excess workers for destruction when the live count exceeds the
486
+ * current `max`. Idle workers are preferred (so a shrink frees capacity without
487
+ * disturbing a run) and ordered oldest-idle-first; only when no idle worker remains
488
+ * does it fall back to flagging a LEASED worker, which is recycled on lease return
489
+ * (never destroyed synchronously). The actual teardown is the reaper's job.
490
+ */
491
+ markExcessForShrink() {
492
+ const max = this.settings.max;
493
+ const live = [...this.inventory.values()].filter((record) => isLive(record.state) && !record.markedForDestroy);
494
+ let excess = live.length - max;
495
+ if (excess <= 0)
496
+ return;
497
+ // Oldest-idle first: idle workers before leased, each group oldest-idle-first.
498
+ const ordered = [...live].sort((a, b) => {
499
+ const aIdle = a.state === "WARM_IDLE" && a.inFlight === 0 ? 0 : 1;
500
+ const bIdle = b.state === "WARM_IDLE" && b.inFlight === 0 ? 0 : 1;
501
+ if (aIdle !== bIdle)
502
+ return aIdle - bIdle;
503
+ return a.lastIdleAtMs - b.lastIdleAtMs;
504
+ });
505
+ for (const record of ordered) {
506
+ if (excess <= 0)
507
+ break;
508
+ record.markedForDestroy = true;
509
+ excess -= 1;
510
+ }
511
+ }
512
+ /**
513
+ * Provisions warm workers one at a time toward the higher of `min`/`warm`, within
514
+ * the `max` ceiling and the spend budget (the reservation inside `provisionWarm`
515
+ * enforces both). Fire-and-forget from `reconcile` so a reload never blocks; a
516
+ * failed provision is logged and swallowed inside `provisionWarm` and retried by
517
+ * the recurring reaper top-up.
518
+ */
519
+ async growTowardTarget() {
520
+ const target = Math.max(this.settings.min, this.settings.warm);
521
+ let attempts = Math.max(0, target - (this.liveWorkerCount() + this.reservedProvisions));
522
+ while (attempts > 0 && this.liveWorkerCount() + this.reservedProvisions < target) {
523
+ if (!this.hasGrowthHeadroom())
524
+ break;
525
+ await this.provisionWarm();
526
+ attempts -= 1;
527
+ }
528
+ this.wakeWaiters();
529
+ }
530
+ /**
531
+ * Re-adopts survivors on daemon startup so a restart does not leak the workers a
532
+ * prior process created. The reconcile is authoritative on `driver.list()`:
533
+ *
534
+ * 1. Seed the daily spend accumulator from the `spend.json` sidecar so a
535
+ * restart within the same UTC day carries the daily total (a day boundary
536
+ * resets it). The sidecar is the source of truth for spend, not inventory.
537
+ * 2. Re-adopt every worker `driver.list()` still shows that carries the
538
+ * pool-owned label into inventory as WARM_IDLE (a fresh process has no
539
+ * active runs, so a survivor is idle: `inFlight=0`, `leaseId=null`). An
540
+ * unlabeled instance is never adopted (it is not ours).
541
+ * 3. Force-return orphan ledger rows: a row whose worker the authoritative list
542
+ * no longer shows is a worker that vanished while the run owning it is gone,
543
+ * so the row is dropped from the ledger (no phantom inventory survives).
544
+ *
545
+ * Idempotent: a worker already in inventory (e.g. a second hydrate) is left alone.
546
+ */
547
+ async hydrate() {
548
+ const spend = await this.ledger.loadDailySpend();
549
+ this.dayKey = spend.dayKey;
550
+ this.dailyWorkerSecondsUsed = spend.workerSecondsToday;
551
+ // The ledger replay is advisory; driver.list() is authoritative. A transient
552
+ // list() failure must not wipe inventory, so the re-adopt below only runs once a
553
+ // BOUNDED retry of list() (short clock-driven backoff) finally succeeds.
554
+ const rows = await this.ledger.load();
555
+ const listed = await this.listForHydrate();
556
+ if (listed === null) {
557
+ // list() never recovered. For a driver that owns no paid survivors
558
+ // (non-ledger, non-ephemeral fake / static-ssh) the logged skip is tolerable:
559
+ // there is nothing to leak, so startup proceeds and the reaper reconciles a
560
+ // later tick. `hydrated` deliberately stays false so the reaper's
561
+ // destroy-unknown gate remains closed until a list() actually succeeds.
562
+ return;
563
+ }
564
+ const listedById = new Map();
565
+ for (const descriptor of listed)
566
+ listedById.set(descriptor.workerId, descriptor);
567
+ // Re-adopt every labeled-ours survivor the list still shows. A fresh process
568
+ // holds no active runs, so each survivor is re-adopted idle (no lease).
569
+ const now = this.leaseClock.now();
570
+ for (const descriptor of listed) {
571
+ if (this.inventory.has(descriptor.workerId))
572
+ continue;
573
+ if (!descriptor.labels.includes(POOL_OWNED_LABEL))
574
+ continue;
575
+ this.inventory.set(descriptor.workerId, {
576
+ workerId: descriptor.workerId,
577
+ workerHost: descriptor.workerHost,
578
+ driverRef: descriptor.driverRef,
579
+ state: "WARM_IDLE",
580
+ labels: [...descriptor.labels],
581
+ createdAtMs: descriptor.createdAtMs,
582
+ leaseId: null,
583
+ inFlight: 0,
584
+ lastIdleAtMs: now,
585
+ lastHeartbeatMs: now,
586
+ workerSecondsUsed: 0,
587
+ markedForDestroy: false,
588
+ affinityKey: null,
589
+ metadata: { ...descriptor.metadata },
590
+ leaseIssues: new Map(),
591
+ });
592
+ }
593
+ // Reconcile every ledger row against the authoritative list:
594
+ // - row whose worker list() still shows: kept (its survivor was re-adopted above).
595
+ // - PROVISIONAL row with no matching instance YOUNGER than ttlMs: kept. The
596
+ // prior process crashed mid-provision (the worker may exist at the driver but
597
+ // not yet be list-visible under eventual consistency), so the recoverable
598
+ // write-ahead row is retained for a later tick / re-hydrate to correlate.
599
+ // - any other row with no matching instance (active row whose worker vanished, or
600
+ // a provisional row older than ttlMs that never materialized): dropped so no
601
+ // phantom inventory / dead write-ahead row survives the restart.
602
+ const ttlMs = this.settings.ttlMs;
603
+ for (const row of rows) {
604
+ if (listedById.has(row.workerId))
605
+ continue;
606
+ if (row.status === "provisional" && now - row.createdAtMs < ttlMs) {
607
+ // A still-recent provisional row: the worker may be in flight / not yet listed.
608
+ continue;
609
+ }
610
+ this.logEvent({ event: "worker_pool_hydrate_orphan_dropped", workerId: row.workerId });
611
+ await this.ledger.delete(row.workerId);
612
+ }
613
+ // Advance the id sequence past any adopted `worker-<n>` survivor so the next
614
+ // grow / warm-provision cannot RE-MINT an id a survivor already owns. Without
615
+ // this, `workerSeq` (which inits at 0) would mint `worker-0` again after adopting a
616
+ // higher-numbered survivor and, once it cycled back through that suffix, stamp
617
+ // a SECOND lease onto a live survivor. Non-numeric ids (e.g. a custom label)
618
+ // carry no numeric suffix and are ignored when computing the high-water mark.
619
+ this.advanceWorkerSeqPastAdopted();
620
+ // The first successful hydrate has now re-adopted every labeled survivor, so
621
+ // the reaper's destroy-unknown reconcile may resume: any labeled-but-unknown
622
+ // survivor a later tick sees is now a genuine leaked orphan, not one this
623
+ // hydrate had simply not adopted yet.
624
+ this.hydrated = true;
625
+ }
626
+ /**
627
+ * Bounded-retry wrapper around `driver.list()` for {@link hydrate}. The
628
+ * authoritative startup reconcile MUST NOT treat a transient `list()` outage as a
629
+ * successful (empty) startup, because a paid (usesLedger / ephemeral) driver may
630
+ * have real survivors a prior process provisioned: swallowing the failure would
631
+ * leave those workers neither adopted (so they never serve a lease) nor reaped (the
632
+ * destroy-unknown gate stays closed because {@link hydrated} never flips) nor
633
+ * visible to drain - unmanaged paid workers leaking past restart.
634
+ *
635
+ * - Retries `list()` up to {@link HYDRATE_LIST_ATTEMPTS} times with a short
636
+ * clock-driven backoff between attempts, returning the descriptors on the first
637
+ * success (the common case: a brief driver blip recovers within a retry).
638
+ * - If every attempt fails AND the driver owns real survivors
639
+ * (`capabilities.usesLedger` or `capabilities.ephemeral`), THROWS
640
+ * `worker_pool_hydrate_failed` so the daemon's `await workerPool.hydrate()` fails
641
+ * startup LOUDLY instead of running blind over unmanaged paid machines.
642
+ * - If every attempt fails for a NON-paid driver (fake / static-ssh: no paid
643
+ * survivors to leak), returns `null` so the caller logs the skip and proceeds
644
+ * with startup, leaving `hydrated` false (reaper destroy-unknown gate closed)
645
+ * until a later `list()` succeeds.
646
+ */
647
+ async listForHydrate() {
648
+ let lastError;
649
+ for (let attempt = 1; attempt <= HYDRATE_LIST_ATTEMPTS; attempt += 1) {
650
+ try {
651
+ return await this.driver.list();
652
+ }
653
+ catch (error) {
654
+ lastError = error;
655
+ this.logEvent({
656
+ event: "worker_pool_hydrate_list_failed",
657
+ attempt,
658
+ maxAttempts: HYDRATE_LIST_ATTEMPTS,
659
+ error: errorMessage(error),
660
+ });
661
+ if (attempt < HYDRATE_LIST_ATTEMPTS) {
662
+ await this.sleep(HYDRATE_LIST_BACKOFF_MS * attempt);
663
+ }
664
+ }
665
+ }
666
+ const caps = this.driver.capabilities;
667
+ if (caps.usesLedger || caps.ephemeral) {
668
+ // A paid driver with potential real survivors: fail startup loud rather than
669
+ // run with unmanaged paid workers that are invisible to adopt / reap / drain.
670
+ this.logEvent({
671
+ event: "worker_pool_hydrate_failed",
672
+ attempts: HYDRATE_LIST_ATTEMPTS,
673
+ error: errorMessage(lastError),
674
+ });
675
+ throw new Error(`worker_pool_hydrate_failed: driver.list() failed after ${HYDRATE_LIST_ATTEMPTS} attempts: ${errorMessage(lastError)}`);
676
+ }
677
+ // A non-paid driver owns no survivors to leak: tolerate the skip.
678
+ return null;
679
+ }
680
+ /** Resolves after `delayMs` via the injected clock (used for hydrate backoff). */
681
+ async sleep(delayMs) {
682
+ await new Promise((resolve) => {
683
+ const handle = this.clock.setTimeout(resolve, delayMs);
684
+ handle.unref?.();
685
+ });
686
+ }
687
+ /**
688
+ * Probes a freshly-provisioned worker until it reports SSH-ready or the bounded
689
+ * attempt budget is spent, enforcing the warm-up contract that a worker is
690
+ * "reachable before it is leased". `provision` returning does NOT guarantee sshd is
691
+ * up on a cold cloud worker (a container driver may only have resolved the published
692
+ * port; a cloud driver may boot asynchronously), so leasing it immediately would hand
693
+ * an unready host to the runner - failing the first run, poisoning the lease, and
694
+ * destroying an otherwise-healthy worker. An already-up host (static-ssh) and the fake
695
+ * probe ok on the first attempt, so this is a single round-trip on the cold path.
696
+ * Probe faults are treated as not-ready (never thrown). Returns false when the worker
697
+ * never becomes ready; the caller destroys it.
698
+ */
699
+ async probeUntilReady(descriptor, driver) {
700
+ let lastReason = "not_ready";
701
+ for (let attempt = 1; attempt <= PROBE_READY_ATTEMPTS; attempt += 1) {
702
+ try {
703
+ const health = await driver.probe(descriptor, {
704
+ timeoutMs: this.settings.acquireTimeoutMs,
705
+ });
706
+ if (health.ok)
707
+ return true;
708
+ lastReason = health.reason;
709
+ }
710
+ catch (error) {
711
+ lastReason = errorMessage(error);
712
+ }
713
+ if (attempt < PROBE_READY_ATTEMPTS)
714
+ await this.sleep(PROBE_READY_BACKOFF_MS * attempt);
715
+ }
716
+ this.logEvent({
717
+ event: "worker_pool_worker_unready",
718
+ workerId: descriptor.workerId,
719
+ reason: lastReason,
720
+ });
721
+ return false;
722
+ }
723
+ /**
724
+ * Bumps `workerSeq` to one past the highest numeric suffix among the `worker-<n>` ids
725
+ * currently in inventory. Ids that do not match `worker-<n>` (non-numeric suffix)
726
+ * are skipped. Never lowers the sequence.
727
+ */
728
+ advanceWorkerSeqPastAdopted() {
729
+ let maxSuffix = -1;
730
+ for (const workerId of this.inventory.keys()) {
731
+ const match = /^worker-(\d+)$/.exec(workerId);
732
+ if (!match)
733
+ continue;
734
+ const suffix = Number.parseInt(match[1], 10);
735
+ if (Number.isFinite(suffix) && suffix > maxSuffix)
736
+ maxSuffix = suffix;
737
+ }
738
+ if (maxSuffix + 1 > this.workerSeq)
739
+ this.workerSeq = maxSuffix + 1;
740
+ }
741
+ async drain(opts) {
742
+ if (this.drainPromise)
743
+ return this.drainPromise;
744
+ this.draining = true;
745
+ this.drainEpoch += 1;
746
+ this.drainPromise = this.runDrain(opts, this.drainEpoch);
747
+ return this.drainPromise;
748
+ }
749
+ snapshot() {
750
+ let warmIdle = 0;
751
+ let leased = 0;
752
+ let provisioning = 0;
753
+ let degraded = 0;
754
+ let inFlight = 0;
755
+ let concurrentWorkers = 0;
756
+ const workers = [];
757
+ for (const record of this.inventory.values()) {
758
+ inFlight += record.inFlight;
759
+ if (isLive(record.state))
760
+ concurrentWorkers += 1;
761
+ switch (record.state) {
762
+ case "WARM_IDLE":
763
+ warmIdle += 1;
764
+ break;
765
+ case "LEASED":
766
+ leased += 1;
767
+ break;
768
+ case "PROVISIONING":
769
+ case "WARMING":
770
+ provisioning += 1;
771
+ break;
772
+ case "DEGRADED":
773
+ degraded += 1;
774
+ break;
775
+ default:
776
+ break;
777
+ }
778
+ workers.push({
779
+ workerId: record.workerId,
780
+ workerHost: record.workerHost,
781
+ state: record.state,
782
+ inFlight: record.inFlight,
783
+ markedForDestroy: record.markedForDestroy,
784
+ });
785
+ }
786
+ return {
787
+ enabled: this.settings.enabled,
788
+ driver: this.settings.driver,
789
+ total: this.inventory.size,
790
+ warmIdle,
791
+ leased,
792
+ provisioning,
793
+ degraded,
794
+ inFlight,
795
+ spend: {
796
+ concurrentWorkers,
797
+ workerSecondsUsed: this.workerSecondsUsed,
798
+ dailyWorkerSecondsUsed: this.dailyWorkerSecondsUsed,
799
+ dayKey: this.dayKey,
800
+ },
801
+ workers,
802
+ };
803
+ }
804
+ // --- selection / stamping ----------------------------------------------
805
+ /**
806
+ * Synchronously picks a leasable worker and stamps a lease on it WITHOUT any
807
+ * await in between. Honors sticky affinity (prefer the same worker a retry ran
808
+ * on) and the per-issue fairness cap. Returns null when no in-inventory worker is
809
+ * leasable for this request.
810
+ */
811
+ selectAndStamp(req) {
812
+ const record = this.pickRecord(req);
813
+ if (!record)
814
+ return null;
815
+ return this.stamp(record, req);
816
+ }
817
+ /** Chooses the best leasable record for a request (affinity first). */
818
+ pickRecord(req) {
819
+ const slotsPerMachine = this.settings.slotsPerMachine;
820
+ // Affinity: if a prior workerHost is named and that worker is still leasable,
821
+ // re-land on it so resume continuity holds across a retry.
822
+ if (req.affinityKey) {
823
+ for (const record of this.inventory.values()) {
824
+ if (record.workerHost === req.affinityKey && isLeasable(record, slotsPerMachine)) {
825
+ if (this.issueWouldExceedCap(req, record))
826
+ return null;
827
+ return record;
828
+ }
829
+ }
830
+ }
831
+ // Otherwise prefer an idle worker, then any under-capacity worker (slotsPerMachine>1).
832
+ let underCapacity = null;
833
+ for (const record of this.inventory.values()) {
834
+ if (!isLeasable(record, slotsPerMachine))
835
+ continue;
836
+ if (this.issueWouldExceedCap(req, record))
837
+ continue;
838
+ if (record.state === "WARM_IDLE" && record.inFlight === 0) {
839
+ return record;
840
+ }
841
+ if (underCapacity === null)
842
+ underCapacity = record;
843
+ }
844
+ return underCapacity;
845
+ }
846
+ /** Stamps a lease on a record (sets leaseId, ++inFlight, LEASED). Synchronous. */
847
+ stamp(record, req) {
848
+ const leaseId = record.leaseId ?? randomUUID();
849
+ record.leaseId = leaseId;
850
+ record.inFlight += 1;
851
+ record.state = "LEASED";
852
+ record.affinityKey = record.workerHost;
853
+ if (!record.leaseIssues)
854
+ record.leaseIssues = new Map();
855
+ record.leaseIssues.set(req.issueId, (record.leaseIssues.get(req.issueId) ?? 0) + 1);
856
+ const acquiredAtMs = this.leaseClock.now();
857
+ // Track this lease's acquire time so a drain that force-destroys the worker while
858
+ // it is still LEASED can accrue the in-flight window (the normal settle path
859
+ // removes this entry in `onLeaseSettle`).
860
+ if (!record.liveLeaseAcquiredMs)
861
+ record.liveLeaseAcquiredMs = [];
862
+ record.liveLeaseAcquiredMs.push(acquiredAtMs);
863
+ record.lastHeartbeatMs = acquiredAtMs;
864
+ const expiresAtMs = record.createdAtMs + this.settings.ttlMs;
865
+ return createLease({
866
+ leaseId,
867
+ record,
868
+ mutex: this.mutexFor(record.workerId),
869
+ clock: this.leaseClock,
870
+ acquiredAtMs,
871
+ expiresAtMs,
872
+ // Bill this lease from ITS OWN acquire time so a long heartbeating run is
873
+ // charged the full window (heartbeats only stamp staleness, never reset the
874
+ // bill) and two overlapping leases on one worker each accrue their own window.
875
+ onSettle: async (rec, outcome, reason) => this.onLeaseSettle(rec, req.issueId, acquiredAtMs, outcome, reason),
876
+ });
877
+ }
878
+ /** True when leasing one more worker for this issue would exceed maxWorkersPerIssue. */
879
+ issueWouldExceedCap(req, candidate) {
880
+ const cap = this.settings.maxWorkersPerIssue;
881
+ if (cap === undefined)
882
+ return false;
883
+ // Reusing a worker the issue already holds does not consume a new slot.
884
+ if ((candidate.leaseIssues?.get(req.issueId) ?? 0) > 0)
885
+ return false;
886
+ let held = this.reservedProvisionsByIssue.get(req.issueId) ?? 0;
887
+ for (const record of this.inventory.values()) {
888
+ if (record.workerId === candidate.workerId)
889
+ continue;
890
+ if ((record.leaseIssues?.get(req.issueId) ?? 0) > 0)
891
+ held += 1;
892
+ }
893
+ return held >= cap;
894
+ }
895
+ // --- growth (reservation-based single flight) --------------------------
896
+ /** Whether a request may attempt to grow a new worker right now. */
897
+ canGrow(req) {
898
+ if (!this.hasGrowthHeadroom())
899
+ return false;
900
+ return !this.issueAtGrowthCap(req);
901
+ }
902
+ /** Capacity headroom under `max` and the concurrent-worker spend cap. */
903
+ hasGrowthHeadroom() {
904
+ const live = this.liveWorkerCount() + this.reservedProvisions;
905
+ if (live >= this.settings.max)
906
+ return false;
907
+ const concurrentCap = this.settings.spend?.maxConcurrentWorkers;
908
+ if (concurrentCap !== undefined && live >= concurrentCap)
909
+ return false;
910
+ return true;
911
+ }
912
+ /**
913
+ * Whether growth is barred specifically by the concurrent-worker spend cap (live
914
+ * workers at the cap while still under `max`). Lets `acquire` distinguish a
915
+ * budget refusal (`spend_cap`, returned now) from a transient `max` saturation
916
+ * (which waits on the FIFO queue).
917
+ */
918
+ blockedBySpendCap() {
919
+ const concurrentCap = this.settings.spend?.maxConcurrentWorkers;
920
+ if (concurrentCap === undefined)
921
+ return false;
922
+ const live = this.liveWorkerCount() + this.reservedProvisions;
923
+ return live >= concurrentCap && live < this.settings.max;
924
+ }
925
+ /** Whether the issue already holds its maxWorkersPerIssue, so it cannot grow. */
926
+ issueAtGrowthCap(req) {
927
+ const cap = this.settings.maxWorkersPerIssue;
928
+ if (cap === undefined)
929
+ return false;
930
+ return this.issueLeaseCount(req.issueId) >= cap;
931
+ }
932
+ /**
933
+ * Count of workers attributed to an issue for cap purposes: workers whose inventory
934
+ * row already carries the issue PLUS any in-flight grows reserved for it (a grow
935
+ * decided but whose provision has not yet landed in inventory). Counting the
936
+ * reservation is what makes two concurrent same-issue grows respect the cap.
937
+ */
938
+ issueLeaseCount(issueId) {
939
+ let held = this.reservedProvisionsByIssue.get(issueId) ?? 0;
940
+ for (const record of this.inventory.values()) {
941
+ if ((record.leaseIssues?.get(issueId) ?? 0) > 0)
942
+ held += 1;
943
+ }
944
+ return held;
945
+ }
946
+ /**
947
+ * Grows one worker under the synchronous reservation. The reservation is taken
948
+ * BEFORE the provision await so a concurrent growth decision sees it and cannot
949
+ * exceed `max`; it is released on settle/reject. A successful provision is
950
+ * stamped and leased immediately.
951
+ */
952
+ async grow(req) {
953
+ // Reserve synchronously, then re-validate (a racing reservation may have
954
+ // just consumed the last slot in this same tick).
955
+ this.reservedProvisions += 1;
956
+ if (this.liveWorkerCount() + this.reservedProvisions > this.settings.max) {
957
+ this.reservedProvisions -= 1;
958
+ return { status: "no_capacity", reason: "spend_cap" };
959
+ }
960
+ // Also reserve the per-issue slot synchronously so a concurrent grow for the
961
+ // SAME issue sees this in-flight grow and cannot itself slip past the cap
962
+ // before this provision has landed in inventory. The reservation is included
963
+ // in the issue cap counts and released in the finally below.
964
+ this.reserveIssueProvision(req.issueId);
965
+ const workerId = `worker-${this.workerSeq++}`;
966
+ const labels = [POOL_OWNED_LABEL, ...req.labels];
967
+ // Capture the driver that will actually run this provision (and its
968
+ // generation) BEFORE the await, so a swapDriver racing the provision cannot
969
+ // misattribute the resulting worker: the record's origin is stamped to THIS
970
+ // driver so recycle destroys it on the backend that created it.
971
+ const originDriver = this.driver;
972
+ const originGeneration = this.driverGeneration;
973
+ try {
974
+ // Write-ahead: flush a provisional ledger row BEFORE the provision await so a
975
+ // crash mid-provision leaves a recoverable record (reconciled by hydrate
976
+ // against driver.list()). Inert for non-cloud drivers.
977
+ await this.writeProvisionalRow(workerId, labels);
978
+ const descriptor = await originDriver.provision({
979
+ workerId,
980
+ affinityKey: req.affinityKey ?? null,
981
+ // Stamp the pool-owned label alongside the request labels so a leaked
982
+ // worker (crash between provision and inventory write) is recognized as ours
983
+ // by the reaper's `list()` reconcile and can be destroyed.
984
+ labels,
985
+ timeoutMs: req.timeoutMs,
986
+ ...(req.signal ? { signal: req.signal } : {}),
987
+ ...(this.settings.driverOptions ? { driverOptions: this.settings.driverOptions } : {}),
988
+ });
989
+ // Correlate: upsert the provisional row with the real driverRef/workerHost
990
+ // now the driver has returned, completing the write-ahead correlate.
991
+ await this.correlateRow(descriptor);
992
+ // A swapDriver may have run WHILE this provision was in flight, so the worker
993
+ // was created on the now-stale `originDriver`, not the live `this.driver`.
994
+ const swappedDuringProvision = this.driverGeneration !== originGeneration;
995
+ // Readiness gate: never lease a worker that is not yet SSH-reachable (the
996
+ // "reachable before leased" contract). Probe it on the driver that created it
997
+ // BEFORE it enters inventory, so a concurrent acquire cannot grab a not-yet-ready
998
+ // worker and an unready cold worker is destroyed + reported as no-capacity rather than
999
+ // handed to the runner (which would fail, poison the lease, and churn a healthy
1000
+ // worker). Inert for an already-up host / the fake (probes ok on the first try).
1001
+ if (!(await this.probeUntilReady(descriptor, originDriver))) {
1002
+ await this.destroyDescriptor(descriptor, "unhealthy", originDriver);
1003
+ return { status: "no_capacity", reason: "driver_error" };
1004
+ }
1005
+ // The pool may have started draining (or been disabled) WHILE this provision OR
1006
+ // the readiness probe was in flight. runDrain snapshotted inventory before the
1007
+ // worker existed, so adding it now would leak a paid worker past a completed drain.
1008
+ // Destroy it instead of stamping it in - on the ORIGIN driver that created it.
1009
+ if (this.draining || !this.settings.enabled) {
1010
+ await this.destroyDescriptor(descriptor, "drain", originDriver);
1011
+ return { status: "no_capacity", reason: "pool_disabled" };
1012
+ }
1013
+ const record = {
1014
+ workerId: descriptor.workerId,
1015
+ workerHost: descriptor.workerHost,
1016
+ driverRef: descriptor.driverRef,
1017
+ state: "WARM_IDLE",
1018
+ labels: [...descriptor.labels],
1019
+ createdAtMs: descriptor.createdAtMs,
1020
+ leaseId: null,
1021
+ inFlight: 0,
1022
+ lastIdleAtMs: this.leaseClock.now(),
1023
+ lastHeartbeatMs: this.leaseClock.now(),
1024
+ workerSecondsUsed: 0,
1025
+ // A swap during the provision means this worker was created on a now-stale
1026
+ // driver; flag it for destroy so the reaper / settle recycles it (it
1027
+ // cannot serve the live driver and the new driver's list() will not own
1028
+ // it). A no-swap grow leaves this false (byte-identical default).
1029
+ markedForDestroy: swappedDuringProvision,
1030
+ affinityKey: null,
1031
+ metadata: { ...descriptor.metadata },
1032
+ leaseIssues: new Map(),
1033
+ // Record the backend that actually provisioned this worker so recycle destroys
1034
+ // it there. Only set when a swap happened during the await; an un-swapped
1035
+ // grow leaves it undefined so recycle falls back to `this.driver`
1036
+ // (byte-identical to the prior default path).
1037
+ ...(swappedDuringProvision ? { originDriver } : {}),
1038
+ };
1039
+ this.inventory.set(record.workerId, record);
1040
+ const lease = this.stamp(record, req);
1041
+ return { status: "leased", lease };
1042
+ }
1043
+ catch (error) {
1044
+ this.logEvent({
1045
+ event: "worker_pool_provision_failed",
1046
+ workerId,
1047
+ error: errorMessage(error),
1048
+ });
1049
+ // The provision rejected: drop the write-ahead provisional row so a failed
1050
+ // grow leaves no dangling row a later hydrate would have to reap.
1051
+ await this.ledger.delete(workerId);
1052
+ return { status: "no_capacity", reason: "driver_error" };
1053
+ }
1054
+ finally {
1055
+ // Release the reservations on settle OR reject so a failed provision never
1056
+ // permanently blocks future growth.
1057
+ this.reservedProvisions -= 1;
1058
+ this.releaseIssueProvision(req.issueId);
1059
+ }
1060
+ }
1061
+ // --- waiter queue -------------------------------------------------------
1062
+ /**
1063
+ * Parks a blocked acquire on the FIFO queue. Resolves to a lease when a worker
1064
+ * frees, or to `no_capacity:acquire_timeout` when the timeout fires or the
1065
+ * request is aborted. The abort path resolves promptly so the poll thread is
1066
+ * never held to the full timeout.
1067
+ */
1068
+ async waitForCapacity(req) {
1069
+ return new Promise((resolve) => {
1070
+ const waiter = {
1071
+ req,
1072
+ settled: false,
1073
+ resolve,
1074
+ timer: this.clock.setTimeout(() => {
1075
+ this.settleWaiter(waiter, { status: "no_capacity", reason: "acquire_timeout" });
1076
+ }, req.timeoutMs),
1077
+ cleanupAbort: null,
1078
+ };
1079
+ waiter.timer.unref?.();
1080
+ if (req.signal) {
1081
+ if (req.signal.aborted) {
1082
+ this.settleWaiter(waiter, { status: "no_capacity", reason: "acquire_timeout" });
1083
+ return;
1084
+ }
1085
+ const onAbort = () => {
1086
+ this.settleWaiter(waiter, { status: "no_capacity", reason: "acquire_timeout" });
1087
+ };
1088
+ req.signal.addEventListener("abort", onAbort, { once: true });
1089
+ waiter.cleanupAbort = () => req.signal?.removeEventListener("abort", onAbort);
1090
+ }
1091
+ this.waiters.push(waiter);
1092
+ });
1093
+ }
1094
+ /** Resolves a waiter exactly once and tears down its timer/abort listener. */
1095
+ settleWaiter(waiter, result) {
1096
+ if (waiter.settled)
1097
+ return;
1098
+ waiter.settled = true;
1099
+ this.clock.clearTimeout(waiter.timer);
1100
+ waiter.cleanupAbort?.();
1101
+ const index = this.waiters.indexOf(waiter);
1102
+ if (index !== -1)
1103
+ this.waiters.splice(index, 1);
1104
+ waiter.resolve(result);
1105
+ }
1106
+ /**
1107
+ * Wakes the oldest waiter that can now be satisfied by a freed/grown worker. Runs
1108
+ * after any event that frees capacity (release/reconcile). Each woken waiter
1109
+ * synchronously re-runs select-and-stamp so it cannot be starved by a fresh
1110
+ * acquire racing in.
1111
+ */
1112
+ wakeWaiters() {
1113
+ // Iterate a snapshot; settleWaiter mutates the live array.
1114
+ for (const waiter of [...this.waiters]) {
1115
+ if (waiter.settled)
1116
+ continue;
1117
+ if (this.draining || !this.settings.enabled) {
1118
+ this.settleWaiter(waiter, { status: "no_capacity", reason: "pool_disabled" });
1119
+ continue;
1120
+ }
1121
+ const lease = this.selectAndStamp(waiter.req);
1122
+ if (lease) {
1123
+ this.settleWaiter(waiter, { status: "leased", lease });
1124
+ }
1125
+ }
1126
+ // The FIFO waiters had first claim on the freed capacity; whatever remains
1127
+ // leasable is announced so the runtime can nudge its poll (a waiter that
1128
+ // consumed the only worker leaves canAcquire() false and suppresses this).
1129
+ this.notifyCapacityAvailable();
1130
+ }
1131
+ // --- lease settlement / spend accounting -------------------------------
1132
+ /**
1133
+ * Pool-side accounting run INSIDE the per-worker mutex when a lease settles.
1134
+ * Decrements `inFlight`, accrues worker-seconds, and either returns the worker to
1135
+ * WARM_IDLE (healthy) or recycles it (poison / markedForDestroy). The single
1136
+ * destroy here is serialized by the per-worker mutex so a reaper tick observing
1137
+ * the same `inFlight->0` cannot double-destroy.
1138
+ */
1139
+ async onLeaseSettle(record, issueId, leaseAcquiredMs, outcome, _reason) {
1140
+ // Roll the day key BEFORE accruing into `dailyWorkerSecondsUsed`. A lease
1141
+ // acquired late on day N but released into day N+1 must bill day N+1 (the day
1142
+ // it settled), not day N's already-counted window. Without this roll the
1143
+ // accumulator stays keyed on the stale day, the daily cap is bypassed across
1144
+ // the midnight boundary, and the persisted `spend.json` (whose `recordDaily`
1145
+ // and `flushDaily` writes key on the live UTC day) diverges from memory.
1146
+ this.rollDayKeyIfNeeded();
1147
+ record.inFlight -= 1;
1148
+ // Decrement THIS issue's lease refcount on the worker; only forget the issue once
1149
+ // its LAST lease here settles. A plain delete-per-settle would drop the issue
1150
+ // while a co-resident sibling slot (`slotsPerMachine>1`) still occupies the worker,
1151
+ // letting `maxWorkersPerIssue` be bypassed.
1152
+ if (record.leaseIssues) {
1153
+ const remaining = (record.leaseIssues.get(issueId) ?? 0) - 1;
1154
+ if (remaining > 0)
1155
+ record.leaseIssues.set(issueId, remaining);
1156
+ else
1157
+ record.leaseIssues.delete(issueId);
1158
+ }
1159
+ // Drop THIS lease's tracked acquire time so a later drain does not re-bill its
1160
+ // (now settled) window. Remove a single matching entry (overlapping leases may
1161
+ // share an acquire timestamp).
1162
+ if (record.liveLeaseAcquiredMs) {
1163
+ const index = record.liveLeaseAcquiredMs.indexOf(leaseAcquiredMs);
1164
+ if (index !== -1)
1165
+ record.liveLeaseAcquiredMs.splice(index, 1);
1166
+ }
1167
+ // Accrue wall-clock worker-seconds for THIS lease window: acquire time to now.
1168
+ // Billing from the lease's own acquire timestamp (not `lastHeartbeatMs`) means
1169
+ // a long heartbeating run is charged its full window and two overlapping leases
1170
+ // on one worker (maxInFlight>1) each accrue their own window. `lastHeartbeatMs`
1171
+ // stays purely a staleness stamp for the reaper's orphan detection.
1172
+ const now = this.leaseClock.now();
1173
+ const elapsedSeconds = Math.max(0, (now - leaseAcquiredMs) / 1000);
1174
+ record.workerSecondsUsed += elapsedSeconds;
1175
+ this.workerSecondsUsed += elapsedSeconds;
1176
+ this.dailyWorkerSecondsUsed += elapsedSeconds;
1177
+ void this.ledger
1178
+ .recordDailyWorkerSeconds(elapsedSeconds)
1179
+ .catch((error) => this.logEvent({ event: "worker_pool_ledger_write_failed", error: errorMessage(error) }));
1180
+ // Remember a poison outcome even when a co-resident sibling lease is still in
1181
+ // flight (`slotsPerMachine>1`). Flag the worker for destroy NOW so it cannot serve a
1182
+ // fresh lease (isLeasable rejects markedForDestroy) and so the LAST sibling to
1183
+ // settle recycles it instead of returning a known-bad worker to WARM_IDLE. With the
1184
+ // default `slotsPerMachine=1` inFlight is already 0 here, so this is inert.
1185
+ if (outcome === "poison")
1186
+ record.markedForDestroy = true;
1187
+ if (record.inFlight > 0) {
1188
+ // Other leases still hold this worker (slotsPerMachine>1); leave it LEASED until
1189
+ // the last one settles, which then recycles it if poisoned or reaper-flagged.
1190
+ return;
1191
+ }
1192
+ record.leaseId = null;
1193
+ if (outcome === "poison" || record.markedForDestroy) {
1194
+ // A poisoned or reaper-flagged worker is recycled the instant its last lease
1195
+ // returns; the per-worker mutex (this callback runs inside it) serializes the
1196
+ // single destroy so a reaper tick cannot double-destroy / underflow.
1197
+ await this.recycle(record, "failed");
1198
+ }
1199
+ else if (this.draining) {
1200
+ // During drain the worker is left in inventory for runDrain to force-destroy;
1201
+ // here we only mark it idle so the drain barrier can observe inFlight->0.
1202
+ record.state = "WARM_IDLE";
1203
+ record.lastIdleAtMs = now;
1204
+ }
1205
+ else {
1206
+ record.state = "WARM_IDLE";
1207
+ record.lastIdleAtMs = now;
1208
+ record.affinityKey = record.workerHost;
1209
+ }
1210
+ // Wake the drain barrier once nothing is in flight anymore.
1211
+ if (this.draining && this.totalInFlight() === 0)
1212
+ this.notifyDrained?.();
1213
+ this.wakeWaiters();
1214
+ }
1215
+ /**
1216
+ * Accrues the in-flight worker-seconds window of every outstanding lease on a worker
1217
+ * that is about to be force-destroyed mid-lease (the drain path). Each tracked
1218
+ * acquire timestamp is billed from acquire-to-now into the process / daily / worker
1219
+ * accumulators and the persisted sidecar, then cleared so the late no-op release
1220
+ * never double-bills. The day key is rolled first so a window that straddled UTC
1221
+ * midnight bills the day it settled. Must be called inside the per-worker mutex.
1222
+ */
1223
+ accrueInFlightWindows(record) {
1224
+ const live = record.liveLeaseAcquiredMs;
1225
+ if (!live || live.length === 0)
1226
+ return;
1227
+ this.rollDayKeyIfNeeded();
1228
+ const now = this.leaseClock.now();
1229
+ for (const acquiredMs of live) {
1230
+ const elapsedSeconds = Math.max(0, (now - acquiredMs) / 1000);
1231
+ record.workerSecondsUsed += elapsedSeconds;
1232
+ this.workerSecondsUsed += elapsedSeconds;
1233
+ this.dailyWorkerSecondsUsed += elapsedSeconds;
1234
+ void this.ledger
1235
+ .recordDailyWorkerSeconds(elapsedSeconds)
1236
+ .catch((error) => this.logEvent({ event: "worker_pool_ledger_write_failed", error: errorMessage(error) }));
1237
+ }
1238
+ record.liveLeaseAcquiredMs = [];
1239
+ record.inFlight = 0;
1240
+ }
1241
+ /**
1242
+ * Destroys a worker and removes it from inventory. Idempotent: a worker already
1243
+ * DESTROYED/removed is left alone. Must be called inside the per-worker mutex (or
1244
+ * during a single-threaded drain) so it runs exactly once per worker.
1245
+ */
1246
+ async recycle(record, reason) {
1247
+ if (record.state === "DESTROYED" || record.state === "DESTROYING")
1248
+ return;
1249
+ record.state = "DESTROYING";
1250
+ // Recycle-vs-endpoint ordering invariant: fire the recycling callbacks INSIDE
1251
+ // the per-worker mutex (we are inside it here) BEFORE `driver.destroy`, so the
1252
+ // coordinator can fail any still-open RunSlot bound to this worker cleanly (close
1253
+ // its endpoint, settle, deregister) before the host is torn out from under it.
1254
+ // The state is already flipped to DESTROYING above so this fires exactly once.
1255
+ this.notifyMachineRecycling(record.workerId);
1256
+ try {
1257
+ // Destroy against the worker's ORIGINAL driver when a swap captured one, so an
1258
+ // in-flight lease settling AFTER a driver hot-reload tears its worker down on
1259
+ // the backend that PROVISIONED it (never the new `this.driver`) and a paid
1260
+ // worker is never orphaned. Workers provisioned under the live driver carry no
1261
+ // `originDriver` and fall back to `this.driver` (byte-identical default).
1262
+ const driver = record.originDriver ?? this.driver;
1263
+ await driver.destroy({
1264
+ workerId: record.workerId,
1265
+ workerHost: record.workerHost,
1266
+ driverRef: record.driverRef,
1267
+ createdAtMs: record.createdAtMs,
1268
+ labels: record.labels,
1269
+ metadata: record.metadata,
1270
+ }, { timeoutMs: this.settings.acquireTimeoutMs, reason });
1271
+ }
1272
+ catch (error) {
1273
+ this.logEvent({
1274
+ event: "worker_pool_destroy_failed",
1275
+ workerId: record.workerId,
1276
+ error: errorMessage(error),
1277
+ });
1278
+ // The backend worker may still be running and billing. Dropping it from inventory
1279
+ // + ledger here would forget a PAID machine with no retry (a silent leak).
1280
+ // Instead keep it tracked but non-leasable (markedForDestroy) and put it back
1281
+ // in a reaper-retryable idle state: the serial reaper re-attempts the teardown
1282
+ // each tick (a flagged worker is reaped even below `min`), and across a restart
1283
+ // `hydrate` re-adopts it from the surviving ledger row and retries the destroy.
1284
+ record.markedForDestroy = true;
1285
+ record.leaseId = null;
1286
+ record.state = "WARM_IDLE";
1287
+ record.lastIdleAtMs = this.leaseClock.now();
1288
+ return;
1289
+ }
1290
+ record.state = "DESTROYED";
1291
+ void this.ledger
1292
+ .delete(record.workerId)
1293
+ .catch((error) => this.logEvent({ event: "worker_pool_ledger_write_failed", error: errorMessage(error) }));
1294
+ this.inventory.delete(record.workerId);
1295
+ this.workerMutexes.delete(record.workerId);
1296
+ }
1297
+ // --- reaper -------------------------------------------------------------
1298
+ /**
1299
+ * Arms (or re-arms) the single recurring reaper timer. The handle is detached
1300
+ * via `unref?.()` so it never keeps the process alive; the tick re-arms itself
1301
+ * at the end so the pass runs serially at the configured cadence. A stopped
1302
+ * pool (drained) arms nothing.
1303
+ */
1304
+ scheduleReaper() {
1305
+ if (this.reaperStopped)
1306
+ return;
1307
+ const handle = this.clock.setTimeout(() => {
1308
+ void this.driveReaper();
1309
+ }, this.settings.reapIntervalMs);
1310
+ handle.unref?.();
1311
+ this.reaperTimer = handle;
1312
+ }
1313
+ /**
1314
+ * Runs one serial reaper pass, then re-arms the timer. The in-progress guard
1315
+ * lives in `runReaperTick`, so even an unusually slow tick (a hung probe) can
1316
+ * never overlap with the next scheduled fire. The internals are re-synced to
1317
+ * the live settings each tick since `reconcile` swaps the whole settings object.
1318
+ */
1319
+ async driveReaper() {
1320
+ this.reaperTimer = null;
1321
+ if (this.reaperStopped || this.draining)
1322
+ return;
1323
+ this.reaperInternals.settings = this.settings;
1324
+ try {
1325
+ await runReaperTick(this.reaperInternals);
1326
+ }
1327
+ catch (error) {
1328
+ this.logEvent({ event: "worker_pool_reaper_failed", error: errorMessage(error) });
1329
+ }
1330
+ finally {
1331
+ this.scheduleReaper();
1332
+ }
1333
+ }
1334
+ /** Stops the recurring reaper timer (terminal; called on drain). */
1335
+ stopReaper() {
1336
+ this.reaperStopped = true;
1337
+ if (this.reaperTimer) {
1338
+ this.clock.clearTimeout(this.reaperTimer);
1339
+ this.reaperTimer = null;
1340
+ }
1341
+ }
1342
+ /**
1343
+ * Provisions one warm worker toward the min/warm target (driven by the reaper's
1344
+ * top-up). Goes through the same reservation as `grow` so a concurrent acquire
1345
+ * cannot push the live count past `max`, but the worker is left WARM_IDLE (no
1346
+ * lease stamped) so the next acquire can claim it. Failures are logged and
1347
+ * swallowed so a single bad provision never stalls the reaper.
1348
+ */
1349
+ async provisionWarm() {
1350
+ this.reservedProvisions += 1;
1351
+ if (this.liveWorkerCount() + this.reservedProvisions > this.settings.max) {
1352
+ this.reservedProvisions -= 1;
1353
+ return;
1354
+ }
1355
+ const workerId = `worker-${this.workerSeq++}`;
1356
+ const labels = [POOL_OWNED_LABEL];
1357
+ // Capture the driver that will run this warm provision (and its generation)
1358
+ // BEFORE the await so a swapDriver racing the provision cannot misattribute the
1359
+ // worker (same no-orphan invariant as `grow`).
1360
+ const originDriver = this.driver;
1361
+ const originGeneration = this.driverGeneration;
1362
+ try {
1363
+ // Write-ahead the provisional row BEFORE provision (recoverable mid-provision
1364
+ // crash), then correlate after the driver returns. Inert for non-cloud.
1365
+ await this.writeProvisionalRow(workerId, labels);
1366
+ const descriptor = await originDriver.provision({
1367
+ workerId,
1368
+ affinityKey: null,
1369
+ labels,
1370
+ timeoutMs: this.settings.acquireTimeoutMs,
1371
+ ...(this.settings.driverOptions ? { driverOptions: this.settings.driverOptions } : {}),
1372
+ });
1373
+ await this.correlateRow(descriptor);
1374
+ // A swapDriver may have run WHILE this warm provision was in flight, so the
1375
+ // worker was created on the now-stale `originDriver`, not `this.driver`.
1376
+ const swappedDuringProvision = this.driverGeneration !== originGeneration;
1377
+ // Readiness gate (same "reachable before leased" contract as `grow`): a warm worker
1378
+ // must be SSH-reachable BEFORE it becomes WARM_IDLE and leasable, so an acquire
1379
+ // never grabs a not-yet-ready top-up worker. A worker that never becomes ready is
1380
+ // destroyed and skipped (the reaper re-tops-up); inert for an already-up host.
1381
+ if (!(await this.probeUntilReady(descriptor, originDriver))) {
1382
+ await this.destroyDescriptor(descriptor, "unhealthy", originDriver);
1383
+ return;
1384
+ }
1385
+ // A drain (or disable) may have begun WHILE this warm provision OR the readiness
1386
+ // probe was in flight; runDrain snapshotted inventory before the worker existed, so
1387
+ // adding it now would leak a paid worker past a completed drain. Destroy it instead -
1388
+ // on the ORIGIN driver that created it.
1389
+ if (this.draining || !this.settings.enabled) {
1390
+ await this.destroyDescriptor(descriptor, "drain", originDriver);
1391
+ return;
1392
+ }
1393
+ const now = this.leaseClock.now();
1394
+ const record = {
1395
+ workerId: descriptor.workerId,
1396
+ workerHost: descriptor.workerHost,
1397
+ driverRef: descriptor.driverRef,
1398
+ state: "WARM_IDLE",
1399
+ labels: [...descriptor.labels],
1400
+ createdAtMs: descriptor.createdAtMs,
1401
+ leaseId: null,
1402
+ inFlight: 0,
1403
+ lastIdleAtMs: now,
1404
+ lastHeartbeatMs: now,
1405
+ workerSecondsUsed: 0,
1406
+ // A swap during the provision means this warm worker was created on a stale
1407
+ // driver; flag it for destroy (it cannot serve the live driver).
1408
+ markedForDestroy: swappedDuringProvision,
1409
+ affinityKey: null,
1410
+ metadata: { ...descriptor.metadata },
1411
+ leaseIssues: new Map(),
1412
+ // Record the backend that actually provisioned this worker (only on a swap; an
1413
+ // un-swapped warm provision leaves it undefined -> falls back to
1414
+ // `this.driver`, byte-identical to the prior path).
1415
+ ...(swappedDuringProvision ? { originDriver } : {}),
1416
+ };
1417
+ this.inventory.set(record.workerId, record);
1418
+ if (swappedDuringProvision) {
1419
+ // This idle warm worker was provisioned on a now-stale driver, so it cannot
1420
+ // serve the live driver AND the new driver's list() will not own it
1421
+ // (the reaper's list-reconcile would otherwise DROP the record without
1422
+ // tearing the worker down, orphaning a paid machine on the old backend).
1423
+ // Recycle it NOW on its captured origin (under its per-worker mutex, exactly as
1424
+ // swapDriver recycles old-driver idle workers) so the destroy is
1425
+ // deterministic and routed to the backend that created it.
1426
+ await this.mutexFor(record.workerId).runExclusive(async () => {
1427
+ if (record.inFlight !== 0)
1428
+ return; // a lease landed first; settle recycles it
1429
+ await this.recycle(record, "shrink");
1430
+ });
1431
+ }
1432
+ }
1433
+ catch (error) {
1434
+ this.logEvent({
1435
+ event: "worker_pool_warm_provision_failed",
1436
+ workerId,
1437
+ error: errorMessage(error),
1438
+ });
1439
+ // Drop the write-ahead provisional row for a failed warm provision so no
1440
+ // dangling row outlives the attempt.
1441
+ await this.ledger.delete(workerId);
1442
+ }
1443
+ finally {
1444
+ this.reservedProvisions -= 1;
1445
+ }
1446
+ }
1447
+ // --- drain --------------------------------------------------------------
1448
+ /**
1449
+ * Flips DRAINING, rejects new acquires, waits for in-flight leases up to the
1450
+ * deadline, then force-destroys ALL workers (held or not) so no paid cloud worker
1451
+ * leaks past process exit.
1452
+ */
1453
+ async runDrain(opts, epoch) {
1454
+ // Stop the recurring reaper so a draining pool issues no further ticks.
1455
+ this.stopReaper();
1456
+ // Reject every parked waiter immediately.
1457
+ for (const waiter of [...this.waiters]) {
1458
+ this.settleWaiter(waiter, { status: "no_capacity", reason: "pool_disabled" });
1459
+ }
1460
+ // Wait for in-flight leases to settle, bounded by the deadline. Event-driven:
1461
+ // `onLeaseSettle` resolves `notifyDrained` once `inFlight` hits zero, and a
1462
+ // deadline timer (real or fake-clock) resolves the race otherwise. Either way
1463
+ // we then force-destroy every remaining worker (held or not) so no worker leaks.
1464
+ if (this.totalInFlight() > 0 && !opts.signal?.aborted) {
1465
+ await new Promise((resolve) => {
1466
+ let done = false;
1467
+ const finish = () => {
1468
+ if (done)
1469
+ return;
1470
+ done = true;
1471
+ // Only the OWNING drain clears the shared barrier hook; a re-enable
1472
+ // may have already nulled/replaced it, so guard the assignment.
1473
+ if (this.drainEpoch === epoch)
1474
+ this.notifyDrained = null;
1475
+ this.clock.clearTimeout(timer);
1476
+ if (onAbort && opts.signal)
1477
+ opts.signal.removeEventListener("abort", onAbort);
1478
+ resolve();
1479
+ };
1480
+ const timer = this.clock.setTimeout(finish, opts.deadlineMs);
1481
+ timer.unref?.();
1482
+ this.notifyDrained = finish;
1483
+ const onAbort = opts.signal ? finish : null;
1484
+ if (onAbort && opts.signal)
1485
+ opts.signal.addEventListener("abort", onAbort, { once: true });
1486
+ });
1487
+ }
1488
+ // Bail if this drain has been superseded. A reconcile re-enable (false->true)
1489
+ // clears `draining`, bumps `drainEpoch`, and grows fresh workers; an orphaned
1490
+ // drain whose deadline fired AFTER that re-enable must NOT force-destroy the
1491
+ // now-LIVE pool's workers. Still flush the daily total below so a superseded
1492
+ // drain does not drop the spend it observed.
1493
+ if (this.drainEpoch !== epoch || !this.draining) {
1494
+ await this.flushDailySpendForDrain();
1495
+ return;
1496
+ }
1497
+ // Force-destroy every remaining worker, held lease or not (the leak fix). Each
1498
+ // destroy runs inside that worker's mutex (consistent with every other teardown)
1499
+ // so a late `onLeaseSettle` — which while draining would flip the worker back to
1500
+ // WARM_IDLE — cannot interleave with the in-progress recycle and resurrect a
1501
+ // worker mid-destroy. The settle either runs fully before the destroy (and is
1502
+ // then overwritten to DESTROYED) or fully after (and no-ops on the DESTROYED worker).
1503
+ for (const record of [...this.inventory.values()]) {
1504
+ // Re-check the epoch each iteration: a re-enable racing the loop must stop
1505
+ // it from destroying any further workers the now-live pool depends on.
1506
+ if (this.drainEpoch !== epoch || !this.draining)
1507
+ break;
1508
+ await this.mutexFor(record.workerId).runExclusive(async () => {
1509
+ // A worker still LEASED at the deadline never ran `onLeaseSettle` for its
1510
+ // outstanding leases, and the late `release()` will no-op on the DESTROYED
1511
+ // guard, so accrue each outstanding lease's in-flight window HERE (inside
1512
+ // the per-worker mutex) before the force-destroy. Without this the spend is
1513
+ // under-counted and the persisted sidecar drops the window across restart.
1514
+ this.accrueInFlightWindows(record);
1515
+ await this.recycle(record, "drain");
1516
+ });
1517
+ }
1518
+ await this.flushDailySpendForDrain();
1519
+ }
1520
+ /**
1521
+ * Durably flushes the authoritative in-memory daily total at the end of a
1522
+ * drain. The hot path records each delta fire-and-forget
1523
+ * (`void recordDailyWorkerSeconds`), so a crash could lose the last unpersisted
1524
+ * deltas; a clean drain SETS the absolute total here (serialized after any
1525
+ * pending additive write) so the persisted sidecar matches the in-memory total
1526
+ * a restart will seed from. The day key is rolled first so a flush that lands
1527
+ * after a UTC-midnight crossing writes the new day's accumulator.
1528
+ */
1529
+ async flushDailySpendForDrain() {
1530
+ this.rollDayKeyIfNeeded();
1531
+ await this.ledger.flushDailyWorkerSeconds(this.dailyWorkerSecondsUsed);
1532
+ }
1533
+ // --- helpers ------------------------------------------------------------
1534
+ mutexFor(workerId) {
1535
+ let mutex = this.workerMutexes.get(workerId);
1536
+ if (!mutex) {
1537
+ mutex = createMutex();
1538
+ this.workerMutexes.set(workerId, mutex);
1539
+ }
1540
+ return mutex;
1541
+ }
1542
+ /** Reserves one per-issue grow slot (counted in the issue caps until released). */
1543
+ reserveIssueProvision(issueId) {
1544
+ this.reservedProvisionsByIssue.set(issueId, (this.reservedProvisionsByIssue.get(issueId) ?? 0) + 1);
1545
+ }
1546
+ /** Releases a previously reserved per-issue grow slot. */
1547
+ releaseIssueProvision(issueId) {
1548
+ const next = (this.reservedProvisionsByIssue.get(issueId) ?? 0) - 1;
1549
+ if (next <= 0)
1550
+ this.reservedProvisionsByIssue.delete(issueId);
1551
+ else
1552
+ this.reservedProvisionsByIssue.set(issueId, next);
1553
+ }
1554
+ /**
1555
+ * Writes the write-ahead provisional ledger row for a worker BEFORE its provision is
1556
+ * awaited. The row carries the workerId + the pool-owned label but no driverRef /
1557
+ * workerHost yet (the driver has not returned), so a crash between provision
1558
+ * and the inventory write leaves a recoverable record on disk. Inert (zero fs
1559
+ * I/O) for non-cloud drivers (the ledger is a no-op when `usesLedger` is false).
1560
+ */
1561
+ async writeProvisionalRow(workerId, labels) {
1562
+ const now = this.leaseClock.now();
1563
+ const row = {
1564
+ workerId,
1565
+ driverRef: null,
1566
+ workerHost: null,
1567
+ labels: [...labels],
1568
+ status: "provisional",
1569
+ createdAtMs: now,
1570
+ updatedAtMs: now,
1571
+ };
1572
+ await this.ledger.upsert(row);
1573
+ }
1574
+ /**
1575
+ * Upserts the CORRELATED active ledger row for a worker AFTER its provision returns,
1576
+ * stamping the real driverRef / workerHost over the earlier provisional row
1577
+ * (same workerId, so it is replaced, not appended). Completes the write-ahead
1578
+ * correlate. Inert for non-cloud drivers.
1579
+ */
1580
+ async correlateRow(descriptor) {
1581
+ const now = this.leaseClock.now();
1582
+ const row = {
1583
+ workerId: descriptor.workerId,
1584
+ driverRef: descriptor.driverRef,
1585
+ workerHost: descriptor.workerHost,
1586
+ labels: [...descriptor.labels],
1587
+ status: "active",
1588
+ createdAtMs: descriptor.createdAtMs,
1589
+ updatedAtMs: now,
1590
+ };
1591
+ await this.ledger.upsert(row);
1592
+ }
1593
+ /**
1594
+ * Destroys a driver descriptor that was created but never entered inventory
1595
+ * (e.g. a worker provisioned while the pool started draining). Best-effort: a
1596
+ * failure is logged and swallowed so the caller can still bail. The optional
1597
+ * `driver` override destroys the worker on the backend that ACTUALLY provisioned it
1598
+ * (the captured origin) when a swap raced the provision; it defaults to the live
1599
+ * `this.driver` (byte-identical to the prior single-driver path).
1600
+ */
1601
+ async destroyDescriptor(descriptor, reason, driver = this.driver) {
1602
+ try {
1603
+ await driver.destroy({
1604
+ workerId: descriptor.workerId,
1605
+ workerHost: descriptor.workerHost,
1606
+ driverRef: descriptor.driverRef,
1607
+ createdAtMs: descriptor.createdAtMs,
1608
+ labels: descriptor.labels,
1609
+ metadata: descriptor.metadata,
1610
+ }, { timeoutMs: this.settings.acquireTimeoutMs, reason });
1611
+ }
1612
+ catch (error) {
1613
+ // Keep the write-ahead ledger row on failure: the backend worker may still be
1614
+ // running, and the surviving row lets `hydrate` re-adopt it after a restart and
1615
+ // retry teardown instead of silently leaking a paid worker. (Byte-identical to the
1616
+ // prior swallow except the row is no longer dropped when destroy did not run.)
1617
+ this.logEvent({
1618
+ event: "worker_pool_destroy_failed",
1619
+ workerId: descriptor.workerId,
1620
+ error: errorMessage(error),
1621
+ });
1622
+ return;
1623
+ }
1624
+ void this.ledger
1625
+ .delete(descriptor.workerId)
1626
+ .catch((error) => this.logEvent({ event: "worker_pool_ledger_write_failed", error: errorMessage(error) }));
1627
+ }
1628
+ liveWorkerCount() {
1629
+ let count = 0;
1630
+ for (const record of this.inventory.values()) {
1631
+ if (isLive(record.state))
1632
+ count += 1;
1633
+ }
1634
+ return count;
1635
+ }
1636
+ totalInFlight() {
1637
+ let total = 0;
1638
+ for (const record of this.inventory.values())
1639
+ total += record.inFlight;
1640
+ return total;
1641
+ }
1642
+ rollDayKeyIfNeeded() {
1643
+ const today = utcDayKey(this.clock.now());
1644
+ if (today !== this.dayKey) {
1645
+ this.dayKey = today;
1646
+ this.dailyWorkerSecondsUsed = 0;
1647
+ }
1648
+ }
1649
+ workerSecondsExhausted() {
1650
+ const spend = this.settings.spend;
1651
+ if (!spend)
1652
+ return false;
1653
+ if (spend.maxWorkerSeconds !== undefined && this.workerSecondsUsed >= spend.maxWorkerSeconds) {
1654
+ return true;
1655
+ }
1656
+ if (spend.dailyWorkerSeconds !== undefined &&
1657
+ this.dailyWorkerSecondsUsed >= spend.dailyWorkerSeconds) {
1658
+ return true;
1659
+ }
1660
+ return false;
1661
+ }
1662
+ }
1663
+ /** Extracts a stable message from an unknown thrown value for structured logs. */
1664
+ function errorMessage(error) {
1665
+ return error instanceof Error ? error.message : String(error);
1666
+ }
1667
+ /**
1668
+ * Whether a reconcile changes the PROVIDER CONSTRUCTION, gating the in-place
1669
+ * `swapDriver` rebuild (Finding #1). True when the driver `kind` differs OR
1670
+ * the `driverOptions` deep-differ (the two inputs `resolveDriver` consumes).
1671
+ * A same-driver reconcile (e.g. a `max`/`warm` resize) returns false so the
1672
+ * resolved driver object stays stable and the rebuild is skipped.
1673
+ */
1674
+ function driverConstructionChanged(prev, next) {
1675
+ if (prev.driver !== next.driver)
1676
+ return true;
1677
+ return !deepEqual(prev.driverOptions, next.driverOptions);
1678
+ }
1679
+ /**
1680
+ * Structural deep-equality over the JSON-shaped `driverOptions` records (plain
1681
+ * objects, arrays, and primitives). Sufficient for the swap gate since
1682
+ * `driverOptions` is a `Record<string, unknown>` of config-derived JSON values.
1683
+ */
1684
+ function deepEqual(a, b) {
1685
+ if (a === b)
1686
+ return true;
1687
+ if (a === null || b === null || typeof a !== "object" || typeof b !== "object")
1688
+ return false;
1689
+ const aArray = Array.isArray(a);
1690
+ const bArray = Array.isArray(b);
1691
+ if (aArray !== bArray)
1692
+ return false;
1693
+ if (aArray && bArray) {
1694
+ if (a.length !== b.length)
1695
+ return false;
1696
+ for (let i = 0; i < a.length; i += 1) {
1697
+ if (!deepEqual(a[i], b[i]))
1698
+ return false;
1699
+ }
1700
+ return true;
1701
+ }
1702
+ const aObj = a;
1703
+ const bObj = b;
1704
+ const aKeys = Object.keys(aObj);
1705
+ const bKeys = Object.keys(bObj);
1706
+ if (aKeys.length !== bKeys.length)
1707
+ return false;
1708
+ for (const key of aKeys) {
1709
+ if (!Object.prototype.hasOwnProperty.call(bObj, key))
1710
+ return false;
1711
+ if (!deepEqual(aObj[key], bObj[key]))
1712
+ return false;
1713
+ }
1714
+ return true;
1715
+ }
1716
+ /**
1717
+ * Constructs a {@link WorkerPool}. Resolves the driver for `settings.driver`
1718
+ * through `deps.drivers` (falling back to the process-wide default registry),
1719
+ * throwing `worker_pool_driver_unavailable` for an unregistered kind so the daemon
1720
+ * fails loud at startup, and wires the write-ahead ledger only when the driver
1721
+ * declares `usesLedger` AND a `ledgerPath` is supplied. No workspace/hook deps
1722
+ * are taken: the pool owns worker lifecycle only.
1723
+ */
1724
+ export function createWorkerPool(settings, deps) {
1725
+ return new WorkerPoolImpl(settings, deps);
1726
+ }
1727
+ //# sourceMappingURL=pool.js.map