lorenz 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/NOTICE +13 -0
- package/README.md +774 -0
- package/RELEASE-MANIFEST.json +211 -0
- package/apps/cli/bin/lorenz.js +25 -0
- package/apps/cli/dist/bin/cli.d.ts +3 -0
- package/apps/cli/dist/bin/cli.d.ts.map +1 -0
- package/apps/cli/dist/bin/cli.js +4 -0
- package/apps/cli/dist/bin/cli.js.map +1 -0
- package/apps/cli/dist/daemon.d.ts +76 -0
- package/apps/cli/dist/daemon.d.ts.map +1 -0
- package/apps/cli/dist/daemon.js +189 -0
- package/apps/cli/dist/daemon.js.map +1 -0
- package/apps/cli/dist/doctor.d.ts +40 -0
- package/apps/cli/dist/doctor.d.ts.map +1 -0
- package/apps/cli/dist/doctor.js +590 -0
- package/apps/cli/dist/doctor.js.map +1 -0
- package/apps/cli/dist/index.d.ts +32 -0
- package/apps/cli/dist/index.d.ts.map +1 -0
- package/apps/cli/dist/index.js +26 -0
- package/apps/cli/dist/index.js.map +1 -0
- package/apps/cli/dist/main.d.ts +40 -0
- package/apps/cli/dist/main.d.ts.map +1 -0
- package/apps/cli/dist/main.js +259 -0
- package/apps/cli/dist/main.js.map +1 -0
- package/apps/cli/dist/runs.d.ts +31 -0
- package/apps/cli/dist/runs.d.ts.map +1 -0
- package/apps/cli/dist/runs.js +281 -0
- package/apps/cli/dist/runs.js.map +1 -0
- package/apps/cli/dist/workerDriverLoader.d.ts +64 -0
- package/apps/cli/dist/workerDriverLoader.d.ts.map +1 -0
- package/apps/cli/dist/workerDriverLoader.js +211 -0
- package/apps/cli/dist/workerDriverLoader.js.map +1 -0
- package/apps/cli/package.json +57 -0
- package/apps/symphony-dashboard/dist/assets/index-B3owF3jd.css +1 -0
- package/apps/symphony-dashboard/dist/assets/index-DQ6XlL0d.js +227 -0
- package/apps/symphony-dashboard/dist/index.html +18 -0
- package/bin/lorenz +16 -0
- package/extensions/docker-worker/dist/index.d.ts +92 -0
- package/extensions/docker-worker/dist/index.d.ts.map +1 -0
- package/extensions/docker-worker/dist/index.js +283 -0
- package/extensions/docker-worker/dist/index.js.map +1 -0
- package/extensions/docker-worker/package.json +14 -0
- package/extensions/jira-tracker/dist/client.d.ts +50 -0
- package/extensions/jira-tracker/dist/client.d.ts.map +1 -0
- package/extensions/jira-tracker/dist/client.js +619 -0
- package/extensions/jira-tracker/dist/client.js.map +1 -0
- package/extensions/jira-tracker/dist/index.d.ts +5 -0
- package/extensions/jira-tracker/dist/index.d.ts.map +1 -0
- package/extensions/jira-tracker/dist/index.js +5 -0
- package/extensions/jira-tracker/dist/index.js.map +1 -0
- package/extensions/jira-tracker/dist/options.d.ts +38 -0
- package/extensions/jira-tracker/dist/options.d.ts.map +1 -0
- package/extensions/jira-tracker/dist/options.js +61 -0
- package/extensions/jira-tracker/dist/options.js.map +1 -0
- package/extensions/jira-tracker/dist/provider.d.ts +6 -0
- package/extensions/jira-tracker/dist/provider.d.ts.map +1 -0
- package/extensions/jira-tracker/dist/provider.js +178 -0
- package/extensions/jira-tracker/dist/provider.js.map +1 -0
- package/extensions/jira-tracker/dist/register.d.ts +10 -0
- package/extensions/jira-tracker/dist/register.d.ts.map +1 -0
- package/extensions/jira-tracker/dist/register.js +15 -0
- package/extensions/jira-tracker/dist/register.js.map +1 -0
- package/extensions/jira-tracker/package.json +16 -0
- package/extensions/linear-tracker/dist/client.d.ts +82 -0
- package/extensions/linear-tracker/dist/client.d.ts.map +1 -0
- package/extensions/linear-tracker/dist/client.js +622 -0
- package/extensions/linear-tracker/dist/client.js.map +1 -0
- package/extensions/linear-tracker/dist/index.d.ts +8 -0
- package/extensions/linear-tracker/dist/index.d.ts.map +1 -0
- package/extensions/linear-tracker/dist/index.js +7 -0
- package/extensions/linear-tracker/dist/index.js.map +1 -0
- package/extensions/linear-tracker/dist/options.d.ts +32 -0
- package/extensions/linear-tracker/dist/options.d.ts.map +1 -0
- package/extensions/linear-tracker/dist/options.js +59 -0
- package/extensions/linear-tracker/dist/options.js.map +1 -0
- package/extensions/linear-tracker/dist/provider.d.ts +4 -0
- package/extensions/linear-tracker/dist/provider.d.ts.map +1 -0
- package/extensions/linear-tracker/dist/provider.js +58 -0
- package/extensions/linear-tracker/dist/provider.js.map +1 -0
- package/extensions/linear-tracker/dist/register.d.ts +11 -0
- package/extensions/linear-tracker/dist/register.d.ts.map +1 -0
- package/extensions/linear-tracker/dist/register.js +19 -0
- package/extensions/linear-tracker/dist/register.js.map +1 -0
- package/extensions/linear-tracker/dist/toolOps.d.ts +8 -0
- package/extensions/linear-tracker/dist/toolOps.d.ts.map +1 -0
- package/extensions/linear-tracker/dist/toolOps.js +160 -0
- package/extensions/linear-tracker/dist/toolOps.js.map +1 -0
- package/extensions/linear-tracker/dist/tools.d.ts +7 -0
- package/extensions/linear-tracker/dist/tools.d.ts.map +1 -0
- package/extensions/linear-tracker/dist/tools.js +210 -0
- package/extensions/linear-tracker/dist/tools.js.map +1 -0
- package/extensions/linear-tracker/package.json +18 -0
- package/extensions/local-tracker/dist/boardStore.d.ts +116 -0
- package/extensions/local-tracker/dist/boardStore.d.ts.map +1 -0
- package/extensions/local-tracker/dist/boardStore.js +475 -0
- package/extensions/local-tracker/dist/boardStore.js.map +1 -0
- package/extensions/local-tracker/dist/client.d.ts +14 -0
- package/extensions/local-tracker/dist/client.d.ts.map +1 -0
- package/extensions/local-tracker/dist/client.js +27 -0
- package/extensions/local-tracker/dist/client.js.map +1 -0
- package/extensions/local-tracker/dist/index.d.ts +7 -0
- package/extensions/local-tracker/dist/index.d.ts.map +1 -0
- package/extensions/local-tracker/dist/index.js +7 -0
- package/extensions/local-tracker/dist/index.js.map +1 -0
- package/extensions/local-tracker/dist/options.d.ts +31 -0
- package/extensions/local-tracker/dist/options.d.ts.map +1 -0
- package/extensions/local-tracker/dist/options.js +69 -0
- package/extensions/local-tracker/dist/options.js.map +1 -0
- package/extensions/local-tracker/dist/provider.d.ts +9 -0
- package/extensions/local-tracker/dist/provider.d.ts.map +1 -0
- package/extensions/local-tracker/dist/provider.js +35 -0
- package/extensions/local-tracker/dist/provider.js.map +1 -0
- package/extensions/local-tracker/dist/register.d.ts +11 -0
- package/extensions/local-tracker/dist/register.d.ts.map +1 -0
- package/extensions/local-tracker/dist/register.js +19 -0
- package/extensions/local-tracker/dist/register.js.map +1 -0
- package/extensions/local-tracker/dist/resolveBoardDir.d.ts +24 -0
- package/extensions/local-tracker/dist/resolveBoardDir.d.ts.map +1 -0
- package/extensions/local-tracker/dist/resolveBoardDir.js +39 -0
- package/extensions/local-tracker/dist/resolveBoardDir.js.map +1 -0
- package/extensions/local-tracker/dist/toolOps.d.ts +9 -0
- package/extensions/local-tracker/dist/toolOps.d.ts.map +1 -0
- package/extensions/local-tracker/dist/toolOps.js +86 -0
- package/extensions/local-tracker/dist/toolOps.js.map +1 -0
- package/extensions/local-tracker/dist/tools.d.ts +7 -0
- package/extensions/local-tracker/dist/tools.d.ts.map +1 -0
- package/extensions/local-tracker/dist/tools.js +170 -0
- package/extensions/local-tracker/dist/tools.js.map +1 -0
- package/extensions/local-tracker/package.json +18 -0
- package/extensions/memory-tracker/dist/index.d.ts +24 -0
- package/extensions/memory-tracker/dist/index.d.ts.map +1 -0
- package/extensions/memory-tracker/dist/index.js +110 -0
- package/extensions/memory-tracker/dist/index.js.map +1 -0
- package/extensions/memory-tracker/package.json +16 -0
- package/extensions/slack-tracker/dist/client.d.ts +88 -0
- package/extensions/slack-tracker/dist/client.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/client.js +246 -0
- package/extensions/slack-tracker/dist/client.js.map +1 -0
- package/extensions/slack-tracker/dist/inMemoryTransport.d.ts +42 -0
- package/extensions/slack-tracker/dist/inMemoryTransport.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/inMemoryTransport.js +104 -0
- package/extensions/slack-tracker/dist/inMemoryTransport.js.map +1 -0
- package/extensions/slack-tracker/dist/index.d.ts +15 -0
- package/extensions/slack-tracker/dist/index.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/index.js +11 -0
- package/extensions/slack-tracker/dist/index.js.map +1 -0
- package/extensions/slack-tracker/dist/mapping.d.ts +27 -0
- package/extensions/slack-tracker/dist/mapping.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/mapping.js +109 -0
- package/extensions/slack-tracker/dist/mapping.js.map +1 -0
- package/extensions/slack-tracker/dist/operations.d.ts +41 -0
- package/extensions/slack-tracker/dist/operations.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/operations.js +97 -0
- package/extensions/slack-tracker/dist/operations.js.map +1 -0
- package/extensions/slack-tracker/dist/options.d.ts +30 -0
- package/extensions/slack-tracker/dist/options.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/options.js +49 -0
- package/extensions/slack-tracker/dist/options.js.map +1 -0
- package/extensions/slack-tracker/dist/provider.d.ts +9 -0
- package/extensions/slack-tracker/dist/provider.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/provider.js +74 -0
- package/extensions/slack-tracker/dist/provider.js.map +1 -0
- package/extensions/slack-tracker/dist/register.d.ts +11 -0
- package/extensions/slack-tracker/dist/register.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/register.js +19 -0
- package/extensions/slack-tracker/dist/register.js.map +1 -0
- package/extensions/slack-tracker/dist/threadState.d.ts +52 -0
- package/extensions/slack-tracker/dist/threadState.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/threadState.js +192 -0
- package/extensions/slack-tracker/dist/threadState.js.map +1 -0
- package/extensions/slack-tracker/dist/toolOps.d.ts +13 -0
- package/extensions/slack-tracker/dist/toolOps.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/toolOps.js +76 -0
- package/extensions/slack-tracker/dist/toolOps.js.map +1 -0
- package/extensions/slack-tracker/dist/tools.d.ts +8 -0
- package/extensions/slack-tracker/dist/tools.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/tools.js +266 -0
- package/extensions/slack-tracker/dist/tools.js.map +1 -0
- package/extensions/slack-tracker/dist/transport.d.ts +63 -0
- package/extensions/slack-tracker/dist/transport.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/transport.js +2 -0
- package/extensions/slack-tracker/dist/transport.js.map +1 -0
- package/extensions/slack-tracker/dist/webTransport.d.ts +44 -0
- package/extensions/slack-tracker/dist/webTransport.d.ts.map +1 -0
- package/extensions/slack-tracker/dist/webTransport.js +402 -0
- package/extensions/slack-tracker/dist/webTransport.js.map +1 -0
- package/extensions/slack-tracker/package.json +17 -0
- package/package.json +89 -0
- package/packages/acp/dist/childProcess.d.ts +4 -0
- package/packages/acp/dist/childProcess.d.ts.map +1 -0
- package/packages/acp/dist/childProcess.js +33 -0
- package/packages/acp/dist/childProcess.js.map +1 -0
- package/packages/acp/dist/index.d.ts +70 -0
- package/packages/acp/dist/index.d.ts.map +1 -0
- package/packages/acp/dist/index.js +701 -0
- package/packages/acp/dist/index.js.map +1 -0
- package/packages/acp/dist/options.d.ts +24 -0
- package/packages/acp/dist/options.d.ts.map +1 -0
- package/packages/acp/dist/options.js +92 -0
- package/packages/acp/dist/options.js.map +1 -0
- package/packages/acp/dist/toml.d.ts +2 -0
- package/packages/acp/dist/toml.d.ts.map +1 -0
- package/packages/acp/dist/toml.js +51 -0
- package/packages/acp/dist/toml.js.map +1 -0
- package/packages/acp/package.json +24 -0
- package/packages/agent-runner/dist/index.d.ts +58 -0
- package/packages/agent-runner/dist/index.d.ts.map +1 -0
- package/packages/agent-runner/dist/index.js +288 -0
- package/packages/agent-runner/dist/index.js.map +1 -0
- package/packages/agent-runner/package.json +19 -0
- package/packages/agent-sdk/dist/index.d.ts +2 -0
- package/packages/agent-sdk/dist/index.d.ts.map +1 -0
- package/packages/agent-sdk/dist/index.js +2 -0
- package/packages/agent-sdk/dist/index.js.map +1 -0
- package/packages/agent-sdk/dist/provider.d.ts +66 -0
- package/packages/agent-sdk/dist/provider.d.ts.map +1 -0
- package/packages/agent-sdk/dist/provider.js +38 -0
- package/packages/agent-sdk/dist/provider.js.map +1 -0
- package/packages/agent-sdk/package.json +14 -0
- package/packages/cli-kit/dist/index.d.ts +20 -0
- package/packages/cli-kit/dist/index.d.ts.map +1 -0
- package/packages/cli-kit/dist/index.js +72 -0
- package/packages/cli-kit/dist/index.js.map +1 -0
- package/packages/cli-kit/package.json +14 -0
- package/packages/config/dist/aliases.d.ts +10 -0
- package/packages/config/dist/aliases.d.ts.map +1 -0
- package/packages/config/dist/aliases.js +153 -0
- package/packages/config/dist/aliases.js.map +1 -0
- package/packages/config/dist/defaults.d.ts +12 -0
- package/packages/config/dist/defaults.d.ts.map +1 -0
- package/packages/config/dist/defaults.js +78 -0
- package/packages/config/dist/defaults.js.map +1 -0
- package/packages/config/dist/errors.d.ts +3 -0
- package/packages/config/dist/errors.d.ts.map +1 -0
- package/packages/config/dist/errors.js +56 -0
- package/packages/config/dist/errors.js.map +1 -0
- package/packages/config/dist/index.d.ts +5 -0
- package/packages/config/dist/index.d.ts.map +1 -0
- package/packages/config/dist/index.js +4 -0
- package/packages/config/dist/index.js.map +1 -0
- package/packages/config/dist/leaf-utils.d.ts +3 -0
- package/packages/config/dist/leaf-utils.d.ts.map +1 -0
- package/packages/config/dist/leaf-utils.js +9 -0
- package/packages/config/dist/leaf-utils.js.map +1 -0
- package/packages/config/dist/parse.d.ts +11 -0
- package/packages/config/dist/parse.d.ts.map +1 -0
- package/packages/config/dist/parse.js +821 -0
- package/packages/config/dist/parse.js.map +1 -0
- package/packages/config/dist/schemas.d.ts +214 -0
- package/packages/config/dist/schemas.d.ts.map +1 -0
- package/packages/config/dist/schemas.js +248 -0
- package/packages/config/dist/schemas.js.map +1 -0
- package/packages/config/package.json +19 -0
- package/packages/dispatch/dist/index.d.ts +22 -0
- package/packages/dispatch/dist/index.d.ts.map +1 -0
- package/packages/dispatch/dist/index.js +117 -0
- package/packages/dispatch/dist/index.js.map +1 -0
- package/packages/dispatch/package.json +16 -0
- package/packages/dispatch-coordinator/dist/coordinator.d.ts +158 -0
- package/packages/dispatch-coordinator/dist/coordinator.d.ts.map +1 -0
- package/packages/dispatch-coordinator/dist/coordinator.js +529 -0
- package/packages/dispatch-coordinator/dist/coordinator.js.map +1 -0
- package/packages/dispatch-coordinator/dist/gate.d.ts +24 -0
- package/packages/dispatch-coordinator/dist/gate.d.ts.map +1 -0
- package/packages/dispatch-coordinator/dist/gate.js +47 -0
- package/packages/dispatch-coordinator/dist/gate.js.map +1 -0
- package/packages/dispatch-coordinator/dist/index.d.ts +6 -0
- package/packages/dispatch-coordinator/dist/index.d.ts.map +1 -0
- package/packages/dispatch-coordinator/dist/index.js +16 -0
- package/packages/dispatch-coordinator/dist/index.js.map +1 -0
- package/packages/dispatch-coordinator/dist/mcpEndpointManager.d.ts +28 -0
- package/packages/dispatch-coordinator/dist/mcpEndpointManager.d.ts.map +1 -0
- package/packages/dispatch-coordinator/dist/mcpEndpointManager.js +54 -0
- package/packages/dispatch-coordinator/dist/mcpEndpointManager.js.map +1 -0
- package/packages/dispatch-coordinator/dist/nullEndpointManager.d.ts +18 -0
- package/packages/dispatch-coordinator/dist/nullEndpointManager.d.ts.map +1 -0
- package/packages/dispatch-coordinator/dist/nullEndpointManager.js +40 -0
- package/packages/dispatch-coordinator/dist/nullEndpointManager.js.map +1 -0
- package/packages/dispatch-coordinator/dist/types.d.ts +119 -0
- package/packages/dispatch-coordinator/dist/types.d.ts.map +1 -0
- package/packages/dispatch-coordinator/dist/types.js +17 -0
- package/packages/dispatch-coordinator/dist/types.js.map +1 -0
- package/packages/dispatch-coordinator/package.json +16 -0
- package/packages/domain/dist/index.d.ts +775 -0
- package/packages/domain/dist/index.d.ts.map +1 -0
- package/packages/domain/dist/index.js +124 -0
- package/packages/domain/dist/index.js.map +1 -0
- package/packages/domain/package.json +14 -0
- package/packages/humanize/dist/index.d.ts +4 -0
- package/packages/humanize/dist/index.d.ts.map +1 -0
- package/packages/humanize/dist/index.js +347 -0
- package/packages/humanize/dist/index.js.map +1 -0
- package/packages/humanize/package.json +11 -0
- package/packages/issue/dist/index.d.ts +7 -0
- package/packages/issue/dist/index.d.ts.map +1 -0
- package/packages/issue/dist/index.js +147 -0
- package/packages/issue/dist/index.js.map +1 -0
- package/packages/issue/package.json +14 -0
- package/packages/log-file/dist/index.d.ts +10 -0
- package/packages/log-file/dist/index.d.ts.map +1 -0
- package/packages/log-file/dist/index.js +200 -0
- package/packages/log-file/dist/index.js.map +1 -0
- package/packages/log-file/package.json +15 -0
- package/packages/mcp/dist/agentEndpoint.d.ts +31 -0
- package/packages/mcp/dist/agentEndpoint.d.ts.map +1 -0
- package/packages/mcp/dist/agentEndpoint.js +270 -0
- package/packages/mcp/dist/agentEndpoint.js.map +1 -0
- package/packages/mcp/dist/auth.d.ts +7 -0
- package/packages/mcp/dist/auth.d.ts.map +1 -0
- package/packages/mcp/dist/auth.js +48 -0
- package/packages/mcp/dist/auth.js.map +1 -0
- package/packages/mcp/dist/filter.d.ts +70 -0
- package/packages/mcp/dist/filter.d.ts.map +1 -0
- package/packages/mcp/dist/filter.js +231 -0
- package/packages/mcp/dist/filter.js.map +1 -0
- package/packages/mcp/dist/index.d.ts +7 -0
- package/packages/mcp/dist/index.d.ts.map +1 -0
- package/packages/mcp/dist/index.js +5 -0
- package/packages/mcp/dist/index.js.map +1 -0
- package/packages/mcp/dist/server.d.ts +31 -0
- package/packages/mcp/dist/server.d.ts.map +1 -0
- package/packages/mcp/dist/server.js +176 -0
- package/packages/mcp/dist/server.js.map +1 -0
- package/packages/mcp/dist/tools/linear.d.ts +5 -0
- package/packages/mcp/dist/tools/linear.d.ts.map +1 -0
- package/packages/mcp/dist/tools/linear.js +192 -0
- package/packages/mcp/dist/tools/linear.js.map +1 -0
- package/packages/mcp/dist/tools/local.d.ts +5 -0
- package/packages/mcp/dist/tools/local.d.ts.map +1 -0
- package/packages/mcp/dist/tools/local.js +161 -0
- package/packages/mcp/dist/tools/local.js.map +1 -0
- package/packages/mcp/dist/tools/result.d.ts +5 -0
- package/packages/mcp/dist/tools/result.d.ts.map +1 -0
- package/packages/mcp/dist/tools/result.js +15 -0
- package/packages/mcp/dist/tools/result.js.map +1 -0
- package/packages/mcp/dist/tools.d.ts +14 -0
- package/packages/mcp/dist/tools.d.ts.map +1 -0
- package/packages/mcp/dist/tools.js +58 -0
- package/packages/mcp/dist/tools.js.map +1 -0
- package/packages/mcp/package.json +20 -0
- package/packages/orchestrator/dist/index.d.ts +171 -0
- package/packages/orchestrator/dist/index.d.ts.map +1 -0
- package/packages/orchestrator/dist/index.js +524 -0
- package/packages/orchestrator/dist/index.js.map +1 -0
- package/packages/orchestrator/package.json +18 -0
- package/packages/policies/dist/index.d.ts +11 -0
- package/packages/policies/dist/index.d.ts.map +1 -0
- package/packages/policies/dist/index.js +6 -0
- package/packages/policies/dist/index.js.map +1 -0
- package/packages/policies/dist/reconciliation.d.ts +5 -0
- package/packages/policies/dist/reconciliation.d.ts.map +1 -0
- package/packages/policies/dist/reconciliation.js +17 -0
- package/packages/policies/dist/reconciliation.js.map +1 -0
- package/packages/policies/dist/resume.d.ts +14 -0
- package/packages/policies/dist/resume.d.ts.map +1 -0
- package/packages/policies/dist/resume.js +7 -0
- package/packages/policies/dist/resume.js.map +1 -0
- package/packages/policies/dist/retry.d.ts +4 -0
- package/packages/policies/dist/retry.d.ts.map +1 -0
- package/packages/policies/dist/retry.js +7 -0
- package/packages/policies/dist/retry.js.map +1 -0
- package/packages/policies/dist/stopReason.d.ts +4 -0
- package/packages/policies/dist/stopReason.d.ts.map +1 -0
- package/packages/policies/dist/stopReason.js +11 -0
- package/packages/policies/dist/stopReason.js.map +1 -0
- package/packages/policies/dist/usage.d.ts +14 -0
- package/packages/policies/dist/usage.d.ts.map +1 -0
- package/packages/policies/dist/usage.js +38 -0
- package/packages/policies/dist/usage.js.map +1 -0
- package/packages/policies/dist/workerHost.d.ts +8 -0
- package/packages/policies/dist/workerHost.d.ts.map +1 -0
- package/packages/policies/dist/workerHost.js +20 -0
- package/packages/policies/dist/workerHost.js.map +1 -0
- package/packages/policies/package.json +21 -0
- package/packages/presenter/dist/index.d.ts +81 -0
- package/packages/presenter/dist/index.d.ts.map +1 -0
- package/packages/presenter/dist/index.js +421 -0
- package/packages/presenter/dist/index.js.map +1 -0
- package/packages/presenter/package.json +16 -0
- package/packages/projections/dist/index.d.ts +10 -0
- package/packages/projections/dist/index.d.ts.map +1 -0
- package/packages/projections/dist/index.js +30 -0
- package/packages/projections/dist/index.js.map +1 -0
- package/packages/projections/package.json +15 -0
- package/packages/prompt/dist/index.d.ts +9 -0
- package/packages/prompt/dist/index.d.ts.map +1 -0
- package/packages/prompt/dist/index.js +71 -0
- package/packages/prompt/dist/index.js.map +1 -0
- package/packages/prompt/package.json +16 -0
- package/packages/retry-scheduler/dist/index.d.ts +12 -0
- package/packages/retry-scheduler/dist/index.d.ts.map +1 -0
- package/packages/retry-scheduler/dist/index.js +39 -0
- package/packages/retry-scheduler/dist/index.js.map +1 -0
- package/packages/retry-scheduler/package.json +15 -0
- package/packages/runtime/dist/index.d.ts +157 -0
- package/packages/runtime/dist/index.d.ts.map +1 -0
- package/packages/runtime/dist/index.js +1074 -0
- package/packages/runtime/dist/index.js.map +1 -0
- package/packages/runtime/package.json +26 -0
- package/packages/runtime-events/dist/index.d.ts +110 -0
- package/packages/runtime-events/dist/index.d.ts.map +1 -0
- package/packages/runtime-events/dist/index.js +25 -0
- package/packages/runtime-events/dist/index.js.map +1 -0
- package/packages/runtime-events/package.json +14 -0
- package/packages/server/dist/index.d.ts +25 -0
- package/packages/server/dist/index.d.ts.map +1 -0
- package/packages/server/dist/index.js +213 -0
- package/packages/server/dist/index.js.map +1 -0
- package/packages/server/dist/issue-store.d.ts +26 -0
- package/packages/server/dist/issue-store.d.ts.map +1 -0
- package/packages/server/dist/issue-store.js +88 -0
- package/packages/server/dist/issue-store.js.map +1 -0
- package/packages/server/dist/path-params.d.ts +6 -0
- package/packages/server/dist/path-params.d.ts.map +1 -0
- package/packages/server/dist/path-params.js +15 -0
- package/packages/server/dist/path-params.js.map +1 -0
- package/packages/server/dist/source.d.ts +12 -0
- package/packages/server/dist/source.d.ts.map +1 -0
- package/packages/server/dist/source.js +2 -0
- package/packages/server/dist/source.js.map +1 -0
- package/packages/server/dist/trace-routes.d.ts +21 -0
- package/packages/server/dist/trace-routes.d.ts.map +1 -0
- package/packages/server/dist/trace-routes.js +66 -0
- package/packages/server/dist/trace-routes.js.map +1 -0
- package/packages/server/dist/ws.d.ts +18 -0
- package/packages/server/dist/ws.d.ts.map +1 -0
- package/packages/server/dist/ws.js +168 -0
- package/packages/server/dist/ws.js.map +1 -0
- package/packages/server/package.json +22 -0
- package/packages/ssh/dist/index.d.ts +33 -0
- package/packages/ssh/dist/index.d.ts.map +1 -0
- package/packages/ssh/dist/index.js +281 -0
- package/packages/ssh/dist/index.js.map +1 -0
- package/packages/ssh/package.json +15 -0
- package/packages/static-worker/dist/index.d.ts +73 -0
- package/packages/static-worker/dist/index.d.ts.map +1 -0
- package/packages/static-worker/dist/index.js +150 -0
- package/packages/static-worker/dist/index.js.map +1 -0
- package/packages/static-worker/package.json +14 -0
- package/packages/tool-sdk/dist/filter.d.ts +70 -0
- package/packages/tool-sdk/dist/filter.d.ts.map +1 -0
- package/packages/tool-sdk/dist/filter.js +231 -0
- package/packages/tool-sdk/dist/filter.js.map +1 -0
- package/packages/tool-sdk/dist/index.d.ts +6 -0
- package/packages/tool-sdk/dist/index.d.ts.map +1 -0
- package/packages/tool-sdk/dist/index.js +4 -0
- package/packages/tool-sdk/dist/index.js.map +1 -0
- package/packages/tool-sdk/dist/provider.d.ts +51 -0
- package/packages/tool-sdk/dist/provider.d.ts.map +1 -0
- package/packages/tool-sdk/dist/provider.js +2 -0
- package/packages/tool-sdk/dist/provider.js.map +1 -0
- package/packages/tool-sdk/dist/registry.d.ts +35 -0
- package/packages/tool-sdk/dist/registry.d.ts.map +1 -0
- package/packages/tool-sdk/dist/registry.js +85 -0
- package/packages/tool-sdk/dist/registry.js.map +1 -0
- package/packages/tool-sdk/dist/result.d.ts +5 -0
- package/packages/tool-sdk/dist/result.d.ts.map +1 -0
- package/packages/tool-sdk/dist/result.js +15 -0
- package/packages/tool-sdk/dist/result.js.map +1 -0
- package/packages/tool-sdk/package.json +14 -0
- package/packages/traceviz-emitter/dist/index.d.ts +19 -0
- package/packages/traceviz-emitter/dist/index.d.ts.map +1 -0
- package/packages/traceviz-emitter/dist/index.js +97 -0
- package/packages/traceviz-emitter/dist/index.js.map +1 -0
- package/packages/traceviz-emitter/package.json +17 -0
- package/packages/traceviz-server/dist/index.d.ts +14 -0
- package/packages/traceviz-server/dist/index.d.ts.map +1 -0
- package/packages/traceviz-server/dist/index.js +10 -0
- package/packages/traceviz-server/dist/index.js.map +1 -0
- package/packages/traceviz-server/dist/models/api.d.ts +51 -0
- package/packages/traceviz-server/dist/models/api.d.ts.map +1 -0
- package/packages/traceviz-server/dist/models/api.js +5 -0
- package/packages/traceviz-server/dist/models/api.js.map +1 -0
- package/packages/traceviz-server/dist/models/display-events.d.ts +58 -0
- package/packages/traceviz-server/dist/models/display-events.d.ts.map +1 -0
- package/packages/traceviz-server/dist/models/display-events.js +6 -0
- package/packages/traceviz-server/dist/models/display-events.js.map +1 -0
- package/packages/traceviz-server/dist/parser.d.ts +14 -0
- package/packages/traceviz-server/dist/parser.d.ts.map +1 -0
- package/packages/traceviz-server/dist/parser.js +363 -0
- package/packages/traceviz-server/dist/parser.js.map +1 -0
- package/packages/traceviz-server/dist/stats.d.ts +7 -0
- package/packages/traceviz-server/dist/stats.d.ts.map +1 -0
- package/packages/traceviz-server/dist/stats.js +81 -0
- package/packages/traceviz-server/dist/stats.js.map +1 -0
- package/packages/traceviz-server/dist/watcher.d.ts +54 -0
- package/packages/traceviz-server/dist/watcher.d.ts.map +1 -0
- package/packages/traceviz-server/dist/watcher.js +368 -0
- package/packages/traceviz-server/dist/watcher.js.map +1 -0
- package/packages/traceviz-server/package.json +16 -0
- package/packages/tracker-sdk/dist/index.d.ts +5 -0
- package/packages/tracker-sdk/dist/index.d.ts.map +1 -0
- package/packages/tracker-sdk/dist/index.js +4 -0
- package/packages/tracker-sdk/dist/index.js.map +1 -0
- package/packages/tracker-sdk/dist/options.d.ts +20 -0
- package/packages/tracker-sdk/dist/options.d.ts.map +1 -0
- package/packages/tracker-sdk/dist/options.js +46 -0
- package/packages/tracker-sdk/dist/options.js.map +1 -0
- package/packages/tracker-sdk/dist/provider.d.ts +104 -0
- package/packages/tracker-sdk/dist/provider.d.ts.map +1 -0
- package/packages/tracker-sdk/dist/provider.js +2 -0
- package/packages/tracker-sdk/dist/provider.js.map +1 -0
- package/packages/tracker-sdk/dist/registry.d.ts +26 -0
- package/packages/tracker-sdk/dist/registry.d.ts.map +1 -0
- package/packages/tracker-sdk/dist/registry.js +52 -0
- package/packages/tracker-sdk/dist/registry.js.map +1 -0
- package/packages/tracker-sdk/dist/toolPack.d.ts +10 -0
- package/packages/tracker-sdk/dist/toolPack.d.ts.map +1 -0
- package/packages/tracker-sdk/dist/toolPack.js +185 -0
- package/packages/tracker-sdk/dist/toolPack.js.map +1 -0
- package/packages/tracker-sdk/package.json +15 -0
- package/packages/tui/dist/index.d.ts +35 -0
- package/packages/tui/dist/index.d.ts.map +1 -0
- package/packages/tui/dist/index.js +354 -0
- package/packages/tui/dist/index.js.map +1 -0
- package/packages/tui/package.json +18 -0
- package/packages/worker-host-pool/dist/index.d.ts +33 -0
- package/packages/worker-host-pool/dist/index.d.ts.map +1 -0
- package/packages/worker-host-pool/dist/index.js +311 -0
- package/packages/worker-host-pool/dist/index.js.map +1 -0
- package/packages/worker-host-pool/package.json +14 -0
- package/packages/worker-pool/dist/index.d.ts +6 -0
- package/packages/worker-pool/dist/index.d.ts.map +1 -0
- package/packages/worker-pool/dist/index.js +15 -0
- package/packages/worker-pool/dist/index.js.map +1 -0
- package/packages/worker-pool/dist/lease.d.ts +36 -0
- package/packages/worker-pool/dist/lease.d.ts.map +1 -0
- package/packages/worker-pool/dist/lease.js +53 -0
- package/packages/worker-pool/dist/lease.js.map +1 -0
- package/packages/worker-pool/dist/ledger.d.ts +51 -0
- package/packages/worker-pool/dist/ledger.d.ts.map +1 -0
- package/packages/worker-pool/dist/ledger.js +165 -0
- package/packages/worker-pool/dist/ledger.js.map +1 -0
- package/packages/worker-pool/dist/mutex.d.ts +10 -0
- package/packages/worker-pool/dist/mutex.d.ts.map +1 -0
- package/packages/worker-pool/dist/mutex.js +22 -0
- package/packages/worker-pool/dist/mutex.js.map +1 -0
- package/packages/worker-pool/dist/pool.d.ts +33 -0
- package/packages/worker-pool/dist/pool.d.ts.map +1 -0
- package/packages/worker-pool/dist/pool.js +1727 -0
- package/packages/worker-pool/dist/pool.js.map +1 -0
- package/packages/worker-pool/dist/reaper.d.ts +94 -0
- package/packages/worker-pool/dist/reaper.d.ts.map +1 -0
- package/packages/worker-pool/dist/reaper.js +295 -0
- package/packages/worker-pool/dist/reaper.js.map +1 -0
- package/packages/worker-pool/dist/types.d.ts +249 -0
- package/packages/worker-pool/dist/types.d.ts.map +1 -0
- package/packages/worker-pool/dist/types.js +2 -0
- package/packages/worker-pool/dist/types.js.map +1 -0
- package/packages/worker-pool/package.json +16 -0
- package/packages/worker-sdk/dist/conformance.d.ts +64 -0
- package/packages/worker-sdk/dist/conformance.d.ts.map +1 -0
- package/packages/worker-sdk/dist/conformance.js +109 -0
- package/packages/worker-sdk/dist/conformance.js.map +1 -0
- package/packages/worker-sdk/dist/fake.d.ts +76 -0
- package/packages/worker-sdk/dist/fake.d.ts.map +1 -0
- package/packages/worker-sdk/dist/fake.js +142 -0
- package/packages/worker-sdk/dist/fake.js.map +1 -0
- package/packages/worker-sdk/dist/index.d.ts +5 -0
- package/packages/worker-sdk/dist/index.d.ts.map +1 -0
- package/packages/worker-sdk/dist/index.js +10 -0
- package/packages/worker-sdk/dist/index.js.map +1 -0
- package/packages/worker-sdk/dist/module.d.ts +46 -0
- package/packages/worker-sdk/dist/module.d.ts.map +1 -0
- package/packages/worker-sdk/dist/module.js +59 -0
- package/packages/worker-sdk/dist/module.js.map +1 -0
- package/packages/worker-sdk/dist/registry.d.ts +24 -0
- package/packages/worker-sdk/dist/registry.d.ts.map +1 -0
- package/packages/worker-sdk/dist/registry.js +49 -0
- package/packages/worker-sdk/dist/registry.js.map +1 -0
- package/packages/worker-sdk/dist/types.d.ts +138 -0
- package/packages/worker-sdk/dist/types.d.ts.map +1 -0
- package/packages/worker-sdk/dist/types.js +21 -0
- package/packages/worker-sdk/dist/types.js.map +1 -0
- package/packages/worker-sdk/package.json +15 -0
- package/packages/workflow/dist/index.d.ts +33 -0
- package/packages/workflow/dist/index.d.ts.map +1 -0
- package/packages/workflow/dist/index.js +125 -0
- package/packages/workflow/dist/index.js.map +1 -0
- package/packages/workflow/package.json +19 -0
- package/packages/workspace/dist/index.d.ts +70 -0
- package/packages/workspace/dist/index.d.ts.map +1 -0
- package/packages/workspace/dist/index.js +1016 -0
- package/packages/workspace/dist/index.js.map +1 -0
- package/packages/workspace/package.json +17 -0
- package/runtime-deps/anthropic-claude-agent-sdk/LICENSE.md +1 -0
- package/runtime-deps/anthropic-claude-agent-sdk/README.md +65 -0
- package/runtime-deps/anthropic-claude-agent-sdk/agentSdkTypes.d.ts +1 -0
- package/runtime-deps/anthropic-claude-agent-sdk/assistant.d.ts +135 -0
- package/runtime-deps/anthropic-claude-agent-sdk/assistant.mjs +190 -0
- package/runtime-deps/anthropic-claude-agent-sdk/bridge.d.ts +231 -0
- package/runtime-deps/anthropic-claude-agent-sdk/bridge.mjs +168 -0
- package/runtime-deps/anthropic-claude-agent-sdk/browser-sdk.d.ts +53 -0
- package/runtime-deps/anthropic-claude-agent-sdk/browser-sdk.js +93 -0
- package/runtime-deps/anthropic-claude-agent-sdk/extractFromBunfs.d.ts +1 -0
- package/runtime-deps/anthropic-claude-agent-sdk/extractFromBunfs.js +156 -0
- package/runtime-deps/anthropic-claude-agent-sdk/manifest.json +47 -0
- package/runtime-deps/anthropic-claude-agent-sdk/manifest.zst.json +55 -0
- package/runtime-deps/anthropic-claude-agent-sdk/node_modules/.bin/anthropic-ai-sdk +21 -0
- package/runtime-deps/anthropic-claude-agent-sdk/package.json +81 -0
- package/runtime-deps/anthropic-claude-agent-sdk/sdk-tools.d.ts +3170 -0
- package/runtime-deps/anthropic-claude-agent-sdk/sdk.d.ts +6000 -0
- package/runtime-deps/anthropic-claude-agent-sdk/sdk.mjs +119 -0
- package/runtime-deps/openai-codex/README.md +60 -0
- package/runtime-deps/openai-codex/bin/codex.js +229 -0
- package/runtime-deps/openai-codex/bin/rg +79 -0
- package/runtime-deps/openai-codex/package.json +22 -0
- package/vendor/claude-agent-acp/dist/acp-agent.d.ts +239 -0
- package/vendor/claude-agent-acp/dist/acp-agent.d.ts.map +1 -0
- package/vendor/claude-agent-acp/dist/acp-agent.js +2693 -0
- package/vendor/claude-agent-acp/dist/bundle.js +41230 -0
- package/vendor/claude-agent-acp/dist/index.d.ts +3 -0
- package/vendor/claude-agent-acp/dist/index.d.ts.map +1 -0
- package/vendor/claude-agent-acp/dist/index.js +67 -0
- package/vendor/claude-agent-acp/dist/lib.d.ts +6 -0
- package/vendor/claude-agent-acp/dist/lib.d.ts.map +1 -0
- package/vendor/claude-agent-acp/dist/lib.js +5 -0
- package/vendor/claude-agent-acp/dist/settings.d.ts +68 -0
- package/vendor/claude-agent-acp/dist/settings.d.ts.map +1 -0
- package/vendor/claude-agent-acp/dist/settings.js +182 -0
- package/vendor/claude-agent-acp/dist/tools.d.ts +103 -0
- package/vendor/claude-agent-acp/dist/tools.d.ts.map +1 -0
- package/vendor/claude-agent-acp/dist/tools.js +713 -0
- package/vendor/claude-agent-acp/dist/utils.d.ts +16 -0
- package/vendor/claude-agent-acp/dist/utils.d.ts.map +1 -0
- package/vendor/claude-agent-acp/dist/utils.js +83 -0
- package/vendor/claude-agent-acp/package.json +23 -0
- package/vendor/codex-acp/dist/index.js +21280 -0
- package/vendor/codex-acp/package.json +17 -0
|
@@ -0,0 +1,1727 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import { defaultWorkerDriverRegistry, POOL_OWNED_LABEL, } from "@lorenz/worker-sdk";
|
|
3
|
+
import { runSsh } from "@lorenz/ssh";
|
|
4
|
+
import { createLedger } from "./ledger.js";
|
|
5
|
+
import { createLease } from "./lease.js";
|
|
6
|
+
import { createMutex } from "./mutex.js";
|
|
7
|
+
import { runReaperTick } from "./reaper.js";
|
|
8
|
+
/**
|
|
9
|
+
* Resolves the configured driver kind through the registry and constructs the
|
|
10
|
+
* driver from the operator's `driverOptions`. The pool is the engine boundary
|
|
11
|
+
* that owns the real ssh dependency: drivers only ever see the injected
|
|
12
|
+
* {@link DriverDeps.runSsh}, never `@lorenz/ssh` itself. Throws the registry's
|
|
13
|
+
* `worker_pool_driver_unavailable` error for an unregistered kind (so the daemon
|
|
14
|
+
* fails loud at startup), and surfaces the factory's own validation error for
|
|
15
|
+
* unusable `driverOptions` at the same fail-loud construction point.
|
|
16
|
+
*/
|
|
17
|
+
function resolveDriver(settings, deps) {
|
|
18
|
+
const factory = (deps.drivers ?? defaultWorkerDriverRegistry).require(settings.driver);
|
|
19
|
+
const driverDeps = {
|
|
20
|
+
clock: deps.clock,
|
|
21
|
+
logEvent: deps.logEvent,
|
|
22
|
+
runSsh,
|
|
23
|
+
};
|
|
24
|
+
return factory.create(settings.driverOptions ?? {}, driverDeps);
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Bounded retry budget for the authoritative `driver.list()` call on
|
|
28
|
+
* {@link WorkerPoolImpl.hydrate}. A transient driver blip must not be mistaken for a
|
|
29
|
+
* successful (empty) startup, so the list is re-attempted this many times with a
|
|
30
|
+
* short clock-driven backoff before the pool gives up.
|
|
31
|
+
*/
|
|
32
|
+
const HYDRATE_LIST_ATTEMPTS = 3;
|
|
33
|
+
/** Base backoff (ms) between hydrate `list()` retries; multiplied by the attempt. */
|
|
34
|
+
const HYDRATE_LIST_BACKOFF_MS = 50;
|
|
35
|
+
/**
|
|
36
|
+
* A freshly-provisioned worker is probed for SSH-readiness up to this many times
|
|
37
|
+
* before a grow / warm top-up gives up on it (a cold cloud worker's sshd may lag the
|
|
38
|
+
* provision return). An already-up host (static-ssh) or the fake probes ok on the
|
|
39
|
+
* first attempt, so the retry only engages for a genuinely cold worker.
|
|
40
|
+
*/
|
|
41
|
+
const PROBE_READY_ATTEMPTS = 3;
|
|
42
|
+
/** Base backoff (ms) between readiness probes; multiplied by the attempt. */
|
|
43
|
+
const PROBE_READY_BACKOFF_MS = 50;
|
|
44
|
+
/** UTC calendar-day key (YYYY-MM-DD) used to roll the daily spend accumulator. */
|
|
45
|
+
function utcDayKey(now) {
|
|
46
|
+
return now.toISOString().slice(0, 10);
|
|
47
|
+
}
|
|
48
|
+
/** Worker states that count as live for capacity/spend accounting. */
|
|
49
|
+
function isLive(state) {
|
|
50
|
+
return state !== "DESTROYED" && state !== "DESTROYING" && state !== "DRAINING";
|
|
51
|
+
}
|
|
52
|
+
/** A worker that can serve a fresh lease (idle, healthy, not slated for teardown). */
|
|
53
|
+
function isLeasable(record, slotsPerMachine) {
|
|
54
|
+
if (record.markedForDestroy)
|
|
55
|
+
return false;
|
|
56
|
+
if (record.state !== "WARM_IDLE" && record.state !== "LEASED")
|
|
57
|
+
return false;
|
|
58
|
+
return record.inFlight < slotsPerMachine;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* The embedded warm worker pool. A long-lived, reload-surviving singleton that
|
|
62
|
+
* produces each run's `workerHost`. It owns the synchronous select-and-stamp
|
|
63
|
+
* path, RESERVATION-based single-flight growth, the FIFO waiter queue, spend
|
|
64
|
+
* accounting, `maxInFlight`, `maxWorkersPerIssue`, sticky affinity, the recurring
|
|
65
|
+
* reaper timer, and the awaitable `reconcile`/`hydrate`/`drain`/`snapshot`
|
|
66
|
+
* surface. `reconcile` diffs prev-vs-next settings (resize toward min/max,
|
|
67
|
+
* deferring shrink to the reaper oldest-idle-first, never reconstructing the
|
|
68
|
+
* object and never destroying a leased worker synchronously); `hydrate` re-adopts
|
|
69
|
+
* survivors from `driver.list()` + the ledger and drops orphan rows; `drain`
|
|
70
|
+
* rejects new acquires then force-destroys every worker so no paid cloud worker leaks.
|
|
71
|
+
*/
|
|
72
|
+
class WorkerPoolImpl {
|
|
73
|
+
settings;
|
|
74
|
+
// The authoritative in-memory inventory, keyed on the pool's idempotency key.
|
|
75
|
+
inventory = new Map();
|
|
76
|
+
// One async mutex per worker so a release and a reaper tick can never both mutate
|
|
77
|
+
// the same record's `inFlight`/state (the reaper-vs-release race fix).
|
|
78
|
+
workerMutexes = new Map();
|
|
79
|
+
// FIFO queue of blocked acquires. A freed worker wakes the oldest compatible
|
|
80
|
+
// waiter first, providing basic fairness.
|
|
81
|
+
waiters = [];
|
|
82
|
+
// Callbacks the pool fires INSIDE the per-worker mutex immediately before it
|
|
83
|
+
// destroys a machine (the single `recycle` chokepoint), so the dispatch
|
|
84
|
+
// coordinator can fail any still-open RunSlot bound to that worker CLEANLY before
|
|
85
|
+
// the host dies (the recycle-vs-endpoint ordering invariant). Each callback is
|
|
86
|
+
// invoked at most once per worker teardown and its errors are swallowed so a
|
|
87
|
+
// misbehaving listener can never block the destroy it precedes.
|
|
88
|
+
recyclingCallbacks = [];
|
|
89
|
+
// Callbacks fired AFTER a waiter wake-up pass whenever capacity is still
|
|
90
|
+
// leasable (see onCapacityAvailable). The runtime registers its poll nudge
|
|
91
|
+
// here; errors are swallowed so a misbehaving listener can never break the
|
|
92
|
+
// settle/reconcile path that freed the capacity.
|
|
93
|
+
capacityAvailableCallbacks = [];
|
|
94
|
+
// Synchronous capacity reservation taken BEFORE any provision await, so two
|
|
95
|
+
// concurrent growth decisions cannot both allocate past `max`. Incremented in
|
|
96
|
+
// the same synchronous tick the growth is decided; released on settle/reject.
|
|
97
|
+
reservedProvisions = 0;
|
|
98
|
+
// Per-issue grow reservations taken synchronously the instant a grow for an
|
|
99
|
+
// issue is decided (and before its provision await), so two concurrent grows
|
|
100
|
+
// for the SAME issue cannot both slip past `maxWorkersPerIssue` while neither has
|
|
101
|
+
// landed in inventory yet. Counted alongside `leaseIssues` in the issue caps;
|
|
102
|
+
// decremented in `grow`'s finally.
|
|
103
|
+
reservedProvisionsByIssue = new Map();
|
|
104
|
+
// Process-lifetime + daily worker-second accumulators. `dayKey` rolls on UTC day
|
|
105
|
+
// change. The daily total is seeded from the ledger sidecar on hydrate (T10).
|
|
106
|
+
workerSecondsUsed = 0;
|
|
107
|
+
dailyWorkerSecondsUsed = 0;
|
|
108
|
+
dayKey;
|
|
109
|
+
// Monotonic sequence for deterministic worker ids (so the fake driver's
|
|
110
|
+
// idempotency key and the test assertions are reproducible).
|
|
111
|
+
workerSeq = 0;
|
|
112
|
+
// Once true the pool rejects new acquires and force-destroys all workers. Set by
|
|
113
|
+
// `drain`; never cleared (drain is terminal for the process).
|
|
114
|
+
draining = false;
|
|
115
|
+
drainPromise = null;
|
|
116
|
+
// Monotonic drain generation. Captured at the start of each `runDrain`; the
|
|
117
|
+
// deadline barrier and the force-destroy loop bail (without destroying live
|
|
118
|
+
// workers) when the epoch they captured no longer matches `drainEpoch` OR
|
|
119
|
+
// `draining` has been cleared. A reconcile RE-ENABLE bumps this epoch so an
|
|
120
|
+
// orphaned drain parked on its deadline cannot force-destroy the workers a
|
|
121
|
+
// re-enabled (now-live) pool just grew.
|
|
122
|
+
drainEpoch = 0;
|
|
123
|
+
// Resolved by `onLeaseSettle` the moment `inFlight` reaches zero while draining,
|
|
124
|
+
// so `drain` proceeds without busy-polling the clock (which a fake clock never
|
|
125
|
+
// advances). Raced against a deadline timer inside `runDrain`.
|
|
126
|
+
notifyDrained = null;
|
|
127
|
+
driver;
|
|
128
|
+
// Monotonic driver generation, bumped by `swapDriver` on every driver
|
|
129
|
+
// hot-reload. A grow / warm-provision CAPTURES this (and `this.driver`) BEFORE
|
|
130
|
+
// its provision await; if the generation has advanced by the time provision
|
|
131
|
+
// returns, a swap happened DURING the await, so the new worker was provisioned on the
|
|
132
|
+
// now-stale driver. The pool then records its origin as the CAPTURED driver
|
|
133
|
+
// (so recycle destroys it on the backend that actually created it) and marks it
|
|
134
|
+
// for destroy (it cannot serve the live driver). Without this, a worker provisioned
|
|
135
|
+
// on driver A but inserted after a swap to B would be recorded under B with no
|
|
136
|
+
// origin, so recycle/destroy routes to B and A's paid machine leaks.
|
|
137
|
+
driverGeneration = 0;
|
|
138
|
+
ledger;
|
|
139
|
+
clock;
|
|
140
|
+
logEvent;
|
|
141
|
+
leaseClock;
|
|
142
|
+
// The recurring reaper timer. Re-armed at the end of each tick so the single
|
|
143
|
+
// serial pass runs at the configured cadence. Detached via `unref?.()` so it
|
|
144
|
+
// never keeps the process alive (systemClock.setTimeout never unrefs on its
|
|
145
|
+
// own). Cleared on drain so a stopped pool issues no further ticks.
|
|
146
|
+
reaperTimer = null;
|
|
147
|
+
reaperStopped = false;
|
|
148
|
+
// True once `hydrate()` has completed at least once. The constructor arms the
|
|
149
|
+
// reaper before `hydrate()` runs, so until the first hydrate re-adopts the
|
|
150
|
+
// labeled survivors from `driver.list()`, the reaper's destroy-unknown branch
|
|
151
|
+
// must stay inert or it would reap the pool's own survivors on restart.
|
|
152
|
+
hydrated = false;
|
|
153
|
+
reaperInternals;
|
|
154
|
+
// The deps used to resolve the driver in the ctor. Retained so `swapDriver`
|
|
155
|
+
// can re-run `resolveDriver` (and rebuild the ledger gate) in place on a
|
|
156
|
+
// driver hot-reload WITHOUT reconstructing the pool singleton.
|
|
157
|
+
deps;
|
|
158
|
+
constructor(settings, deps) {
|
|
159
|
+
this.settings = settings;
|
|
160
|
+
this.deps = deps;
|
|
161
|
+
this.clock = deps.clock;
|
|
162
|
+
this.logEvent = deps.logEvent;
|
|
163
|
+
this.driver = resolveDriver(settings, deps);
|
|
164
|
+
this.ledger = createLedger({
|
|
165
|
+
ledgerPath: deps.ledgerPath ?? "",
|
|
166
|
+
clock: deps.clock,
|
|
167
|
+
usesLedger: this.driver.capabilities.usesLedger && deps.ledgerPath !== undefined,
|
|
168
|
+
});
|
|
169
|
+
// The lease/heartbeat clock works in milliseconds while the ClockPort yields
|
|
170
|
+
// a Date; adapt once so leases see a plain numeric clock.
|
|
171
|
+
this.leaseClock = { now: () => this.clock.now().getTime() };
|
|
172
|
+
this.dayKey = utcDayKey(this.clock.now());
|
|
173
|
+
// The narrow seam the reaper drives over. Every primitive routes back through
|
|
174
|
+
// the pool's per-worker mutex so a reaper tick and a lease release can never both
|
|
175
|
+
// touch the same `inFlight`.
|
|
176
|
+
this.reaperInternals = {
|
|
177
|
+
settings: this.settings,
|
|
178
|
+
driver: this.driver,
|
|
179
|
+
poolOwnedLabel: POOL_OWNED_LABEL,
|
|
180
|
+
now: () => this.leaseClock.now(),
|
|
181
|
+
inventory: this.inventory,
|
|
182
|
+
mutexFor: (workerId) => this.mutexFor(workerId),
|
|
183
|
+
liveWorkerCount: () => this.liveWorkerCount(),
|
|
184
|
+
// In-process invariant: a lease is settled exactly once, only in `runClaim`'s
|
|
185
|
+
// finally (release/fail), so an UN-settled in-flight lease always implies an
|
|
186
|
+
// active run. The reaper therefore treats every in-flight lease as alive and
|
|
187
|
+
// never force-returns a LEASED worker from the live pool (that would kill a
|
|
188
|
+
// legitimate long single-turn run that emits no heartbeat). Orphan recovery
|
|
189
|
+
// after a process restart is handled separately by `hydrate`, which re-adopts
|
|
190
|
+
// only the survivors `driver.list()` still shows and drops orphan rows.
|
|
191
|
+
isRunActive: () => true,
|
|
192
|
+
hydrated: () => this.hydrated,
|
|
193
|
+
hasGrowthBudget: () => this.hasGrowthHeadroom(),
|
|
194
|
+
destroyWorker: async (record, reason) => this.recycle(record, reason),
|
|
195
|
+
provisionWarm: async () => this.provisionWarm(),
|
|
196
|
+
logEvent: this.logEvent,
|
|
197
|
+
wakeWaiters: () => this.wakeWaiters(),
|
|
198
|
+
};
|
|
199
|
+
// Single serial recurring reaper timer, detached so it never keeps the
|
|
200
|
+
// process alive. The tick re-arms itself at the configured cadence.
|
|
201
|
+
this.scheduleReaper();
|
|
202
|
+
}
|
|
203
|
+
// --- public API ---------------------------------------------------------
|
|
204
|
+
async acquire(req) {
|
|
205
|
+
if (!this.settings.enabled || this.draining) {
|
|
206
|
+
return { status: "no_capacity", reason: "pool_disabled" };
|
|
207
|
+
}
|
|
208
|
+
this.rollDayKeyIfNeeded();
|
|
209
|
+
// Spend gate: once worker-seconds (total or daily) are exhausted the pool runs
|
|
210
|
+
// nothing further, even reusing a warm worker, until the cap resets.
|
|
211
|
+
if (this.workerSecondsExhausted()) {
|
|
212
|
+
return { status: "no_capacity", reason: "spend_cap" };
|
|
213
|
+
}
|
|
214
|
+
// 1) Synchronous select-and-stamp over a free/under-capacity worker. No await
|
|
215
|
+
// between selecting the record and stamping it, so two concurrent acquires
|
|
216
|
+
// can never grab the same slot.
|
|
217
|
+
const selected = this.selectAndStamp(req);
|
|
218
|
+
if (selected) {
|
|
219
|
+
return { status: "leased", lease: selected };
|
|
220
|
+
}
|
|
221
|
+
// 2) Grow under the reservation, if capacity and spend allow.
|
|
222
|
+
if (this.canGrow(req)) {
|
|
223
|
+
const grown = await this.grow(req);
|
|
224
|
+
if (grown.status === "leased")
|
|
225
|
+
return grown;
|
|
226
|
+
// A growth that failed for capacity/spend reasons falls through to the
|
|
227
|
+
// waiter queue; a driver_error with nothing to wait on is returned.
|
|
228
|
+
if (grown.status === "no_capacity" && grown.reason === "driver_error") {
|
|
229
|
+
return grown;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
else if (this.blockedBySpendCap()) {
|
|
233
|
+
// A worker could not be selected and growth is barred specifically by a spend
|
|
234
|
+
// cap (concurrent workers). Surface spend_cap now rather than holding the
|
|
235
|
+
// poll thread on a waiter the budget can never satisfy.
|
|
236
|
+
return { status: "no_capacity", reason: "spend_cap" };
|
|
237
|
+
}
|
|
238
|
+
// 3) Block on the FIFO waiter queue until a worker frees, the timeout fires, or
|
|
239
|
+
// the request is aborted.
|
|
240
|
+
return this.waitForCapacity(req);
|
|
241
|
+
}
|
|
242
|
+
canAcquire() {
|
|
243
|
+
if (!this.settings.enabled || this.draining)
|
|
244
|
+
return false;
|
|
245
|
+
this.rollDayKeyIfNeeded();
|
|
246
|
+
if (this.workerSecondsExhausted())
|
|
247
|
+
return false;
|
|
248
|
+
// A warm/under-capacity worker is immediately leasable.
|
|
249
|
+
for (const record of this.inventory.values()) {
|
|
250
|
+
if (isLeasable(record, this.settings.slotsPerMachine))
|
|
251
|
+
return true;
|
|
252
|
+
}
|
|
253
|
+
// Otherwise capacity exists only if the pool can still grow a worker.
|
|
254
|
+
return this.hasGrowthHeadroom();
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Whether the pool currently governs worker-host capacity. A config reload can disable the pool
|
|
258
|
+
* (which drains it to zero) without tearing down the orchestrator's lifetime capacity probe; the
|
|
259
|
+
* probe reads this so a disabled pool falls through to static/local execution instead of
|
|
260
|
+
* permanently blocking dispatch. Mirrors `settings.enabled` (swapped in by `reconcile`).
|
|
261
|
+
*/
|
|
262
|
+
isEnabled() {
|
|
263
|
+
return this.settings.enabled;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Diffs prev-vs-next settings on a config hot-reload and reconciles the live
|
|
267
|
+
* pool WITHOUT being reconstructed (the singleton survives every reload):
|
|
268
|
+
*
|
|
269
|
+
* - `enabled true -> false`: drain to zero (paid workers must not linger).
|
|
270
|
+
* - `enabled false -> true`: grow from zero toward the warm/min target.
|
|
271
|
+
* - lowering `max` (or any live overshoot of the new `max`): defer the shrink
|
|
272
|
+
* to the reaper, marking the OLDEST-IDLE excess workers `markedForDestroy`
|
|
273
|
+
* (the reaper reaps a flagged idle worker on its next tick, and a flagged
|
|
274
|
+
* LEASED worker is recycled the instant its last lease returns). Leased workers
|
|
275
|
+
* are NEVER destroyed synchronously here.
|
|
276
|
+
* - raising `min`/`warm`: top up toward the new target within the spend budget.
|
|
277
|
+
*
|
|
278
|
+
* Settings are swapped in first so every subsequent acquire / reaper tick reads
|
|
279
|
+
* the latest knobs (the reaper re-syncs `internals.settings` each tick anyway).
|
|
280
|
+
*/
|
|
281
|
+
reconcile(next) {
|
|
282
|
+
const prev = this.settings;
|
|
283
|
+
if (!next.enabled) {
|
|
284
|
+
// Disabling the pool drains it to zero, so it needs NO (re)built driver:
|
|
285
|
+
// SKIP the swap entirely. A disable reload that ALSO points at an unavailable
|
|
286
|
+
// driver (or drops the static-ssh hosts so construction would throw) must
|
|
287
|
+
// still disable + drain - never throw inside `swapDriver` and strand the
|
|
288
|
+
// live pool enabled with paid workers still running. The drain tears every worker
|
|
289
|
+
// down on the driver that PROVISIONED it (its origin), not the new one.
|
|
290
|
+
this.settings = next;
|
|
291
|
+
this.reaperInternals.settings = next;
|
|
292
|
+
void this.drain({ deadlineMs: next.drainDeadlineMs });
|
|
293
|
+
return;
|
|
294
|
+
}
|
|
295
|
+
// Finding #1: rebuild the driver in place BEFORE the settings swap when the
|
|
296
|
+
// driver construction actually changed (a new kind or deep-changed
|
|
297
|
+
// driverOptions). A same-driver reconcile skips the swap (no rebuild),
|
|
298
|
+
// keeping the singleton's resolved driver object stable. Once the coordinator
|
|
299
|
+
// exists it will drive `swapDriver`; until then `reconcile` drives it directly.
|
|
300
|
+
if (driverConstructionChanged(prev, next)) {
|
|
301
|
+
this.swapDriver(next);
|
|
302
|
+
}
|
|
303
|
+
this.settings = next;
|
|
304
|
+
this.reaperInternals.settings = next;
|
|
305
|
+
// A re-enabled pool (false -> true) starts from zero; the grow-toward-target
|
|
306
|
+
// path below covers it (a disabled pool was drained to zero, so live==0). The
|
|
307
|
+
// prior disable set `draining`/`reaperStopped` via `drain`; a re-enable must
|
|
308
|
+
// clear them (and re-arm the reaper) or the pool stays permanently dead -
|
|
309
|
+
// every acquire short-circuits on `draining` and no reaper top-up ever runs.
|
|
310
|
+
if (!prev.enabled) {
|
|
311
|
+
this.draining = false;
|
|
312
|
+
this.drainPromise = null;
|
|
313
|
+
this.notifyDrained = null;
|
|
314
|
+
// Invalidate any drain still parked on its deadline barrier. Its captured
|
|
315
|
+
// epoch is now stale, so its force-destroy loop will bail instead of
|
|
316
|
+
// tearing down the workers this re-enable is about to grow.
|
|
317
|
+
this.drainEpoch += 1;
|
|
318
|
+
if (this.reaperStopped) {
|
|
319
|
+
this.reaperStopped = false;
|
|
320
|
+
this.scheduleReaper();
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
// Defer any shrink toward a lowered `max` to the reaper, oldest-idle first.
|
|
324
|
+
this.markExcessForShrink();
|
|
325
|
+
// Grow toward the (possibly raised) warm/min target within the spend budget.
|
|
326
|
+
void this.growTowardTarget();
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Rebuilds the resolved driver IN PLACE on a driver hot-reload, without
|
|
330
|
+
* reconstructing the pool singleton (Finding #1). The pool's ctor resolved the
|
|
331
|
+
* driver once, but `reconcile` previously only swapped settings, so a reload
|
|
332
|
+
* that changed `driver`/`driverOptions` left every acquire still routed to
|
|
333
|
+
* the stale driver object.
|
|
334
|
+
*
|
|
335
|
+
* TRANSACTIONAL: every step that can THROW (resolving the new driver and
|
|
336
|
+
* constructing its ledger) runs FIRST, into locals, BEFORE any record or
|
|
337
|
+
* `this.driver` is mutated. A failed reload (driver unavailable / invalid
|
|
338
|
+
* driverOptions) therefore throws having mutated NOTHING, matching the
|
|
339
|
+
* runtime's rollback to the last-good settings: marking last-good workers for
|
|
340
|
+
* destroy and THEN throwing would let `onLeaseSettle`/the reaper drain healthy
|
|
341
|
+
* warm/paid capacity after a REJECTED reload (Codex iter-6 HIGH). Once resolve
|
|
342
|
+
* succeeds (the commit point), the remaining steps cannot throw. `swapDriver`:
|
|
343
|
+
*
|
|
344
|
+
* 1. CAPTURES `originDriver` on EVERY existing record BEFORE reassigning, so
|
|
345
|
+
* each surviving worker remembers the backend that PROVISIONED it. This is the
|
|
346
|
+
* no-orphaned-paid-worker invariant: an in-flight lease that settles AFTER the
|
|
347
|
+
* swap routes `recycle`'s `destroy` to its ORIGINAL backend (below), not the
|
|
348
|
+
* new `this.driver`. A record that already carries an `originDriver` (a
|
|
349
|
+
* prior swap) keeps it (the true origin), so repeated swaps never lose it.
|
|
350
|
+
* 2. flags every old-driver worker `markedForDestroy` and recycles each IDLE one
|
|
351
|
+
* immediately (under its per-worker mutex) against its ORIGINAL backend, so no
|
|
352
|
+
* paid worker is orphaned and the new driver's `list()` reconcile never sees a
|
|
353
|
+
* stale old worker it does not own. A still-LEASED old worker keeps the flag and is
|
|
354
|
+
* recycled on its ORIGINAL backend the instant its last lease settles
|
|
355
|
+
* (`onLeaseSettle` -> `recycle`, which routes to `originDriver`).
|
|
356
|
+
* 3. commits the pre-resolved driver (`this.driver = newDriver`).
|
|
357
|
+
* 4. re-threads `reaperInternals.driver` to the new driver so the recurring
|
|
358
|
+
* reaper's `list()` reconcile / probe / top-up drive the new backend.
|
|
359
|
+
* 5. rebuilds the ledger `usesLedger` gate against the new driver's
|
|
360
|
+
* capabilities (e.g. non-ledger -> ledger) WITHOUT reconstructing the spend
|
|
361
|
+
* accumulators, which live on the pool and are untouched.
|
|
362
|
+
*
|
|
363
|
+
* Called by `reconcile` only when {@link driverConstructionChanged} is true.
|
|
364
|
+
*/
|
|
365
|
+
swapDriver(next) {
|
|
366
|
+
// TRANSACTIONAL: do ALL throwing work (resolveDriver, and constructing the
|
|
367
|
+
// new ledger) into LOCALS BEFORE mutating ANY record or `this.driver`. A
|
|
368
|
+
// failed reload (driver unavailable / invalid driverOptions) must throw
|
|
369
|
+
// having mutated NOTHING, so the runtime's transactional rollback to the
|
|
370
|
+
// last-good settings is matched by an UNTOUCHED inventory: marking workers for
|
|
371
|
+
// destroy before this throws would let `onLeaseSettle` recycle healthy
|
|
372
|
+
// in-flight leases and the reaper reap idle workers, draining warm/paid capacity
|
|
373
|
+
// after a REJECTED reload. (Codex iter-6 HIGH.)
|
|
374
|
+
const newDriver = resolveDriver(next, this.deps);
|
|
375
|
+
const newLedger = createLedger({
|
|
376
|
+
ledgerPath: this.deps.ledgerPath ?? "",
|
|
377
|
+
clock: this.deps.clock,
|
|
378
|
+
usesLedger: newDriver.capabilities.usesLedger && this.deps.ledgerPath !== undefined,
|
|
379
|
+
});
|
|
380
|
+
// --- COMMIT POINT: resolve succeeded, so from here NOTHING throws. ---------
|
|
381
|
+
// 1) Capture the origin driver on every existing record BEFORE reassigning
|
|
382
|
+
// `this.driver`, and flag each for drain so it is recycled on its origin.
|
|
383
|
+
const idleToRecycle = [];
|
|
384
|
+
for (const record of this.inventory.values()) {
|
|
385
|
+
record.originDriver = record.originDriver ?? this.driver;
|
|
386
|
+
record.markedForDestroy = true;
|
|
387
|
+
// An idle (un-leased) old-driver worker cannot serve the new driver and the
|
|
388
|
+
// new driver's list() will not own it, so recycle it now against its origin
|
|
389
|
+
// rather than deferring to a reaper that would otherwise drop it un-destroyed.
|
|
390
|
+
if (isLive(record.state) && record.inFlight === 0 && record.state !== "DESTROYING") {
|
|
391
|
+
idleToRecycle.push(record);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
// 3) Commit the pre-resolved driver in place, and bump the driver
|
|
395
|
+
// generation so any in-flight grow / warm-provision that captured the PRIOR
|
|
396
|
+
// generation before its provision await detects the swap when it returns
|
|
397
|
+
// (and records its worker's origin as the captured driver).
|
|
398
|
+
this.driver = newDriver;
|
|
399
|
+
this.driverGeneration += 1;
|
|
400
|
+
// 4) Re-thread the reaper's driver so its list()/probe/top-up drive the new
|
|
401
|
+
// backend (the reaper reads `reaperInternals.driver`, not `this.driver`).
|
|
402
|
+
this.reaperInternals.driver = this.driver;
|
|
403
|
+
// 5) Commit the pre-built ledger gate (rebuilt against the new driver's
|
|
404
|
+
// `usesLedger` capability). The pool's spend accumulators are unaffected
|
|
405
|
+
// (they live on the pool, not the ledger object).
|
|
406
|
+
this.ledger = newLedger;
|
|
407
|
+
// 2 (deferred async, fire-and-forget like reconcile's grow/drain): recycle each
|
|
408
|
+
// idle old-driver worker on its ORIGINAL backend under its per-worker mutex, then
|
|
409
|
+
// wake any waiters so the freed capacity refills from the NEW driver.
|
|
410
|
+
if (idleToRecycle.length > 0) {
|
|
411
|
+
void (async () => {
|
|
412
|
+
for (const record of idleToRecycle) {
|
|
413
|
+
await this.mutexFor(record.workerId).runExclusive(async () => {
|
|
414
|
+
if (record.inFlight !== 0)
|
|
415
|
+
return; // a lease landed first; settle recycles it
|
|
416
|
+
await this.recycle(record, "shrink");
|
|
417
|
+
});
|
|
418
|
+
}
|
|
419
|
+
this.wakeWaiters();
|
|
420
|
+
})();
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
/**
|
|
424
|
+
* Registers a callback the pool fires INSIDE the per-worker mutex immediately
|
|
425
|
+
* before it destroys a machine. Every teardown path routes through the single
|
|
426
|
+
* {@link recycle} chokepoint, so the callback fires exactly once per worker just
|
|
427
|
+
* before `driver.destroy`. The dispatch coordinator registers a callback here
|
|
428
|
+
* to fail any still-open RunSlot on the recycled worker CLEANLY before the host
|
|
429
|
+
* dies (the recycle-vs-endpoint ordering invariant). A callback error is
|
|
430
|
+
* swallowed so a misbehaving listener can never block the teardown it precedes.
|
|
431
|
+
*/
|
|
432
|
+
onMachineRecycling(cb) {
|
|
433
|
+
this.recyclingCallbacks.push(cb);
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Registers a callback fired whenever a capacity-freeing event leaves the pool
|
|
437
|
+
* leasable (see {@link WorkerPool.onCapacityAvailable}). Fired at the end of every
|
|
438
|
+
* waiter wake-up pass - a lease settle, a reconcile grow, a reaper top-up -
|
|
439
|
+
* AFTER the FIFO waiters had first claim on the freed worker, and only when
|
|
440
|
+
* `canAcquire()` still holds, so a drained/disabled/spend-capped pool never
|
|
441
|
+
* notifies.
|
|
442
|
+
*/
|
|
443
|
+
onCapacityAvailable(cb) {
|
|
444
|
+
this.capacityAvailableCallbacks.push(cb);
|
|
445
|
+
}
|
|
446
|
+
/** Notifies every {@link onCapacityAvailable} listener; errors are swallowed. */
|
|
447
|
+
notifyCapacityAvailable() {
|
|
448
|
+
if (this.capacityAvailableCallbacks.length === 0)
|
|
449
|
+
return;
|
|
450
|
+
if (!this.canAcquire())
|
|
451
|
+
return;
|
|
452
|
+
for (const cb of this.capacityAvailableCallbacks) {
|
|
453
|
+
try {
|
|
454
|
+
cb();
|
|
455
|
+
}
|
|
456
|
+
catch (error) {
|
|
457
|
+
this.logEvent({
|
|
458
|
+
event: "worker_pool_capacity_callback_failed",
|
|
459
|
+
error: errorMessage(error),
|
|
460
|
+
});
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
/**
|
|
465
|
+
* Notifies every registered {@link onMachineRecycling} callback that `workerId` is
|
|
466
|
+
* about to be destroyed. Called once at the top of {@link recycle} (inside the
|
|
467
|
+
* per-worker mutex, before `driver.destroy`). Each callback's error is caught and
|
|
468
|
+
* logged so one bad listener can never block the teardown or starve the others.
|
|
469
|
+
*/
|
|
470
|
+
notifyMachineRecycling(workerId) {
|
|
471
|
+
for (const cb of this.recyclingCallbacks) {
|
|
472
|
+
try {
|
|
473
|
+
cb(workerId);
|
|
474
|
+
}
|
|
475
|
+
catch (error) {
|
|
476
|
+
this.logEvent({
|
|
477
|
+
event: "worker_pool_recycling_callback_failed",
|
|
478
|
+
workerId,
|
|
479
|
+
error: errorMessage(error),
|
|
480
|
+
});
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
/**
|
|
485
|
+
* Flags the OLDEST excess workers for destruction when the live count exceeds the
|
|
486
|
+
* current `max`. Idle workers are preferred (so a shrink frees capacity without
|
|
487
|
+
* disturbing a run) and ordered oldest-idle-first; only when no idle worker remains
|
|
488
|
+
* does it fall back to flagging a LEASED worker, which is recycled on lease return
|
|
489
|
+
* (never destroyed synchronously). The actual teardown is the reaper's job.
|
|
490
|
+
*/
|
|
491
|
+
markExcessForShrink() {
|
|
492
|
+
const max = this.settings.max;
|
|
493
|
+
const live = [...this.inventory.values()].filter((record) => isLive(record.state) && !record.markedForDestroy);
|
|
494
|
+
let excess = live.length - max;
|
|
495
|
+
if (excess <= 0)
|
|
496
|
+
return;
|
|
497
|
+
// Oldest-idle first: idle workers before leased, each group oldest-idle-first.
|
|
498
|
+
const ordered = [...live].sort((a, b) => {
|
|
499
|
+
const aIdle = a.state === "WARM_IDLE" && a.inFlight === 0 ? 0 : 1;
|
|
500
|
+
const bIdle = b.state === "WARM_IDLE" && b.inFlight === 0 ? 0 : 1;
|
|
501
|
+
if (aIdle !== bIdle)
|
|
502
|
+
return aIdle - bIdle;
|
|
503
|
+
return a.lastIdleAtMs - b.lastIdleAtMs;
|
|
504
|
+
});
|
|
505
|
+
for (const record of ordered) {
|
|
506
|
+
if (excess <= 0)
|
|
507
|
+
break;
|
|
508
|
+
record.markedForDestroy = true;
|
|
509
|
+
excess -= 1;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
/**
|
|
513
|
+
* Provisions warm workers one at a time toward the higher of `min`/`warm`, within
|
|
514
|
+
* the `max` ceiling and the spend budget (the reservation inside `provisionWarm`
|
|
515
|
+
* enforces both). Fire-and-forget from `reconcile` so a reload never blocks; a
|
|
516
|
+
* failed provision is logged and swallowed inside `provisionWarm` and retried by
|
|
517
|
+
* the recurring reaper top-up.
|
|
518
|
+
*/
|
|
519
|
+
async growTowardTarget() {
|
|
520
|
+
const target = Math.max(this.settings.min, this.settings.warm);
|
|
521
|
+
let attempts = Math.max(0, target - (this.liveWorkerCount() + this.reservedProvisions));
|
|
522
|
+
while (attempts > 0 && this.liveWorkerCount() + this.reservedProvisions < target) {
|
|
523
|
+
if (!this.hasGrowthHeadroom())
|
|
524
|
+
break;
|
|
525
|
+
await this.provisionWarm();
|
|
526
|
+
attempts -= 1;
|
|
527
|
+
}
|
|
528
|
+
this.wakeWaiters();
|
|
529
|
+
}
|
|
530
|
+
/**
|
|
531
|
+
* Re-adopts survivors on daemon startup so a restart does not leak the workers a
|
|
532
|
+
* prior process created. The reconcile is authoritative on `driver.list()`:
|
|
533
|
+
*
|
|
534
|
+
* 1. Seed the daily spend accumulator from the `spend.json` sidecar so a
|
|
535
|
+
* restart within the same UTC day carries the daily total (a day boundary
|
|
536
|
+
* resets it). The sidecar is the source of truth for spend, not inventory.
|
|
537
|
+
* 2. Re-adopt every worker `driver.list()` still shows that carries the
|
|
538
|
+
* pool-owned label into inventory as WARM_IDLE (a fresh process has no
|
|
539
|
+
* active runs, so a survivor is idle: `inFlight=0`, `leaseId=null`). An
|
|
540
|
+
* unlabeled instance is never adopted (it is not ours).
|
|
541
|
+
* 3. Force-return orphan ledger rows: a row whose worker the authoritative list
|
|
542
|
+
* no longer shows is a worker that vanished while the run owning it is gone,
|
|
543
|
+
* so the row is dropped from the ledger (no phantom inventory survives).
|
|
544
|
+
*
|
|
545
|
+
* Idempotent: a worker already in inventory (e.g. a second hydrate) is left alone.
|
|
546
|
+
*/
|
|
547
|
+
async hydrate() {
|
|
548
|
+
const spend = await this.ledger.loadDailySpend();
|
|
549
|
+
this.dayKey = spend.dayKey;
|
|
550
|
+
this.dailyWorkerSecondsUsed = spend.workerSecondsToday;
|
|
551
|
+
// The ledger replay is advisory; driver.list() is authoritative. A transient
|
|
552
|
+
// list() failure must not wipe inventory, so the re-adopt below only runs once a
|
|
553
|
+
// BOUNDED retry of list() (short clock-driven backoff) finally succeeds.
|
|
554
|
+
const rows = await this.ledger.load();
|
|
555
|
+
const listed = await this.listForHydrate();
|
|
556
|
+
if (listed === null) {
|
|
557
|
+
// list() never recovered. For a driver that owns no paid survivors
|
|
558
|
+
// (non-ledger, non-ephemeral fake / static-ssh) the logged skip is tolerable:
|
|
559
|
+
// there is nothing to leak, so startup proceeds and the reaper reconciles a
|
|
560
|
+
// later tick. `hydrated` deliberately stays false so the reaper's
|
|
561
|
+
// destroy-unknown gate remains closed until a list() actually succeeds.
|
|
562
|
+
return;
|
|
563
|
+
}
|
|
564
|
+
const listedById = new Map();
|
|
565
|
+
for (const descriptor of listed)
|
|
566
|
+
listedById.set(descriptor.workerId, descriptor);
|
|
567
|
+
// Re-adopt every labeled-ours survivor the list still shows. A fresh process
|
|
568
|
+
// holds no active runs, so each survivor is re-adopted idle (no lease).
|
|
569
|
+
const now = this.leaseClock.now();
|
|
570
|
+
for (const descriptor of listed) {
|
|
571
|
+
if (this.inventory.has(descriptor.workerId))
|
|
572
|
+
continue;
|
|
573
|
+
if (!descriptor.labels.includes(POOL_OWNED_LABEL))
|
|
574
|
+
continue;
|
|
575
|
+
this.inventory.set(descriptor.workerId, {
|
|
576
|
+
workerId: descriptor.workerId,
|
|
577
|
+
workerHost: descriptor.workerHost,
|
|
578
|
+
driverRef: descriptor.driverRef,
|
|
579
|
+
state: "WARM_IDLE",
|
|
580
|
+
labels: [...descriptor.labels],
|
|
581
|
+
createdAtMs: descriptor.createdAtMs,
|
|
582
|
+
leaseId: null,
|
|
583
|
+
inFlight: 0,
|
|
584
|
+
lastIdleAtMs: now,
|
|
585
|
+
lastHeartbeatMs: now,
|
|
586
|
+
workerSecondsUsed: 0,
|
|
587
|
+
markedForDestroy: false,
|
|
588
|
+
affinityKey: null,
|
|
589
|
+
metadata: { ...descriptor.metadata },
|
|
590
|
+
leaseIssues: new Map(),
|
|
591
|
+
});
|
|
592
|
+
}
|
|
593
|
+
// Reconcile every ledger row against the authoritative list:
|
|
594
|
+
// - row whose worker list() still shows: kept (its survivor was re-adopted above).
|
|
595
|
+
// - PROVISIONAL row with no matching instance YOUNGER than ttlMs: kept. The
|
|
596
|
+
// prior process crashed mid-provision (the worker may exist at the driver but
|
|
597
|
+
// not yet be list-visible under eventual consistency), so the recoverable
|
|
598
|
+
// write-ahead row is retained for a later tick / re-hydrate to correlate.
|
|
599
|
+
// - any other row with no matching instance (active row whose worker vanished, or
|
|
600
|
+
// a provisional row older than ttlMs that never materialized): dropped so no
|
|
601
|
+
// phantom inventory / dead write-ahead row survives the restart.
|
|
602
|
+
const ttlMs = this.settings.ttlMs;
|
|
603
|
+
for (const row of rows) {
|
|
604
|
+
if (listedById.has(row.workerId))
|
|
605
|
+
continue;
|
|
606
|
+
if (row.status === "provisional" && now - row.createdAtMs < ttlMs) {
|
|
607
|
+
// A still-recent provisional row: the worker may be in flight / not yet listed.
|
|
608
|
+
continue;
|
|
609
|
+
}
|
|
610
|
+
this.logEvent({ event: "worker_pool_hydrate_orphan_dropped", workerId: row.workerId });
|
|
611
|
+
await this.ledger.delete(row.workerId);
|
|
612
|
+
}
|
|
613
|
+
// Advance the id sequence past any adopted `worker-<n>` survivor so the next
|
|
614
|
+
// grow / warm-provision cannot RE-MINT an id a survivor already owns. Without
|
|
615
|
+
// this, `workerSeq` (which inits at 0) would mint `worker-0` again after adopting a
|
|
616
|
+
// higher-numbered survivor and, once it cycled back through that suffix, stamp
|
|
617
|
+
// a SECOND lease onto a live survivor. Non-numeric ids (e.g. a custom label)
|
|
618
|
+
// carry no numeric suffix and are ignored when computing the high-water mark.
|
|
619
|
+
this.advanceWorkerSeqPastAdopted();
|
|
620
|
+
// The first successful hydrate has now re-adopted every labeled survivor, so
|
|
621
|
+
// the reaper's destroy-unknown reconcile may resume: any labeled-but-unknown
|
|
622
|
+
// survivor a later tick sees is now a genuine leaked orphan, not one this
|
|
623
|
+
// hydrate had simply not adopted yet.
|
|
624
|
+
this.hydrated = true;
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* Bounded-retry wrapper around `driver.list()` for {@link hydrate}. The
|
|
628
|
+
* authoritative startup reconcile MUST NOT treat a transient `list()` outage as a
|
|
629
|
+
* successful (empty) startup, because a paid (usesLedger / ephemeral) driver may
|
|
630
|
+
* have real survivors a prior process provisioned: swallowing the failure would
|
|
631
|
+
* leave those workers neither adopted (so they never serve a lease) nor reaped (the
|
|
632
|
+
* destroy-unknown gate stays closed because {@link hydrated} never flips) nor
|
|
633
|
+
* visible to drain - unmanaged paid workers leaking past restart.
|
|
634
|
+
*
|
|
635
|
+
* - Retries `list()` up to {@link HYDRATE_LIST_ATTEMPTS} times with a short
|
|
636
|
+
* clock-driven backoff between attempts, returning the descriptors on the first
|
|
637
|
+
* success (the common case: a brief driver blip recovers within a retry).
|
|
638
|
+
* - If every attempt fails AND the driver owns real survivors
|
|
639
|
+
* (`capabilities.usesLedger` or `capabilities.ephemeral`), THROWS
|
|
640
|
+
* `worker_pool_hydrate_failed` so the daemon's `await workerPool.hydrate()` fails
|
|
641
|
+
* startup LOUDLY instead of running blind over unmanaged paid machines.
|
|
642
|
+
* - If every attempt fails for a NON-paid driver (fake / static-ssh: no paid
|
|
643
|
+
* survivors to leak), returns `null` so the caller logs the skip and proceeds
|
|
644
|
+
* with startup, leaving `hydrated` false (reaper destroy-unknown gate closed)
|
|
645
|
+
* until a later `list()` succeeds.
|
|
646
|
+
*/
|
|
647
|
+
async listForHydrate() {
|
|
648
|
+
let lastError;
|
|
649
|
+
for (let attempt = 1; attempt <= HYDRATE_LIST_ATTEMPTS; attempt += 1) {
|
|
650
|
+
try {
|
|
651
|
+
return await this.driver.list();
|
|
652
|
+
}
|
|
653
|
+
catch (error) {
|
|
654
|
+
lastError = error;
|
|
655
|
+
this.logEvent({
|
|
656
|
+
event: "worker_pool_hydrate_list_failed",
|
|
657
|
+
attempt,
|
|
658
|
+
maxAttempts: HYDRATE_LIST_ATTEMPTS,
|
|
659
|
+
error: errorMessage(error),
|
|
660
|
+
});
|
|
661
|
+
if (attempt < HYDRATE_LIST_ATTEMPTS) {
|
|
662
|
+
await this.sleep(HYDRATE_LIST_BACKOFF_MS * attempt);
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
const caps = this.driver.capabilities;
|
|
667
|
+
if (caps.usesLedger || caps.ephemeral) {
|
|
668
|
+
// A paid driver with potential real survivors: fail startup loud rather than
|
|
669
|
+
// run with unmanaged paid workers that are invisible to adopt / reap / drain.
|
|
670
|
+
this.logEvent({
|
|
671
|
+
event: "worker_pool_hydrate_failed",
|
|
672
|
+
attempts: HYDRATE_LIST_ATTEMPTS,
|
|
673
|
+
error: errorMessage(lastError),
|
|
674
|
+
});
|
|
675
|
+
throw new Error(`worker_pool_hydrate_failed: driver.list() failed after ${HYDRATE_LIST_ATTEMPTS} attempts: ${errorMessage(lastError)}`);
|
|
676
|
+
}
|
|
677
|
+
// A non-paid driver owns no survivors to leak: tolerate the skip.
|
|
678
|
+
return null;
|
|
679
|
+
}
|
|
680
|
+
/** Resolves after `delayMs` via the injected clock (used for hydrate backoff). */
|
|
681
|
+
async sleep(delayMs) {
|
|
682
|
+
await new Promise((resolve) => {
|
|
683
|
+
const handle = this.clock.setTimeout(resolve, delayMs);
|
|
684
|
+
handle.unref?.();
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
/**
|
|
688
|
+
* Probes a freshly-provisioned worker until it reports SSH-ready or the bounded
|
|
689
|
+
* attempt budget is spent, enforcing the warm-up contract that a worker is
|
|
690
|
+
* "reachable before it is leased". `provision` returning does NOT guarantee sshd is
|
|
691
|
+
* up on a cold cloud worker (a container driver may only have resolved the published
|
|
692
|
+
* port; a cloud driver may boot asynchronously), so leasing it immediately would hand
|
|
693
|
+
* an unready host to the runner - failing the first run, poisoning the lease, and
|
|
694
|
+
* destroying an otherwise-healthy worker. An already-up host (static-ssh) and the fake
|
|
695
|
+
* probe ok on the first attempt, so this is a single round-trip on the cold path.
|
|
696
|
+
* Probe faults are treated as not-ready (never thrown). Returns false when the worker
|
|
697
|
+
* never becomes ready; the caller destroys it.
|
|
698
|
+
*/
|
|
699
|
+
async probeUntilReady(descriptor, driver) {
|
|
700
|
+
let lastReason = "not_ready";
|
|
701
|
+
for (let attempt = 1; attempt <= PROBE_READY_ATTEMPTS; attempt += 1) {
|
|
702
|
+
try {
|
|
703
|
+
const health = await driver.probe(descriptor, {
|
|
704
|
+
timeoutMs: this.settings.acquireTimeoutMs,
|
|
705
|
+
});
|
|
706
|
+
if (health.ok)
|
|
707
|
+
return true;
|
|
708
|
+
lastReason = health.reason;
|
|
709
|
+
}
|
|
710
|
+
catch (error) {
|
|
711
|
+
lastReason = errorMessage(error);
|
|
712
|
+
}
|
|
713
|
+
if (attempt < PROBE_READY_ATTEMPTS)
|
|
714
|
+
await this.sleep(PROBE_READY_BACKOFF_MS * attempt);
|
|
715
|
+
}
|
|
716
|
+
this.logEvent({
|
|
717
|
+
event: "worker_pool_worker_unready",
|
|
718
|
+
workerId: descriptor.workerId,
|
|
719
|
+
reason: lastReason,
|
|
720
|
+
});
|
|
721
|
+
return false;
|
|
722
|
+
}
|
|
723
|
+
/**
|
|
724
|
+
* Bumps `workerSeq` to one past the highest numeric suffix among the `worker-<n>` ids
|
|
725
|
+
* currently in inventory. Ids that do not match `worker-<n>` (non-numeric suffix)
|
|
726
|
+
* are skipped. Never lowers the sequence.
|
|
727
|
+
*/
|
|
728
|
+
advanceWorkerSeqPastAdopted() {
|
|
729
|
+
let maxSuffix = -1;
|
|
730
|
+
for (const workerId of this.inventory.keys()) {
|
|
731
|
+
const match = /^worker-(\d+)$/.exec(workerId);
|
|
732
|
+
if (!match)
|
|
733
|
+
continue;
|
|
734
|
+
const suffix = Number.parseInt(match[1], 10);
|
|
735
|
+
if (Number.isFinite(suffix) && suffix > maxSuffix)
|
|
736
|
+
maxSuffix = suffix;
|
|
737
|
+
}
|
|
738
|
+
if (maxSuffix + 1 > this.workerSeq)
|
|
739
|
+
this.workerSeq = maxSuffix + 1;
|
|
740
|
+
}
|
|
741
|
+
async drain(opts) {
|
|
742
|
+
if (this.drainPromise)
|
|
743
|
+
return this.drainPromise;
|
|
744
|
+
this.draining = true;
|
|
745
|
+
this.drainEpoch += 1;
|
|
746
|
+
this.drainPromise = this.runDrain(opts, this.drainEpoch);
|
|
747
|
+
return this.drainPromise;
|
|
748
|
+
}
|
|
749
|
+
snapshot() {
|
|
750
|
+
let warmIdle = 0;
|
|
751
|
+
let leased = 0;
|
|
752
|
+
let provisioning = 0;
|
|
753
|
+
let degraded = 0;
|
|
754
|
+
let inFlight = 0;
|
|
755
|
+
let concurrentWorkers = 0;
|
|
756
|
+
const workers = [];
|
|
757
|
+
for (const record of this.inventory.values()) {
|
|
758
|
+
inFlight += record.inFlight;
|
|
759
|
+
if (isLive(record.state))
|
|
760
|
+
concurrentWorkers += 1;
|
|
761
|
+
switch (record.state) {
|
|
762
|
+
case "WARM_IDLE":
|
|
763
|
+
warmIdle += 1;
|
|
764
|
+
break;
|
|
765
|
+
case "LEASED":
|
|
766
|
+
leased += 1;
|
|
767
|
+
break;
|
|
768
|
+
case "PROVISIONING":
|
|
769
|
+
case "WARMING":
|
|
770
|
+
provisioning += 1;
|
|
771
|
+
break;
|
|
772
|
+
case "DEGRADED":
|
|
773
|
+
degraded += 1;
|
|
774
|
+
break;
|
|
775
|
+
default:
|
|
776
|
+
break;
|
|
777
|
+
}
|
|
778
|
+
workers.push({
|
|
779
|
+
workerId: record.workerId,
|
|
780
|
+
workerHost: record.workerHost,
|
|
781
|
+
state: record.state,
|
|
782
|
+
inFlight: record.inFlight,
|
|
783
|
+
markedForDestroy: record.markedForDestroy,
|
|
784
|
+
});
|
|
785
|
+
}
|
|
786
|
+
return {
|
|
787
|
+
enabled: this.settings.enabled,
|
|
788
|
+
driver: this.settings.driver,
|
|
789
|
+
total: this.inventory.size,
|
|
790
|
+
warmIdle,
|
|
791
|
+
leased,
|
|
792
|
+
provisioning,
|
|
793
|
+
degraded,
|
|
794
|
+
inFlight,
|
|
795
|
+
spend: {
|
|
796
|
+
concurrentWorkers,
|
|
797
|
+
workerSecondsUsed: this.workerSecondsUsed,
|
|
798
|
+
dailyWorkerSecondsUsed: this.dailyWorkerSecondsUsed,
|
|
799
|
+
dayKey: this.dayKey,
|
|
800
|
+
},
|
|
801
|
+
workers,
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
// --- selection / stamping ----------------------------------------------
|
|
805
|
+
/**
|
|
806
|
+
* Synchronously picks a leasable worker and stamps a lease on it WITHOUT any
|
|
807
|
+
* await in between. Honors sticky affinity (prefer the same worker a retry ran
|
|
808
|
+
* on) and the per-issue fairness cap. Returns null when no in-inventory worker is
|
|
809
|
+
* leasable for this request.
|
|
810
|
+
*/
|
|
811
|
+
selectAndStamp(req) {
|
|
812
|
+
const record = this.pickRecord(req);
|
|
813
|
+
if (!record)
|
|
814
|
+
return null;
|
|
815
|
+
return this.stamp(record, req);
|
|
816
|
+
}
|
|
817
|
+
/** Chooses the best leasable record for a request (affinity first). */
|
|
818
|
+
pickRecord(req) {
|
|
819
|
+
const slotsPerMachine = this.settings.slotsPerMachine;
|
|
820
|
+
// Affinity: if a prior workerHost is named and that worker is still leasable,
|
|
821
|
+
// re-land on it so resume continuity holds across a retry.
|
|
822
|
+
if (req.affinityKey) {
|
|
823
|
+
for (const record of this.inventory.values()) {
|
|
824
|
+
if (record.workerHost === req.affinityKey && isLeasable(record, slotsPerMachine)) {
|
|
825
|
+
if (this.issueWouldExceedCap(req, record))
|
|
826
|
+
return null;
|
|
827
|
+
return record;
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
// Otherwise prefer an idle worker, then any under-capacity worker (slotsPerMachine>1).
|
|
832
|
+
let underCapacity = null;
|
|
833
|
+
for (const record of this.inventory.values()) {
|
|
834
|
+
if (!isLeasable(record, slotsPerMachine))
|
|
835
|
+
continue;
|
|
836
|
+
if (this.issueWouldExceedCap(req, record))
|
|
837
|
+
continue;
|
|
838
|
+
if (record.state === "WARM_IDLE" && record.inFlight === 0) {
|
|
839
|
+
return record;
|
|
840
|
+
}
|
|
841
|
+
if (underCapacity === null)
|
|
842
|
+
underCapacity = record;
|
|
843
|
+
}
|
|
844
|
+
return underCapacity;
|
|
845
|
+
}
|
|
846
|
+
/** Stamps a lease on a record (sets leaseId, ++inFlight, LEASED). Synchronous. */
|
|
847
|
+
stamp(record, req) {
|
|
848
|
+
const leaseId = record.leaseId ?? randomUUID();
|
|
849
|
+
record.leaseId = leaseId;
|
|
850
|
+
record.inFlight += 1;
|
|
851
|
+
record.state = "LEASED";
|
|
852
|
+
record.affinityKey = record.workerHost;
|
|
853
|
+
if (!record.leaseIssues)
|
|
854
|
+
record.leaseIssues = new Map();
|
|
855
|
+
record.leaseIssues.set(req.issueId, (record.leaseIssues.get(req.issueId) ?? 0) + 1);
|
|
856
|
+
const acquiredAtMs = this.leaseClock.now();
|
|
857
|
+
// Track this lease's acquire time so a drain that force-destroys the worker while
|
|
858
|
+
// it is still LEASED can accrue the in-flight window (the normal settle path
|
|
859
|
+
// removes this entry in `onLeaseSettle`).
|
|
860
|
+
if (!record.liveLeaseAcquiredMs)
|
|
861
|
+
record.liveLeaseAcquiredMs = [];
|
|
862
|
+
record.liveLeaseAcquiredMs.push(acquiredAtMs);
|
|
863
|
+
record.lastHeartbeatMs = acquiredAtMs;
|
|
864
|
+
const expiresAtMs = record.createdAtMs + this.settings.ttlMs;
|
|
865
|
+
return createLease({
|
|
866
|
+
leaseId,
|
|
867
|
+
record,
|
|
868
|
+
mutex: this.mutexFor(record.workerId),
|
|
869
|
+
clock: this.leaseClock,
|
|
870
|
+
acquiredAtMs,
|
|
871
|
+
expiresAtMs,
|
|
872
|
+
// Bill this lease from ITS OWN acquire time so a long heartbeating run is
|
|
873
|
+
// charged the full window (heartbeats only stamp staleness, never reset the
|
|
874
|
+
// bill) and two overlapping leases on one worker each accrue their own window.
|
|
875
|
+
onSettle: async (rec, outcome, reason) => this.onLeaseSettle(rec, req.issueId, acquiredAtMs, outcome, reason),
|
|
876
|
+
});
|
|
877
|
+
}
|
|
878
|
+
/** True when leasing one more worker for this issue would exceed maxWorkersPerIssue. */
|
|
879
|
+
issueWouldExceedCap(req, candidate) {
|
|
880
|
+
const cap = this.settings.maxWorkersPerIssue;
|
|
881
|
+
if (cap === undefined)
|
|
882
|
+
return false;
|
|
883
|
+
// Reusing a worker the issue already holds does not consume a new slot.
|
|
884
|
+
if ((candidate.leaseIssues?.get(req.issueId) ?? 0) > 0)
|
|
885
|
+
return false;
|
|
886
|
+
let held = this.reservedProvisionsByIssue.get(req.issueId) ?? 0;
|
|
887
|
+
for (const record of this.inventory.values()) {
|
|
888
|
+
if (record.workerId === candidate.workerId)
|
|
889
|
+
continue;
|
|
890
|
+
if ((record.leaseIssues?.get(req.issueId) ?? 0) > 0)
|
|
891
|
+
held += 1;
|
|
892
|
+
}
|
|
893
|
+
return held >= cap;
|
|
894
|
+
}
|
|
895
|
+
// --- growth (reservation-based single flight) --------------------------
|
|
896
|
+
/** Whether a request may attempt to grow a new worker right now. */
|
|
897
|
+
canGrow(req) {
|
|
898
|
+
if (!this.hasGrowthHeadroom())
|
|
899
|
+
return false;
|
|
900
|
+
return !this.issueAtGrowthCap(req);
|
|
901
|
+
}
|
|
902
|
+
/** Capacity headroom under `max` and the concurrent-worker spend cap. */
|
|
903
|
+
hasGrowthHeadroom() {
|
|
904
|
+
const live = this.liveWorkerCount() + this.reservedProvisions;
|
|
905
|
+
if (live >= this.settings.max)
|
|
906
|
+
return false;
|
|
907
|
+
const concurrentCap = this.settings.spend?.maxConcurrentWorkers;
|
|
908
|
+
if (concurrentCap !== undefined && live >= concurrentCap)
|
|
909
|
+
return false;
|
|
910
|
+
return true;
|
|
911
|
+
}
|
|
912
|
+
/**
|
|
913
|
+
* Whether growth is barred specifically by the concurrent-worker spend cap (live
|
|
914
|
+
* workers at the cap while still under `max`). Lets `acquire` distinguish a
|
|
915
|
+
* budget refusal (`spend_cap`, returned now) from a transient `max` saturation
|
|
916
|
+
* (which waits on the FIFO queue).
|
|
917
|
+
*/
|
|
918
|
+
blockedBySpendCap() {
|
|
919
|
+
const concurrentCap = this.settings.spend?.maxConcurrentWorkers;
|
|
920
|
+
if (concurrentCap === undefined)
|
|
921
|
+
return false;
|
|
922
|
+
const live = this.liveWorkerCount() + this.reservedProvisions;
|
|
923
|
+
return live >= concurrentCap && live < this.settings.max;
|
|
924
|
+
}
|
|
925
|
+
/** Whether the issue already holds its maxWorkersPerIssue, so it cannot grow. */
|
|
926
|
+
issueAtGrowthCap(req) {
|
|
927
|
+
const cap = this.settings.maxWorkersPerIssue;
|
|
928
|
+
if (cap === undefined)
|
|
929
|
+
return false;
|
|
930
|
+
return this.issueLeaseCount(req.issueId) >= cap;
|
|
931
|
+
}
|
|
932
|
+
/**
|
|
933
|
+
* Count of workers attributed to an issue for cap purposes: workers whose inventory
|
|
934
|
+
* row already carries the issue PLUS any in-flight grows reserved for it (a grow
|
|
935
|
+
* decided but whose provision has not yet landed in inventory). Counting the
|
|
936
|
+
* reservation is what makes two concurrent same-issue grows respect the cap.
|
|
937
|
+
*/
|
|
938
|
+
issueLeaseCount(issueId) {
|
|
939
|
+
let held = this.reservedProvisionsByIssue.get(issueId) ?? 0;
|
|
940
|
+
for (const record of this.inventory.values()) {
|
|
941
|
+
if ((record.leaseIssues?.get(issueId) ?? 0) > 0)
|
|
942
|
+
held += 1;
|
|
943
|
+
}
|
|
944
|
+
return held;
|
|
945
|
+
}
|
|
946
|
+
/**
|
|
947
|
+
* Grows one worker under the synchronous reservation. The reservation is taken
|
|
948
|
+
* BEFORE the provision await so a concurrent growth decision sees it and cannot
|
|
949
|
+
* exceed `max`; it is released on settle/reject. A successful provision is
|
|
950
|
+
* stamped and leased immediately.
|
|
951
|
+
*/
|
|
952
|
+
async grow(req) {
|
|
953
|
+
// Reserve synchronously, then re-validate (a racing reservation may have
|
|
954
|
+
// just consumed the last slot in this same tick).
|
|
955
|
+
this.reservedProvisions += 1;
|
|
956
|
+
if (this.liveWorkerCount() + this.reservedProvisions > this.settings.max) {
|
|
957
|
+
this.reservedProvisions -= 1;
|
|
958
|
+
return { status: "no_capacity", reason: "spend_cap" };
|
|
959
|
+
}
|
|
960
|
+
// Also reserve the per-issue slot synchronously so a concurrent grow for the
|
|
961
|
+
// SAME issue sees this in-flight grow and cannot itself slip past the cap
|
|
962
|
+
// before this provision has landed in inventory. The reservation is included
|
|
963
|
+
// in the issue cap counts and released in the finally below.
|
|
964
|
+
this.reserveIssueProvision(req.issueId);
|
|
965
|
+
const workerId = `worker-${this.workerSeq++}`;
|
|
966
|
+
const labels = [POOL_OWNED_LABEL, ...req.labels];
|
|
967
|
+
// Capture the driver that will actually run this provision (and its
|
|
968
|
+
// generation) BEFORE the await, so a swapDriver racing the provision cannot
|
|
969
|
+
// misattribute the resulting worker: the record's origin is stamped to THIS
|
|
970
|
+
// driver so recycle destroys it on the backend that created it.
|
|
971
|
+
const originDriver = this.driver;
|
|
972
|
+
const originGeneration = this.driverGeneration;
|
|
973
|
+
try {
|
|
974
|
+
// Write-ahead: flush a provisional ledger row BEFORE the provision await so a
|
|
975
|
+
// crash mid-provision leaves a recoverable record (reconciled by hydrate
|
|
976
|
+
// against driver.list()). Inert for non-cloud drivers.
|
|
977
|
+
await this.writeProvisionalRow(workerId, labels);
|
|
978
|
+
const descriptor = await originDriver.provision({
|
|
979
|
+
workerId,
|
|
980
|
+
affinityKey: req.affinityKey ?? null,
|
|
981
|
+
// Stamp the pool-owned label alongside the request labels so a leaked
|
|
982
|
+
// worker (crash between provision and inventory write) is recognized as ours
|
|
983
|
+
// by the reaper's `list()` reconcile and can be destroyed.
|
|
984
|
+
labels,
|
|
985
|
+
timeoutMs: req.timeoutMs,
|
|
986
|
+
...(req.signal ? { signal: req.signal } : {}),
|
|
987
|
+
...(this.settings.driverOptions ? { driverOptions: this.settings.driverOptions } : {}),
|
|
988
|
+
});
|
|
989
|
+
// Correlate: upsert the provisional row with the real driverRef/workerHost
|
|
990
|
+
// now the driver has returned, completing the write-ahead correlate.
|
|
991
|
+
await this.correlateRow(descriptor);
|
|
992
|
+
// A swapDriver may have run WHILE this provision was in flight, so the worker
|
|
993
|
+
// was created on the now-stale `originDriver`, not the live `this.driver`.
|
|
994
|
+
const swappedDuringProvision = this.driverGeneration !== originGeneration;
|
|
995
|
+
// Readiness gate: never lease a worker that is not yet SSH-reachable (the
|
|
996
|
+
// "reachable before leased" contract). Probe it on the driver that created it
|
|
997
|
+
// BEFORE it enters inventory, so a concurrent acquire cannot grab a not-yet-ready
|
|
998
|
+
// worker and an unready cold worker is destroyed + reported as no-capacity rather than
|
|
999
|
+
// handed to the runner (which would fail, poison the lease, and churn a healthy
|
|
1000
|
+
// worker). Inert for an already-up host / the fake (probes ok on the first try).
|
|
1001
|
+
if (!(await this.probeUntilReady(descriptor, originDriver))) {
|
|
1002
|
+
await this.destroyDescriptor(descriptor, "unhealthy", originDriver);
|
|
1003
|
+
return { status: "no_capacity", reason: "driver_error" };
|
|
1004
|
+
}
|
|
1005
|
+
// The pool may have started draining (or been disabled) WHILE this provision OR
|
|
1006
|
+
// the readiness probe was in flight. runDrain snapshotted inventory before the
|
|
1007
|
+
// worker existed, so adding it now would leak a paid worker past a completed drain.
|
|
1008
|
+
// Destroy it instead of stamping it in - on the ORIGIN driver that created it.
|
|
1009
|
+
if (this.draining || !this.settings.enabled) {
|
|
1010
|
+
await this.destroyDescriptor(descriptor, "drain", originDriver);
|
|
1011
|
+
return { status: "no_capacity", reason: "pool_disabled" };
|
|
1012
|
+
}
|
|
1013
|
+
const record = {
|
|
1014
|
+
workerId: descriptor.workerId,
|
|
1015
|
+
workerHost: descriptor.workerHost,
|
|
1016
|
+
driverRef: descriptor.driverRef,
|
|
1017
|
+
state: "WARM_IDLE",
|
|
1018
|
+
labels: [...descriptor.labels],
|
|
1019
|
+
createdAtMs: descriptor.createdAtMs,
|
|
1020
|
+
leaseId: null,
|
|
1021
|
+
inFlight: 0,
|
|
1022
|
+
lastIdleAtMs: this.leaseClock.now(),
|
|
1023
|
+
lastHeartbeatMs: this.leaseClock.now(),
|
|
1024
|
+
workerSecondsUsed: 0,
|
|
1025
|
+
// A swap during the provision means this worker was created on a now-stale
|
|
1026
|
+
// driver; flag it for destroy so the reaper / settle recycles it (it
|
|
1027
|
+
// cannot serve the live driver and the new driver's list() will not own
|
|
1028
|
+
// it). A no-swap grow leaves this false (byte-identical default).
|
|
1029
|
+
markedForDestroy: swappedDuringProvision,
|
|
1030
|
+
affinityKey: null,
|
|
1031
|
+
metadata: { ...descriptor.metadata },
|
|
1032
|
+
leaseIssues: new Map(),
|
|
1033
|
+
// Record the backend that actually provisioned this worker so recycle destroys
|
|
1034
|
+
// it there. Only set when a swap happened during the await; an un-swapped
|
|
1035
|
+
// grow leaves it undefined so recycle falls back to `this.driver`
|
|
1036
|
+
// (byte-identical to the prior default path).
|
|
1037
|
+
...(swappedDuringProvision ? { originDriver } : {}),
|
|
1038
|
+
};
|
|
1039
|
+
this.inventory.set(record.workerId, record);
|
|
1040
|
+
const lease = this.stamp(record, req);
|
|
1041
|
+
return { status: "leased", lease };
|
|
1042
|
+
}
|
|
1043
|
+
catch (error) {
|
|
1044
|
+
this.logEvent({
|
|
1045
|
+
event: "worker_pool_provision_failed",
|
|
1046
|
+
workerId,
|
|
1047
|
+
error: errorMessage(error),
|
|
1048
|
+
});
|
|
1049
|
+
// The provision rejected: drop the write-ahead provisional row so a failed
|
|
1050
|
+
// grow leaves no dangling row a later hydrate would have to reap.
|
|
1051
|
+
await this.ledger.delete(workerId);
|
|
1052
|
+
return { status: "no_capacity", reason: "driver_error" };
|
|
1053
|
+
}
|
|
1054
|
+
finally {
|
|
1055
|
+
// Release the reservations on settle OR reject so a failed provision never
|
|
1056
|
+
// permanently blocks future growth.
|
|
1057
|
+
this.reservedProvisions -= 1;
|
|
1058
|
+
this.releaseIssueProvision(req.issueId);
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
// --- waiter queue -------------------------------------------------------
|
|
1062
|
+
/**
|
|
1063
|
+
* Parks a blocked acquire on the FIFO queue. Resolves to a lease when a worker
|
|
1064
|
+
* frees, or to `no_capacity:acquire_timeout` when the timeout fires or the
|
|
1065
|
+
* request is aborted. The abort path resolves promptly so the poll thread is
|
|
1066
|
+
* never held to the full timeout.
|
|
1067
|
+
*/
|
|
1068
|
+
async waitForCapacity(req) {
|
|
1069
|
+
return new Promise((resolve) => {
|
|
1070
|
+
const waiter = {
|
|
1071
|
+
req,
|
|
1072
|
+
settled: false,
|
|
1073
|
+
resolve,
|
|
1074
|
+
timer: this.clock.setTimeout(() => {
|
|
1075
|
+
this.settleWaiter(waiter, { status: "no_capacity", reason: "acquire_timeout" });
|
|
1076
|
+
}, req.timeoutMs),
|
|
1077
|
+
cleanupAbort: null,
|
|
1078
|
+
};
|
|
1079
|
+
waiter.timer.unref?.();
|
|
1080
|
+
if (req.signal) {
|
|
1081
|
+
if (req.signal.aborted) {
|
|
1082
|
+
this.settleWaiter(waiter, { status: "no_capacity", reason: "acquire_timeout" });
|
|
1083
|
+
return;
|
|
1084
|
+
}
|
|
1085
|
+
const onAbort = () => {
|
|
1086
|
+
this.settleWaiter(waiter, { status: "no_capacity", reason: "acquire_timeout" });
|
|
1087
|
+
};
|
|
1088
|
+
req.signal.addEventListener("abort", onAbort, { once: true });
|
|
1089
|
+
waiter.cleanupAbort = () => req.signal?.removeEventListener("abort", onAbort);
|
|
1090
|
+
}
|
|
1091
|
+
this.waiters.push(waiter);
|
|
1092
|
+
});
|
|
1093
|
+
}
|
|
1094
|
+
/** Resolves a waiter exactly once and tears down its timer/abort listener. */
|
|
1095
|
+
settleWaiter(waiter, result) {
|
|
1096
|
+
if (waiter.settled)
|
|
1097
|
+
return;
|
|
1098
|
+
waiter.settled = true;
|
|
1099
|
+
this.clock.clearTimeout(waiter.timer);
|
|
1100
|
+
waiter.cleanupAbort?.();
|
|
1101
|
+
const index = this.waiters.indexOf(waiter);
|
|
1102
|
+
if (index !== -1)
|
|
1103
|
+
this.waiters.splice(index, 1);
|
|
1104
|
+
waiter.resolve(result);
|
|
1105
|
+
}
|
|
1106
|
+
/**
|
|
1107
|
+
* Wakes the oldest waiter that can now be satisfied by a freed/grown worker. Runs
|
|
1108
|
+
* after any event that frees capacity (release/reconcile). Each woken waiter
|
|
1109
|
+
* synchronously re-runs select-and-stamp so it cannot be starved by a fresh
|
|
1110
|
+
* acquire racing in.
|
|
1111
|
+
*/
|
|
1112
|
+
wakeWaiters() {
|
|
1113
|
+
// Iterate a snapshot; settleWaiter mutates the live array.
|
|
1114
|
+
for (const waiter of [...this.waiters]) {
|
|
1115
|
+
if (waiter.settled)
|
|
1116
|
+
continue;
|
|
1117
|
+
if (this.draining || !this.settings.enabled) {
|
|
1118
|
+
this.settleWaiter(waiter, { status: "no_capacity", reason: "pool_disabled" });
|
|
1119
|
+
continue;
|
|
1120
|
+
}
|
|
1121
|
+
const lease = this.selectAndStamp(waiter.req);
|
|
1122
|
+
if (lease) {
|
|
1123
|
+
this.settleWaiter(waiter, { status: "leased", lease });
|
|
1124
|
+
}
|
|
1125
|
+
}
|
|
1126
|
+
// The FIFO waiters had first claim on the freed capacity; whatever remains
|
|
1127
|
+
// leasable is announced so the runtime can nudge its poll (a waiter that
|
|
1128
|
+
// consumed the only worker leaves canAcquire() false and suppresses this).
|
|
1129
|
+
this.notifyCapacityAvailable();
|
|
1130
|
+
}
|
|
1131
|
+
// --- lease settlement / spend accounting -------------------------------
|
|
1132
|
+
/**
|
|
1133
|
+
* Pool-side accounting run INSIDE the per-worker mutex when a lease settles.
|
|
1134
|
+
* Decrements `inFlight`, accrues worker-seconds, and either returns the worker to
|
|
1135
|
+
* WARM_IDLE (healthy) or recycles it (poison / markedForDestroy). The single
|
|
1136
|
+
* destroy here is serialized by the per-worker mutex so a reaper tick observing
|
|
1137
|
+
* the same `inFlight->0` cannot double-destroy.
|
|
1138
|
+
*/
|
|
1139
|
+
async onLeaseSettle(record, issueId, leaseAcquiredMs, outcome, _reason) {
|
|
1140
|
+
// Roll the day key BEFORE accruing into `dailyWorkerSecondsUsed`. A lease
|
|
1141
|
+
// acquired late on day N but released into day N+1 must bill day N+1 (the day
|
|
1142
|
+
// it settled), not day N's already-counted window. Without this roll the
|
|
1143
|
+
// accumulator stays keyed on the stale day, the daily cap is bypassed across
|
|
1144
|
+
// the midnight boundary, and the persisted `spend.json` (whose `recordDaily`
|
|
1145
|
+
// and `flushDaily` writes key on the live UTC day) diverges from memory.
|
|
1146
|
+
this.rollDayKeyIfNeeded();
|
|
1147
|
+
record.inFlight -= 1;
|
|
1148
|
+
// Decrement THIS issue's lease refcount on the worker; only forget the issue once
|
|
1149
|
+
// its LAST lease here settles. A plain delete-per-settle would drop the issue
|
|
1150
|
+
// while a co-resident sibling slot (`slotsPerMachine>1`) still occupies the worker,
|
|
1151
|
+
// letting `maxWorkersPerIssue` be bypassed.
|
|
1152
|
+
if (record.leaseIssues) {
|
|
1153
|
+
const remaining = (record.leaseIssues.get(issueId) ?? 0) - 1;
|
|
1154
|
+
if (remaining > 0)
|
|
1155
|
+
record.leaseIssues.set(issueId, remaining);
|
|
1156
|
+
else
|
|
1157
|
+
record.leaseIssues.delete(issueId);
|
|
1158
|
+
}
|
|
1159
|
+
// Drop THIS lease's tracked acquire time so a later drain does not re-bill its
|
|
1160
|
+
// (now settled) window. Remove a single matching entry (overlapping leases may
|
|
1161
|
+
// share an acquire timestamp).
|
|
1162
|
+
if (record.liveLeaseAcquiredMs) {
|
|
1163
|
+
const index = record.liveLeaseAcquiredMs.indexOf(leaseAcquiredMs);
|
|
1164
|
+
if (index !== -1)
|
|
1165
|
+
record.liveLeaseAcquiredMs.splice(index, 1);
|
|
1166
|
+
}
|
|
1167
|
+
// Accrue wall-clock worker-seconds for THIS lease window: acquire time to now.
|
|
1168
|
+
// Billing from the lease's own acquire timestamp (not `lastHeartbeatMs`) means
|
|
1169
|
+
// a long heartbeating run is charged its full window and two overlapping leases
|
|
1170
|
+
// on one worker (maxInFlight>1) each accrue their own window. `lastHeartbeatMs`
|
|
1171
|
+
// stays purely a staleness stamp for the reaper's orphan detection.
|
|
1172
|
+
const now = this.leaseClock.now();
|
|
1173
|
+
const elapsedSeconds = Math.max(0, (now - leaseAcquiredMs) / 1000);
|
|
1174
|
+
record.workerSecondsUsed += elapsedSeconds;
|
|
1175
|
+
this.workerSecondsUsed += elapsedSeconds;
|
|
1176
|
+
this.dailyWorkerSecondsUsed += elapsedSeconds;
|
|
1177
|
+
void this.ledger
|
|
1178
|
+
.recordDailyWorkerSeconds(elapsedSeconds)
|
|
1179
|
+
.catch((error) => this.logEvent({ event: "worker_pool_ledger_write_failed", error: errorMessage(error) }));
|
|
1180
|
+
// Remember a poison outcome even when a co-resident sibling lease is still in
|
|
1181
|
+
// flight (`slotsPerMachine>1`). Flag the worker for destroy NOW so it cannot serve a
|
|
1182
|
+
// fresh lease (isLeasable rejects markedForDestroy) and so the LAST sibling to
|
|
1183
|
+
// settle recycles it instead of returning a known-bad worker to WARM_IDLE. With the
|
|
1184
|
+
// default `slotsPerMachine=1` inFlight is already 0 here, so this is inert.
|
|
1185
|
+
if (outcome === "poison")
|
|
1186
|
+
record.markedForDestroy = true;
|
|
1187
|
+
if (record.inFlight > 0) {
|
|
1188
|
+
// Other leases still hold this worker (slotsPerMachine>1); leave it LEASED until
|
|
1189
|
+
// the last one settles, which then recycles it if poisoned or reaper-flagged.
|
|
1190
|
+
return;
|
|
1191
|
+
}
|
|
1192
|
+
record.leaseId = null;
|
|
1193
|
+
if (outcome === "poison" || record.markedForDestroy) {
|
|
1194
|
+
// A poisoned or reaper-flagged worker is recycled the instant its last lease
|
|
1195
|
+
// returns; the per-worker mutex (this callback runs inside it) serializes the
|
|
1196
|
+
// single destroy so a reaper tick cannot double-destroy / underflow.
|
|
1197
|
+
await this.recycle(record, "failed");
|
|
1198
|
+
}
|
|
1199
|
+
else if (this.draining) {
|
|
1200
|
+
// During drain the worker is left in inventory for runDrain to force-destroy;
|
|
1201
|
+
// here we only mark it idle so the drain barrier can observe inFlight->0.
|
|
1202
|
+
record.state = "WARM_IDLE";
|
|
1203
|
+
record.lastIdleAtMs = now;
|
|
1204
|
+
}
|
|
1205
|
+
else {
|
|
1206
|
+
record.state = "WARM_IDLE";
|
|
1207
|
+
record.lastIdleAtMs = now;
|
|
1208
|
+
record.affinityKey = record.workerHost;
|
|
1209
|
+
}
|
|
1210
|
+
// Wake the drain barrier once nothing is in flight anymore.
|
|
1211
|
+
if (this.draining && this.totalInFlight() === 0)
|
|
1212
|
+
this.notifyDrained?.();
|
|
1213
|
+
this.wakeWaiters();
|
|
1214
|
+
}
|
|
1215
|
+
/**
|
|
1216
|
+
* Accrues the in-flight worker-seconds window of every outstanding lease on a worker
|
|
1217
|
+
* that is about to be force-destroyed mid-lease (the drain path). Each tracked
|
|
1218
|
+
* acquire timestamp is billed from acquire-to-now into the process / daily / worker
|
|
1219
|
+
* accumulators and the persisted sidecar, then cleared so the late no-op release
|
|
1220
|
+
* never double-bills. The day key is rolled first so a window that straddled UTC
|
|
1221
|
+
* midnight bills the day it settled. Must be called inside the per-worker mutex.
|
|
1222
|
+
*/
|
|
1223
|
+
accrueInFlightWindows(record) {
|
|
1224
|
+
const live = record.liveLeaseAcquiredMs;
|
|
1225
|
+
if (!live || live.length === 0)
|
|
1226
|
+
return;
|
|
1227
|
+
this.rollDayKeyIfNeeded();
|
|
1228
|
+
const now = this.leaseClock.now();
|
|
1229
|
+
for (const acquiredMs of live) {
|
|
1230
|
+
const elapsedSeconds = Math.max(0, (now - acquiredMs) / 1000);
|
|
1231
|
+
record.workerSecondsUsed += elapsedSeconds;
|
|
1232
|
+
this.workerSecondsUsed += elapsedSeconds;
|
|
1233
|
+
this.dailyWorkerSecondsUsed += elapsedSeconds;
|
|
1234
|
+
void this.ledger
|
|
1235
|
+
.recordDailyWorkerSeconds(elapsedSeconds)
|
|
1236
|
+
.catch((error) => this.logEvent({ event: "worker_pool_ledger_write_failed", error: errorMessage(error) }));
|
|
1237
|
+
}
|
|
1238
|
+
record.liveLeaseAcquiredMs = [];
|
|
1239
|
+
record.inFlight = 0;
|
|
1240
|
+
}
|
|
1241
|
+
/**
|
|
1242
|
+
* Destroys a worker and removes it from inventory. Idempotent: a worker already
|
|
1243
|
+
* DESTROYED/removed is left alone. Must be called inside the per-worker mutex (or
|
|
1244
|
+
* during a single-threaded drain) so it runs exactly once per worker.
|
|
1245
|
+
*/
|
|
1246
|
+
async recycle(record, reason) {
|
|
1247
|
+
if (record.state === "DESTROYED" || record.state === "DESTROYING")
|
|
1248
|
+
return;
|
|
1249
|
+
record.state = "DESTROYING";
|
|
1250
|
+
// Recycle-vs-endpoint ordering invariant: fire the recycling callbacks INSIDE
|
|
1251
|
+
// the per-worker mutex (we are inside it here) BEFORE `driver.destroy`, so the
|
|
1252
|
+
// coordinator can fail any still-open RunSlot bound to this worker cleanly (close
|
|
1253
|
+
// its endpoint, settle, deregister) before the host is torn out from under it.
|
|
1254
|
+
// The state is already flipped to DESTROYING above so this fires exactly once.
|
|
1255
|
+
this.notifyMachineRecycling(record.workerId);
|
|
1256
|
+
try {
|
|
1257
|
+
// Destroy against the worker's ORIGINAL driver when a swap captured one, so an
|
|
1258
|
+
// in-flight lease settling AFTER a driver hot-reload tears its worker down on
|
|
1259
|
+
// the backend that PROVISIONED it (never the new `this.driver`) and a paid
|
|
1260
|
+
// worker is never orphaned. Workers provisioned under the live driver carry no
|
|
1261
|
+
// `originDriver` and fall back to `this.driver` (byte-identical default).
|
|
1262
|
+
const driver = record.originDriver ?? this.driver;
|
|
1263
|
+
await driver.destroy({
|
|
1264
|
+
workerId: record.workerId,
|
|
1265
|
+
workerHost: record.workerHost,
|
|
1266
|
+
driverRef: record.driverRef,
|
|
1267
|
+
createdAtMs: record.createdAtMs,
|
|
1268
|
+
labels: record.labels,
|
|
1269
|
+
metadata: record.metadata,
|
|
1270
|
+
}, { timeoutMs: this.settings.acquireTimeoutMs, reason });
|
|
1271
|
+
}
|
|
1272
|
+
catch (error) {
|
|
1273
|
+
this.logEvent({
|
|
1274
|
+
event: "worker_pool_destroy_failed",
|
|
1275
|
+
workerId: record.workerId,
|
|
1276
|
+
error: errorMessage(error),
|
|
1277
|
+
});
|
|
1278
|
+
// The backend worker may still be running and billing. Dropping it from inventory
|
|
1279
|
+
// + ledger here would forget a PAID machine with no retry (a silent leak).
|
|
1280
|
+
// Instead keep it tracked but non-leasable (markedForDestroy) and put it back
|
|
1281
|
+
// in a reaper-retryable idle state: the serial reaper re-attempts the teardown
|
|
1282
|
+
// each tick (a flagged worker is reaped even below `min`), and across a restart
|
|
1283
|
+
// `hydrate` re-adopts it from the surviving ledger row and retries the destroy.
|
|
1284
|
+
record.markedForDestroy = true;
|
|
1285
|
+
record.leaseId = null;
|
|
1286
|
+
record.state = "WARM_IDLE";
|
|
1287
|
+
record.lastIdleAtMs = this.leaseClock.now();
|
|
1288
|
+
return;
|
|
1289
|
+
}
|
|
1290
|
+
record.state = "DESTROYED";
|
|
1291
|
+
void this.ledger
|
|
1292
|
+
.delete(record.workerId)
|
|
1293
|
+
.catch((error) => this.logEvent({ event: "worker_pool_ledger_write_failed", error: errorMessage(error) }));
|
|
1294
|
+
this.inventory.delete(record.workerId);
|
|
1295
|
+
this.workerMutexes.delete(record.workerId);
|
|
1296
|
+
}
|
|
1297
|
+
// --- reaper -------------------------------------------------------------
|
|
1298
|
+
/**
|
|
1299
|
+
* Arms (or re-arms) the single recurring reaper timer. The handle is detached
|
|
1300
|
+
* via `unref?.()` so it never keeps the process alive; the tick re-arms itself
|
|
1301
|
+
* at the end so the pass runs serially at the configured cadence. A stopped
|
|
1302
|
+
* pool (drained) arms nothing.
|
|
1303
|
+
*/
|
|
1304
|
+
scheduleReaper() {
|
|
1305
|
+
if (this.reaperStopped)
|
|
1306
|
+
return;
|
|
1307
|
+
const handle = this.clock.setTimeout(() => {
|
|
1308
|
+
void this.driveReaper();
|
|
1309
|
+
}, this.settings.reapIntervalMs);
|
|
1310
|
+
handle.unref?.();
|
|
1311
|
+
this.reaperTimer = handle;
|
|
1312
|
+
}
|
|
1313
|
+
/**
|
|
1314
|
+
* Runs one serial reaper pass, then re-arms the timer. The in-progress guard
|
|
1315
|
+
* lives in `runReaperTick`, so even an unusually slow tick (a hung probe) can
|
|
1316
|
+
* never overlap with the next scheduled fire. The internals are re-synced to
|
|
1317
|
+
* the live settings each tick since `reconcile` swaps the whole settings object.
|
|
1318
|
+
*/
|
|
1319
|
+
async driveReaper() {
|
|
1320
|
+
this.reaperTimer = null;
|
|
1321
|
+
if (this.reaperStopped || this.draining)
|
|
1322
|
+
return;
|
|
1323
|
+
this.reaperInternals.settings = this.settings;
|
|
1324
|
+
try {
|
|
1325
|
+
await runReaperTick(this.reaperInternals);
|
|
1326
|
+
}
|
|
1327
|
+
catch (error) {
|
|
1328
|
+
this.logEvent({ event: "worker_pool_reaper_failed", error: errorMessage(error) });
|
|
1329
|
+
}
|
|
1330
|
+
finally {
|
|
1331
|
+
this.scheduleReaper();
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
/** Stops the recurring reaper timer (terminal; called on drain). */
|
|
1335
|
+
stopReaper() {
|
|
1336
|
+
this.reaperStopped = true;
|
|
1337
|
+
if (this.reaperTimer) {
|
|
1338
|
+
this.clock.clearTimeout(this.reaperTimer);
|
|
1339
|
+
this.reaperTimer = null;
|
|
1340
|
+
}
|
|
1341
|
+
}
|
|
1342
|
+
/**
|
|
1343
|
+
* Provisions one warm worker toward the min/warm target (driven by the reaper's
|
|
1344
|
+
* top-up). Goes through the same reservation as `grow` so a concurrent acquire
|
|
1345
|
+
* cannot push the live count past `max`, but the worker is left WARM_IDLE (no
|
|
1346
|
+
* lease stamped) so the next acquire can claim it. Failures are logged and
|
|
1347
|
+
* swallowed so a single bad provision never stalls the reaper.
|
|
1348
|
+
*/
|
|
1349
|
+
async provisionWarm() {
|
|
1350
|
+
this.reservedProvisions += 1;
|
|
1351
|
+
if (this.liveWorkerCount() + this.reservedProvisions > this.settings.max) {
|
|
1352
|
+
this.reservedProvisions -= 1;
|
|
1353
|
+
return;
|
|
1354
|
+
}
|
|
1355
|
+
const workerId = `worker-${this.workerSeq++}`;
|
|
1356
|
+
const labels = [POOL_OWNED_LABEL];
|
|
1357
|
+
// Capture the driver that will run this warm provision (and its generation)
|
|
1358
|
+
// BEFORE the await so a swapDriver racing the provision cannot misattribute the
|
|
1359
|
+
// worker (same no-orphan invariant as `grow`).
|
|
1360
|
+
const originDriver = this.driver;
|
|
1361
|
+
const originGeneration = this.driverGeneration;
|
|
1362
|
+
try {
|
|
1363
|
+
// Write-ahead the provisional row BEFORE provision (recoverable mid-provision
|
|
1364
|
+
// crash), then correlate after the driver returns. Inert for non-cloud.
|
|
1365
|
+
await this.writeProvisionalRow(workerId, labels);
|
|
1366
|
+
const descriptor = await originDriver.provision({
|
|
1367
|
+
workerId,
|
|
1368
|
+
affinityKey: null,
|
|
1369
|
+
labels,
|
|
1370
|
+
timeoutMs: this.settings.acquireTimeoutMs,
|
|
1371
|
+
...(this.settings.driverOptions ? { driverOptions: this.settings.driverOptions } : {}),
|
|
1372
|
+
});
|
|
1373
|
+
await this.correlateRow(descriptor);
|
|
1374
|
+
// A swapDriver may have run WHILE this warm provision was in flight, so the
|
|
1375
|
+
// worker was created on the now-stale `originDriver`, not `this.driver`.
|
|
1376
|
+
const swappedDuringProvision = this.driverGeneration !== originGeneration;
|
|
1377
|
+
// Readiness gate (same "reachable before leased" contract as `grow`): a warm worker
|
|
1378
|
+
// must be SSH-reachable BEFORE it becomes WARM_IDLE and leasable, so an acquire
|
|
1379
|
+
// never grabs a not-yet-ready top-up worker. A worker that never becomes ready is
|
|
1380
|
+
// destroyed and skipped (the reaper re-tops-up); inert for an already-up host.
|
|
1381
|
+
if (!(await this.probeUntilReady(descriptor, originDriver))) {
|
|
1382
|
+
await this.destroyDescriptor(descriptor, "unhealthy", originDriver);
|
|
1383
|
+
return;
|
|
1384
|
+
}
|
|
1385
|
+
// A drain (or disable) may have begun WHILE this warm provision OR the readiness
|
|
1386
|
+
// probe was in flight; runDrain snapshotted inventory before the worker existed, so
|
|
1387
|
+
// adding it now would leak a paid worker past a completed drain. Destroy it instead -
|
|
1388
|
+
// on the ORIGIN driver that created it.
|
|
1389
|
+
if (this.draining || !this.settings.enabled) {
|
|
1390
|
+
await this.destroyDescriptor(descriptor, "drain", originDriver);
|
|
1391
|
+
return;
|
|
1392
|
+
}
|
|
1393
|
+
const now = this.leaseClock.now();
|
|
1394
|
+
const record = {
|
|
1395
|
+
workerId: descriptor.workerId,
|
|
1396
|
+
workerHost: descriptor.workerHost,
|
|
1397
|
+
driverRef: descriptor.driverRef,
|
|
1398
|
+
state: "WARM_IDLE",
|
|
1399
|
+
labels: [...descriptor.labels],
|
|
1400
|
+
createdAtMs: descriptor.createdAtMs,
|
|
1401
|
+
leaseId: null,
|
|
1402
|
+
inFlight: 0,
|
|
1403
|
+
lastIdleAtMs: now,
|
|
1404
|
+
lastHeartbeatMs: now,
|
|
1405
|
+
workerSecondsUsed: 0,
|
|
1406
|
+
// A swap during the provision means this warm worker was created on a stale
|
|
1407
|
+
// driver; flag it for destroy (it cannot serve the live driver).
|
|
1408
|
+
markedForDestroy: swappedDuringProvision,
|
|
1409
|
+
affinityKey: null,
|
|
1410
|
+
metadata: { ...descriptor.metadata },
|
|
1411
|
+
leaseIssues: new Map(),
|
|
1412
|
+
// Record the backend that actually provisioned this worker (only on a swap; an
|
|
1413
|
+
// un-swapped warm provision leaves it undefined -> falls back to
|
|
1414
|
+
// `this.driver`, byte-identical to the prior path).
|
|
1415
|
+
...(swappedDuringProvision ? { originDriver } : {}),
|
|
1416
|
+
};
|
|
1417
|
+
this.inventory.set(record.workerId, record);
|
|
1418
|
+
if (swappedDuringProvision) {
|
|
1419
|
+
// This idle warm worker was provisioned on a now-stale driver, so it cannot
|
|
1420
|
+
// serve the live driver AND the new driver's list() will not own it
|
|
1421
|
+
// (the reaper's list-reconcile would otherwise DROP the record without
|
|
1422
|
+
// tearing the worker down, orphaning a paid machine on the old backend).
|
|
1423
|
+
// Recycle it NOW on its captured origin (under its per-worker mutex, exactly as
|
|
1424
|
+
// swapDriver recycles old-driver idle workers) so the destroy is
|
|
1425
|
+
// deterministic and routed to the backend that created it.
|
|
1426
|
+
await this.mutexFor(record.workerId).runExclusive(async () => {
|
|
1427
|
+
if (record.inFlight !== 0)
|
|
1428
|
+
return; // a lease landed first; settle recycles it
|
|
1429
|
+
await this.recycle(record, "shrink");
|
|
1430
|
+
});
|
|
1431
|
+
}
|
|
1432
|
+
}
|
|
1433
|
+
catch (error) {
|
|
1434
|
+
this.logEvent({
|
|
1435
|
+
event: "worker_pool_warm_provision_failed",
|
|
1436
|
+
workerId,
|
|
1437
|
+
error: errorMessage(error),
|
|
1438
|
+
});
|
|
1439
|
+
// Drop the write-ahead provisional row for a failed warm provision so no
|
|
1440
|
+
// dangling row outlives the attempt.
|
|
1441
|
+
await this.ledger.delete(workerId);
|
|
1442
|
+
}
|
|
1443
|
+
finally {
|
|
1444
|
+
this.reservedProvisions -= 1;
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
// --- drain --------------------------------------------------------------
|
|
1448
|
+
/**
|
|
1449
|
+
* Flips DRAINING, rejects new acquires, waits for in-flight leases up to the
|
|
1450
|
+
* deadline, then force-destroys ALL workers (held or not) so no paid cloud worker
|
|
1451
|
+
* leaks past process exit.
|
|
1452
|
+
*/
|
|
1453
|
+
async runDrain(opts, epoch) {
|
|
1454
|
+
// Stop the recurring reaper so a draining pool issues no further ticks.
|
|
1455
|
+
this.stopReaper();
|
|
1456
|
+
// Reject every parked waiter immediately.
|
|
1457
|
+
for (const waiter of [...this.waiters]) {
|
|
1458
|
+
this.settleWaiter(waiter, { status: "no_capacity", reason: "pool_disabled" });
|
|
1459
|
+
}
|
|
1460
|
+
// Wait for in-flight leases to settle, bounded by the deadline. Event-driven:
|
|
1461
|
+
// `onLeaseSettle` resolves `notifyDrained` once `inFlight` hits zero, and a
|
|
1462
|
+
// deadline timer (real or fake-clock) resolves the race otherwise. Either way
|
|
1463
|
+
// we then force-destroy every remaining worker (held or not) so no worker leaks.
|
|
1464
|
+
if (this.totalInFlight() > 0 && !opts.signal?.aborted) {
|
|
1465
|
+
await new Promise((resolve) => {
|
|
1466
|
+
let done = false;
|
|
1467
|
+
const finish = () => {
|
|
1468
|
+
if (done)
|
|
1469
|
+
return;
|
|
1470
|
+
done = true;
|
|
1471
|
+
// Only the OWNING drain clears the shared barrier hook; a re-enable
|
|
1472
|
+
// may have already nulled/replaced it, so guard the assignment.
|
|
1473
|
+
if (this.drainEpoch === epoch)
|
|
1474
|
+
this.notifyDrained = null;
|
|
1475
|
+
this.clock.clearTimeout(timer);
|
|
1476
|
+
if (onAbort && opts.signal)
|
|
1477
|
+
opts.signal.removeEventListener("abort", onAbort);
|
|
1478
|
+
resolve();
|
|
1479
|
+
};
|
|
1480
|
+
const timer = this.clock.setTimeout(finish, opts.deadlineMs);
|
|
1481
|
+
timer.unref?.();
|
|
1482
|
+
this.notifyDrained = finish;
|
|
1483
|
+
const onAbort = opts.signal ? finish : null;
|
|
1484
|
+
if (onAbort && opts.signal)
|
|
1485
|
+
opts.signal.addEventListener("abort", onAbort, { once: true });
|
|
1486
|
+
});
|
|
1487
|
+
}
|
|
1488
|
+
// Bail if this drain has been superseded. A reconcile re-enable (false->true)
|
|
1489
|
+
// clears `draining`, bumps `drainEpoch`, and grows fresh workers; an orphaned
|
|
1490
|
+
// drain whose deadline fired AFTER that re-enable must NOT force-destroy the
|
|
1491
|
+
// now-LIVE pool's workers. Still flush the daily total below so a superseded
|
|
1492
|
+
// drain does not drop the spend it observed.
|
|
1493
|
+
if (this.drainEpoch !== epoch || !this.draining) {
|
|
1494
|
+
await this.flushDailySpendForDrain();
|
|
1495
|
+
return;
|
|
1496
|
+
}
|
|
1497
|
+
// Force-destroy every remaining worker, held lease or not (the leak fix). Each
|
|
1498
|
+
// destroy runs inside that worker's mutex (consistent with every other teardown)
|
|
1499
|
+
// so a late `onLeaseSettle` — which while draining would flip the worker back to
|
|
1500
|
+
// WARM_IDLE — cannot interleave with the in-progress recycle and resurrect a
|
|
1501
|
+
// worker mid-destroy. The settle either runs fully before the destroy (and is
|
|
1502
|
+
// then overwritten to DESTROYED) or fully after (and no-ops on the DESTROYED worker).
|
|
1503
|
+
for (const record of [...this.inventory.values()]) {
|
|
1504
|
+
// Re-check the epoch each iteration: a re-enable racing the loop must stop
|
|
1505
|
+
// it from destroying any further workers the now-live pool depends on.
|
|
1506
|
+
if (this.drainEpoch !== epoch || !this.draining)
|
|
1507
|
+
break;
|
|
1508
|
+
await this.mutexFor(record.workerId).runExclusive(async () => {
|
|
1509
|
+
// A worker still LEASED at the deadline never ran `onLeaseSettle` for its
|
|
1510
|
+
// outstanding leases, and the late `release()` will no-op on the DESTROYED
|
|
1511
|
+
// guard, so accrue each outstanding lease's in-flight window HERE (inside
|
|
1512
|
+
// the per-worker mutex) before the force-destroy. Without this the spend is
|
|
1513
|
+
// under-counted and the persisted sidecar drops the window across restart.
|
|
1514
|
+
this.accrueInFlightWindows(record);
|
|
1515
|
+
await this.recycle(record, "drain");
|
|
1516
|
+
});
|
|
1517
|
+
}
|
|
1518
|
+
await this.flushDailySpendForDrain();
|
|
1519
|
+
}
|
|
1520
|
+
/**
|
|
1521
|
+
* Durably flushes the authoritative in-memory daily total at the end of a
|
|
1522
|
+
* drain. The hot path records each delta fire-and-forget
|
|
1523
|
+
* (`void recordDailyWorkerSeconds`), so a crash could lose the last unpersisted
|
|
1524
|
+
* deltas; a clean drain SETS the absolute total here (serialized after any
|
|
1525
|
+
* pending additive write) so the persisted sidecar matches the in-memory total
|
|
1526
|
+
* a restart will seed from. The day key is rolled first so a flush that lands
|
|
1527
|
+
* after a UTC-midnight crossing writes the new day's accumulator.
|
|
1528
|
+
*/
|
|
1529
|
+
async flushDailySpendForDrain() {
|
|
1530
|
+
this.rollDayKeyIfNeeded();
|
|
1531
|
+
await this.ledger.flushDailyWorkerSeconds(this.dailyWorkerSecondsUsed);
|
|
1532
|
+
}
|
|
1533
|
+
// --- helpers ------------------------------------------------------------
|
|
1534
|
+
mutexFor(workerId) {
|
|
1535
|
+
let mutex = this.workerMutexes.get(workerId);
|
|
1536
|
+
if (!mutex) {
|
|
1537
|
+
mutex = createMutex();
|
|
1538
|
+
this.workerMutexes.set(workerId, mutex);
|
|
1539
|
+
}
|
|
1540
|
+
return mutex;
|
|
1541
|
+
}
|
|
1542
|
+
/** Reserves one per-issue grow slot (counted in the issue caps until released). */
|
|
1543
|
+
reserveIssueProvision(issueId) {
|
|
1544
|
+
this.reservedProvisionsByIssue.set(issueId, (this.reservedProvisionsByIssue.get(issueId) ?? 0) + 1);
|
|
1545
|
+
}
|
|
1546
|
+
/** Releases a previously reserved per-issue grow slot. */
|
|
1547
|
+
releaseIssueProvision(issueId) {
|
|
1548
|
+
const next = (this.reservedProvisionsByIssue.get(issueId) ?? 0) - 1;
|
|
1549
|
+
if (next <= 0)
|
|
1550
|
+
this.reservedProvisionsByIssue.delete(issueId);
|
|
1551
|
+
else
|
|
1552
|
+
this.reservedProvisionsByIssue.set(issueId, next);
|
|
1553
|
+
}
|
|
1554
|
+
/**
|
|
1555
|
+
* Writes the write-ahead provisional ledger row for a worker BEFORE its provision is
|
|
1556
|
+
* awaited. The row carries the workerId + the pool-owned label but no driverRef /
|
|
1557
|
+
* workerHost yet (the driver has not returned), so a crash between provision
|
|
1558
|
+
* and the inventory write leaves a recoverable record on disk. Inert (zero fs
|
|
1559
|
+
* I/O) for non-cloud drivers (the ledger is a no-op when `usesLedger` is false).
|
|
1560
|
+
*/
|
|
1561
|
+
async writeProvisionalRow(workerId, labels) {
|
|
1562
|
+
const now = this.leaseClock.now();
|
|
1563
|
+
const row = {
|
|
1564
|
+
workerId,
|
|
1565
|
+
driverRef: null,
|
|
1566
|
+
workerHost: null,
|
|
1567
|
+
labels: [...labels],
|
|
1568
|
+
status: "provisional",
|
|
1569
|
+
createdAtMs: now,
|
|
1570
|
+
updatedAtMs: now,
|
|
1571
|
+
};
|
|
1572
|
+
await this.ledger.upsert(row);
|
|
1573
|
+
}
|
|
1574
|
+
/**
|
|
1575
|
+
* Upserts the CORRELATED active ledger row for a worker AFTER its provision returns,
|
|
1576
|
+
* stamping the real driverRef / workerHost over the earlier provisional row
|
|
1577
|
+
* (same workerId, so it is replaced, not appended). Completes the write-ahead
|
|
1578
|
+
* correlate. Inert for non-cloud drivers.
|
|
1579
|
+
*/
|
|
1580
|
+
async correlateRow(descriptor) {
|
|
1581
|
+
const now = this.leaseClock.now();
|
|
1582
|
+
const row = {
|
|
1583
|
+
workerId: descriptor.workerId,
|
|
1584
|
+
driverRef: descriptor.driverRef,
|
|
1585
|
+
workerHost: descriptor.workerHost,
|
|
1586
|
+
labels: [...descriptor.labels],
|
|
1587
|
+
status: "active",
|
|
1588
|
+
createdAtMs: descriptor.createdAtMs,
|
|
1589
|
+
updatedAtMs: now,
|
|
1590
|
+
};
|
|
1591
|
+
await this.ledger.upsert(row);
|
|
1592
|
+
}
|
|
1593
|
+
/**
|
|
1594
|
+
* Destroys a driver descriptor that was created but never entered inventory
|
|
1595
|
+
* (e.g. a worker provisioned while the pool started draining). Best-effort: a
|
|
1596
|
+
* failure is logged and swallowed so the caller can still bail. The optional
|
|
1597
|
+
* `driver` override destroys the worker on the backend that ACTUALLY provisioned it
|
|
1598
|
+
* (the captured origin) when a swap raced the provision; it defaults to the live
|
|
1599
|
+
* `this.driver` (byte-identical to the prior single-driver path).
|
|
1600
|
+
*/
|
|
1601
|
+
async destroyDescriptor(descriptor, reason, driver = this.driver) {
|
|
1602
|
+
try {
|
|
1603
|
+
await driver.destroy({
|
|
1604
|
+
workerId: descriptor.workerId,
|
|
1605
|
+
workerHost: descriptor.workerHost,
|
|
1606
|
+
driverRef: descriptor.driverRef,
|
|
1607
|
+
createdAtMs: descriptor.createdAtMs,
|
|
1608
|
+
labels: descriptor.labels,
|
|
1609
|
+
metadata: descriptor.metadata,
|
|
1610
|
+
}, { timeoutMs: this.settings.acquireTimeoutMs, reason });
|
|
1611
|
+
}
|
|
1612
|
+
catch (error) {
|
|
1613
|
+
// Keep the write-ahead ledger row on failure: the backend worker may still be
|
|
1614
|
+
// running, and the surviving row lets `hydrate` re-adopt it after a restart and
|
|
1615
|
+
// retry teardown instead of silently leaking a paid worker. (Byte-identical to the
|
|
1616
|
+
// prior swallow except the row is no longer dropped when destroy did not run.)
|
|
1617
|
+
this.logEvent({
|
|
1618
|
+
event: "worker_pool_destroy_failed",
|
|
1619
|
+
workerId: descriptor.workerId,
|
|
1620
|
+
error: errorMessage(error),
|
|
1621
|
+
});
|
|
1622
|
+
return;
|
|
1623
|
+
}
|
|
1624
|
+
void this.ledger
|
|
1625
|
+
.delete(descriptor.workerId)
|
|
1626
|
+
.catch((error) => this.logEvent({ event: "worker_pool_ledger_write_failed", error: errorMessage(error) }));
|
|
1627
|
+
}
|
|
1628
|
+
liveWorkerCount() {
|
|
1629
|
+
let count = 0;
|
|
1630
|
+
for (const record of this.inventory.values()) {
|
|
1631
|
+
if (isLive(record.state))
|
|
1632
|
+
count += 1;
|
|
1633
|
+
}
|
|
1634
|
+
return count;
|
|
1635
|
+
}
|
|
1636
|
+
totalInFlight() {
|
|
1637
|
+
let total = 0;
|
|
1638
|
+
for (const record of this.inventory.values())
|
|
1639
|
+
total += record.inFlight;
|
|
1640
|
+
return total;
|
|
1641
|
+
}
|
|
1642
|
+
rollDayKeyIfNeeded() {
|
|
1643
|
+
const today = utcDayKey(this.clock.now());
|
|
1644
|
+
if (today !== this.dayKey) {
|
|
1645
|
+
this.dayKey = today;
|
|
1646
|
+
this.dailyWorkerSecondsUsed = 0;
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
workerSecondsExhausted() {
|
|
1650
|
+
const spend = this.settings.spend;
|
|
1651
|
+
if (!spend)
|
|
1652
|
+
return false;
|
|
1653
|
+
if (spend.maxWorkerSeconds !== undefined && this.workerSecondsUsed >= spend.maxWorkerSeconds) {
|
|
1654
|
+
return true;
|
|
1655
|
+
}
|
|
1656
|
+
if (spend.dailyWorkerSeconds !== undefined &&
|
|
1657
|
+
this.dailyWorkerSecondsUsed >= spend.dailyWorkerSeconds) {
|
|
1658
|
+
return true;
|
|
1659
|
+
}
|
|
1660
|
+
return false;
|
|
1661
|
+
}
|
|
1662
|
+
}
|
|
1663
|
+
/** Extracts a stable message from an unknown thrown value for structured logs. */
|
|
1664
|
+
function errorMessage(error) {
|
|
1665
|
+
return error instanceof Error ? error.message : String(error);
|
|
1666
|
+
}
|
|
1667
|
+
/**
|
|
1668
|
+
* Whether a reconcile changes the PROVIDER CONSTRUCTION, gating the in-place
|
|
1669
|
+
* `swapDriver` rebuild (Finding #1). True when the driver `kind` differs OR
|
|
1670
|
+
* the `driverOptions` deep-differ (the two inputs `resolveDriver` consumes).
|
|
1671
|
+
* A same-driver reconcile (e.g. a `max`/`warm` resize) returns false so the
|
|
1672
|
+
* resolved driver object stays stable and the rebuild is skipped.
|
|
1673
|
+
*/
|
|
1674
|
+
function driverConstructionChanged(prev, next) {
|
|
1675
|
+
if (prev.driver !== next.driver)
|
|
1676
|
+
return true;
|
|
1677
|
+
return !deepEqual(prev.driverOptions, next.driverOptions);
|
|
1678
|
+
}
|
|
1679
|
+
/**
|
|
1680
|
+
* Structural deep-equality over the JSON-shaped `driverOptions` records (plain
|
|
1681
|
+
* objects, arrays, and primitives). Sufficient for the swap gate since
|
|
1682
|
+
* `driverOptions` is a `Record<string, unknown>` of config-derived JSON values.
|
|
1683
|
+
*/
|
|
1684
|
+
function deepEqual(a, b) {
|
|
1685
|
+
if (a === b)
|
|
1686
|
+
return true;
|
|
1687
|
+
if (a === null || b === null || typeof a !== "object" || typeof b !== "object")
|
|
1688
|
+
return false;
|
|
1689
|
+
const aArray = Array.isArray(a);
|
|
1690
|
+
const bArray = Array.isArray(b);
|
|
1691
|
+
if (aArray !== bArray)
|
|
1692
|
+
return false;
|
|
1693
|
+
if (aArray && bArray) {
|
|
1694
|
+
if (a.length !== b.length)
|
|
1695
|
+
return false;
|
|
1696
|
+
for (let i = 0; i < a.length; i += 1) {
|
|
1697
|
+
if (!deepEqual(a[i], b[i]))
|
|
1698
|
+
return false;
|
|
1699
|
+
}
|
|
1700
|
+
return true;
|
|
1701
|
+
}
|
|
1702
|
+
const aObj = a;
|
|
1703
|
+
const bObj = b;
|
|
1704
|
+
const aKeys = Object.keys(aObj);
|
|
1705
|
+
const bKeys = Object.keys(bObj);
|
|
1706
|
+
if (aKeys.length !== bKeys.length)
|
|
1707
|
+
return false;
|
|
1708
|
+
for (const key of aKeys) {
|
|
1709
|
+
if (!Object.prototype.hasOwnProperty.call(bObj, key))
|
|
1710
|
+
return false;
|
|
1711
|
+
if (!deepEqual(aObj[key], bObj[key]))
|
|
1712
|
+
return false;
|
|
1713
|
+
}
|
|
1714
|
+
return true;
|
|
1715
|
+
}
|
|
1716
|
+
/**
|
|
1717
|
+
* Constructs a {@link WorkerPool}. Resolves the driver for `settings.driver`
|
|
1718
|
+
* through `deps.drivers` (falling back to the process-wide default registry),
|
|
1719
|
+
* throwing `worker_pool_driver_unavailable` for an unregistered kind so the daemon
|
|
1720
|
+
* fails loud at startup, and wires the write-ahead ledger only when the driver
|
|
1721
|
+
* declares `usesLedger` AND a `ledgerPath` is supplied. No workspace/hook deps
|
|
1722
|
+
* are taken: the pool owns worker lifecycle only.
|
|
1723
|
+
*/
|
|
1724
|
+
export function createWorkerPool(settings, deps) {
|
|
1725
|
+
return new WorkerPoolImpl(settings, deps);
|
|
1726
|
+
}
|
|
1727
|
+
//# sourceMappingURL=pool.js.map
|