agent-relay 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/.trajectories/agent-relay-322-324.md +17 -0
  2. package/.trajectories/completed/2026-01/traj_03zupyv1s7b9.json +49 -0
  3. package/.trajectories/completed/2026-01/traj_03zupyv1s7b9.md +31 -0
  4. package/.trajectories/completed/2026-01/traj_0zacdjl1g4ht.json +125 -0
  5. package/.trajectories/completed/2026-01/traj_0zacdjl1g4ht.md +62 -0
  6. package/.trajectories/completed/2026-01/traj_33iuy72sezbk.json +49 -0
  7. package/.trajectories/completed/2026-01/traj_33iuy72sezbk.md +31 -0
  8. package/.trajectories/completed/2026-01/traj_5ammh5qtvklq.json +77 -0
  9. package/.trajectories/completed/2026-01/traj_5ammh5qtvklq.md +42 -0
  10. package/.trajectories/completed/2026-01/traj_6mieijqyvaag.json +77 -0
  11. package/.trajectories/completed/2026-01/traj_6mieijqyvaag.md +42 -0
  12. package/.trajectories/completed/2026-01/traj_78ffm31jn3uk.json +77 -0
  13. package/.trajectories/completed/2026-01/traj_78ffm31jn3uk.md +42 -0
  14. package/.trajectories/completed/2026-01/traj_94gnp3k30goq.json +66 -0
  15. package/.trajectories/completed/2026-01/traj_94gnp3k30goq.md +36 -0
  16. package/.trajectories/completed/2026-01/traj_avqeghu6pz5a.json +40 -0
  17. package/.trajectories/completed/2026-01/traj_avqeghu6pz5a.md +22 -0
  18. package/.trajectories/completed/2026-01/traj_dcsp9s8y01ra.json +121 -0
  19. package/.trajectories/completed/2026-01/traj_dcsp9s8y01ra.md +29 -0
  20. package/.trajectories/completed/2026-01/traj_fhx9irlckht6.json +53 -0
  21. package/.trajectories/completed/2026-01/traj_fhx9irlckht6.md +32 -0
  22. package/.trajectories/completed/2026-01/traj_fqduidx3xbtp.json +101 -0
  23. package/.trajectories/completed/2026-01/traj_fqduidx3xbtp.md +52 -0
  24. package/.trajectories/completed/2026-01/traj_hf81ey93uz6t.json +49 -0
  25. package/.trajectories/completed/2026-01/traj_hf81ey93uz6t.md +31 -0
  26. package/.trajectories/completed/2026-01/traj_hfmki2jr9d4r.json +65 -0
  27. package/.trajectories/completed/2026-01/traj_hfmki2jr9d4r.md +37 -0
  28. package/.trajectories/completed/2026-01/traj_lq450ly148uw.json +49 -0
  29. package/.trajectories/completed/2026-01/traj_lq450ly148uw.md +31 -0
  30. package/.trajectories/completed/2026-01/traj_multi_server_arch.md +101 -0
  31. package/.trajectories/completed/2026-01/traj_psd9ob0j2ru3.json +27 -0
  32. package/.trajectories/completed/2026-01/traj_psd9ob0j2ru3.md +14 -0
  33. package/.trajectories/completed/2026-01/traj_ub8csuv3lcv4.json +53 -0
  34. package/.trajectories/completed/2026-01/traj_ub8csuv3lcv4.md +32 -0
  35. package/.trajectories/completed/2026-01/traj_uc29tlso8i9s.json +186 -0
  36. package/.trajectories/completed/2026-01/traj_uc29tlso8i9s.md +86 -0
  37. package/.trajectories/completed/2026-01/traj_ui9b4tqxoa7j.json +77 -0
  38. package/.trajectories/completed/2026-01/traj_ui9b4tqxoa7j.md +42 -0
  39. package/.trajectories/completed/2026-01/traj_v9dkdoxylyid.json +89 -0
  40. package/.trajectories/completed/2026-01/traj_v9dkdoxylyid.md +47 -0
  41. package/.trajectories/completed/2026-01/traj_xy9vifpqet80.json +65 -0
  42. package/.trajectories/completed/2026-01/traj_xy9vifpqet80.md +37 -0
  43. package/.trajectories/completed/2026-01/traj_y7aiwijyfmmv.json +49 -0
  44. package/.trajectories/completed/2026-01/traj_y7aiwijyfmmv.md +31 -0
  45. package/.trajectories/consolidate-settings-panel.md +24 -0
  46. package/.trajectories/gh-cli-user-token.md +26 -0
  47. package/.trajectories/index.json +155 -1
  48. package/deploy/workspace/codex.config.toml +15 -0
  49. package/deploy/workspace/entrypoint.sh +167 -7
  50. package/deploy/workspace/git-credential-relay +17 -2
  51. package/dist/bridge/spawner.d.ts +7 -0
  52. package/dist/bridge/spawner.js +40 -9
  53. package/dist/bridge/types.d.ts +2 -0
  54. package/dist/cli/index.js +210 -168
  55. package/dist/cloud/api/admin.d.ts +8 -0
  56. package/dist/cloud/api/admin.js +212 -0
  57. package/dist/cloud/api/auth.js +8 -0
  58. package/dist/cloud/api/billing.d.ts +0 -10
  59. package/dist/cloud/api/billing.js +248 -58
  60. package/dist/cloud/api/codex-auth-helper.d.ts +10 -4
  61. package/dist/cloud/api/codex-auth-helper.js +215 -8
  62. package/dist/cloud/api/coordinators.js +402 -0
  63. package/dist/cloud/api/daemons.js +15 -11
  64. package/dist/cloud/api/git.js +104 -17
  65. package/dist/cloud/api/github-app.js +42 -8
  66. package/dist/cloud/api/nango-auth.js +297 -16
  67. package/dist/cloud/api/onboarding.js +97 -33
  68. package/dist/cloud/api/providers.js +12 -16
  69. package/dist/cloud/api/repos.js +200 -124
  70. package/dist/cloud/api/test-helpers.js +40 -0
  71. package/dist/cloud/api/usage.js +13 -0
  72. package/dist/cloud/api/webhooks.js +1 -1
  73. package/dist/cloud/api/workspaces.d.ts +18 -0
  74. package/dist/cloud/api/workspaces.js +945 -15
  75. package/dist/cloud/config.d.ts +8 -0
  76. package/dist/cloud/config.js +15 -0
  77. package/dist/cloud/db/drizzle.d.ts +5 -2
  78. package/dist/cloud/db/drizzle.js +27 -20
  79. package/dist/cloud/db/schema.d.ts +19 -51
  80. package/dist/cloud/db/schema.js +5 -4
  81. package/dist/cloud/index.d.ts +0 -1
  82. package/dist/cloud/index.js +0 -1
  83. package/dist/cloud/provisioner/index.d.ts +93 -1
  84. package/dist/cloud/provisioner/index.js +608 -63
  85. package/dist/cloud/server.js +156 -16
  86. package/dist/cloud/services/compute-enforcement.d.ts +57 -0
  87. package/dist/cloud/services/compute-enforcement.js +175 -0
  88. package/dist/cloud/services/index.d.ts +2 -0
  89. package/dist/cloud/services/index.js +4 -0
  90. package/dist/cloud/services/intro-expiration.d.ts +55 -0
  91. package/dist/cloud/services/intro-expiration.js +211 -0
  92. package/dist/cloud/services/nango.d.ts +14 -0
  93. package/dist/cloud/services/nango.js +74 -14
  94. package/dist/cloud/services/ssh-security.d.ts +31 -0
  95. package/dist/cloud/services/ssh-security.js +63 -0
  96. package/dist/continuity/manager.d.ts +5 -0
  97. package/dist/continuity/manager.js +56 -2
  98. package/dist/daemon/api.d.ts +2 -0
  99. package/dist/daemon/api.js +214 -5
  100. package/dist/daemon/cli-auth.d.ts +13 -1
  101. package/dist/daemon/cli-auth.js +166 -47
  102. package/dist/daemon/connection.d.ts +7 -1
  103. package/dist/daemon/connection.js +15 -0
  104. package/dist/daemon/orchestrator.d.ts +2 -0
  105. package/dist/daemon/orchestrator.js +26 -0
  106. package/dist/daemon/repo-manager.d.ts +116 -0
  107. package/dist/daemon/repo-manager.js +384 -0
  108. package/dist/daemon/router.d.ts +60 -1
  109. package/dist/daemon/router.js +281 -20
  110. package/dist/daemon/user-directory.d.ts +111 -0
  111. package/dist/daemon/user-directory.js +233 -0
  112. package/dist/dashboard/out/404.html +1 -1
  113. package/dist/dashboard/out/_next/static/T1tgCqVWHFIkV7ClEtzD7/_ssgManifest.js +1 -0
  114. package/dist/dashboard/out/_next/static/chunks/532-bace199897eeab37.js +9 -0
  115. package/dist/dashboard/out/_next/static/chunks/766-b54f0853794b78c3.js +1 -0
  116. package/dist/dashboard/out/_next/static/chunks/83-b51836037078006c.js +1 -0
  117. package/dist/dashboard/out/_next/static/chunks/891-6cd50de1224f70bb.js +1 -0
  118. package/dist/dashboard/out/_next/static/chunks/899-bb19a9b3d9b39ea6.js +1 -0
  119. package/dist/dashboard/out/_next/static/chunks/app/app/onboarding/page-8939b0fc700f7eca.js +1 -0
  120. package/dist/dashboard/out/_next/static/chunks/app/app/page-5af1b6b439858aa6.js +1 -0
  121. package/dist/dashboard/out/_next/static/chunks/app/connect-repos/page-f45ecbc3e06134fc.js +1 -0
  122. package/dist/dashboard/out/_next/static/chunks/app/history/{page-abb9ab2d329f56e9.js → page-8c8bed33beb2bf1c.js} +1 -1
  123. package/dist/dashboard/out/_next/static/chunks/app/layout-2433bb48965f4333.js +1 -0
  124. package/dist/dashboard/out/_next/static/chunks/app/login/{page-c22d080201cbd9fb.js → page-16f3b49e55b1e0ed.js} +1 -1
  125. package/dist/dashboard/out/_next/static/chunks/app/metrics/page-ac39dc0cc3c26fa7.js +1 -0
  126. package/dist/dashboard/out/_next/static/chunks/app/{page-77e9c65420a06cfb.js → page-4a5938c18a11a654.js} +1 -1
  127. package/dist/dashboard/out/_next/static/chunks/app/pricing/page-982a7000fee44014.js +1 -0
  128. package/dist/dashboard/out/_next/static/chunks/app/providers/page-ac3a6ac433fd6001.js +1 -0
  129. package/dist/dashboard/out/_next/static/chunks/app/providers/setup/[provider]/page-09f9caae98a18c09.js +1 -0
  130. package/dist/dashboard/out/_next/static/chunks/app/signup/{page-68d34f50baa8ab6b.js → page-547dd0ca55ecd0ba.js} +1 -1
  131. package/dist/dashboard/out/_next/static/chunks/{main-ed4e1fb6f29c34cf.js → main-2ee6beb2ae96d210.js} +1 -1
  132. package/dist/dashboard/out/_next/static/chunks/{main-app-6e8e8d3ef4e0192a.js → main-app-5d692157a8eb1fd9.js} +1 -1
  133. package/dist/dashboard/out/_next/static/css/85d2af9c7ac74d62.css +1 -0
  134. package/dist/dashboard/out/_next/static/css/fe4b28883eeff359.css +1 -0
  135. package/dist/dashboard/out/app/onboarding.html +1 -1
  136. package/dist/dashboard/out/app/onboarding.txt +3 -3
  137. package/dist/dashboard/out/app.html +1 -1
  138. package/dist/dashboard/out/app.txt +3 -3
  139. package/dist/dashboard/out/apple-icon.png +0 -0
  140. package/dist/dashboard/out/connect-repos.html +1 -1
  141. package/dist/dashboard/out/connect-repos.txt +3 -3
  142. package/dist/dashboard/out/history.html +1 -1
  143. package/dist/dashboard/out/history.txt +3 -3
  144. package/dist/dashboard/out/index.html +1 -1
  145. package/dist/dashboard/out/index.txt +3 -3
  146. package/dist/dashboard/out/login.html +2 -2
  147. package/dist/dashboard/out/login.txt +3 -3
  148. package/dist/dashboard/out/metrics.html +1 -1
  149. package/dist/dashboard/out/metrics.txt +3 -3
  150. package/dist/dashboard/out/pricing.html +2 -2
  151. package/dist/dashboard/out/pricing.txt +3 -3
  152. package/dist/dashboard/out/providers/setup/claude.html +1 -0
  153. package/dist/dashboard/out/providers/setup/claude.txt +8 -0
  154. package/dist/dashboard/out/providers/setup/codex.html +1 -0
  155. package/dist/dashboard/out/providers/setup/codex.txt +8 -0
  156. package/dist/dashboard/out/providers.html +1 -1
  157. package/dist/dashboard/out/providers.txt +3 -3
  158. package/dist/dashboard/out/signup.html +2 -2
  159. package/dist/dashboard/out/signup.txt +3 -3
  160. package/dist/dashboard-server/server.js +316 -12
  161. package/dist/dashboard-server/user-bridge.d.ts +103 -0
  162. package/dist/dashboard-server/user-bridge.js +189 -0
  163. package/dist/protocol/channels.d.ts +205 -0
  164. package/dist/protocol/channels.js +154 -0
  165. package/dist/protocol/types.d.ts +13 -1
  166. package/dist/resiliency/provider-context.js +2 -0
  167. package/dist/shared/cli-auth-config.d.ts +19 -0
  168. package/dist/shared/cli-auth-config.js +58 -2
  169. package/dist/utils/agent-config.js +1 -1
  170. package/dist/wrapper/auth-detection.d.ts +49 -0
  171. package/dist/wrapper/auth-detection.js +192 -0
  172. package/dist/wrapper/base-wrapper.d.ts +153 -0
  173. package/dist/wrapper/base-wrapper.js +393 -0
  174. package/dist/wrapper/client.d.ts +7 -1
  175. package/dist/wrapper/client.js +3 -0
  176. package/dist/wrapper/index.d.ts +1 -0
  177. package/dist/wrapper/index.js +4 -3
  178. package/dist/wrapper/pty-wrapper.d.ts +62 -84
  179. package/dist/wrapper/pty-wrapper.js +154 -180
  180. package/dist/wrapper/tmux-wrapper.d.ts +41 -66
  181. package/dist/wrapper/tmux-wrapper.js +90 -134
  182. package/package.json +4 -2
  183. package/scripts/postinstall.js +11 -155
  184. package/scripts/test-interactive-terminal.sh +248 -0
  185. package/dist/cloud/vault/index.d.ts +0 -76
  186. package/dist/cloud/vault/index.js +0 -219
  187. package/dist/dashboard/out/_next/static/chunks/699-3b1cd6618a45d259.js +0 -1
  188. package/dist/dashboard/out/_next/static/chunks/724-2dae7627550ab88f.js +0 -9
  189. package/dist/dashboard/out/_next/static/chunks/766-1f2dd8cb7f766b0b.js +0 -1
  190. package/dist/dashboard/out/_next/static/chunks/app/app/onboarding/page-3fdfa60e53f2810d.js +0 -1
  191. package/dist/dashboard/out/_next/static/chunks/app/app/page-e6381e5a6e1fbcfd.js +0 -1
  192. package/dist/dashboard/out/_next/static/chunks/app/connect-repos/page-3538dfe0ffe984b8.js +0 -1
  193. package/dist/dashboard/out/_next/static/chunks/app/layout-c0d118c0f92d969c.js +0 -1
  194. package/dist/dashboard/out/_next/static/chunks/app/metrics/page-67a3e98d9a43a6ed.js +0 -1
  195. package/dist/dashboard/out/_next/static/chunks/app/pricing/page-b08ed1c34d14434a.js +0 -1
  196. package/dist/dashboard/out/_next/static/chunks/app/providers/page-e88bc117ef7671c3.js +0 -1
  197. package/dist/dashboard/out/_next/static/css/29852f26181969a0.css +0 -1
  198. package/dist/dashboard/out/_next/static/css/7c3ae9e8617d42a5.css +0 -1
  199. package/dist/dashboard/out/_next/static/wPgKJtcOmTFLpUncDg16A/_ssgManifest.js +0 -1
  200. /package/dist/dashboard/out/_next/static/{wPgKJtcOmTFLpUncDg16A → T1tgCqVWHFIkV7ClEtzD7}/_buildManifest.js +0 -0
@@ -6,10 +6,11 @@
6
6
  import * as crypto from 'crypto';
7
7
  import { getConfig } from '../config.js';
8
8
  import { db } from '../db/index.js';
9
- import { vault } from '../vault/index.js';
10
9
  import { nangoService } from '../services/nango.js';
11
10
  import { canAutoScale, canScaleToTier, getResourceTierForPlan, } from '../services/planLimits.js';
11
+ import { deriveSshPassword } from '../services/ssh-security.js';
12
12
  const WORKSPACE_PORT = 3888;
13
+ const CODEX_OAUTH_PORT = 1455; // Codex CLI OAuth callback port - must be mapped for local dev
13
14
  const FETCH_TIMEOUT_MS = 10_000;
14
15
  const WORKSPACE_IMAGE = process.env.WORKSPACE_IMAGE || 'ghcr.io/agentworkforce/relay-workspace:latest';
15
16
  // In-memory tracker for provisioning progress (workspace ID -> progress)
@@ -70,20 +71,6 @@ async function getGithubAppTokenForUser(userId) {
70
71
  return null;
71
72
  }
72
73
  }
73
- async function loadCredentialToken(userId, provider) {
74
- try {
75
- const cred = await vault.getCredential(userId, provider);
76
- if (cred?.accessToken) {
77
- return cred.accessToken;
78
- }
79
- }
80
- catch (error) {
81
- console.warn(`Failed to decrypt ${provider} credential from vault; trying raw storage fallback`, error);
82
- const raw = await db.credentials.findByUserAndProvider(userId, provider);
83
- return raw?.accessToken ?? null;
84
- }
85
- return null;
86
- }
87
74
  async function wait(ms) {
88
75
  return new Promise((resolve) => setTimeout(resolve, ms));
89
76
  }
@@ -132,33 +119,46 @@ async function softHealthCheck(url) {
132
119
  */
133
120
  async function waitForMachineStarted(apiToken, appName, machineId, timeoutSeconds = 120) {
134
121
  console.log(`[provisioner] Waiting for machine ${machineId} to start (timeout: ${timeoutSeconds}s)...`);
135
- try {
136
- // Use Fly.io's /wait endpoint - blocks until machine reaches target state
137
- const res = await fetch(`https://api.machines.dev/v1/apps/${appName}/machines/${machineId}/wait?state=started&timeout=${timeoutSeconds}`, {
138
- headers: { Authorization: `Bearer ${apiToken}` },
139
- });
140
- if (res.ok) {
141
- console.log(`[provisioner] Machine ${machineId} is now started`);
142
- return;
143
- }
144
- // 408 = timeout, machine didn't reach state in time
145
- if (res.status === 408) {
146
- // Get current state for error message
147
- const stateRes = await fetch(`https://api.machines.dev/v1/apps/${appName}/machines/${machineId}`, { headers: { Authorization: `Bearer ${apiToken}` } });
148
- const machine = stateRes.ok ? (await stateRes.json()) : { state: 'unknown' };
149
- throw new Error(`Machine ${machineId} did not start within ${timeoutSeconds}s (last state: ${machine.state})`);
122
+ // Fly.io /wait endpoint has max timeout of 60s, so we need to loop for longer waits
123
+ const maxSingleWait = 60;
124
+ const startTime = Date.now();
125
+ const deadline = startTime + timeoutSeconds * 1000;
126
+ while (Date.now() < deadline) {
127
+ const remainingMs = deadline - Date.now();
128
+ const waitSeconds = Math.min(maxSingleWait, Math.ceil(remainingMs / 1000));
129
+ if (waitSeconds <= 0)
130
+ break;
131
+ try {
132
+ // Use Fly.io's /wait endpoint - blocks until machine reaches target state
133
+ // timeout is an integer in seconds (max 60)
134
+ const res = await fetch(`https://api.machines.dev/v1/apps/${appName}/machines/${machineId}/wait?state=started&timeout=${waitSeconds}`, {
135
+ headers: { Authorization: `Bearer ${apiToken}` },
136
+ });
137
+ if (res.ok) {
138
+ console.log(`[provisioner] Machine ${machineId} is now started`);
139
+ return;
140
+ }
141
+ // 408 = timeout, machine didn't reach state in time - try again if we have time
142
+ if (res.status === 408) {
143
+ console.log(`[provisioner] Machine ${machineId} not ready yet, continuing to wait...`);
144
+ continue;
145
+ }
146
+ // Other error
147
+ const errorText = await res.text();
148
+ throw new Error(`Wait for machine failed: ${res.status} ${errorText}`);
150
149
  }
151
- // Other error
152
- const errorText = await res.text();
153
- throw new Error(`Wait for machine failed: ${res.status} ${errorText}`);
154
- }
155
- catch (error) {
156
- if (error instanceof Error && error.message.includes('did not start')) {
157
- throw error;
150
+ catch (error) {
151
+ if (error instanceof Error && error.message.includes('Wait for machine failed')) {
152
+ throw error;
153
+ }
154
+ console.warn(`[provisioner] Error waiting for machine:`, error);
155
+ throw new Error(`Failed to wait for machine ${machineId}: ${error.message}`);
158
156
  }
159
- console.warn(`[provisioner] Error waiting for machine:`, error);
160
- throw new Error(`Failed to wait for machine ${machineId}: ${error.message}`);
161
157
  }
158
+ // Timeout reached - get current state for error message
159
+ const stateRes = await fetch(`https://api.machines.dev/v1/apps/${appName}/machines/${machineId}`, { headers: { Authorization: `Bearer ${apiToken}` } });
160
+ const machine = stateRes.ok ? (await stateRes.json()) : { state: 'unknown' };
161
+ throw new Error(`Machine ${machineId} did not start within ${timeoutSeconds}s (last state: ${machine.state})`);
162
162
  }
163
163
  /**
164
164
  * Wait for health check to pass (with DNS propagation time)
@@ -226,6 +226,8 @@ class FlyProvisioner {
226
226
  cloudApiUrl;
227
227
  sessionSecret;
228
228
  registryAuth;
229
+ snapshotRetentionDays;
230
+ volumeSizeGb;
229
231
  constructor() {
230
232
  const config = getConfig();
231
233
  if (!config.compute.fly) {
@@ -238,6 +240,9 @@ class FlyProvisioner {
238
240
  this.registryAuth = config.compute.fly.registryAuth;
239
241
  this.cloudApiUrl = config.publicUrl;
240
242
  this.sessionSecret = config.sessionSecret;
243
+ // Snapshot settings: default 14 days retention, 10GB volume
244
+ this.snapshotRetentionDays = Math.min(60, Math.max(1, config.compute.fly.snapshotRetentionDays ?? 14));
245
+ this.volumeSizeGb = config.compute.fly.volumeSizeGb ?? 10;
241
246
  }
242
247
  /**
243
248
  * Generate a workspace token for API authentication
@@ -249,6 +254,87 @@ class FlyProvisioner {
249
254
  .update(`workspace:${workspaceId}`)
250
255
  .digest('hex');
251
256
  }
257
+ /**
258
+ * Create a volume with automatic snapshot settings
259
+ * Fly.io takes daily snapshots automatically; we configure retention
260
+ */
261
+ async createVolume(appName) {
262
+ const volumeName = 'workspace_data';
263
+ console.log(`[fly] Creating volume ${volumeName} with ${this.snapshotRetentionDays}-day snapshot retention...`);
264
+ const response = await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/volumes`, {
265
+ method: 'POST',
266
+ headers: {
267
+ Authorization: `Bearer ${this.apiToken}`,
268
+ 'Content-Type': 'application/json',
269
+ },
270
+ body: JSON.stringify({
271
+ name: volumeName,
272
+ region: this.region,
273
+ size_gb: this.volumeSizeGb,
274
+ // Enable automatic daily snapshots (default is true, but be explicit)
275
+ auto_backup_enabled: true,
276
+ // Retain snapshots for configured days (default 5, we use 14)
277
+ snapshot_retention: this.snapshotRetentionDays,
278
+ }),
279
+ });
280
+ if (!response.ok) {
281
+ const error = await response.text();
282
+ throw new Error(`Failed to create volume: ${error}`);
283
+ }
284
+ const volume = await response.json();
285
+ console.log(`[fly] Volume ${volume.id} created with auto-snapshots (${this.snapshotRetentionDays} days retention)`);
286
+ return volume;
287
+ }
288
+ /**
289
+ * Create an on-demand snapshot of a workspace volume
290
+ * Use before risky operations or as manual backup
291
+ */
292
+ async createSnapshot(appName, volumeId) {
293
+ console.log(`[fly] Creating on-demand snapshot for volume ${volumeId}...`);
294
+ const response = await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/volumes/${volumeId}/snapshots`, {
295
+ method: 'POST',
296
+ headers: {
297
+ Authorization: `Bearer ${this.apiToken}`,
298
+ 'Content-Type': 'application/json',
299
+ },
300
+ });
301
+ if (!response.ok) {
302
+ const error = await response.text();
303
+ throw new Error(`Failed to create snapshot: ${error}`);
304
+ }
305
+ const snapshot = await response.json();
306
+ console.log(`[fly] Snapshot ${snapshot.id} created`);
307
+ return snapshot;
308
+ }
309
+ /**
310
+ * List snapshots for a workspace volume
311
+ */
312
+ async listSnapshots(appName, volumeId) {
313
+ const response = await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/volumes/${volumeId}/snapshots`, {
314
+ headers: {
315
+ Authorization: `Bearer ${this.apiToken}`,
316
+ },
317
+ });
318
+ if (!response.ok) {
319
+ return [];
320
+ }
321
+ return await response.json();
322
+ }
323
+ /**
324
+ * Get volume info for a workspace
325
+ */
326
+ async getVolume(appName) {
327
+ const response = await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/volumes`, {
328
+ headers: {
329
+ Authorization: `Bearer ${this.apiToken}`,
330
+ },
331
+ });
332
+ if (!response.ok) {
333
+ return null;
334
+ }
335
+ const volumes = await response.json();
336
+ return volumes.find(v => v.name === 'workspace_data') || null;
337
+ }
252
338
  async provision(workspace, credentials) {
253
339
  const appName = `ar-${workspace.id.substring(0, 8)}`;
254
340
  // Stage: Creating workspace
@@ -359,8 +445,33 @@ class FlyProvisioner {
359
445
  if (customHostname) {
360
446
  await this.allocateCertificate(appName, customHostname);
361
447
  }
362
- // Stage: Machine
448
+ // Stage: Machine (includes volume creation)
363
449
  updateProvisioningStage(workspace.id, 'machine');
450
+ // Create volume with automatic daily snapshots before machine
451
+ // Fly.io takes daily snapshots automatically; we configure retention
452
+ const volume = await this.createVolume(appName);
453
+ // Determine instance size based on user's plan
454
+ // Free tier: 1 CPU, 2GB (~$10/mo) - Claude needs 2GB minimum
455
+ // Paid tiers: 2 CPU, 2GB (~$15/mo)
456
+ // Introductory bonus: Free users get Pro-level resources for first 14 days
457
+ const user = await db.users.findById(workspace.userId);
458
+ const userPlan = user?.plan || 'free';
459
+ const isFreeTier = userPlan === 'free';
460
+ // Check if user is in introductory period (first 14 days)
461
+ const INTRO_PERIOD_DAYS = 14;
462
+ const userCreatedAt = user?.createdAt ? new Date(user.createdAt) : new Date();
463
+ const daysSinceSignup = (Date.now() - userCreatedAt.getTime()) / (1000 * 60 * 60 * 24);
464
+ const isIntroPeriod = isFreeTier && daysSinceSignup < INTRO_PERIOD_DAYS;
465
+ const guestConfig = {
466
+ cpu_kind: 'shared',
467
+ cpus: isIntroPeriod ? 2 : (isFreeTier ? 1 : 2), // Intro gets 2 CPUs like Pro
468
+ memory_mb: isIntroPeriod ? 4096 : 2048, // Intro gets 4GB like Pro
469
+ };
470
+ if (isIntroPeriod) {
471
+ const daysRemaining = Math.ceil(INTRO_PERIOD_DAYS - daysSinceSignup);
472
+ console.log(`[fly] Introductory bonus active (${daysRemaining} days remaining) - 2 CPU / 4GB`);
473
+ }
474
+ console.log(`[fly] Using ${guestConfig.cpus} CPU / ${guestConfig.memory_mb}MB for ${userPlan} plan`);
364
475
  // Create machine with auto-stop/start for cost optimization
365
476
  const machineResponse = await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/machines`, {
366
477
  method: 'POST',
@@ -382,6 +493,7 @@ class FlyProvisioner {
382
493
  }),
383
494
  env: {
384
495
  WORKSPACE_ID: workspace.id,
496
+ WORKSPACE_OWNER_USER_ID: workspace.userId,
385
497
  SUPERVISOR_ENABLED: String(workspace.config.supervisorEnabled ?? false),
386
498
  MAX_AGENTS: String(workspace.config.maxAgents ?? 10),
387
499
  REPOSITORIES: (workspace.config.repositories ?? []).join(','),
@@ -391,6 +503,10 @@ class FlyProvisioner {
391
503
  // Git gateway configuration
392
504
  CLOUD_API_URL: this.cloudApiUrl,
393
505
  WORKSPACE_TOKEN: this.generateWorkspaceToken(workspace.id),
506
+ // SSH for CLI tunneling (Codex OAuth callback forwarding)
507
+ // Each workspace gets a unique password derived from its ID + secret salt
508
+ ENABLE_SSH: 'true',
509
+ SSH_PASSWORD: deriveSshPassword(workspace.id),
394
510
  },
395
511
  services: [
396
512
  {
@@ -421,6 +537,22 @@ class FlyProvisioner {
421
537
  hard_limit: 50,
422
538
  },
423
539
  },
540
+ // SSH service for CLI tunneling (Codex OAuth callback forwarding)
541
+ // Exposes port 2222 publicly for SSH connections from user's machine
542
+ {
543
+ ports: [
544
+ {
545
+ port: 2222,
546
+ handlers: [], // Empty handlers = raw TCP passthrough
547
+ },
548
+ ],
549
+ protocol: 'tcp',
550
+ internal_port: 2222,
551
+ // SSH connections should also wake the machine
552
+ auto_stop_machines: 'stop',
553
+ auto_start_machines: true,
554
+ min_machines_running: 0,
555
+ },
424
556
  ],
425
557
  checks: {
426
558
  health: {
@@ -432,13 +564,15 @@ class FlyProvisioner {
432
564
  grace_period: '10s',
433
565
  },
434
566
  },
435
- // Start with small tier (shared CPUs) - scales up based on plan
436
- // Free tier uses shared CPUs for cost efficiency
437
- guest: {
438
- cpu_kind: 'shared',
439
- cpus: 2,
440
- memory_mb: 2048,
441
- },
567
+ // Instance size based on plan - free tier gets smaller instance
568
+ guest: guestConfig,
569
+ // Mount the volume we created with snapshot settings
570
+ mounts: [
571
+ {
572
+ volume: volume.id,
573
+ path: '/data',
574
+ },
575
+ ],
442
576
  },
443
577
  }),
444
578
  });
@@ -535,13 +669,24 @@ class FlyProvisioner {
535
669
  }
536
670
  /**
537
671
  * Resize workspace - vertical scaling via Fly Machines API
672
+ * @param skipRestart - If true, config is saved but machine won't restart (changes apply on next start)
538
673
  */
539
- async resize(workspace, tier) {
540
- if (!workspace.computeId)
541
- return;
542
- const appName = `ar-${workspace.id.substring(0, 8)}`;
674
+ async resize(workspaceOrId, tier, skipRestart = false) {
675
+ const workspaceId = typeof workspaceOrId === 'string' ? workspaceOrId : workspaceOrId.id;
676
+ const computeId = typeof workspaceOrId === 'string' ? undefined : workspaceOrId.computeId;
677
+ // If passed just an ID, look up the workspace
678
+ let machineId = computeId;
679
+ if (!machineId) {
680
+ const workspace = await db.workspaces.findById(workspaceId);
681
+ if (!workspace?.computeId)
682
+ return;
683
+ machineId = workspace.computeId;
684
+ }
685
+ const appName = `ar-${workspaceId.substring(0, 8)}`;
543
686
  // Update machine configuration
544
- await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/machines/${workspace.computeId}`, {
687
+ // If running: reboots with new specs (unless skip_launch: true)
688
+ // If stopped: config saved, applies on next start
689
+ await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/machines/${machineId}`, {
545
690
  method: 'POST',
546
691
  headers: {
547
692
  Authorization: `Bearer ${this.apiToken}`,
@@ -559,9 +704,11 @@ class FlyProvisioner {
559
704
  MAX_AGENTS: String(tier.maxAgents),
560
705
  },
561
706
  },
707
+ skip_launch: skipRestart, // If true, don't restart - changes apply on next start
562
708
  }),
563
709
  });
564
- console.log(`[fly] Resized workspace ${workspace.id} to ${tier.name} (${tier.cpuCores} CPU, ${tier.memoryMb}MB RAM)`);
710
+ const restartNote = skipRestart ? ' (will apply on next restart)' : ' (restarting)';
711
+ console.log(`[fly] Resized workspace ${workspaceId.substring(0, 8)} to ${tier.name} (${tier.cpuCores} CPU, ${tier.memoryMb}MB RAM)${restartNote}`);
565
712
  }
566
713
  /**
567
714
  * Update the max agent limit for a workspace
@@ -615,6 +762,118 @@ class FlyProvisioner {
615
762
  return RESOURCE_TIERS.medium;
616
763
  return RESOURCE_TIERS.small;
617
764
  }
765
+ /**
766
+ * Update machine image without restarting
767
+ * Note: The machine needs to be restarted later to use the new image
768
+ */
769
+ async updateMachineImage(workspace, newImage) {
770
+ if (!workspace.computeId)
771
+ return;
772
+ const appName = `ar-${workspace.id.substring(0, 8)}`;
773
+ // Get current machine config first
774
+ const getResponse = await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/machines/${workspace.computeId}`, {
775
+ headers: {
776
+ Authorization: `Bearer ${this.apiToken}`,
777
+ },
778
+ });
779
+ if (!getResponse.ok) {
780
+ throw new Error(`Failed to get machine config: ${await getResponse.text()}`);
781
+ }
782
+ const machine = await getResponse.json();
783
+ // Update the image in the config
784
+ const updatedConfig = {
785
+ ...machine.config,
786
+ image: newImage,
787
+ // Include registry auth if configured
788
+ ...(this.registryAuth && {
789
+ image_registry_auth: {
790
+ registry: 'ghcr.io',
791
+ username: this.registryAuth.username,
792
+ password: this.registryAuth.password,
793
+ },
794
+ }),
795
+ };
796
+ // Update machine with new image config (skip_launch keeps it in current state)
797
+ const updateResponse = await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/machines/${workspace.computeId}?skip_launch=true`, {
798
+ method: 'POST',
799
+ headers: {
800
+ Authorization: `Bearer ${this.apiToken}`,
801
+ 'Content-Type': 'application/json',
802
+ },
803
+ body: JSON.stringify({ config: updatedConfig }),
804
+ });
805
+ if (!updateResponse.ok) {
806
+ throw new Error(`Failed to update machine image: ${await updateResponse.text()}`);
807
+ }
808
+ console.log(`[fly] Updated machine image for workspace ${workspace.id.substring(0, 8)} to ${newImage}`);
809
+ }
810
+ /**
811
+ * Check if workspace has active agents by querying the daemon
812
+ */
813
+ async checkActiveAgents(workspace) {
814
+ if (!workspace.publicUrl) {
815
+ return { hasActiveAgents: false, agentCount: 0, agents: [] };
816
+ }
817
+ try {
818
+ // Use internal Fly network URL if available (more reliable)
819
+ const appName = `ar-${workspace.id.substring(0, 8)}`;
820
+ const isOnFly = !!process.env.FLY_APP_NAME;
821
+ const baseUrl = isOnFly
822
+ ? `http://${appName}.internal:3888`
823
+ : workspace.publicUrl;
824
+ const controller = new AbortController();
825
+ const timer = setTimeout(() => controller.abort(), 10_000);
826
+ const response = await fetch(`${baseUrl}/api/agents`, {
827
+ method: 'GET',
828
+ headers: {
829
+ 'Accept': 'application/json',
830
+ },
831
+ signal: controller.signal,
832
+ });
833
+ clearTimeout(timer);
834
+ if (!response.ok) {
835
+ console.warn(`[fly] Failed to check agents for ${workspace.id.substring(0, 8)}: ${response.status}`);
836
+ return { hasActiveAgents: false, agentCount: 0, agents: [] };
837
+ }
838
+ const data = await response.json();
839
+ const agents = data.agents || [];
840
+ // Consider agents with 'active' or 'idle' activity state as active
841
+ // 'disconnected' agents are not active
842
+ const activeAgents = agents.filter(a => a.status === 'running' || a.activityState === 'active' || a.activityState === 'idle');
843
+ return {
844
+ hasActiveAgents: activeAgents.length > 0,
845
+ agentCount: activeAgents.length,
846
+ agents: agents.map(a => ({ name: a.name, status: a.status || a.activityState || 'unknown' })),
847
+ };
848
+ }
849
+ catch (error) {
850
+ // Workspace might be stopped or unreachable - treat as no active agents
851
+ console.warn(`[fly] Could not reach workspace ${workspace.id.substring(0, 8)} to check agents:`, error.message);
852
+ return { hasActiveAgents: false, agentCount: 0, agents: [] };
853
+ }
854
+ }
855
+ /**
856
+ * Get the current machine state
857
+ */
858
+ async getMachineState(workspace) {
859
+ if (!workspace.computeId)
860
+ return 'unknown';
861
+ const appName = `ar-${workspace.id.substring(0, 8)}`;
862
+ try {
863
+ const response = await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/machines/${workspace.computeId}`, {
864
+ headers: {
865
+ Authorization: `Bearer ${this.apiToken}`,
866
+ },
867
+ });
868
+ if (!response.ok)
869
+ return 'unknown';
870
+ const machine = await response.json();
871
+ return machine.state;
872
+ }
873
+ catch {
874
+ return 'unknown';
875
+ }
876
+ }
618
877
  }
619
878
  /**
620
879
  * Railway provisioner
@@ -695,6 +954,7 @@ class RailwayProvisioner {
695
954
  // Set environment variables
696
955
  const envVars = {
697
956
  WORKSPACE_ID: workspace.id,
957
+ WORKSPACE_OWNER_USER_ID: workspace.userId,
698
958
  SUPERVISOR_ENABLED: String(workspace.config.supervisorEnabled ?? false),
699
959
  MAX_AGENTS: String(workspace.config.maxAgents ?? 10),
700
960
  REPOSITORIES: (workspace.config.repositories ?? []).join(','),
@@ -913,6 +1173,7 @@ class DockerProvisioner {
913
1173
  // Build environment variables
914
1174
  const envArgs = [
915
1175
  `-e WORKSPACE_ID=${workspace.id}`,
1176
+ `-e WORKSPACE_OWNER_USER_ID=${workspace.userId}`,
916
1177
  `-e SUPERVISOR_ENABLED=${workspace.config.supervisorEnabled ?? false}`,
917
1178
  `-e MAX_AGENTS=${workspace.config.maxAgents ?? 10}`,
918
1179
  `-e REPOSITORIES=${(workspace.config.repositories ?? []).join(',')}`,
@@ -932,6 +1193,9 @@ class DockerProvisioner {
932
1193
  // Run container
933
1194
  const { execSync } = await import('child_process');
934
1195
  const hostPort = 3000 + Math.floor(Math.random() * 1000);
1196
+ // SSH port for tunneling (Codex OAuth callback forwarding)
1197
+ // Derive from hostPort to avoid collisions: API port 3500 -> SSH port 22500
1198
+ const sshHostPort = 22000 + (hostPort - 3000);
935
1199
  // When running in Docker, connect to the same network for container-to-container communication
936
1200
  const runningInDocker = process.env.RUNNING_IN_DOCKER === 'true';
937
1201
  const networkArg = runningInDocker ? '--network agent-relay-dev' : '';
@@ -945,7 +1209,18 @@ class DockerProvisioner {
945
1209
  console.log('[provisioner] Dev mode: mounting local dist/ and docs/ folders into workspace container');
946
1210
  }
947
1211
  try {
948
- execSync(`docker run -d --user root --name ${containerName} ${networkArg} ${volumeArgs} -p ${hostPort}:${WORKSPACE_PORT} ${envArgs.join(' ')} ${WORKSPACE_IMAGE}`, { stdio: 'pipe' });
1212
+ // Map workspace API port and SSH port (for tunneling)
1213
+ // SSH is used by CLI to forward localhost:1455 to workspace container for Codex OAuth
1214
+ // Set CODEX_DIRECT_PORT=true to also map port 1455 directly (for debugging only)
1215
+ const directCodexPort = process.env.CODEX_DIRECT_PORT === 'true';
1216
+ const portMappings = directCodexPort
1217
+ ? `-p ${hostPort}:${WORKSPACE_PORT} -p ${sshHostPort}:2222 -p ${CODEX_OAUTH_PORT}:${CODEX_OAUTH_PORT}`
1218
+ : `-p ${hostPort}:${WORKSPACE_PORT} -p ${sshHostPort}:2222`;
1219
+ // Enable SSH in the container for tunneling
1220
+ // Each workspace gets a unique password derived from its ID + secret salt
1221
+ envArgs.push('-e ENABLE_SSH=true');
1222
+ envArgs.push(`-e SSH_PASSWORD=${deriveSshPassword(workspace.id)}`);
1223
+ execSync(`docker run -d --user root --name ${containerName} ${networkArg} ${volumeArgs} ${portMappings} ${envArgs.join(' ')} ${WORKSPACE_IMAGE}`, { stdio: 'pipe' });
949
1224
  const publicUrl = `http://localhost:${hostPort}`;
950
1225
  // Wait for container to be healthy before returning
951
1226
  // When running in Docker, use the internal container name for health check
@@ -956,6 +1231,7 @@ class DockerProvisioner {
956
1231
  return {
957
1232
  computeId: containerName,
958
1233
  publicUrl,
1234
+ sshPort: sshHostPort,
959
1235
  };
960
1236
  }
961
1237
  catch (error) {
@@ -1061,6 +1337,37 @@ export class WorkspaceProvisioner {
1061
1337
  });
1062
1338
  // Auto-accept the creator's membership
1063
1339
  await db.workspaceMembers.acceptInvite(workspace.id, config.userId);
1340
+ // Link repositories to this workspace
1341
+ // This enables auto-access for users with GitHub access to these repos
1342
+ for (const repoFullName of config.repositories) {
1343
+ try {
1344
+ // Find the user's repo record (may not exist if user didn't import it first)
1345
+ const userRepos = await db.repositories.findByUserId(config.userId);
1346
+ const repoRecord = userRepos.find(r => r.githubFullName.toLowerCase() === repoFullName.toLowerCase());
1347
+ if (repoRecord) {
1348
+ await db.repositories.assignToWorkspace(repoRecord.id, workspace.id);
1349
+ console.log(`[provisioner] Linked repo ${repoFullName} to workspace ${workspace.id.substring(0, 8)}`);
1350
+ }
1351
+ else {
1352
+ // Create a placeholder repo record if it doesn't exist
1353
+ // This ensures the repo is tracked for workspace access checks
1354
+ console.log(`[provisioner] Creating repo record for ${repoFullName}`);
1355
+ const newRepo = await db.repositories.upsert({
1356
+ userId: config.userId,
1357
+ githubFullName: repoFullName,
1358
+ githubId: 0, // Will be updated when actually synced
1359
+ defaultBranch: 'main',
1360
+ isPrivate: true, // Assume private, will be updated
1361
+ workspaceId: workspace.id,
1362
+ });
1363
+ console.log(`[provisioner] Created and linked repo ${repoFullName} (id: ${newRepo.id.substring(0, 8)})`);
1364
+ }
1365
+ }
1366
+ catch (err) {
1367
+ console.warn(`[provisioner] Failed to link repo ${repoFullName}:`, err);
1368
+ // Continue with other repos
1369
+ }
1370
+ }
1064
1371
  // Initialize stage tracking immediately
1065
1372
  updateProvisioningStage(workspace.id, 'creating');
1066
1373
  // Run provisioning in the background so frontend can poll for stages
@@ -1077,14 +1384,11 @@ export class WorkspaceProvisioner {
1077
1384
  * Run the actual provisioning work asynchronously
1078
1385
  */
1079
1386
  async runProvisioningAsync(workspace, config) {
1080
- // Get credentials
1387
+ // Build credentials map for workspace provisioning
1388
+ // Note: Provider tokens (Claude, Codex, etc.) are no longer stored centrally.
1389
+ // CLI tools authenticate directly on workspace instances.
1390
+ // Only GitHub App tokens are obtained from Nango for repository cloning.
1081
1391
  const credentials = new Map();
1082
- for (const provider of config.providers) {
1083
- const token = await loadCredentialToken(config.userId, provider);
1084
- if (token) {
1085
- credentials.set(provider, token);
1086
- }
1087
- }
1088
1392
  // GitHub token is required for cloning repositories
1089
1393
  // Use direct token if provided (for testing), otherwise get from Nango
1090
1394
  if (config.repositories.length > 0) {
@@ -1183,8 +1487,9 @@ export class WorkspaceProvisioner {
1183
1487
  }
1184
1488
  /**
1185
1489
  * Resize a workspace (vertical scaling)
1490
+ * @param skipRestart - If true, config is saved but machine won't restart (changes apply on next start)
1186
1491
  */
1187
- async resize(workspaceId, tier) {
1492
+ async resize(workspaceId, tier, skipRestart = false) {
1188
1493
  const workspace = await db.workspaces.findById(workspaceId);
1189
1494
  if (!workspace) {
1190
1495
  throw new Error('Workspace not found');
@@ -1192,7 +1497,7 @@ export class WorkspaceProvisioner {
1192
1497
  if (!this.provisioner.resize) {
1193
1498
  throw new Error('Resize not supported by current compute provider');
1194
1499
  }
1195
- await this.provisioner.resize(workspace, tier);
1500
+ await this.provisioner.resize(workspace, tier, skipRestart);
1196
1501
  // Update workspace config with new limits
1197
1502
  await db.workspaces.updateConfig(workspaceId, {
1198
1503
  ...workspace.config,
@@ -1306,6 +1611,246 @@ export class WorkspaceProvisioner {
1306
1611
  targetTier: recommendedTier.name,
1307
1612
  };
1308
1613
  }
1614
+ // ============================================================================
1615
+ // Snapshot Management
1616
+ // ============================================================================
1617
+ /**
1618
+ * Create an on-demand snapshot of a workspace's volume
1619
+ * Use before risky operations (e.g., major refactors, untrusted code execution)
1620
+ */
1621
+ async createSnapshot(workspaceId) {
1622
+ const workspace = await db.workspaces.findById(workspaceId);
1623
+ if (!workspace) {
1624
+ throw new Error('Workspace not found');
1625
+ }
1626
+ // Only Fly.io provisioner supports snapshots
1627
+ if (!(this.provisioner instanceof FlyProvisioner)) {
1628
+ console.warn('[provisioner] Snapshots only supported on Fly.io');
1629
+ return null;
1630
+ }
1631
+ const appName = `ar-${workspace.id.substring(0, 8)}`;
1632
+ const flyProvisioner = this.provisioner;
1633
+ // Get the volume
1634
+ const volume = await flyProvisioner.getVolume(appName);
1635
+ if (!volume) {
1636
+ throw new Error('No volume found for workspace');
1637
+ }
1638
+ // Create snapshot
1639
+ const snapshot = await flyProvisioner.createSnapshot(appName, volume.id);
1640
+ return { snapshotId: snapshot.id };
1641
+ }
1642
+ /**
1643
+ * List available snapshots for a workspace
1644
+ * Includes both automatic daily snapshots and on-demand snapshots
1645
+ */
1646
+ async listSnapshots(workspaceId) {
1647
+ const workspace = await db.workspaces.findById(workspaceId);
1648
+ if (!workspace) {
1649
+ throw new Error('Workspace not found');
1650
+ }
1651
+ // Only Fly.io provisioner supports snapshots
1652
+ if (!(this.provisioner instanceof FlyProvisioner)) {
1653
+ return [];
1654
+ }
1655
+ const appName = `ar-${workspace.id.substring(0, 8)}`;
1656
+ const flyProvisioner = this.provisioner;
1657
+ // Get the volume
1658
+ const volume = await flyProvisioner.getVolume(appName);
1659
+ if (!volume) {
1660
+ return [];
1661
+ }
1662
+ // List snapshots
1663
+ const snapshots = await flyProvisioner.listSnapshots(appName, volume.id);
1664
+ return snapshots.map(s => ({
1665
+ id: s.id,
1666
+ createdAt: s.created_at,
1667
+ sizeBytes: s.size,
1668
+ }));
1669
+ }
1670
+ /**
1671
+ * Get the volume ID for a workspace (needed for restore operations)
1672
+ */
1673
+ async getVolumeId(workspaceId) {
1674
+ const workspace = await db.workspaces.findById(workspaceId);
1675
+ if (!workspace) {
1676
+ throw new Error('Workspace not found');
1677
+ }
1678
+ if (!(this.provisioner instanceof FlyProvisioner)) {
1679
+ return null;
1680
+ }
1681
+ const appName = `ar-${workspace.id.substring(0, 8)}`;
1682
+ const flyProvisioner = this.provisioner;
1683
+ const volume = await flyProvisioner.getVolume(appName);
1684
+ return volume?.id || null;
1685
+ }
1686
+ // ============================================================================
1687
+ // Graceful Image Update
1688
+ // ============================================================================
1689
+ /**
1690
+ * Result of a graceful update attempt
1691
+ */
1692
+ static UpdateResult = {
1693
+ UPDATED: 'updated',
1694
+ UPDATED_PENDING_RESTART: 'updated_pending_restart',
1695
+ SKIPPED_ACTIVE_AGENTS: 'skipped_active_agents',
1696
+ SKIPPED_NOT_RUNNING: 'skipped_not_running',
1697
+ NOT_SUPPORTED: 'not_supported',
1698
+ ERROR: 'error',
1699
+ };
1700
+ /**
1701
+ * Gracefully update a single workspace's image
1702
+ *
1703
+ * Behavior:
1704
+ * - If workspace is stopped: Update config, will use new image on next wake
1705
+ * - If workspace is running with no agents: Update config and restart
1706
+ * - If workspace is running with active agents: Skip (or force if specified)
1707
+ *
1708
+ * @param workspaceId - Workspace to update
1709
+ * @param newImage - New Docker image to use
1710
+ * @param options - Update options
1711
+ * @returns Update result with details
1712
+ */
1713
+ async gracefulUpdateImage(workspaceId, newImage, options = {}) {
1714
+ const workspace = await db.workspaces.findById(workspaceId);
1715
+ if (!workspace) {
1716
+ return {
1717
+ result: WorkspaceProvisioner.UpdateResult.ERROR,
1718
+ workspaceId,
1719
+ error: 'Workspace not found',
1720
+ };
1721
+ }
1722
+ // Only Fly.io supports graceful updates
1723
+ if (!(this.provisioner instanceof FlyProvisioner)) {
1724
+ return {
1725
+ result: WorkspaceProvisioner.UpdateResult.NOT_SUPPORTED,
1726
+ workspaceId,
1727
+ error: 'Graceful updates only supported on Fly.io',
1728
+ };
1729
+ }
1730
+ const flyProvisioner = this.provisioner;
1731
+ try {
1732
+ // Check machine state
1733
+ const machineState = await flyProvisioner.getMachineState(workspace);
1734
+ if (machineState === 'stopped' || machineState === 'suspended') {
1735
+ // Machine is not running - safe to update, will apply on next wake
1736
+ await flyProvisioner.updateMachineImage(workspace, newImage);
1737
+ console.log(`[provisioner] Updated stopped workspace ${workspaceId.substring(0, 8)} to ${newImage}`);
1738
+ return {
1739
+ result: WorkspaceProvisioner.UpdateResult.UPDATED_PENDING_RESTART,
1740
+ workspaceId,
1741
+ machineState,
1742
+ };
1743
+ }
1744
+ if (machineState === 'started') {
1745
+ // Machine is running - check for active agents
1746
+ const agentCheck = await flyProvisioner.checkActiveAgents(workspace);
1747
+ if (agentCheck.hasActiveAgents && !options.force) {
1748
+ // Has active agents and not forcing - skip
1749
+ console.log(`[provisioner] Skipped workspace ${workspaceId.substring(0, 8)}: ${agentCheck.agentCount} active agents`);
1750
+ return {
1751
+ result: WorkspaceProvisioner.UpdateResult.SKIPPED_ACTIVE_AGENTS,
1752
+ workspaceId,
1753
+ machineState,
1754
+ agentCount: agentCheck.agentCount,
1755
+ agents: agentCheck.agents,
1756
+ };
1757
+ }
1758
+ // Update the image config
1759
+ await flyProvisioner.updateMachineImage(workspace, newImage);
1760
+ if (options.skipRestart) {
1761
+ // Config updated but not restarting - will apply on next restart/auto-stop-wake
1762
+ console.log(`[provisioner] Updated workspace ${workspaceId.substring(0, 8)} config (restart skipped)`);
1763
+ return {
1764
+ result: WorkspaceProvisioner.UpdateResult.UPDATED_PENDING_RESTART,
1765
+ workspaceId,
1766
+ machineState,
1767
+ agentCount: agentCheck.agentCount,
1768
+ agents: agentCheck.agents,
1769
+ };
1770
+ }
1771
+ // Restart to apply new image
1772
+ await flyProvisioner.restart(workspace);
1773
+ console.log(`[provisioner] Updated and restarted workspace ${workspaceId.substring(0, 8)}`);
1774
+ return {
1775
+ result: WorkspaceProvisioner.UpdateResult.UPDATED,
1776
+ workspaceId,
1777
+ machineState,
1778
+ agentCount: agentCheck.agentCount,
1779
+ };
1780
+ }
1781
+ // Unknown state
1782
+ return {
1783
+ result: WorkspaceProvisioner.UpdateResult.SKIPPED_NOT_RUNNING,
1784
+ workspaceId,
1785
+ machineState,
1786
+ };
1787
+ }
1788
+ catch (error) {
1789
+ console.error(`[provisioner] Error updating workspace ${workspaceId.substring(0, 8)}:`, error);
1790
+ return {
1791
+ result: WorkspaceProvisioner.UpdateResult.ERROR,
1792
+ workspaceId,
1793
+ error: error.message,
1794
+ };
1795
+ }
1796
+ }
1797
+ /**
1798
+ * Gracefully update all workspaces to a new image
1799
+ *
1800
+ * Processes workspaces in batches, respecting active agents unless forced.
1801
+ * Returns detailed results for each workspace.
1802
+ *
1803
+ * @param newImage - New Docker image to use
1804
+ * @param options - Update options
1805
+ * @returns Summary and per-workspace results
1806
+ */
1807
+ async gracefulUpdateAllImages(newImage, options = {}) {
1808
+ // Get all workspaces to update
1809
+ let workspaces;
1810
+ if (options.workspaceIds?.length) {
1811
+ // Specific workspaces
1812
+ workspaces = (await Promise.all(options.workspaceIds.map(id => db.workspaces.findById(id)))).filter((w) => w !== null);
1813
+ }
1814
+ else if (options.userIds?.length) {
1815
+ // Workspaces for specific users
1816
+ const allWorkspaces = await Promise.all(options.userIds.map(userId => db.workspaces.findByUserId(userId)));
1817
+ workspaces = allWorkspaces.flat();
1818
+ }
1819
+ else {
1820
+ // All workspaces - need to query by status to get running ones
1821
+ // For now, we'll get all workspaces from the provisioning provider
1822
+ workspaces = await db.workspaces.findAll();
1823
+ }
1824
+ // Filter to only Fly.io workspaces
1825
+ workspaces = workspaces.filter(w => w.computeProvider === 'fly' && w.computeId);
1826
+ console.log(`[provisioner] Starting graceful update of ${workspaces.length} workspaces to ${newImage}`);
1827
+ const batchSize = options.batchSize ?? 5;
1828
+ const results = [];
1829
+ // Process in batches
1830
+ for (let i = 0; i < workspaces.length; i += batchSize) {
1831
+ const batch = workspaces.slice(i, i + batchSize);
1832
+ const batchResults = await Promise.all(batch.map(workspace => this.gracefulUpdateImage(workspace.id, newImage, {
1833
+ force: options.force,
1834
+ skipRestart: options.skipRestart,
1835
+ })));
1836
+ results.push(...batchResults);
1837
+ // Small delay between batches to avoid overwhelming Fly API
1838
+ if (i + batchSize < workspaces.length) {
1839
+ await wait(1000);
1840
+ }
1841
+ }
1842
+ // Compute summary
1843
+ const summary = {
1844
+ total: results.length,
1845
+ updated: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.UPDATED).length,
1846
+ pendingRestart: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.UPDATED_PENDING_RESTART).length,
1847
+ skippedActiveAgents: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.SKIPPED_ACTIVE_AGENTS).length,
1848
+ skippedNotRunning: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.SKIPPED_NOT_RUNNING).length,
1849
+ errors: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.ERROR).length,
1850
+ };
1851
+ console.log(`[provisioner] Graceful update complete:`, summary);
1852
+ return { summary, results };
1853
+ }
1309
1854
  }
1310
1855
  // Singleton instance
1311
1856
  let _provisioner = null;