agent-relay 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. package/.trajectories/active/traj_3yx9dy148mge.json +42 -0
  2. package/.trajectories/completed/2026-01/traj_1g7yx6qtg4ai.json +49 -0
  3. package/.trajectories/completed/2026-01/traj_1g7yx6qtg4ai.md +31 -0
  4. package/.trajectories/completed/2026-01/traj_4qwd4zmhfwp4.json +49 -0
  5. package/.trajectories/completed/2026-01/traj_4qwd4zmhfwp4.md +31 -0
  6. package/.trajectories/completed/2026-01/traj_6unwwmgyj5sq.json +109 -0
  7. package/.trajectories/completed/2026-01/traj_a0tqx8biw9c4.json +49 -0
  8. package/.trajectories/completed/2026-01/traj_a0tqx8biw9c4.md +31 -0
  9. package/.trajectories/completed/2026-01/traj_ax8uungxz2qh.json +66 -0
  10. package/.trajectories/completed/2026-01/traj_ax8uungxz2qh.md +36 -0
  11. package/.trajectories/completed/2026-01/traj_c9izbh2snpzf.json +49 -0
  12. package/.trajectories/completed/2026-01/traj_c9izbh2snpzf.md +31 -0
  13. package/.trajectories/completed/2026-01/traj_cpn70dw066nt.json +65 -0
  14. package/.trajectories/completed/2026-01/traj_cpn70dw066nt.md +37 -0
  15. package/.trajectories/completed/2026-01/traj_erglv2f8t9eh.json +36 -0
  16. package/.trajectories/completed/2026-01/traj_erglv2f8t9eh.md +21 -0
  17. package/.trajectories/completed/2026-01/traj_he75f24d1xfm.json +101 -0
  18. package/.trajectories/completed/2026-01/traj_he75f24d1xfm.md +52 -0
  19. package/.trajectories/completed/2026-01/traj_lgtodco7dp1n.json +61 -0
  20. package/.trajectories/completed/2026-01/traj_lgtodco7dp1n.md +36 -0
  21. package/.trajectories/completed/2026-01/traj_oszg9flv74pk.json +73 -0
  22. package/.trajectories/completed/2026-01/traj_oszg9flv74pk.md +41 -0
  23. package/.trajectories/completed/2026-01/traj_pulomd3y8cvj.json +77 -0
  24. package/.trajectories/completed/2026-01/traj_pulomd3y8cvj.md +42 -0
  25. package/.trajectories/completed/2026-01/traj_rsavt0jipi3c.json +109 -0
  26. package/.trajectories/completed/2026-01/traj_rsavt0jipi3c.md +56 -0
  27. package/.trajectories/completed/2026-01/traj_x721m1j9rzup.json +113 -0
  28. package/.trajectories/completed/2026-01/traj_x721m1j9rzup.md +57 -0
  29. package/.trajectories/completed/2026-01/traj_xjqvmep5ed3h.json +61 -0
  30. package/.trajectories/completed/2026-01/traj_xjqvmep5ed3h.md +36 -0
  31. package/.trajectories/completed/2026-01/traj_y7n6hfbf7dmg.json +49 -0
  32. package/.trajectories/completed/2026-01/traj_y7n6hfbf7dmg.md +31 -0
  33. package/.trajectories/completed/2026-01/traj_yvfkwnkdiso2.json +49 -0
  34. package/.trajectories/completed/2026-01/traj_yvfkwnkdiso2.md +31 -0
  35. package/.trajectories/index.json +140 -1
  36. package/TRAIL_GIT_AUTH_FIX.md +113 -0
  37. package/deploy/workspace/codex.config.toml +1 -1
  38. package/deploy/workspace/entrypoint.sh +20 -79
  39. package/deploy/workspace/gh-relay +156 -0
  40. package/deploy/workspace/git-credential-relay +5 -1
  41. package/dist/bridge/multi-project-client.js +13 -10
  42. package/dist/bridge/spawner.d.ts +2 -0
  43. package/dist/bridge/spawner.js +19 -1
  44. package/dist/bridge/types.d.ts +2 -0
  45. package/dist/cli/index.d.ts +1 -1
  46. package/dist/cli/index.js +115 -69
  47. package/dist/cloud/api/admin.js +16 -3
  48. package/dist/cloud/api/codex-auth-helper.js +28 -8
  49. package/dist/cloud/api/consensus.d.ts +13 -0
  50. package/dist/cloud/api/consensus.js +259 -0
  51. package/dist/cloud/api/daemons.js +205 -1
  52. package/dist/cloud/api/git.js +37 -7
  53. package/dist/cloud/api/onboarding.js +4 -1
  54. package/dist/cloud/api/provider-env.d.ts +5 -0
  55. package/dist/cloud/api/provider-env.js +27 -0
  56. package/dist/cloud/api/providers.js +2 -0
  57. package/dist/cloud/api/test-helpers.js +130 -0
  58. package/dist/cloud/api/workspaces.js +38 -3
  59. package/dist/cloud/db/bulk-ingest.d.ts +88 -0
  60. package/dist/cloud/db/bulk-ingest.js +268 -0
  61. package/dist/cloud/db/drizzle.d.ts +33 -0
  62. package/dist/cloud/db/drizzle.js +174 -2
  63. package/dist/cloud/db/index.d.ts +24 -5
  64. package/dist/cloud/db/index.js +19 -4
  65. package/dist/cloud/db/schema.d.ts +397 -3
  66. package/dist/cloud/db/schema.js +75 -1
  67. package/dist/cloud/provisioner/index.d.ts +8 -0
  68. package/dist/cloud/provisioner/index.js +256 -50
  69. package/dist/cloud/server.js +47 -3
  70. package/dist/cloud/services/index.d.ts +1 -0
  71. package/dist/cloud/services/index.js +2 -0
  72. package/dist/cloud/services/nango.d.ts +3 -4
  73. package/dist/cloud/services/nango.js +11 -33
  74. package/dist/cloud/services/workspace-keepalive.d.ts +76 -0
  75. package/dist/cloud/services/workspace-keepalive.js +234 -0
  76. package/dist/config/relay-config.d.ts +23 -0
  77. package/dist/config/relay-config.js +23 -0
  78. package/dist/daemon/agent-manager.d.ts +20 -1
  79. package/dist/daemon/agent-manager.js +47 -0
  80. package/dist/daemon/agent-registry.js +4 -4
  81. package/dist/daemon/agent-signing.d.ts +158 -0
  82. package/dist/daemon/agent-signing.js +523 -0
  83. package/dist/daemon/api.js +18 -1
  84. package/dist/daemon/cli-auth.d.ts +4 -1
  85. package/dist/daemon/cli-auth.js +55 -11
  86. package/dist/daemon/cloud-sync.d.ts +47 -1
  87. package/dist/daemon/cloud-sync.js +152 -3
  88. package/dist/daemon/connection.d.ts +28 -0
  89. package/dist/daemon/connection.js +98 -15
  90. package/dist/daemon/consensus-integration.d.ts +167 -0
  91. package/dist/daemon/consensus-integration.js +371 -0
  92. package/dist/daemon/consensus.d.ts +271 -0
  93. package/dist/daemon/consensus.js +632 -0
  94. package/dist/daemon/delivery-tracker.d.ts +34 -0
  95. package/dist/daemon/delivery-tracker.js +104 -0
  96. package/dist/daemon/enhanced-features.d.ts +118 -0
  97. package/dist/daemon/enhanced-features.js +178 -0
  98. package/dist/daemon/index.d.ts +4 -0
  99. package/dist/daemon/index.js +5 -0
  100. package/dist/daemon/rate-limiter.d.ts +68 -0
  101. package/dist/daemon/rate-limiter.js +130 -0
  102. package/dist/daemon/router.d.ts +18 -11
  103. package/dist/daemon/router.js +55 -111
  104. package/dist/daemon/server.d.ts +13 -1
  105. package/dist/daemon/server.js +71 -9
  106. package/dist/daemon/sync-queue.d.ts +116 -0
  107. package/dist/daemon/sync-queue.js +361 -0
  108. package/dist/health-worker-manager.d.ts +62 -0
  109. package/dist/health-worker-manager.js +144 -0
  110. package/dist/health-worker.d.ts +9 -0
  111. package/dist/health-worker.js +79 -0
  112. package/dist/index.d.ts +2 -1
  113. package/dist/index.js +5 -1
  114. package/dist/memory/context-compaction.d.ts +156 -0
  115. package/dist/memory/context-compaction.js +453 -0
  116. package/dist/memory/index.d.ts +1 -0
  117. package/dist/memory/index.js +1 -0
  118. package/dist/protocol/channels.js +4 -4
  119. package/dist/protocol/framing.d.ts +72 -10
  120. package/dist/protocol/framing.js +194 -25
  121. package/dist/storage/adapter.d.ts +8 -1
  122. package/dist/storage/adapter.js +11 -0
  123. package/dist/storage/batched-sqlite-adapter.d.ts +71 -0
  124. package/dist/storage/batched-sqlite-adapter.js +183 -0
  125. package/dist/storage/dead-letter-queue.d.ts +196 -0
  126. package/dist/storage/dead-letter-queue.js +427 -0
  127. package/dist/storage/dlq-adapter.d.ts +195 -0
  128. package/dist/storage/dlq-adapter.js +664 -0
  129. package/dist/trajectory/config.d.ts +32 -14
  130. package/dist/trajectory/config.js +38 -16
  131. package/dist/trajectory/integration.js +217 -64
  132. package/dist/utils/git-remote.d.ts +47 -0
  133. package/dist/utils/git-remote.js +125 -0
  134. package/dist/utils/id-generator.d.ts +35 -0
  135. package/dist/utils/id-generator.js +60 -0
  136. package/dist/utils/index.d.ts +1 -0
  137. package/dist/utils/index.js +1 -0
  138. package/dist/utils/precompiled-patterns.d.ts +110 -0
  139. package/dist/utils/precompiled-patterns.js +322 -0
  140. package/dist/wrapper/auth-detection.js +1 -1
  141. package/dist/wrapper/base-wrapper.d.ts +36 -0
  142. package/dist/wrapper/base-wrapper.js +48 -2
  143. package/dist/wrapper/client.d.ts +14 -4
  144. package/dist/wrapper/client.js +84 -31
  145. package/dist/wrapper/idle-detector.d.ts +102 -0
  146. package/dist/wrapper/idle-detector.js +279 -0
  147. package/dist/wrapper/parser.d.ts +4 -0
  148. package/dist/wrapper/parser.js +19 -1
  149. package/dist/wrapper/pty-wrapper.d.ts +7 -1
  150. package/dist/wrapper/pty-wrapper.js +51 -27
  151. package/dist/wrapper/tmux-wrapper.d.ts +12 -1
  152. package/dist/wrapper/tmux-wrapper.js +65 -17
  153. package/package.json +5 -5
  154. package/scripts/run-migrations.js +43 -0
  155. package/scripts/verify-schema.js +134 -0
  156. package/tests/benchmarks/protocol.bench.ts +310 -0
  157. package/dist/dashboard/out/404.html +0 -1
  158. package/dist/dashboard/out/_next/static/T1tgCqVWHFIkV7ClEtzD7/_buildManifest.js +0 -1
  159. package/dist/dashboard/out/_next/static/T1tgCqVWHFIkV7ClEtzD7/_ssgManifest.js +0 -1
  160. package/dist/dashboard/out/_next/static/chunks/116-2502180def231162.js +0 -1
  161. package/dist/dashboard/out/_next/static/chunks/117-f7b8ab0809342e77.js +0 -2
  162. package/dist/dashboard/out/_next/static/chunks/282-980c2eb8fff20123.js +0 -1
  163. package/dist/dashboard/out/_next/static/chunks/532-bace199897eeab37.js +0 -9
  164. package/dist/dashboard/out/_next/static/chunks/648-5cc6e1921389a58a.js +0 -1
  165. package/dist/dashboard/out/_next/static/chunks/766-b54f0853794b78c3.js +0 -1
  166. package/dist/dashboard/out/_next/static/chunks/83-b51836037078006c.js +0 -1
  167. package/dist/dashboard/out/_next/static/chunks/891-6cd50de1224f70bb.js +0 -1
  168. package/dist/dashboard/out/_next/static/chunks/899-bb19a9b3d9b39ea6.js +0 -1
  169. package/dist/dashboard/out/_next/static/chunks/app/_not-found/page-53b8a69f76db17d0.js +0 -1
  170. package/dist/dashboard/out/_next/static/chunks/app/app/onboarding/page-8939b0fc700f7eca.js +0 -1
  171. package/dist/dashboard/out/_next/static/chunks/app/app/page-5af1b6b439858aa6.js +0 -1
  172. package/dist/dashboard/out/_next/static/chunks/app/connect-repos/page-f45ecbc3e06134fc.js +0 -1
  173. package/dist/dashboard/out/_next/static/chunks/app/history/page-8c8bed33beb2bf1c.js +0 -1
  174. package/dist/dashboard/out/_next/static/chunks/app/layout-2433bb48965f4333.js +0 -1
  175. package/dist/dashboard/out/_next/static/chunks/app/login/page-16f3b49e55b1e0ed.js +0 -1
  176. package/dist/dashboard/out/_next/static/chunks/app/metrics/page-ac39dc0cc3c26fa7.js +0 -1
  177. package/dist/dashboard/out/_next/static/chunks/app/page-4a5938c18a11a654.js +0 -1
  178. package/dist/dashboard/out/_next/static/chunks/app/pricing/page-982a7000fee44014.js +0 -1
  179. package/dist/dashboard/out/_next/static/chunks/app/providers/page-ac3a6ac433fd6001.js +0 -1
  180. package/dist/dashboard/out/_next/static/chunks/app/providers/setup/[provider]/page-09f9caae98a18c09.js +0 -1
  181. package/dist/dashboard/out/_next/static/chunks/app/signup/page-547dd0ca55ecd0ba.js +0 -1
  182. package/dist/dashboard/out/_next/static/chunks/e868780c-48e5f147c90a3a41.js +0 -18
  183. package/dist/dashboard/out/_next/static/chunks/fd9d1056-609918ca7b6280bb.js +0 -1
  184. package/dist/dashboard/out/_next/static/chunks/framework-f66176bb897dc684.js +0 -1
  185. package/dist/dashboard/out/_next/static/chunks/main-2ee6beb2ae96d210.js +0 -1
  186. package/dist/dashboard/out/_next/static/chunks/main-app-5d692157a8eb1fd9.js +0 -1
  187. package/dist/dashboard/out/_next/static/chunks/pages/_app-72b849fbd24ac258.js +0 -1
  188. package/dist/dashboard/out/_next/static/chunks/pages/_error-7ba65e1336b92748.js +0 -1
  189. package/dist/dashboard/out/_next/static/chunks/polyfills-42372ed130431b0a.js +0 -1
  190. package/dist/dashboard/out/_next/static/chunks/webpack-1cdd8ed57114d5e1.js +0 -1
  191. package/dist/dashboard/out/_next/static/css/85d2af9c7ac74d62.css +0 -1
  192. package/dist/dashboard/out/_next/static/css/fe4b28883eeff359.css +0 -1
  193. package/dist/dashboard/out/alt-logos/agent-relay-logo-128.png +0 -0
  194. package/dist/dashboard/out/alt-logos/agent-relay-logo-256.png +0 -0
  195. package/dist/dashboard/out/alt-logos/agent-relay-logo-32.png +0 -0
  196. package/dist/dashboard/out/alt-logos/agent-relay-logo-512.png +0 -0
  197. package/dist/dashboard/out/alt-logos/agent-relay-logo-64.png +0 -0
  198. package/dist/dashboard/out/alt-logos/agent-relay-logo.svg +0 -45
  199. package/dist/dashboard/out/alt-logos/logo.svg +0 -38
  200. package/dist/dashboard/out/alt-logos/monogram-logo-128.png +0 -0
  201. package/dist/dashboard/out/alt-logos/monogram-logo-256.png +0 -0
  202. package/dist/dashboard/out/alt-logos/monogram-logo-32.png +0 -0
  203. package/dist/dashboard/out/alt-logos/monogram-logo-512.png +0 -0
  204. package/dist/dashboard/out/alt-logos/monogram-logo-64.png +0 -0
  205. package/dist/dashboard/out/alt-logos/monogram-logo.svg +0 -38
  206. package/dist/dashboard/out/app/onboarding.html +0 -1
  207. package/dist/dashboard/out/app/onboarding.txt +0 -7
  208. package/dist/dashboard/out/app.html +0 -1
  209. package/dist/dashboard/out/app.txt +0 -7
  210. package/dist/dashboard/out/apple-icon.png +0 -0
  211. package/dist/dashboard/out/connect-repos.html +0 -1
  212. package/dist/dashboard/out/connect-repos.txt +0 -7
  213. package/dist/dashboard/out/history.html +0 -1
  214. package/dist/dashboard/out/history.txt +0 -7
  215. package/dist/dashboard/out/index.html +0 -1
  216. package/dist/dashboard/out/index.txt +0 -7
  217. package/dist/dashboard/out/login.html +0 -6
  218. package/dist/dashboard/out/login.txt +0 -7
  219. package/dist/dashboard/out/metrics.html +0 -1
  220. package/dist/dashboard/out/metrics.txt +0 -7
  221. package/dist/dashboard/out/pricing.html +0 -13
  222. package/dist/dashboard/out/pricing.txt +0 -7
  223. package/dist/dashboard/out/providers/setup/claude.html +0 -1
  224. package/dist/dashboard/out/providers/setup/claude.txt +0 -8
  225. package/dist/dashboard/out/providers/setup/codex.html +0 -1
  226. package/dist/dashboard/out/providers/setup/codex.txt +0 -8
  227. package/dist/dashboard/out/providers.html +0 -1
  228. package/dist/dashboard/out/providers.txt +0 -7
  229. package/dist/dashboard/out/signup.html +0 -6
  230. package/dist/dashboard/out/signup.txt +0 -7
  231. package/dist/dashboard-server/metrics.d.ts +0 -105
  232. package/dist/dashboard-server/metrics.js +0 -193
  233. package/dist/dashboard-server/needs-attention.d.ts +0 -24
  234. package/dist/dashboard-server/needs-attention.js +0 -78
  235. package/dist/dashboard-server/server.d.ts +0 -15
  236. package/dist/dashboard-server/server.js +0 -3776
  237. package/dist/dashboard-server/start.d.ts +0 -6
  238. package/dist/dashboard-server/start.js +0 -13
  239. package/dist/dashboard-server/user-bridge.d.ts +0 -103
  240. package/dist/dashboard-server/user-bridge.js +0 -189
@@ -4,12 +4,48 @@
4
4
  * One-click provisioning for compute resources (Fly.io, Railway, Docker).
5
5
  */
6
6
  import * as crypto from 'crypto';
7
+ import { createHash } from 'node:crypto';
7
8
  import { getConfig } from '../config.js';
8
9
  import { db } from '../db/index.js';
9
10
  import { nangoService } from '../services/nango.js';
10
11
  import { canAutoScale, canScaleToTier, getResourceTierForPlan, } from '../services/planLimits.js';
11
12
  import { deriveSshPassword } from '../services/ssh-security.js';
13
+ // ============================================================================
14
+ // Daemon API Key Management
15
+ // ============================================================================
16
+ /**
17
+ * Generate a daemon API key in the format ar_live_<32 hex chars>
18
+ */
19
+ function generateDaemonApiKey() {
20
+ const random = crypto.randomBytes(32).toString('hex');
21
+ return `ar_live_${random}`;
22
+ }
23
+ /**
24
+ * Hash an API key for secure storage
25
+ */
26
+ function hashApiKey(apiKey) {
27
+ return createHash('sha256').update(apiKey).digest('hex');
28
+ }
29
+ /**
30
+ * Create a linked daemon record for a workspace during provisioning
31
+ * @param preGeneratedApiKey - Pre-generated API key (if not provided, one will be generated)
32
+ */
33
+ async function createLinkedDaemon(userId, workspaceId, machineId, preGeneratedApiKey) {
34
+ const apiKey = preGeneratedApiKey ?? generateDaemonApiKey();
35
+ const apiKeyHash = hashApiKey(apiKey);
36
+ const daemon = await db.linkedDaemons.create({
37
+ userId,
38
+ workspaceId,
39
+ name: `auto-provisioned-${Date.now()}`,
40
+ machineId,
41
+ apiKeyHash,
42
+ status: 'offline',
43
+ });
44
+ return { daemonId: daemon.id, apiKey };
45
+ }
12
46
  const WORKSPACE_PORT = 3888;
47
+ const WORKSPACE_HEALTH_PORT = 3889; // Health check on separate thread - always responsive
48
+ const WORKSPACE_SSH_PORT = 3022;
13
49
  const CODEX_OAUTH_PORT = 1455; // Codex CLI OAuth callback port - must be mapped for local dev
14
50
  const FETCH_TIMEOUT_MS = 10_000;
15
51
  const WORKSPACE_IMAGE = process.env.WORKSPACE_IMAGE || 'ghcr.io/agentworkforce/relay-workspace:latest';
@@ -355,12 +391,15 @@ class FlyProvisioner {
355
391
  updateProvisioningStage(workspace.id, 'networking');
356
392
  // Allocate IPs for the app (required for public DNS)
357
393
  // Must use GraphQL API - Machines REST API doesn't support IP allocation
358
- // Shared IPv4 is free, IPv6 is free
394
+ // IMPORTANT: We use dedicated IPv4 ($2/mo) instead of shared because:
395
+ // - Shared IPv4 doesn't properly handle raw TCP on non-standard ports (like SSH on 3022)
396
+ // - SSH tunnel connections fail with "Connection closed by remote host" on shared IPs
397
+ // - Dedicated IPv4 is required for raw TCP services to work correctly
359
398
  console.log(`[fly] Allocating IPs for ${appName}...`);
360
399
  const allocateIP = async (type) => {
361
400
  try {
362
- // Map our type to Fly GraphQL enum
363
- const graphqlType = type === 'shared_v4' ? 'shared_v4' : 'v6';
401
+ // Map our type to Fly GraphQL enum (v4 = dedicated IPv4)
402
+ const graphqlType = type;
364
403
  const res = await fetchWithRetry('https://api.fly.io/graphql', {
365
404
  method: 'POST',
366
405
  headers: {
@@ -412,11 +451,11 @@ class FlyProvisioner {
412
451
  return false;
413
452
  }
414
453
  };
415
- const [sharedV4Result, v6Result] = await Promise.all([
416
- allocateIP('shared_v4'),
454
+ const [v4Result, v6Result] = await Promise.all([
455
+ allocateIP('v4'),
417
456
  allocateIP('v6'),
418
457
  ]);
419
- console.log(`[fly] IP allocation results: shared_v4=${sharedV4Result}, v6=${v6Result}`);
458
+ console.log(`[fly] IP allocation results: v4=${v4Result}, v6=${v6Result}`);
420
459
  // Stage: Secrets
421
460
  updateProvisioningStage(workspace.id, 'secrets');
422
461
  // Set secrets (provider credentials)
@@ -447,6 +486,9 @@ class FlyProvisioner {
447
486
  }
448
487
  // Stage: Machine (includes volume creation)
449
488
  updateProvisioningStage(workspace.id, 'machine');
489
+ // Generate API key for cloud message sync BEFORE creating the machine
490
+ // The key is set as an env var on the machine and stored hashed in linkedDaemons
491
+ const machineApiKey = generateDaemonApiKey();
450
492
  // Create volume with automatic daily snapshots before machine
451
493
  // Fly.io takes daily snapshots automatically; we configure retention
452
494
  const volume = await this.createVolume(appName);
@@ -500,13 +542,20 @@ class FlyProvisioner {
500
542
  PROVIDERS: (workspace.config.providers ?? []).join(','),
501
543
  PORT: String(WORKSPACE_PORT),
502
544
  AGENT_RELAY_DASHBOARD_PORT: String(WORKSPACE_PORT),
545
+ // Store repos on persistent volume (/data) so they survive container restarts
546
+ // Without this, repos are cloned to /workspace (ephemeral) and lost on restart
547
+ WORKSPACE_DIR: '/data/repos',
503
548
  // Git gateway configuration
504
549
  CLOUD_API_URL: this.cloudApiUrl,
505
550
  WORKSPACE_TOKEN: this.generateWorkspaceToken(workspace.id),
551
+ // Daemon API key for cloud message sync
552
+ // Auto-generated during provisioning, stored in linkedDaemons table
553
+ AGENT_RELAY_API_KEY: machineApiKey,
506
554
  // SSH for CLI tunneling (Codex OAuth callback forwarding)
507
555
  // Each workspace gets a unique password derived from its ID + secret salt
508
556
  ENABLE_SSH: 'true',
509
557
  SSH_PASSWORD: deriveSshPassword(workspace.id),
558
+ SSH_PORT: String(WORKSPACE_SSH_PORT),
510
559
  },
511
560
  services: [
512
561
  {
@@ -538,16 +587,16 @@ class FlyProvisioner {
538
587
  },
539
588
  },
540
589
  // SSH service for CLI tunneling (Codex OAuth callback forwarding)
541
- // Exposes port 2222 publicly for SSH connections from user's machine
590
+ // Exposes port 3022 publicly for SSH connections from user's machine
542
591
  {
543
592
  ports: [
544
593
  {
545
- port: 2222,
594
+ port: WORKSPACE_SSH_PORT,
546
595
  handlers: [], // Empty handlers = raw TCP passthrough
547
596
  },
548
597
  ],
549
598
  protocol: 'tcp',
550
- internal_port: 2222,
599
+ internal_port: WORKSPACE_SSH_PORT,
551
600
  // SSH connections should also wake the machine
552
601
  auto_stop_machines: 'stop',
553
602
  auto_start_machines: true,
@@ -557,11 +606,11 @@ class FlyProvisioner {
557
606
  checks: {
558
607
  health: {
559
608
  type: 'http',
560
- port: WORKSPACE_PORT,
609
+ port: WORKSPACE_HEALTH_PORT, // Health worker thread - responds even when main loop blocked
561
610
  path: '/health',
562
611
  interval: '30s',
563
- timeout: '5s',
564
- grace_period: '10s',
612
+ timeout: '10s', // Increased timeout for safety
613
+ grace_period: '30s', // Longer grace period for startup
565
614
  },
566
615
  },
567
616
  // Instance size based on plan - free tier gets smaller instance
@@ -581,6 +630,11 @@ class FlyProvisioner {
581
630
  throw new Error(`Failed to create Fly machine: ${error}`);
582
631
  }
583
632
  const machine = (await machineResponse.json());
633
+ // Create linked daemon for cloud message sync
634
+ // Pass the pre-generated API key so it matches what was set in the machine env vars
635
+ const { daemonId } = await createLinkedDaemon(workspace.userId, workspace.id, machine.id, // Use Fly machine ID as daemon's machine ID
636
+ machineApiKey);
637
+ console.log(`[fly] Created linked daemon ${daemonId.substring(0, 8)} for workspace ${workspace.id.substring(0, 8)}`);
584
638
  // Return custom domain URL if configured, otherwise default fly.dev
585
639
  const publicUrl = customHostname
586
640
  ? `https://${customHostname}`
@@ -807,50 +861,114 @@ class FlyProvisioner {
807
861
  }
808
862
  console.log(`[fly] Updated machine image for workspace ${workspace.id.substring(0, 8)} to ${newImage}`);
809
863
  }
864
+ /**
865
+ * Set secrets as environment variables for a workspace.
866
+ */
867
+ async setSecrets(workspace, secrets) {
868
+ if (!workspace.computeId || Object.keys(secrets).length === 0)
869
+ return;
870
+ const appName = `ar-${workspace.id.substring(0, 8)}`;
871
+ await fetchWithRetry(`https://api.machines.dev/v1/apps/${appName}/secrets`, {
872
+ method: 'POST',
873
+ headers: {
874
+ Authorization: `Bearer ${this.apiToken}`,
875
+ 'Content-Type': 'application/json',
876
+ },
877
+ body: JSON.stringify(secrets),
878
+ });
879
+ }
810
880
  /**
811
881
  * Check if workspace has active agents by querying the daemon
882
+ * Retries up to 3 times with backoff to handle machines that are starting up
812
883
  */
813
884
  async checkActiveAgents(workspace) {
814
885
  if (!workspace.publicUrl) {
815
- return { hasActiveAgents: false, agentCount: 0, agents: [] };
886
+ return { hasActiveAgents: false, agentCount: 0, agents: [], verified: true };
816
887
  }
817
- try {
818
- // Use internal Fly network URL if available (more reliable)
819
- const appName = `ar-${workspace.id.substring(0, 8)}`;
820
- const isOnFly = !!process.env.FLY_APP_NAME;
821
- const baseUrl = isOnFly
822
- ? `http://${appName}.internal:3888`
823
- : workspace.publicUrl;
824
- const controller = new AbortController();
825
- const timer = setTimeout(() => controller.abort(), 10_000);
826
- const response = await fetch(`${baseUrl}/api/agents`, {
827
- method: 'GET',
828
- headers: {
829
- 'Accept': 'application/json',
830
- },
831
- signal: controller.signal,
832
- });
833
- clearTimeout(timer);
834
- if (!response.ok) {
835
- console.warn(`[fly] Failed to check agents for ${workspace.id.substring(0, 8)}: ${response.status}`);
836
- return { hasActiveAgents: false, agentCount: 0, agents: [] };
888
+ // Use internal Fly network URL if available (more reliable)
889
+ const appName = `ar-${workspace.id.substring(0, 8)}`;
890
+ const isOnFly = !!process.env.FLY_APP_NAME;
891
+ const baseUrl = isOnFly
892
+ ? `http://${appName}.internal:3888`
893
+ : workspace.publicUrl;
894
+ const maxRetries = 3;
895
+ const retryDelays = [2000, 4000, 6000]; // 2s, 4s, 6s backoff
896
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
897
+ try {
898
+ const controller = new AbortController();
899
+ const timer = setTimeout(() => controller.abort(), 10_000);
900
+ // Use /api/data endpoint which returns { agents: [...], ... }
901
+ // Note: /api/agents doesn't exist on the workspace dashboard-server
902
+ const response = await fetch(`${baseUrl}/api/data`, {
903
+ method: 'GET',
904
+ headers: {
905
+ 'Accept': 'application/json',
906
+ },
907
+ signal: controller.signal,
908
+ });
909
+ clearTimeout(timer);
910
+ if (!response.ok) {
911
+ console.warn(`[fly] Failed to check agents for ${workspace.id.substring(0, 8)}: HTTP ${response.status} (attempt ${attempt + 1}/${maxRetries})`);
912
+ if (attempt < maxRetries - 1) {
913
+ await new Promise(resolve => setTimeout(resolve, retryDelays[attempt]));
914
+ continue;
915
+ }
916
+ return { hasActiveAgents: false, agentCount: 0, agents: [], verified: false };
917
+ }
918
+ const data = await response.json();
919
+ const agents = data.agents || [];
920
+ // Diagnostic logging: capture raw agent data before filtering
921
+ if (agents.length > 0) {
922
+ console.log(`[fly] Workspace ${workspace.id.substring(0, 8)} raw agent data:`, agents.map(a => ({ name: a.name, status: a.status, activityState: a.activityState })));
923
+ }
924
+ // Treat any online agent as active unless explicitly disconnected/offline.
925
+ const activeAgents = agents.filter(a => {
926
+ const status = (a.status ?? '').toLowerCase();
927
+ const activityState = (a.activityState ?? '').toLowerCase();
928
+ const isProcessing = a.isProcessing === true;
929
+ if (activityState === 'active' || activityState === 'idle')
930
+ return true;
931
+ if (status && status !== 'disconnected' && status !== 'offline')
932
+ return true;
933
+ if (isProcessing)
934
+ return true;
935
+ return false;
936
+ });
937
+ // Log filtering results for diagnostics
938
+ if (agents.length > 0 && activeAgents.length !== agents.length) {
939
+ const filteredOut = agents.filter(a => {
940
+ const status = (a.status ?? '').toLowerCase();
941
+ const activityState = (a.activityState ?? '').toLowerCase();
942
+ const isProcessing = a.isProcessing === true;
943
+ if (activityState === 'active' || activityState === 'idle')
944
+ return false;
945
+ if (status && status !== 'disconnected' && status !== 'offline')
946
+ return false;
947
+ if (isProcessing)
948
+ return false;
949
+ return true;
950
+ });
951
+ console.log(`[fly] Workspace ${workspace.id.substring(0, 8)} filtered out agents:`, filteredOut.map(a => ({ name: a.name, status: a.status, activityState: a.activityState })));
952
+ }
953
+ return {
954
+ hasActiveAgents: activeAgents.length > 0,
955
+ agentCount: activeAgents.length,
956
+ agents: agents.map(a => ({ name: a.name, status: a.status || a.activityState || 'unknown' })),
957
+ verified: true,
958
+ };
959
+ }
960
+ catch (error) {
961
+ // Workspace might be stopped or unreachable - retry with backoff
962
+ console.warn(`[fly] Could not reach workspace ${workspace.id.substring(0, 8)} (attempt ${attempt + 1}/${maxRetries}):`, error.message);
963
+ if (attempt < maxRetries - 1) {
964
+ await new Promise(resolve => setTimeout(resolve, retryDelays[attempt]));
965
+ continue;
966
+ }
837
967
  }
838
- const data = await response.json();
839
- const agents = data.agents || [];
840
- // Consider agents with 'active' or 'idle' activity state as active
841
- // 'disconnected' agents are not active
842
- const activeAgents = agents.filter(a => a.status === 'running' || a.activityState === 'active' || a.activityState === 'idle');
843
- return {
844
- hasActiveAgents: activeAgents.length > 0,
845
- agentCount: activeAgents.length,
846
- agents: agents.map(a => ({ name: a.name, status: a.status || a.activityState || 'unknown' })),
847
- };
848
- }
849
- catch (error) {
850
- // Workspace might be stopped or unreachable - treat as no active agents
851
- console.warn(`[fly] Could not reach workspace ${workspace.id.substring(0, 8)} to check agents:`, error.message);
852
- return { hasActiveAgents: false, agentCount: 0, agents: [] };
853
968
  }
969
+ // All retries exhausted
970
+ console.warn(`[fly] Workspace ${workspace.id.substring(0, 8)} unreachable after ${maxRetries} attempts`);
971
+ return { hasActiveAgents: false, agentCount: 0, agents: [], verified: false };
854
972
  }
855
973
  /**
856
974
  * Get the current machine state
@@ -951,6 +1069,10 @@ class RailwayProvisioner {
951
1069
  });
952
1070
  const serviceData = await serviceResponse.json();
953
1071
  const serviceId = serviceData.data.serviceCreate.id;
1072
+ // Create linked daemon for cloud message sync
1073
+ // This generates an API key and registers the daemon in the linkedDaemons table
1074
+ const { daemonId, apiKey: railwayApiKey } = await createLinkedDaemon(workspace.userId, workspace.id, serviceId);
1075
+ console.log(`[railway] Created linked daemon ${daemonId.substring(0, 8)} for workspace ${workspace.id.substring(0, 8)}`);
954
1076
  // Set environment variables
955
1077
  const envVars = {
956
1078
  WORKSPACE_ID: workspace.id,
@@ -961,8 +1083,13 @@ class RailwayProvisioner {
961
1083
  PROVIDERS: (workspace.config.providers ?? []).join(','),
962
1084
  PORT: String(WORKSPACE_PORT),
963
1085
  AGENT_RELAY_DASHBOARD_PORT: String(WORKSPACE_PORT),
1086
+ // Store repos on persistent volume so they survive container restarts
1087
+ WORKSPACE_DIR: '/data/repos',
964
1088
  CLOUD_API_URL: this.cloudApiUrl,
965
1089
  WORKSPACE_TOKEN: this.generateWorkspaceToken(workspace.id),
1090
+ // Daemon API key for cloud message sync
1091
+ // Auto-generated during provisioning, stored in linkedDaemons table
1092
+ AGENT_RELAY_API_KEY: railwayApiKey,
966
1093
  };
967
1094
  for (const [provider, token] of credentials) {
968
1095
  envVars[`${provider.toUpperCase()}_TOKEN`] = token;
@@ -1113,6 +1240,37 @@ class RailwayProvisioner {
1113
1240
  }),
1114
1241
  });
1115
1242
  }
1243
+ async setEnvVars(workspace, envVars) {
1244
+ if (!workspace.computeId || Object.keys(envVars).length === 0)
1245
+ return;
1246
+ const linkedDaemons = await db.linkedDaemons.findByWorkspaceId(workspace.id);
1247
+ const serviceId = linkedDaemons[0]?.machineId;
1248
+ if (!serviceId) {
1249
+ console.warn(`[railway] No service ID found for workspace ${workspace.id}`);
1250
+ return;
1251
+ }
1252
+ await fetchWithRetry('https://backboard.railway.app/graphql/v2', {
1253
+ method: 'POST',
1254
+ headers: {
1255
+ Authorization: `Bearer ${this.apiToken}`,
1256
+ 'Content-Type': 'application/json',
1257
+ },
1258
+ body: JSON.stringify({
1259
+ query: `
1260
+ mutation SetVariables($input: VariableCollectionUpsertInput!) {
1261
+ variableCollectionUpsert(input: $input)
1262
+ }
1263
+ `,
1264
+ variables: {
1265
+ input: {
1266
+ projectId: workspace.computeId,
1267
+ serviceId,
1268
+ variables: envVars,
1269
+ },
1270
+ },
1271
+ }),
1272
+ });
1273
+ }
1116
1274
  }
1117
1275
  /**
1118
1276
  * Local Docker provisioner (for development/self-hosted)
@@ -1170,6 +1328,11 @@ class DockerProvisioner {
1170
1328
  }
1171
1329
  async provision(workspace, credentials) {
1172
1330
  const containerName = `ar-${workspace.id.substring(0, 8)}`;
1331
+ // Create linked daemon for cloud message sync
1332
+ // This generates an API key and registers the daemon in the linkedDaemons table
1333
+ // Use container name as daemon's machine ID (will be updated to actual container ID after creation)
1334
+ const { daemonId, apiKey: dockerApiKey } = await createLinkedDaemon(workspace.userId, workspace.id, containerName);
1335
+ console.log(`[docker] Created linked daemon ${daemonId.substring(0, 8)} for workspace ${workspace.id.substring(0, 8)}`);
1173
1336
  // Build environment variables
1174
1337
  const envArgs = [
1175
1338
  `-e WORKSPACE_ID=${workspace.id}`,
@@ -1180,8 +1343,13 @@ class DockerProvisioner {
1180
1343
  `-e PROVIDERS=${(workspace.config.providers ?? []).join(',')}`,
1181
1344
  `-e PORT=${WORKSPACE_PORT}`,
1182
1345
  `-e AGENT_RELAY_DASHBOARD_PORT=${WORKSPACE_PORT}`,
1346
+ // Store repos on persistent volume so they survive container restarts
1347
+ `-e WORKSPACE_DIR=/data/repos`,
1183
1348
  `-e CLOUD_API_URL=${this.cloudApiUrlForContainer}`,
1184
1349
  `-e WORKSPACE_TOKEN=${this.generateWorkspaceToken(workspace.id)}`,
1350
+ // Daemon API key for cloud message sync
1351
+ // Auto-generated during provisioning, stored in linkedDaemons table
1352
+ `-e AGENT_RELAY_API_KEY=${dockerApiKey}`,
1185
1353
  ];
1186
1354
  for (const [provider, token] of credentials) {
1187
1355
  envArgs.push(`-e ${provider.toUpperCase()}_TOKEN=${token}`);
@@ -1214,12 +1382,13 @@ class DockerProvisioner {
1214
1382
  // Set CODEX_DIRECT_PORT=true to also map port 1455 directly (for debugging only)
1215
1383
  const directCodexPort = process.env.CODEX_DIRECT_PORT === 'true';
1216
1384
  const portMappings = directCodexPort
1217
- ? `-p ${hostPort}:${WORKSPACE_PORT} -p ${sshHostPort}:2222 -p ${CODEX_OAUTH_PORT}:${CODEX_OAUTH_PORT}`
1218
- : `-p ${hostPort}:${WORKSPACE_PORT} -p ${sshHostPort}:2222`;
1385
+ ? `-p ${hostPort}:${WORKSPACE_PORT} -p ${sshHostPort}:${WORKSPACE_SSH_PORT} -p ${CODEX_OAUTH_PORT}:${CODEX_OAUTH_PORT}`
1386
+ : `-p ${hostPort}:${WORKSPACE_PORT} -p ${sshHostPort}:${WORKSPACE_SSH_PORT}`;
1219
1387
  // Enable SSH in the container for tunneling
1220
1388
  // Each workspace gets a unique password derived from its ID + secret salt
1221
1389
  envArgs.push('-e ENABLE_SSH=true');
1222
1390
  envArgs.push(`-e SSH_PASSWORD=${deriveSshPassword(workspace.id)}`);
1391
+ envArgs.push(`-e SSH_PORT=${WORKSPACE_SSH_PORT}`);
1223
1392
  execSync(`docker run -d --user root --name ${containerName} ${networkArg} ${volumeArgs} ${portMappings} ${envArgs.join(' ')} ${WORKSPACE_IMAGE}`, { stdio: 'pipe' });
1224
1393
  const publicUrl = `http://localhost:${hostPort}`;
1225
1394
  // Wait for container to be healthy before returning
@@ -1291,6 +1460,9 @@ class DockerProvisioner {
1291
1460
  throw new Error(`Failed to restart container: ${error}`);
1292
1461
  }
1293
1462
  }
1463
+ async setEnvVars(_workspace, _envVars) {
1464
+ console.warn('[docker] Updating environment variables for running containers is not supported.');
1465
+ }
1294
1466
  }
1295
1467
  /**
1296
1468
  * Main Workspace Provisioner
@@ -1473,6 +1645,25 @@ export class WorkspaceProvisioner {
1473
1645
  }
1474
1646
  await this.provisioner.restart(workspace);
1475
1647
  }
1648
+ /**
1649
+ * Update environment variables for a workspace instance.
1650
+ */
1651
+ async setWorkspaceEnvVars(workspace, envVars) {
1652
+ if (Object.keys(envVars).length === 0)
1653
+ return;
1654
+ if (this.provisioner instanceof FlyProvisioner) {
1655
+ await this.provisioner.setSecrets(workspace, envVars);
1656
+ return;
1657
+ }
1658
+ if (this.provisioner instanceof RailwayProvisioner) {
1659
+ await this.provisioner.setEnvVars(workspace, envVars);
1660
+ return;
1661
+ }
1662
+ if (this.provisioner instanceof DockerProvisioner) {
1663
+ await this.provisioner.setEnvVars(workspace, envVars);
1664
+ return;
1665
+ }
1666
+ }
1476
1667
  /**
1477
1668
  * Stop a workspace
1478
1669
  */
@@ -1693,6 +1884,7 @@ export class WorkspaceProvisioner {
1693
1884
  UPDATED: 'updated',
1694
1885
  UPDATED_PENDING_RESTART: 'updated_pending_restart',
1695
1886
  SKIPPED_ACTIVE_AGENTS: 'skipped_active_agents',
1887
+ SKIPPED_VERIFICATION_FAILED: 'skipped_verification_failed',
1696
1888
  SKIPPED_NOT_RUNNING: 'skipped_not_running',
1697
1889
  NOT_SUPPORTED: 'not_supported',
1698
1890
  ERROR: 'error',
@@ -1744,6 +1936,19 @@ export class WorkspaceProvisioner {
1744
1936
  if (machineState === 'started') {
1745
1937
  // Machine is running - check for active agents
1746
1938
  const agentCheck = await flyProvisioner.checkActiveAgents(workspace);
1939
+ // If we couldn't verify agent status and not forcing, skip to be safe
1940
+ // This is expected behavior for workspaces that are waking up from auto-stop
1941
+ // or experiencing temporary network issues - not an error condition
1942
+ if (!agentCheck.verified && !options.force) {
1943
+ console.log(`[provisioner] Skipped workspace ${workspaceId.substring(0, 8)}: workspace unreachable (will update on next restart)`);
1944
+ return {
1945
+ result: WorkspaceProvisioner.UpdateResult.SKIPPED_VERIFICATION_FAILED,
1946
+ workspaceId,
1947
+ machineState,
1948
+ // Use 'reason' instead of 'error' - this is expected behavior, not an error
1949
+ reason: 'Workspace unreachable - will update on next restart or when accessible',
1950
+ };
1951
+ }
1747
1952
  if (agentCheck.hasActiveAgents && !options.force) {
1748
1953
  // Has active agents and not forcing - skip
1749
1954
  console.log(`[provisioner] Skipped workspace ${workspaceId.substring(0, 8)}: ${agentCheck.agentCount} active agents`);
@@ -1845,6 +2050,7 @@ export class WorkspaceProvisioner {
1845
2050
  updated: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.UPDATED).length,
1846
2051
  pendingRestart: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.UPDATED_PENDING_RESTART).length,
1847
2052
  skippedActiveAgents: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.SKIPPED_ACTIVE_AGENTS).length,
2053
+ skippedVerificationFailed: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.SKIPPED_VERIFICATION_FAILED).length,
1848
2054
  skippedNotRunning: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.SKIPPED_NOT_RUNNING).length,
1849
2055
  errors: results.filter(r => r.result === WorkspaceProvisioner.UpdateResult.ERROR).length,
1850
2056
  };
@@ -15,7 +15,7 @@ import { RedisStore } from 'connect-redis';
15
15
  import { WebSocketServer, WebSocket } from 'ws';
16
16
  import { getConfig } from './config.js';
17
17
  import { runMigrations } from './db/index.js';
18
- import { getScalingOrchestrator, getComputeEnforcementService, getIntroExpirationService } from './services/index.js';
18
+ import { getScalingOrchestrator, getComputeEnforcementService, getIntroExpirationService, getWorkspaceKeepaliveService } from './services/index.js';
19
19
  const __filename = fileURLToPath(import.meta.url);
20
20
  const __dirname = path.dirname(__filename);
21
21
  // API routers
@@ -37,6 +37,7 @@ import { nangoAuthRouter } from './api/nango-auth.js';
37
37
  import { gitRouter } from './api/git.js';
38
38
  import { codexAuthHelperRouter } from './api/codex-auth-helper.js';
39
39
  import { adminRouter } from './api/admin.js';
40
+ import { consensusRouter } from './api/consensus.js';
40
41
  import { db } from './db/index.js';
41
42
  import { validateSshSecurityConfig } from './services/ssh-security.js';
42
43
  /**
@@ -176,17 +177,19 @@ export async function createServer() {
176
177
  });
177
178
  // Lightweight CSRF protection using session token
178
179
  const SAFE_METHODS = new Set(['GET', 'HEAD', 'OPTIONS']);
179
- // Paths exempt from CSRF (webhooks from external services, workspace proxy, local auth callbacks)
180
+ // Paths exempt from CSRF (webhooks from external services, workspace proxy, local auth callbacks, admin API)
180
181
  const CSRF_EXEMPT_PATHS = [
181
182
  '/api/webhooks/',
182
183
  '/api/auth/nango/webhook',
183
184
  '/api/auth/codex-helper/callback',
185
+ '/api/admin/', // Admin API uses X-Admin-Secret header auth
184
186
  ];
185
187
  // Additional pattern for workspace proxy routes (contains /proxy/)
186
188
  const isWorkspaceProxyRoute = (path) => /^\/api\/workspaces\/[^/]+\/proxy\//.test(path);
187
189
  app.use((req, res, next) => {
188
190
  // Skip CSRF for webhook endpoints and workspace proxy routes
189
- if (CSRF_EXEMPT_PATHS.some(path => req.path.startsWith(path)) || isWorkspaceProxyRoute(req.path)) {
191
+ const isExemptPath = CSRF_EXEMPT_PATHS.some(exemptPath => req.path.startsWith(exemptPath));
192
+ if (isExemptPath || isWorkspaceProxyRoute(req.path)) {
190
193
  return next();
191
194
  }
192
195
  if (!req.session)
@@ -212,6 +215,11 @@ export async function createServer() {
212
215
  if (authHeader?.startsWith('Bearer ')) {
213
216
  return next();
214
217
  }
218
+ // Skip CSRF for admin API key authenticated requests
219
+ const adminSecret = req.get('x-admin-secret');
220
+ if (adminSecret) {
221
+ return next();
222
+ }
215
223
  // Skip CSRF for test endpoints in non-production
216
224
  if (process.env.NODE_ENV !== 'production' && req.path.startsWith('/api/test/')) {
217
225
  return next();
@@ -247,6 +255,7 @@ export async function createServer() {
247
255
  // --- Routes with session auth ---
248
256
  app.use('/api/providers', providersRouter);
249
257
  app.use('/api/workspaces', workspacesRouter);
258
+ app.use('/api', consensusRouter); // Consensus API (nested under /api/workspaces/:id/consensus)
250
259
  app.use('/api/repos', reposRouter);
251
260
  app.use('/api/onboarding', onboardingRouter);
252
261
  app.use('/api/billing', billingRouter);
@@ -316,6 +325,8 @@ export async function createServer() {
316
325
  let scalingOrchestrator = null;
317
326
  let computeEnforcement = null;
318
327
  let introExpiration = null;
328
+ let workspaceKeepalive = null;
329
+ let daemonStaleCheckInterval = null;
319
330
  // Create HTTP server for WebSocket upgrade handling
320
331
  const httpServer = http.createServer(app);
321
332
  // ===== Presence WebSocket =====
@@ -644,7 +655,31 @@ export async function createServer() {
644
655
  catch (error) {
645
656
  console.warn('[cloud] Failed to start intro expiration:', error);
646
657
  }
658
+ // Start workspace keepalive service (pings workspaces with active agents)
659
+ // This prevents Fly.io from idling machines that have running Claude agents
660
+ try {
661
+ workspaceKeepalive = getWorkspaceKeepaliveService();
662
+ workspaceKeepalive.start();
663
+ console.log('[cloud] Workspace keepalive service started');
664
+ }
665
+ catch (error) {
666
+ console.warn('[cloud] Failed to start workspace keepalive:', error);
667
+ }
647
668
  }
669
+ // Start daemon stale check (mark daemons offline if no heartbeat for 2+ minutes)
670
+ // Runs every 60 seconds regardless of RELAY_CLOUD_ENABLED
671
+ daemonStaleCheckInterval = setInterval(async () => {
672
+ try {
673
+ const count = await db.linkedDaemons.markStale();
674
+ if (count > 0) {
675
+ console.log(`[cloud] Marked ${count} daemon(s) as offline (stale)`);
676
+ }
677
+ }
678
+ catch (error) {
679
+ console.error('[cloud] Failed to mark stale daemons:', error);
680
+ }
681
+ }, 60_000); // Every 60 seconds
682
+ console.log('[cloud] Daemon stale check started (60s interval)');
648
683
  return new Promise((resolve) => {
649
684
  server = httpServer.listen(config.port, () => {
650
685
  console.log(`Agent Relay Cloud running on port ${config.port}`);
@@ -667,6 +702,15 @@ export async function createServer() {
667
702
  if (introExpiration) {
668
703
  introExpiration.stop();
669
704
  }
705
+ // Stop workspace keepalive service
706
+ if (workspaceKeepalive) {
707
+ workspaceKeepalive.stop();
708
+ }
709
+ // Stop daemon stale check
710
+ if (daemonStaleCheckInterval) {
711
+ clearInterval(daemonStaleCheckInterval);
712
+ daemonStaleCheckInterval = null;
713
+ }
670
714
  // Close WebSocket server
671
715
  wssPresence.close();
672
716
  if (server) {
@@ -11,4 +11,5 @@ export { spawnCIFixAgent, notifyAgentOfCIFailure, completeFixAttempt, getFailure
11
11
  export { handleMention, handleIssueAssignment, getPendingMentions, getPendingIssueAssignments, processPendingMentions, processPendingIssueAssignments, KNOWN_AGENTS, isKnownAgent, } from './mention-handler.js';
12
12
  export { ComputeEnforcementService, ComputeEnforcementConfig, EnforcementResult, getComputeEnforcementService, createComputeEnforcementService, } from './compute-enforcement.js';
13
13
  export { IntroExpirationService, IntroExpirationConfig, IntroStatus, ExpirationResult as IntroExpirationResult, INTRO_PERIOD_DAYS, getIntroStatus, getIntroExpirationService, startIntroExpirationService, stopIntroExpirationService, } from './intro-expiration.js';
14
+ export { WorkspaceKeepaliveService, WorkspaceKeepaliveConfig, KeepaliveStats, getWorkspaceKeepaliveService, createWorkspaceKeepaliveService, } from './workspace-keepalive.js';
14
15
  //# sourceMappingURL=index.d.ts.map
@@ -16,4 +16,6 @@ export { handleMention, handleIssueAssignment, getPendingMentions, getPendingIss
16
16
  export { ComputeEnforcementService, getComputeEnforcementService, createComputeEnforcementService, } from './compute-enforcement.js';
17
17
  // Intro expiration (auto-resize after free tier intro period)
18
18
  export { IntroExpirationService, INTRO_PERIOD_DAYS, getIntroStatus, getIntroExpirationService, startIntroExpirationService, stopIntroExpirationService, } from './intro-expiration.js';
19
+ // Workspace keepalive (prevent Fly.io from idling machines with active agents)
20
+ export { WorkspaceKeepaliveService, getWorkspaceKeepaliveService, createWorkspaceKeepaliveService, } from './workspace-keepalive.js';
19
21
  //# sourceMappingURL=index.js.map
@@ -38,10 +38,9 @@ declare class NangoService {
38
38
  * This is the user-level token (not the installation token).
39
39
  * Use this for operations that require user context (e.g., gh CLI).
40
40
  *
41
- * The user token can be found in:
42
- * 1. getToken() without installation flag
43
- * 2. connection_config.access_token in github-app-oauth
44
- * 3. Separate 'github' user connection
41
+ * The user token is stored in connection_config.userCredentials.access_token
42
+ * by Nango's GitHub App OAuth flow. This is a gho_* or ghu_* token that
43
+ * works for both git operations and gh CLI commands.
45
44
  */
46
45
  getGithubUserOAuthToken(connectionId: string): Promise<string>;
47
46
  /**