@plosson/agentio 0.7.3 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@plosson/agentio",
3
- "version": "0.7.3",
3
+ "version": "0.7.4",
4
4
  "description": "CLI for LLM agents to interact with communication and tracking services",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -39,6 +39,8 @@ interface FakeRunnerOptions {
39
39
  loggedIn?: boolean;
40
40
  existingApp?: SiteioApp | null;
41
41
  deployedApp?: SiteioApp | null;
42
+ /** Stdout returned by logsApp. Default: empty string. */
43
+ logsStdout?: string;
42
44
  failOn?:
43
45
  | 'isInstalled'
44
46
  | 'isLoggedIn'
@@ -99,6 +101,10 @@ function makeFakeRunner(opts: FakeRunnerOptions = {}): {
99
101
  if ('deployedApp' in opts) return opts.deployedApp ?? null;
100
102
  return { name, url: `https://${name}.siteio.example.com` };
101
103
  },
104
+ async logsApp(name, logOpts) {
105
+ calls.push({ method: 'logsApp', args: { name, opts: logOpts ?? null } });
106
+ return opts.logsStdout ?? '';
107
+ },
102
108
  };
103
109
 
104
110
  return { runner, calls };
@@ -116,6 +122,12 @@ interface FakeDepsOptions extends FakeRunnerOptions {
116
122
  dockerfile?: string;
117
123
  /** Value returned by detectGitOriginUrl. Default: null. */
118
124
  gitOriginUrl?: string | null;
125
+ /**
126
+ * Sequence of HTTP status codes (or nulls) `probeHealth` should return
127
+ * across successive polls. When exhausted, falls back to the default
128
+ * (a healthy 200). Pass [] to simulate an unreachable container.
129
+ */
130
+ healthProbeResponses?: Array<number | null>;
119
131
  }
120
132
 
121
133
  interface FakeDeps extends TeleportDeps {
@@ -126,6 +138,8 @@ interface FakeDeps extends TeleportDeps {
126
138
  warnLines: string[];
127
139
  tempFileWrites: { path: string; content: string }[];
128
140
  tempFileDeletes: string[];
141
+ healthProbeUrls: string[];
142
+ sleepCalls: number[];
129
143
  }
130
144
 
131
145
  function makeDeps(opts: FakeDepsOptions = {}): FakeDeps {
@@ -134,8 +148,11 @@ function makeDeps(opts: FakeDepsOptions = {}): FakeDeps {
134
148
  const warnLines: string[] = [];
135
149
  const tempFileWrites: { path: string; content: string }[] = [];
136
150
  const tempFileDeletes: string[] = [];
151
+ const healthProbeUrls: string[] = [];
152
+ const sleepCalls: number[] = [];
137
153
 
138
154
  let tempCounter = 0;
155
+ let healthProbeIdx = 0;
139
156
 
140
157
  const deps: FakeDeps = {
141
158
  calls,
@@ -143,6 +160,8 @@ function makeDeps(opts: FakeDepsOptions = {}): FakeDeps {
143
160
  warnLines,
144
161
  tempFileWrites,
145
162
  tempFileDeletes,
163
+ healthProbeUrls,
164
+ sleepCalls,
146
165
  runner,
147
166
  loadConfig: async () =>
148
167
  ({
@@ -167,6 +186,25 @@ function makeDeps(opts: FakeDepsOptions = {}): FakeDeps {
167
186
  },
168
187
  detectGitOriginUrl: async () =>
169
188
  'gitOriginUrl' in opts ? (opts.gitOriginUrl ?? null) : null,
189
+ probeHealth: async (url) => {
190
+ healthProbeUrls.push(url);
191
+ // Default behavior: 200 on the first probe so happy-path tests
192
+ // don't have to configure anything. Callers exercising timeouts
193
+ // pass `healthProbeResponses: []` or an explicit list.
194
+ if (opts.healthProbeResponses == null) return 200;
195
+ const list = opts.healthProbeResponses;
196
+ if (healthProbeIdx < list.length) {
197
+ return list[healthProbeIdx++] ?? null;
198
+ }
199
+ return null;
200
+ },
201
+ // Sleep is a no-op in tests — we don't want real time to pass.
202
+ // The loop inside waitForHealth is bounded by Date.now() ≥ deadline,
203
+ // so we also need the deadline to be reachable; see the test that
204
+ // exercises a timeout, which shrinks the timeoutMs explicitly.
205
+ sleep: async (ms) => {
206
+ sleepCalls.push(ms);
207
+ },
170
208
  log: (msg) => logLines.push(msg),
171
209
  warn: (msg) => warnLines.push(msg),
172
210
  };
@@ -1214,3 +1252,211 @@ describe('runTeleport — sync failure paths', () => {
1214
1252
  expect(methods).toContain('setApp');
1215
1253
  });
1216
1254
  });
1255
+
1256
+ /* ------------------------------------------------------------------ */
1257
+ /* waitForHealth (direct) */
1258
+ /* ------------------------------------------------------------------ */
1259
+
1260
+ describe('waitForHealth', () => {
1261
+ test('returns true on first 200 without sleeping', async () => {
1262
+ const { waitForHealth } = await import('./teleport');
1263
+ const probed: string[] = [];
1264
+ const sleeps: number[] = [];
1265
+ const logs: string[] = [];
1266
+ const ok = await waitForHealth(
1267
+ 'https://mcp.example.com',
1268
+ {
1269
+ probeHealth: async (u) => {
1270
+ probed.push(u);
1271
+ return 200;
1272
+ },
1273
+ sleep: async (ms) => {
1274
+ sleeps.push(ms);
1275
+ },
1276
+ log: (m) => logs.push(m),
1277
+ },
1278
+ { timeoutMs: 1000, intervalMs: 100 }
1279
+ );
1280
+ expect(ok).toBe(true);
1281
+ expect(probed).toEqual(['https://mcp.example.com/health']);
1282
+ expect(sleeps).toEqual([]); // no sleep after a first-attempt success
1283
+ expect(logs.join('\n')).toMatch(/responded 200 after 1 attempt/);
1284
+ });
1285
+
1286
+ test('returns true when 200 arrives after a few not-ready probes', async () => {
1287
+ const { waitForHealth } = await import('./teleport');
1288
+ const sequence: Array<number | null> = [null, 503, null, 200];
1289
+ let idx = 0;
1290
+ const sleeps: number[] = [];
1291
+ const ok = await waitForHealth(
1292
+ 'https://mcp.example.com/',
1293
+ {
1294
+ probeHealth: async () => sequence[idx++] ?? null,
1295
+ sleep: async (ms) => {
1296
+ sleeps.push(ms);
1297
+ },
1298
+ log: () => {},
1299
+ },
1300
+ { timeoutMs: 10_000, intervalMs: 100 }
1301
+ );
1302
+ expect(ok).toBe(true);
1303
+ expect(sleeps).toEqual([100, 100, 100]); // 3 sleeps before the 4th probe hit 200
1304
+ });
1305
+
1306
+ test('returns false when probe never hits 200 within the budget', async () => {
1307
+ const { waitForHealth } = await import('./teleport');
1308
+ let probeCount = 0;
1309
+ const sleeps: number[] = [];
1310
+ const ok = await waitForHealth(
1311
+ 'https://mcp.example.com',
1312
+ {
1313
+ probeHealth: async () => {
1314
+ probeCount++;
1315
+ return null;
1316
+ },
1317
+ sleep: async (ms) => {
1318
+ sleeps.push(ms);
1319
+ },
1320
+ log: () => {},
1321
+ },
1322
+ { timeoutMs: 500, intervalMs: 100 }
1323
+ );
1324
+ expect(ok).toBe(false);
1325
+ // timeout/interval = 5 attempts, 4 sleeps between them.
1326
+ expect(probeCount).toBe(5);
1327
+ expect(sleeps.length).toBe(4);
1328
+ });
1329
+
1330
+ test('strips trailing slash(es) from url before appending /health', async () => {
1331
+ const { waitForHealth } = await import('./teleport');
1332
+ const probed: string[] = [];
1333
+ await waitForHealth(
1334
+ 'https://mcp.example.com///',
1335
+ {
1336
+ probeHealth: async (u) => {
1337
+ probed.push(u);
1338
+ return 200;
1339
+ },
1340
+ sleep: async () => {},
1341
+ log: () => {},
1342
+ },
1343
+ { timeoutMs: 1000, intervalMs: 100 }
1344
+ );
1345
+ expect(probed[0]).toBe('https://mcp.example.com/health');
1346
+ });
1347
+ });
1348
+
1349
+ /* ------------------------------------------------------------------ */
1350
+ /* runTeleport — health-check surfacing */
1351
+ /* ------------------------------------------------------------------ */
1352
+
1353
+ describe('runTeleport — health check on deploy', () => {
1354
+ test('happy path probes /health at the deployed URL and does not fetch logs', async () => {
1355
+ const deps = makeDeps();
1356
+ await runTeleport({ name: 'mcp' }, deps);
1357
+ expect(deps.healthProbeUrls[0]).toBe('https://mcp.siteio.example.com/health');
1358
+ expect(deps.calls.map((c) => c.method)).not.toContain('logsApp');
1359
+ });
1360
+
1361
+ test('health never returns 200 → fetches logs, surfaces them via warn, throws CliError', async () => {
1362
+ const deps = makeDeps({
1363
+ healthProbeResponses: [], // always null → timeout path
1364
+ logsStdout: 'Error: EACCES: permission denied, mkdir /data/.config\n',
1365
+ });
1366
+ await expect(runTeleport({ name: 'mcp' }, deps)).rejects.toThrow(
1367
+ /\/health never returned 200/
1368
+ );
1369
+ // logs were fetched with the expected tail size
1370
+ const logsCall = deps.calls.find((c) => c.method === 'logsApp');
1371
+ expect(logsCall).toBeDefined();
1372
+ expect((logsCall!.args as { opts: { tail: number } }).opts.tail).toBeGreaterThan(0);
1373
+ // The log tail was surfaced to the user on stderr (deps.warn)
1374
+ expect(deps.warnLines.join('\n')).toContain('EACCES: permission denied');
1375
+ });
1376
+
1377
+ test('empty log stdout still produces a clear warning (no "undefined" output)', async () => {
1378
+ const deps = makeDeps({
1379
+ healthProbeResponses: [],
1380
+ logsStdout: '',
1381
+ });
1382
+ await expect(runTeleport({ name: 'mcp' }, deps)).rejects.toThrow();
1383
+ expect(deps.warnLines.join('\n')).toContain('(no logs returned by siteio)');
1384
+ });
1385
+
1386
+ test('siteio did not return a URL → health check is skipped with a warning, no throw', async () => {
1387
+ const deps = makeDeps({ deployedApp: { name: 'mcp' } }); // no url field
1388
+ const result = await runTeleport({ name: 'mcp' }, deps);
1389
+ expect(result.url).toBeUndefined();
1390
+ expect(deps.healthProbeUrls).toEqual([]);
1391
+ expect(deps.warnLines.join('\n')).toContain('Skipping health check');
1392
+ });
1393
+
1394
+ test('appInfo lacks url but findApp has it → falls back, still runs health check', async () => {
1395
+ // Mirrors real siteio behavior: `apps info --json` omits the
1396
+ // generated subdomain URL even though `apps list --json` surfaces
1397
+ // it. We fall back to findApp (which wraps `apps list`) so the
1398
+ // health check can still run.
1399
+ const deps = makeDeps({
1400
+ deployedApp: { name: 'mcp' }, // info: no url
1401
+ // existingApp is read by findApp on re-call — setting it supplies
1402
+ // the fallback URL.
1403
+ existingApp: { name: 'mcp', url: 'https://mcp.siteio.example.com' },
1404
+ });
1405
+ // But runTeleport's "create" path REFUSES if existingApp is found,
1406
+ // so we need to bypass that. Trick: set existingApp to null at call
1407
+ // time; we can't really do that here without extending the fixture.
1408
+ // Instead, emulate via a custom runner.
1409
+ let findAppCalls = 0;
1410
+ const deployInfo: SiteioApp = { name: 'mcp' }; // info returns no url
1411
+ const fallbackInfo: SiteioApp = {
1412
+ name: 'mcp',
1413
+ url: 'https://mcp.siteio.example.com',
1414
+ };
1415
+ deps.runner.findApp = async () => {
1416
+ findAppCalls++;
1417
+ // First call (preflight — "does app already exist?") must return null
1418
+ // so runTeleport proceeds with create. Second call (post-deploy URL
1419
+ // fallback) returns the populated URL.
1420
+ return findAppCalls === 1 ? null : fallbackInfo;
1421
+ };
1422
+ deps.runner.appInfo = async () => deployInfo;
1423
+ await runTeleport({ name: 'mcp' }, deps);
1424
+ expect(findAppCalls).toBe(2);
1425
+ expect(deps.healthProbeUrls[0]).toBe('https://mcp.siteio.example.com/health');
1426
+ });
1427
+ });
1428
+
1429
+ describe('runTeleport — health check on --sync', () => {
1430
+ test('sync happy path probes /health after restart', async () => {
1431
+ const deps = makeDeps({
1432
+ existingApp: { name: 'mcp', url: 'https://mcp.siteio.example.com' },
1433
+ deployedApp: {
1434
+ name: 'mcp',
1435
+ url: 'https://mcp.siteio.example.com',
1436
+ volumes: [`agentio-data-mcp:${DATA_VOLUME_PATH}`],
1437
+ },
1438
+ });
1439
+ await runTeleport({ name: 'mcp', sync: true }, deps);
1440
+ expect(deps.healthProbeUrls[0]).toBe('https://mcp.siteio.example.com/health');
1441
+ // No log fetch on a happy sync.
1442
+ expect(deps.calls.map((c) => c.method)).not.toContain('logsApp');
1443
+ });
1444
+
1445
+ test('sync health times out → logs fetched + thrown', async () => {
1446
+ const deps = makeDeps({
1447
+ existingApp: { name: 'mcp', url: 'https://mcp.siteio.example.com' },
1448
+ deployedApp: {
1449
+ name: 'mcp',
1450
+ url: 'https://mcp.siteio.example.com',
1451
+ volumes: [`agentio-data-mcp:${DATA_VOLUME_PATH}`],
1452
+ },
1453
+ healthProbeResponses: [],
1454
+ logsStdout: 'boom\n',
1455
+ });
1456
+ await expect(
1457
+ runTeleport({ name: 'mcp', sync: true }, deps)
1458
+ ).rejects.toThrow(/\/health never returned 200/);
1459
+ expect(deps.calls.map((c) => c.method)).toContain('logsApp');
1460
+ expect(deps.warnLines.join('\n')).toContain('boom');
1461
+ });
1462
+ });
@@ -133,10 +133,59 @@ export interface TeleportDeps {
133
133
  * siteio `--git` argument in git-mode.
134
134
  */
135
135
  detectGitOriginUrl: () => Promise<string | null>;
136
+ /**
137
+ * HTTP probe used by `waitForHealth`. Returns the status code (200 on
138
+ * a healthy server). Network errors are surfaced as `null` so the
139
+ * poller can treat them the same as a not-yet-ready container.
140
+ */
141
+ probeHealth: (url: string) => Promise<number | null>;
142
+ /** Resolved after `ms` milliseconds. Injected for testability. */
143
+ sleep: (ms: number) => Promise<void>;
136
144
  log: (msg: string) => void;
137
145
  warn: (msg: string) => void;
138
146
  }
139
147
 
148
+ /* ------------------------------------------------------------------ */
149
+ /* health polling */
150
+ /* ------------------------------------------------------------------ */
151
+
152
+ /** How long to wait for /health to return 200 before giving up. */
153
+ export const HEALTH_TIMEOUT_MS = 90_000;
154
+ /** Spacing between consecutive /health probes. */
155
+ export const HEALTH_INTERVAL_MS = 2_000;
156
+ /** Number of log lines to surface when the health check times out. */
157
+ export const HEALTH_FAILURE_LOG_TAIL = 50;
158
+
159
+ /**
160
+ * Poll `${url}/health` until it returns 200 or we exhaust the attempt
161
+ * budget (ceil(timeoutMs / intervalMs)). Returns true on success; false
162
+ * otherwise. Uses an attempt-count loop (not wall clock) so tests that
163
+ * stub `deps.sleep` to a no-op can exercise the timeout path without
164
+ * actually waiting 90 real seconds.
165
+ */
166
+ export async function waitForHealth(
167
+ url: string,
168
+ deps: Pick<TeleportDeps, 'probeHealth' | 'sleep' | 'log'>,
169
+ opts: { timeoutMs?: number; intervalMs?: number } = {}
170
+ ): Promise<boolean> {
171
+ const timeoutMs = opts.timeoutMs ?? HEALTH_TIMEOUT_MS;
172
+ const intervalMs = opts.intervalMs ?? HEALTH_INTERVAL_MS;
173
+ const maxAttempts = Math.max(1, Math.ceil(timeoutMs / intervalMs));
174
+ const healthUrl = `${url.replace(/\/+$/, '')}/health`;
175
+ deps.log(`Waiting for ${healthUrl} (up to ${Math.round(timeoutMs / 1000)}s)…`);
176
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
177
+ const status = await deps.probeHealth(healthUrl);
178
+ if (status === 200) {
179
+ deps.log(` /health responded 200 after ${attempt} attempt(s).`);
180
+ return true;
181
+ }
182
+ if (attempt < maxAttempts) {
183
+ await deps.sleep(intervalMs);
184
+ }
185
+ }
186
+ return false;
187
+ }
188
+
140
189
  export interface TeleportOptions {
141
190
  name: string;
142
191
  dockerfileOnly?: boolean;
@@ -300,8 +349,38 @@ async function runSync(
300
349
  await deps.runner.restartApp(opts.name);
301
350
 
302
351
  // We already fetched appInfo earlier for volume detection; reuse
303
- // its URL field rather than calling again.
304
- const url = typeof detail?.url === 'string' ? detail.url : undefined;
352
+ // its URL field rather than calling again. Same fallback as the
353
+ // full-teleport path: siteio's `apps info --json` omits the
354
+ // generated subdomain URL, so fall back to findApp if it's missing.
355
+ let url = typeof detail?.url === 'string' ? detail.url : undefined;
356
+ if (!url) {
357
+ const listed = await deps.runner.findApp(opts.name);
358
+ if (typeof listed?.url === 'string') url = listed.url;
359
+ }
360
+
361
+ // Same health-check / log-surface pattern as the full teleport path.
362
+ // A sync that breaks the container (bad env, corrupted config blob,
363
+ // volume backfill surprise) should fail loudly instead of silently
364
+ // leaving a crash-looping remote.
365
+ if (url) {
366
+ const healthy = await waitForHealth(url, deps);
367
+ if (!healthy) {
368
+ deps.warn(
369
+ `Container failed to report healthy after ${Math.round(HEALTH_TIMEOUT_MS / 1000)}s. Fetching logs…`
370
+ );
371
+ const logs = await deps.runner.logsApp(opts.name, {
372
+ tail: HEALTH_FAILURE_LOG_TAIL,
373
+ });
374
+ deps.warn('--- container logs (tail) ---');
375
+ deps.warn(logs.trim() || '(no logs returned by siteio)');
376
+ deps.warn('--- end logs ---');
377
+ throw new CliError(
378
+ 'API_ERROR',
379
+ `Sync to "${opts.name}" restarted the container but /health never returned 200`,
380
+ 'Inspect the logs above. The previous config is gone — the next sync (or a manual `siteio apps restart`) will still see the broken state until you fix the root cause.'
381
+ );
382
+ }
383
+ }
305
384
 
306
385
  deps.log('');
307
386
  deps.log('Sync complete!');
@@ -568,7 +647,47 @@ export async function runTeleport(
568
647
  // Try to surface the deployed URL. Non-fatal if siteio doesn't
569
648
  // give us one back.
570
649
  const info = await deps.runner.appInfo(opts.name);
571
- const url = typeof info?.url === 'string' ? info.url : undefined;
650
+ let url = typeof info?.url === 'string' ? info.url : undefined;
651
+ // siteio's `apps info --json` output omits the generated subdomain
652
+ // URL (domains: [] in the payload) even though the app is reachable
653
+ // at it. `apps list --json` DOES include the url field at the top
654
+ // level. Fall back to findApp so the post-deploy health check can
655
+ // still run even when siteio doesn't surface url in info.
656
+ if (!url) {
657
+ const listed = await deps.runner.findApp(opts.name);
658
+ if (typeof listed?.url === 'string') url = listed.url;
659
+ }
660
+
661
+ // Poll /health to CONFIRM the container actually came up. siteio's
662
+ // deploy returns success as soon as Docker starts the container, so
663
+ // a crash-loop (bad volume permissions, bad config, missing binary,
664
+ // etc.) looks like a successful deploy until the user probes it
665
+ // themselves. Surfacing logs on timeout is the fix.
666
+ if (url) {
667
+ const healthy = await waitForHealth(url, deps);
668
+ if (!healthy) {
669
+ deps.warn(
670
+ `Container failed to report healthy after ${Math.round(HEALTH_TIMEOUT_MS / 1000)}s. Fetching logs…`
671
+ );
672
+ const logs = await deps.runner.logsApp(opts.name, {
673
+ tail: HEALTH_FAILURE_LOG_TAIL,
674
+ });
675
+ deps.warn('--- container logs (tail) ---');
676
+ deps.warn(logs.trim() || '(no logs returned by siteio)');
677
+ deps.warn('--- end logs ---');
678
+ throw new CliError(
679
+ 'API_ERROR',
680
+ `Deploy "${opts.name}" started but /health never returned 200`,
681
+ 'Inspect the logs above. Common causes: permission errors on mounted volumes, missing env vars, binary not found for the container arch.'
682
+ );
683
+ }
684
+ } else {
685
+ deps.warn(
686
+ 'Skipping health check: siteio did not return a URL for this app. ' +
687
+ `Run \`siteio apps info ${opts.name}\` and curl <url>/health manually to verify.`
688
+ );
689
+ }
690
+
572
691
  const claudeCmd = url
573
692
  ? `claude mcp add --scope local --transport http agentio "${url}/mcp?services=rss"`
574
693
  : null;
@@ -634,6 +753,34 @@ async function defaultRemoveTempFile(path: string): Promise<void> {
634
753
  * Returns null if the cwd isn't a git repo, has no origin remote, or if
635
754
  * the git binary isn't on PATH.
636
755
  */
756
+ /**
757
+ * Default health probe: HEAD-equivalent GET on the given URL. Returns
758
+ * the HTTP status code, or null if the request couldn't be made (DNS,
759
+ * connection refused, TLS error, etc). We treat connection errors the
760
+ * same as "not ready yet" so the poller keeps retrying.
761
+ */
762
+ async function defaultProbeHealth(url: string): Promise<number | null> {
763
+ try {
764
+ // Short timeout per attempt so a hung connection can't eat the
765
+ // whole polling budget. AbortSignal.timeout is natively supported
766
+ // by Bun's fetch.
767
+ const res = await fetch(url, {
768
+ method: 'GET',
769
+ signal: AbortSignal.timeout(5_000),
770
+ });
771
+ // Drain the body so the socket is released promptly; we only care
772
+ // about the status code here.
773
+ await res.text().catch(() => {});
774
+ return res.status;
775
+ } catch {
776
+ return null;
777
+ }
778
+ }
779
+
780
+ async function defaultSleep(ms: number): Promise<void> {
781
+ return new Promise((resolve) => setTimeout(resolve, ms));
782
+ }
783
+
637
784
  async function defaultDetectGitOriginUrl(): Promise<string | null> {
638
785
  try {
639
786
  const proc = Bun.spawn(['git', 'remote', 'get-url', 'origin'], {
@@ -722,6 +869,8 @@ export function registerTeleportCommand(parent: Command): void {
722
869
  writeTempFile: defaultWriteTempFile,
723
870
  removeTempFile: defaultRemoveTempFile,
724
871
  detectGitOriginUrl: defaultDetectGitOriginUrl,
872
+ probeHealth: defaultProbeHealth,
873
+ sleep: defaultSleep,
725
874
  log: (msg) => console.log(msg),
726
875
  warn: (msg) => console.error(msg),
727
876
  }
@@ -27,11 +27,12 @@ describe('generateTeleportDockerfile — structural invariants', () => {
27
27
  expect(df).toContain('FROM ubuntu:24.04');
28
28
  });
29
29
 
30
- test('installs ca-certificates, curl, tini', () => {
30
+ test('installs ca-certificates, curl, tini, gosu', () => {
31
31
  const df = generateTeleportDockerfile();
32
32
  expect(df).toContain('ca-certificates');
33
33
  expect(df).toContain('curl');
34
34
  expect(df).toContain('tini');
35
+ expect(df).toContain('gosu');
35
36
  });
36
37
 
37
38
  test('cleans up apt lists (image size hygiene)', () => {
@@ -43,7 +44,14 @@ describe('generateTeleportDockerfile — structural invariants', () => {
43
44
  const df = generateTeleportDockerfile();
44
45
  expect(df).toContain('groupadd -g 1001 agentio');
45
46
  expect(df).toContain('useradd -u 1001 -g agentio');
46
- expect(df).toContain('USER agentio');
47
+ });
48
+
49
+ test('does NOT set USER agentio (container runs as root until entrypoint drops privileges via gosu)', () => {
50
+ // The container boots as root so the CMD can chown /data (the
51
+ // persistent volume mounts as root:root on first boot). gosu then
52
+ // drops privileges before any user-input-processing code runs.
53
+ const df = generateTeleportDockerfile();
54
+ expect(df).not.toMatch(/^USER\s+agentio/m);
47
55
  });
48
56
 
49
57
  test('sets HOME, XDG_CONFIG_HOME, PATH for the non-root user', () => {
@@ -53,9 +61,15 @@ describe('generateTeleportDockerfile — structural invariants', () => {
53
61
  expect(df).toContain('ENV PATH="/home/agentio/bin:${PATH}"');
54
62
  });
55
63
 
56
- test('ensures /data and /home/agentio/bin are owned by agentio', () => {
64
+ test('ensures /home/agentio/bin is owned by agentio at build time', () => {
65
+ const df = generateTeleportDockerfile();
66
+ expect(df).toContain('chown -R agentio:agentio /home/agentio/bin');
67
+ });
68
+
69
+ test('chowns /data at container START (in CMD) — volume mount masks any build-time ownership', () => {
57
70
  const df = generateTeleportDockerfile();
58
- expect(df).toContain('chown -R agentio:agentio /data /home/agentio/bin');
71
+ const cmdLine = df.match(/CMD \[.*\]/)?.[0] ?? '';
72
+ expect(cmdLine).toContain('chown -R agentio:agentio /data');
59
73
  });
60
74
 
61
75
  test('never uses COPY or ADD (siteio inline Dockerfile constraint)', () => {
@@ -151,10 +165,10 @@ describe('generateTeleportDockerfile — port + healthcheck + entrypoint', () =>
151
165
  expect(df).toContain('ENTRYPOINT ["/usr/bin/tini", "--"]');
152
166
  });
153
167
 
154
- test('CMD runs config import THEN the server, via sh -c', () => {
168
+ test('CMD chowns /data, drops privileges via gosu, imports config, execs server', () => {
155
169
  const df = generateTeleportDockerfile();
156
170
  expect(df).toContain(
157
- 'CMD ["sh", "-c", "agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port 9999"]'
171
+ 'CMD ["sh", "-c", "chown -R agentio:agentio /data && exec gosu agentio sh -c \'agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port 9999\'"]'
158
172
  );
159
173
  });
160
174
 
@@ -164,6 +178,12 @@ describe('generateTeleportDockerfile — port + healthcheck + entrypoint', () =>
164
178
  expect(cmdLine).toContain('exec agentio server start');
165
179
  });
166
180
 
181
+ test('CMD uses `exec gosu agentio` so the server is NOT a child of the root shell (so tini sees the right PID)', () => {
182
+ const df = generateTeleportDockerfile();
183
+ const cmdLine = df.match(/CMD \[.*\]/)?.[0] ?? '';
184
+ expect(cmdLine).toContain('exec gosu agentio');
185
+ });
186
+
167
187
  test('CMD binds to 0.0.0.0 (required for Docker networking)', () => {
168
188
  const df = generateTeleportDockerfile();
169
189
  expect(df).toContain('--host 0.0.0.0');
@@ -171,13 +191,19 @@ describe('generateTeleportDockerfile — port + healthcheck + entrypoint', () =>
171
191
  });
172
192
 
173
193
  describe('generateTeleportDockerfile — security posture', () => {
174
- test('switches to non-root user BEFORE the CMD runs', () => {
194
+ test('drops to non-root BEFORE any user-input code runs (config import, server)', () => {
195
+ // The boot sequence in CMD is: root runs chown /data, then `exec gosu
196
+ // agentio` hands control to the agentio user. Only AFTER that does
197
+ // `agentio config import` (which decrypts AGENTIO_CONFIG) and the
198
+ // server ever run. Confirm the ordering inside the CMD string itself.
175
199
  const df = generateTeleportDockerfile();
176
- const userIdx = df.indexOf('USER agentio');
177
- const cmdIdx = df.search(/^CMD /m);
178
- expect(userIdx).toBeGreaterThan(-1);
179
- expect(cmdIdx).toBeGreaterThan(-1);
180
- expect(userIdx).toBeLessThan(cmdIdx);
200
+ const cmdLine = df.match(/CMD \[.*\]/)?.[0] ?? '';
201
+ const gosuIdx = cmdLine.indexOf('gosu agentio');
202
+ const importIdx = cmdLine.indexOf('agentio config import');
203
+ const serverIdx = cmdLine.indexOf('agentio server start');
204
+ expect(gosuIdx).toBeGreaterThan(-1);
205
+ expect(importIdx).toBeGreaterThan(gosuIdx);
206
+ expect(serverIdx).toBeGreaterThan(gosuIdx);
181
207
  });
182
208
 
183
209
  test('does not install sudo', () => {
@@ -52,16 +52,21 @@ FROM ubuntu:24.04
52
52
  # ca-certificates : HTTPS calls from the container
53
53
  # curl : release binary download + healthcheck
54
54
  # tini : proper PID 1 / signal handling
55
+ # gosu : drop privileges from root to the agentio user at
56
+ # container START (AFTER we've chowned the persistent
57
+ # /data volume, which Docker mounts as root-owned)
55
58
  RUN apt-get update && apt-get install -y --no-install-recommends \\
56
- ca-certificates curl tini \\
59
+ ca-certificates curl tini gosu \\
57
60
  && rm -rf /var/lib/apt/lists/*
58
61
 
59
- # Non-root user, home at /data so config.json + tokens.enc live in
60
- # a siteio-managed persistent volume path.
62
+ # Non-root user. /data will be a siteio-managed persistent volume mount,
63
+ # so its build-time ownership is meaningless — the volume masks whatever
64
+ # we chown here at image-build time. We fix ownership at container-START
65
+ # via the entrypoint script below (requires root, hence no USER directive).
61
66
  RUN groupadd -g 1001 agentio \\
62
67
  && useradd -u 1001 -g agentio -d /home/agentio -m agentio \\
63
68
  && mkdir -p /data /home/agentio/bin \\
64
- && chown -R agentio:agentio /data /home/agentio/bin
69
+ && chown -R agentio:agentio /home/agentio/bin
65
70
 
66
71
  # Fetch the agentio linux binary at BUILD time (not boot) so siteio
67
72
  # caches the layer and subsequent deploys reuse it unless --no-cache
@@ -80,7 +85,6 @@ RUN set -eux; \\
80
85
  chmod +x /home/agentio/bin/agentio; \\
81
86
  chown agentio:agentio /home/agentio/bin/agentio
82
87
 
83
- USER agentio
84
88
  ENV HOME=/data
85
89
  ENV XDG_CONFIG_HOME=/data
86
90
  ENV PATH="/home/agentio/bin:\${PATH}"
@@ -93,9 +97,12 @@ EXPOSE ${port}
93
97
  HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \\
94
98
  CMD curl -sf http://localhost:${port}/health || exit 1
95
99
 
96
- # tini becomes PID 1 for signal handling. sh -c runs the import-then-
97
- # server pipeline so SIGTERM propagates correctly to the running server.
100
+ # tini becomes PID 1 for signal handling. The sh -c script runs as ROOT
101
+ # so it can chown the freshly-mounted /data volume (Docker mounts named
102
+ # volumes with root:root ownership on first boot, overriding any
103
+ # build-time chown), then drops to the agentio user via gosu and exec's
104
+ # the config-import + server pipeline so SIGTERM propagates correctly.
98
105
  ENTRYPOINT ["/usr/bin/tini", "--"]
99
- CMD ["sh", "-c", "agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port ${port}"]
106
+ CMD ["sh", "-c", "chown -R agentio:agentio /data && exec gosu agentio sh -c 'agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port ${port}'"]
100
107
  `;
101
108
  }
@@ -76,11 +76,12 @@ describe('docker/Dockerfile.teleport — structural invariants', () => {
76
76
  expect(df).toMatch(/FROM\s+ubuntu:24\.04\s*(?:\n|$)/);
77
77
  });
78
78
 
79
- test('stage 2 installs ca-certificates, curl, tini', () => {
79
+ test('stage 2 installs ca-certificates, curl, tini, gosu', () => {
80
80
  const df = loadDockerfile();
81
81
  expect(df).toContain('ca-certificates');
82
82
  expect(df).toContain('curl');
83
83
  expect(df).toContain('tini');
84
+ expect(df).toContain('gosu');
84
85
  });
85
86
 
86
87
  test('stage 2 cleans up apt lists', () => {
@@ -92,7 +93,15 @@ describe('docker/Dockerfile.teleport — structural invariants', () => {
92
93
  const df = loadDockerfile();
93
94
  expect(df).toContain('groupadd -g 1001 agentio');
94
95
  expect(df).toContain('useradd -u 1001 -g agentio');
95
- expect(df).toContain('USER agentio');
96
+ });
97
+
98
+ test('stage 2 does NOT set USER agentio (container runs as root until entrypoint drops privileges via gosu)', () => {
99
+ // Container boots as root so the CMD can chown /data (Docker mounts
100
+ // named volumes with root:root ownership on first boot, overriding
101
+ // any build-time chown). gosu drops privileges before any user-input
102
+ // code runs.
103
+ const df = loadDockerfile();
104
+ expect(df).not.toMatch(/^USER\s+agentio/m);
96
105
  });
97
106
 
98
107
  test('copies binary from stage 1 with --chown=agentio:agentio', () => {
@@ -133,10 +142,10 @@ describe('docker/Dockerfile.teleport — structural invariants', () => {
133
142
  expect(df).toContain('ENTRYPOINT ["/usr/bin/tini", "--"]');
134
143
  });
135
144
 
136
- test('CMD runs config import THEN the server, via sh -c with exec', () => {
145
+ test('CMD chowns /data, drops privileges via gosu, imports config, execs server', () => {
137
146
  const df = loadDockerfile();
138
147
  expect(df).toContain(
139
- 'CMD ["sh", "-c", "agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port 9999"]'
148
+ 'CMD ["sh", "-c", "chown -R agentio:agentio /data && exec gosu agentio sh -c \'agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port 9999\'"]'
140
149
  );
141
150
  });
142
151
 
@@ -148,14 +157,19 @@ describe('docker/Dockerfile.teleport — structural invariants', () => {
148
157
  });
149
158
 
150
159
  describe('docker/Dockerfile.teleport — security posture', () => {
151
- test('runtime stage switches to USER agentio before the CMD runs', () => {
160
+ test('drops to non-root BEFORE any user-input code runs (config import, server)', () => {
161
+ // The boot sequence in CMD is: root runs chown /data, then `exec gosu
162
+ // agentio` hands control to the agentio user. Only AFTER that does
163
+ // `agentio config import` (which decrypts AGENTIO_CONFIG) and the
164
+ // server ever run. Confirm the ordering inside the CMD string itself.
152
165
  const df = loadDockerfile();
153
- // Find the position of USER agentio and the FINAL CMD/ENTRYPOINT.
154
- const userIdx = df.lastIndexOf('USER agentio');
155
- const cmdIdx = df.search(/^CMD /m);
156
- expect(userIdx).toBeGreaterThan(-1);
157
- expect(cmdIdx).toBeGreaterThan(-1);
158
- expect(userIdx).toBeLessThan(cmdIdx);
166
+ const cmdLine = df.match(/CMD \[.*\]/m)?.[0] ?? '';
167
+ const gosuIdx = cmdLine.indexOf('gosu agentio');
168
+ const importIdx = cmdLine.indexOf('agentio config import');
169
+ const serverIdx = cmdLine.indexOf('agentio server start');
170
+ expect(gosuIdx).toBeGreaterThan(-1);
171
+ expect(importIdx).toBeGreaterThan(gosuIdx);
172
+ expect(serverIdx).toBeGreaterThan(gosuIdx);
159
173
  });
160
174
 
161
175
  test('does not install sudo', () => {
@@ -167,14 +181,4 @@ describe('docker/Dockerfile.teleport — security posture', () => {
167
181
  const df = loadDockerfile();
168
182
  expect(df).not.toContain('EXPOSE 22');
169
183
  });
170
-
171
- test('HEALTHCHECK runs under the agentio user (no root escalation)', () => {
172
- // Since USER agentio is set before HEALTHCHECK and CMD, they both
173
- // run as agentio. Verify by ordering: USER must come before the
174
- // HEALTHCHECK directive.
175
- const df = loadDockerfile();
176
- const userIdx = df.lastIndexOf('USER agentio');
177
- const hcIdx = df.indexOf('HEALTHCHECK');
178
- expect(userIdx).toBeLessThan(hcIdx);
179
- });
180
184
  });
@@ -718,3 +718,49 @@ describe('appInfo', () => {
718
718
  expect(info!.url).toBe('https://mcp.x.siteio.me');
719
719
  });
720
720
  });
721
+
722
+ /* ------------------------------------------------------------------ */
723
+ /* logsApp */
724
+ /* ------------------------------------------------------------------ */
725
+
726
+ describe('logsApp', () => {
727
+ test('emits exact argv without --tail when tail is omitted', async () => {
728
+ const { spawn, calls } = makeMockSpawn({
729
+ responses: [{ exitCode: 0, stdout: 'log line 1\nlog line 2\n', stderr: '' }],
730
+ });
731
+ const runner = createSiteioRunner(spawn);
732
+ const out = await runner.logsApp('mcp');
733
+ expect(out).toBe('log line 1\nlog line 2\n');
734
+ expect(calls[0].cmd).toEqual(['siteio', 'apps', 'logs', 'mcp']);
735
+ });
736
+
737
+ test('passes --tail N when provided', async () => {
738
+ const { spawn, calls } = makeMockSpawn({
739
+ responses: [{ exitCode: 0, stdout: 'tail output\n', stderr: '' }],
740
+ });
741
+ const runner = createSiteioRunner(spawn);
742
+ await runner.logsApp('mcp', { tail: 40 });
743
+ expect(calls[0].cmd).toEqual([
744
+ 'siteio',
745
+ 'apps',
746
+ 'logs',
747
+ 'mcp',
748
+ '--tail',
749
+ '40',
750
+ ]);
751
+ });
752
+
753
+ test('returns partial stdout without throwing even on non-zero exit (best-effort)', async () => {
754
+ // Logs are consumed inside an already-failing path (crash-loop
755
+ // surfacing) so we don't want them to raise and mask the original
756
+ // error. Verify graceful return.
757
+ const { spawn } = makeMockSpawn({
758
+ responses: [
759
+ { exitCode: 1, stdout: 'some partial output\n', stderr: 'agent offline' },
760
+ ],
761
+ });
762
+ const runner = createSiteioRunner(spawn);
763
+ const out = await runner.logsApp('mcp', { tail: 10 });
764
+ expect(out).toBe('some partial output\n');
765
+ });
766
+ });
@@ -99,6 +99,16 @@ export interface SiteioRunner {
99
99
  restartApp(name: string): Promise<void>;
100
100
  /** `siteio apps info <name> --json` — used to surface the deployed URL. */
101
101
  appInfo(name: string): Promise<SiteioApp | null>;
102
+ /**
103
+ * `siteio apps logs <name> [--tail N]` — tail of the container's
104
+ * stdout/stderr. Used by teleport to surface crash-loop output when
105
+ * /health never becomes responsive after a deploy.
106
+ *
107
+ * Returns the captured stdout. Callers that want stderr too should
108
+ * treat this as best-effort (siteio's own progress lines sometimes
109
+ * leak onto stderr, which we don't try to merge).
110
+ */
111
+ logsApp(name: string, opts?: { tail?: number }): Promise<string>;
102
112
  }
103
113
 
104
114
  /**
@@ -295,6 +305,19 @@ export function createSiteioRunner(
295
305
  }
296
306
  },
297
307
 
308
+ async logsApp(name, opts) {
309
+ const cmd = ['siteio', 'apps', 'logs', name];
310
+ if (opts?.tail != null) {
311
+ cmd.push('--tail', String(opts.tail));
312
+ }
313
+ const r = await spawn({ cmd });
314
+ // Logs are best-effort: if siteio errors (app doesn't exist yet,
315
+ // agent offline, etc), return whatever we got instead of throwing,
316
+ // so the caller — typically in an already-failing teleport path —
317
+ // can still surface something useful to the user.
318
+ return r.stdout;
319
+ },
320
+
298
321
  async appInfo(name) {
299
322
  const cmd = ['siteio', 'apps', 'info', name, '--json'];
300
323
  const r = await spawn({ cmd });