npm - @plosson/agentio - Versions diffs - 0.7.3 → 0.7.4 - Mend

@plosson/agentio 0.7.3 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +1 -1
package/src/commands/teleport.test.ts +246 -0
package/src/commands/teleport.ts +152 -3
package/src/server/dockerfile-gen.test.ts +38 -12
package/src/server/dockerfile-gen.ts +15 -8
package/src/server/dockerfile-teleport.test.ts +25 -21
package/src/server/siteio-runner.test.ts +46 -0
package/src/server/siteio-runner.ts +23 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@plosson/agentio",
-  "version": "0.7.3",
+  "version": "0.7.4",
   "description": "CLI for LLM agents to interact with communication and tracking services",
   "type": "module",
   "license": "MIT",

package/src/commands/teleport.test.ts CHANGED Viewed

@@ -39,6 +39,8 @@ interface FakeRunnerOptions {
   loggedIn?: boolean;
   existingApp?: SiteioApp | null;
   deployedApp?: SiteioApp | null;
+  /** Stdout returned by logsApp. Default: empty string. */
+  logsStdout?: string;
   failOn?:
     | 'isInstalled'
     | 'isLoggedIn'
@@ -99,6 +101,10 @@ function makeFakeRunner(opts: FakeRunnerOptions = {}): {
       if ('deployedApp' in opts) return opts.deployedApp ?? null;
       return { name, url: `https://${name}.siteio.example.com` };
     },
+    async logsApp(name, logOpts) {
+      calls.push({ method: 'logsApp', args: { name, opts: logOpts ?? null } });
+      return opts.logsStdout ?? '';
+    },
   };
   return { runner, calls };
@@ -116,6 +122,12 @@ interface FakeDepsOptions extends FakeRunnerOptions {
   dockerfile?: string;
   /** Value returned by detectGitOriginUrl. Default: null. */
   gitOriginUrl?: string | null;
+  /**
+   * Sequence of HTTP status codes (or nulls) `probeHealth` should return
+   * across successive polls. When exhausted, falls back to the default
+   * (a healthy 200). Pass [] to simulate an unreachable container.
+   */
+  healthProbeResponses?: Array<number | null>;
 }
 interface FakeDeps extends TeleportDeps {
@@ -126,6 +138,8 @@ interface FakeDeps extends TeleportDeps {
   warnLines: string[];
   tempFileWrites: { path: string; content: string }[];
   tempFileDeletes: string[];
+  healthProbeUrls: string[];
+  sleepCalls: number[];
 }
 function makeDeps(opts: FakeDepsOptions = {}): FakeDeps {
@@ -134,8 +148,11 @@ function makeDeps(opts: FakeDepsOptions = {}): FakeDeps {
   const warnLines: string[] = [];
   const tempFileWrites: { path: string; content: string }[] = [];
   const tempFileDeletes: string[] = [];
+  const healthProbeUrls: string[] = [];
+  const sleepCalls: number[] = [];
   let tempCounter = 0;
+  let healthProbeIdx = 0;
   const deps: FakeDeps = {
     calls,
@@ -143,6 +160,8 @@ function makeDeps(opts: FakeDepsOptions = {}): FakeDeps {
     warnLines,
     tempFileWrites,
     tempFileDeletes,
+    healthProbeUrls,
+    sleepCalls,
     runner,
     loadConfig: async () =>
       ({
@@ -167,6 +186,25 @@ function makeDeps(opts: FakeDepsOptions = {}): FakeDeps {
     },
     detectGitOriginUrl: async () =>
       'gitOriginUrl' in opts ? (opts.gitOriginUrl ?? null) : null,
+    probeHealth: async (url) => {
+      healthProbeUrls.push(url);
+      // Default behavior: 200 on the first probe so happy-path tests
+      // don't have to configure anything. Callers exercising timeouts
+      // pass `healthProbeResponses: []` or an explicit list.
+      if (opts.healthProbeResponses == null) return 200;
+      const list = opts.healthProbeResponses;
+      if (healthProbeIdx < list.length) {
+        return list[healthProbeIdx++] ?? null;
+      }
+      return null;
+    },
+    // Sleep is a no-op in tests — we don't want real time to pass.
+    // The loop inside waitForHealth is bounded by Date.now() ≥ deadline,
+    // so we also need the deadline to be reachable; see the test that
+    // exercises a timeout, which shrinks the timeoutMs explicitly.
+    sleep: async (ms) => {
+      sleepCalls.push(ms);
+    },
     log: (msg) => logLines.push(msg),
     warn: (msg) => warnLines.push(msg),
   };
@@ -1214,3 +1252,211 @@ describe('runTeleport — sync failure paths', () => {
     expect(methods).toContain('setApp');
   });
 });
+/* ------------------------------------------------------------------ */
+/* waitForHealth (direct)                                             */
+/* ------------------------------------------------------------------ */
+describe('waitForHealth', () => {
+  test('returns true on first 200 without sleeping', async () => {
+    const { waitForHealth } = await import('./teleport');
+    const probed: string[] = [];
+    const sleeps: number[] = [];
+    const logs: string[] = [];
+    const ok = await waitForHealth(
+      'https://mcp.example.com',
+      {
+        probeHealth: async (u) => {
+          probed.push(u);
+          return 200;
+        },
+        sleep: async (ms) => {
+          sleeps.push(ms);
+        },
+        log: (m) => logs.push(m),
+      },
+      { timeoutMs: 1000, intervalMs: 100 }
+    );
+    expect(ok).toBe(true);
+    expect(probed).toEqual(['https://mcp.example.com/health']);
+    expect(sleeps).toEqual([]); // no sleep after a first-attempt success
+    expect(logs.join('\n')).toMatch(/responded 200 after 1 attempt/);
+  });
+  test('returns true when 200 arrives after a few not-ready probes', async () => {
+    const { waitForHealth } = await import('./teleport');
+    const sequence: Array<number | null> = [null, 503, null, 200];
+    let idx = 0;
+    const sleeps: number[] = [];
+    const ok = await waitForHealth(
+      'https://mcp.example.com/',
+      {
+        probeHealth: async () => sequence[idx++] ?? null,
+        sleep: async (ms) => {
+          sleeps.push(ms);
+        },
+        log: () => {},
+      },
+      { timeoutMs: 10_000, intervalMs: 100 }
+    );
+    expect(ok).toBe(true);
+    expect(sleeps).toEqual([100, 100, 100]); // 3 sleeps before the 4th probe hit 200
+  });
+  test('returns false when probe never hits 200 within the budget', async () => {
+    const { waitForHealth } = await import('./teleport');
+    let probeCount = 0;
+    const sleeps: number[] = [];
+    const ok = await waitForHealth(
+      'https://mcp.example.com',
+      {
+        probeHealth: async () => {
+          probeCount++;
+          return null;
+        },
+        sleep: async (ms) => {
+          sleeps.push(ms);
+        },
+        log: () => {},
+      },
+      { timeoutMs: 500, intervalMs: 100 }
+    );
+    expect(ok).toBe(false);
+    // timeout/interval = 5 attempts, 4 sleeps between them.
+    expect(probeCount).toBe(5);
+    expect(sleeps.length).toBe(4);
+  });
+  test('strips trailing slash(es) from url before appending /health', async () => {
+    const { waitForHealth } = await import('./teleport');
+    const probed: string[] = [];
+    await waitForHealth(
+      'https://mcp.example.com///',
+      {
+        probeHealth: async (u) => {
+          probed.push(u);
+          return 200;
+        },
+        sleep: async () => {},
+        log: () => {},
+      },
+      { timeoutMs: 1000, intervalMs: 100 }
+    );
+    expect(probed[0]).toBe('https://mcp.example.com/health');
+  });
+});
+/* ------------------------------------------------------------------ */
+/* runTeleport — health-check surfacing                               */
+/* ------------------------------------------------------------------ */
+describe('runTeleport — health check on deploy', () => {
+  test('happy path probes /health at the deployed URL and does not fetch logs', async () => {
+    const deps = makeDeps();
+    await runTeleport({ name: 'mcp' }, deps);
+    expect(deps.healthProbeUrls[0]).toBe('https://mcp.siteio.example.com/health');
+    expect(deps.calls.map((c) => c.method)).not.toContain('logsApp');
+  });
+  test('health never returns 200 → fetches logs, surfaces them via warn, throws CliError', async () => {
+    const deps = makeDeps({
+      healthProbeResponses: [], // always null → timeout path
+      logsStdout: 'Error: EACCES: permission denied, mkdir /data/.config\n',
+    });
+    await expect(runTeleport({ name: 'mcp' }, deps)).rejects.toThrow(
+      /\/health never returned 200/
+    );
+    // logs were fetched with the expected tail size
+    const logsCall = deps.calls.find((c) => c.method === 'logsApp');
+    expect(logsCall).toBeDefined();
+    expect((logsCall!.args as { opts: { tail: number } }).opts.tail).toBeGreaterThan(0);
+    // The log tail was surfaced to the user on stderr (deps.warn)
+    expect(deps.warnLines.join('\n')).toContain('EACCES: permission denied');
+  });
+  test('empty log stdout still produces a clear warning (no "undefined" output)', async () => {
+    const deps = makeDeps({
+      healthProbeResponses: [],
+      logsStdout: '',
+    });
+    await expect(runTeleport({ name: 'mcp' }, deps)).rejects.toThrow();
+    expect(deps.warnLines.join('\n')).toContain('(no logs returned by siteio)');
+  });
+  test('siteio did not return a URL → health check is skipped with a warning, no throw', async () => {
+    const deps = makeDeps({ deployedApp: { name: 'mcp' } }); // no url field
+    const result = await runTeleport({ name: 'mcp' }, deps);
+    expect(result.url).toBeUndefined();
+    expect(deps.healthProbeUrls).toEqual([]);
+    expect(deps.warnLines.join('\n')).toContain('Skipping health check');
+  });
+  test('appInfo lacks url but findApp has it → falls back, still runs health check', async () => {
+    // Mirrors real siteio behavior: `apps info --json` omits the
+    // generated subdomain URL even though `apps list --json` surfaces
+    // it. We fall back to findApp (which wraps `apps list`) so the
+    // health check can still run.
+    const deps = makeDeps({
+      deployedApp: { name: 'mcp' }, // info: no url
+      // existingApp is read by findApp on re-call — setting it supplies
+      // the fallback URL.
+      existingApp: { name: 'mcp', url: 'https://mcp.siteio.example.com' },
+    });
+    // But runTeleport's "create" path REFUSES if existingApp is found,
+    // so we need to bypass that. Trick: set existingApp to null at call
+    // time; we can't really do that here without extending the fixture.
+    // Instead, emulate via a custom runner.
+    let findAppCalls = 0;
+    const deployInfo: SiteioApp = { name: 'mcp' }; // info returns no url
+    const fallbackInfo: SiteioApp = {
+      name: 'mcp',
+      url: 'https://mcp.siteio.example.com',
+    };
+    deps.runner.findApp = async () => {
+      findAppCalls++;
+      // First call (preflight — "does app already exist?") must return null
+      // so runTeleport proceeds with create. Second call (post-deploy URL
+      // fallback) returns the populated URL.
+      return findAppCalls === 1 ? null : fallbackInfo;
+    };
+    deps.runner.appInfo = async () => deployInfo;
+    await runTeleport({ name: 'mcp' }, deps);
+    expect(findAppCalls).toBe(2);
+    expect(deps.healthProbeUrls[0]).toBe('https://mcp.siteio.example.com/health');
+  });
+});
+describe('runTeleport — health check on --sync', () => {
+  test('sync happy path probes /health after restart', async () => {
+    const deps = makeDeps({
+      existingApp: { name: 'mcp', url: 'https://mcp.siteio.example.com' },
+      deployedApp: {
+        name: 'mcp',
+        url: 'https://mcp.siteio.example.com',
+        volumes: [`agentio-data-mcp:${DATA_VOLUME_PATH}`],
+      },
+    });
+    await runTeleport({ name: 'mcp', sync: true }, deps);
+    expect(deps.healthProbeUrls[0]).toBe('https://mcp.siteio.example.com/health');
+    // No log fetch on a happy sync.
+    expect(deps.calls.map((c) => c.method)).not.toContain('logsApp');
+  });
+  test('sync health times out → logs fetched + thrown', async () => {
+    const deps = makeDeps({
+      existingApp: { name: 'mcp', url: 'https://mcp.siteio.example.com' },
+      deployedApp: {
+        name: 'mcp',
+        url: 'https://mcp.siteio.example.com',
+        volumes: [`agentio-data-mcp:${DATA_VOLUME_PATH}`],
+      },
+      healthProbeResponses: [],
+      logsStdout: 'boom\n',
+    });
+    await expect(
+      runTeleport({ name: 'mcp', sync: true }, deps)
+    ).rejects.toThrow(/\/health never returned 200/);
+    expect(deps.calls.map((c) => c.method)).toContain('logsApp');
+    expect(deps.warnLines.join('\n')).toContain('boom');
+  });
+});

package/src/commands/teleport.ts CHANGED Viewed

@@ -133,10 +133,59 @@ export interface TeleportDeps {
    * siteio `--git` argument in git-mode.
    */
   detectGitOriginUrl: () => Promise<string | null>;
+  /**
+   * HTTP probe used by `waitForHealth`. Returns the status code (200 on
+   * a healthy server). Network errors are surfaced as `null` so the
+   * poller can treat them the same as a not-yet-ready container.
+   */
+  probeHealth: (url: string) => Promise<number | null>;
+  /** Resolved after `ms` milliseconds. Injected for testability. */
+  sleep: (ms: number) => Promise<void>;
   log: (msg: string) => void;
   warn: (msg: string) => void;
 }
+/* ------------------------------------------------------------------ */
+/* health polling                                                     */
+/* ------------------------------------------------------------------ */
+/** How long to wait for /health to return 200 before giving up. */
+export const HEALTH_TIMEOUT_MS = 90_000;
+/** Spacing between consecutive /health probes. */
+export const HEALTH_INTERVAL_MS = 2_000;
+/** Number of log lines to surface when the health check times out. */
+export const HEALTH_FAILURE_LOG_TAIL = 50;
+/**
+ * Poll `${url}/health` until it returns 200 or we exhaust the attempt
+ * budget (ceil(timeoutMs / intervalMs)). Returns true on success; false
+ * otherwise. Uses an attempt-count loop (not wall clock) so tests that
+ * stub `deps.sleep` to a no-op can exercise the timeout path without
+ * actually waiting 90 real seconds.
+ */
+export async function waitForHealth(
+  url: string,
+  deps: Pick<TeleportDeps, 'probeHealth' | 'sleep' | 'log'>,
+  opts: { timeoutMs?: number; intervalMs?: number } = {}
+): Promise<boolean> {
+  const timeoutMs = opts.timeoutMs ?? HEALTH_TIMEOUT_MS;
+  const intervalMs = opts.intervalMs ?? HEALTH_INTERVAL_MS;
+  const maxAttempts = Math.max(1, Math.ceil(timeoutMs / intervalMs));
+  const healthUrl = `${url.replace(/\/+$/, '')}/health`;
+  deps.log(`Waiting for ${healthUrl} (up to ${Math.round(timeoutMs / 1000)}s)…`);
+  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+    const status = await deps.probeHealth(healthUrl);
+    if (status === 200) {
+      deps.log(`  /health responded 200 after ${attempt} attempt(s).`);
+      return true;
+    }
+    if (attempt < maxAttempts) {
+      await deps.sleep(intervalMs);
+    }
+  }
+  return false;
+}
 export interface TeleportOptions {
   name: string;
   dockerfileOnly?: boolean;
@@ -300,8 +349,38 @@ async function runSync(
   await deps.runner.restartApp(opts.name);
   // We already fetched appInfo earlier for volume detection; reuse
-  // its URL field rather than calling again.
-  const url = typeof detail?.url === 'string' ? detail.url : undefined;
+  // its URL field rather than calling again. Same fallback as the
+  // full-teleport path: siteio's `apps info --json` omits the
+  // generated subdomain URL, so fall back to findApp if it's missing.
+  let url = typeof detail?.url === 'string' ? detail.url : undefined;
+  if (!url) {
+    const listed = await deps.runner.findApp(opts.name);
+    if (typeof listed?.url === 'string') url = listed.url;
+  }
+  // Same health-check / log-surface pattern as the full teleport path.
+  // A sync that breaks the container (bad env, corrupted config blob,
+  // volume backfill surprise) should fail loudly instead of silently
+  // leaving a crash-looping remote.
+  if (url) {
+    const healthy = await waitForHealth(url, deps);
+    if (!healthy) {
+      deps.warn(
+        `Container failed to report healthy after ${Math.round(HEALTH_TIMEOUT_MS / 1000)}s. Fetching logs…`
+      );
+      const logs = await deps.runner.logsApp(opts.name, {
+        tail: HEALTH_FAILURE_LOG_TAIL,
+      });
+      deps.warn('--- container logs (tail) ---');
+      deps.warn(logs.trim() || '(no logs returned by siteio)');
+      deps.warn('--- end logs ---');
+      throw new CliError(
+        'API_ERROR',
+        `Sync to "${opts.name}" restarted the container but /health never returned 200`,
+        'Inspect the logs above. The previous config is gone — the next sync (or a manual `siteio apps restart`) will still see the broken state until you fix the root cause.'
+      );
+    }
+  }
   deps.log('');
   deps.log('Sync complete!');
@@ -568,7 +647,47 @@ export async function runTeleport(
     // Try to surface the deployed URL. Non-fatal if siteio doesn't
     // give us one back.
     const info = await deps.runner.appInfo(opts.name);
-    const url = typeof info?.url === 'string' ? info.url : undefined;
+    let url = typeof info?.url === 'string' ? info.url : undefined;
+    // siteio's `apps info --json` output omits the generated subdomain
+    // URL (domains: [] in the payload) even though the app is reachable
+    // at it. `apps list --json` DOES include the url field at the top
+    // level. Fall back to findApp so the post-deploy health check can
+    // still run even when siteio doesn't surface url in info.
+    if (!url) {
+      const listed = await deps.runner.findApp(opts.name);
+      if (typeof listed?.url === 'string') url = listed.url;
+    }
+    // Poll /health to CONFIRM the container actually came up. siteio's
+    // deploy returns success as soon as Docker starts the container, so
+    // a crash-loop (bad volume permissions, bad config, missing binary,
+    // etc.) looks like a successful deploy until the user probes it
+    // themselves. Surfacing logs on timeout is the fix.
+    if (url) {
+      const healthy = await waitForHealth(url, deps);
+      if (!healthy) {
+        deps.warn(
+          `Container failed to report healthy after ${Math.round(HEALTH_TIMEOUT_MS / 1000)}s. Fetching logs…`
+        );
+        const logs = await deps.runner.logsApp(opts.name, {
+          tail: HEALTH_FAILURE_LOG_TAIL,
+        });
+        deps.warn('--- container logs (tail) ---');
+        deps.warn(logs.trim() || '(no logs returned by siteio)');
+        deps.warn('--- end logs ---');
+        throw new CliError(
+          'API_ERROR',
+          `Deploy "${opts.name}" started but /health never returned 200`,
+          'Inspect the logs above. Common causes: permission errors on mounted volumes, missing env vars, binary not found for the container arch.'
+        );
+      }
+    } else {
+      deps.warn(
+        'Skipping health check: siteio did not return a URL for this app. ' +
+          `Run \`siteio apps info ${opts.name}\` and curl <url>/health manually to verify.`
+      );
+    }
     const claudeCmd = url
       ? `claude mcp add --scope local --transport http agentio "${url}/mcp?services=rss"`
       : null;
@@ -634,6 +753,34 @@ async function defaultRemoveTempFile(path: string): Promise<void> {
  * Returns null if the cwd isn't a git repo, has no origin remote, or if
  * the git binary isn't on PATH.
  */
+/**
+ * Default health probe: HEAD-equivalent GET on the given URL. Returns
+ * the HTTP status code, or null if the request couldn't be made (DNS,
+ * connection refused, TLS error, etc). We treat connection errors the
+ * same as "not ready yet" so the poller keeps retrying.
+ */
+async function defaultProbeHealth(url: string): Promise<number | null> {
+  try {
+    // Short timeout per attempt so a hung connection can't eat the
+    // whole polling budget. AbortSignal.timeout is natively supported
+    // by Bun's fetch.
+    const res = await fetch(url, {
+      method: 'GET',
+      signal: AbortSignal.timeout(5_000),
+    });
+    // Drain the body so the socket is released promptly; we only care
+    // about the status code here.
+    await res.text().catch(() => {});
+    return res.status;
+  } catch {
+    return null;
+  }
+}
+async function defaultSleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
 async function defaultDetectGitOriginUrl(): Promise<string | null> {
   try {
     const proc = Bun.spawn(['git', 'remote', 'get-url', 'origin'], {
@@ -722,6 +869,8 @@ export function registerTeleportCommand(parent: Command): void {
             writeTempFile: defaultWriteTempFile,
             removeTempFile: defaultRemoveTempFile,
             detectGitOriginUrl: defaultDetectGitOriginUrl,
+            probeHealth: defaultProbeHealth,
+            sleep: defaultSleep,
             log: (msg) => console.log(msg),
             warn: (msg) => console.error(msg),
           }

package/src/server/dockerfile-gen.test.ts CHANGED Viewed

@@ -27,11 +27,12 @@ describe('generateTeleportDockerfile — structural invariants', () => {
     expect(df).toContain('FROM ubuntu:24.04');
   });
-  test('installs ca-certificates, curl, tini', () => {
+  test('installs ca-certificates, curl, tini, gosu', () => {
     const df = generateTeleportDockerfile();
     expect(df).toContain('ca-certificates');
     expect(df).toContain('curl');
     expect(df).toContain('tini');
+    expect(df).toContain('gosu');
   });
   test('cleans up apt lists (image size hygiene)', () => {
@@ -43,7 +44,14 @@ describe('generateTeleportDockerfile — structural invariants', () => {
     const df = generateTeleportDockerfile();
     expect(df).toContain('groupadd -g 1001 agentio');
     expect(df).toContain('useradd -u 1001 -g agentio');
-    expect(df).toContain('USER agentio');
+  });
+  test('does NOT set USER agentio (container runs as root until entrypoint drops privileges via gosu)', () => {
+    // The container boots as root so the CMD can chown /data (the
+    // persistent volume mounts as root:root on first boot). gosu then
+    // drops privileges before any user-input-processing code runs.
+    const df = generateTeleportDockerfile();
+    expect(df).not.toMatch(/^USER\s+agentio/m);
   });
   test('sets HOME, XDG_CONFIG_HOME, PATH for the non-root user', () => {
@@ -53,9 +61,15 @@ describe('generateTeleportDockerfile — structural invariants', () => {
     expect(df).toContain('ENV PATH="/home/agentio/bin:${PATH}"');
   });
-  test('ensures /data and /home/agentio/bin are owned by agentio', () => {
+  test('ensures /home/agentio/bin is owned by agentio at build time', () => {
+    const df = generateTeleportDockerfile();
+    expect(df).toContain('chown -R agentio:agentio /home/agentio/bin');
+  });
+  test('chowns /data at container START (in CMD) — volume mount masks any build-time ownership', () => {
     const df = generateTeleportDockerfile();
-    expect(df).toContain('chown -R agentio:agentio /data /home/agentio/bin');
+    const cmdLine = df.match(/CMD \[.*\]/)?.[0] ?? '';
+    expect(cmdLine).toContain('chown -R agentio:agentio /data');
   });
   test('never uses COPY or ADD (siteio inline Dockerfile constraint)', () => {
@@ -151,10 +165,10 @@ describe('generateTeleportDockerfile — port + healthcheck + entrypoint', () =>
     expect(df).toContain('ENTRYPOINT ["/usr/bin/tini", "--"]');
   });
-  test('CMD runs config import THEN the server, via sh -c', () => {
+  test('CMD chowns /data, drops privileges via gosu, imports config, execs server', () => {
     const df = generateTeleportDockerfile();
     expect(df).toContain(
-      'CMD ["sh", "-c", "agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port 9999"]'
+      'CMD ["sh", "-c", "chown -R agentio:agentio /data && exec gosu agentio sh -c \'agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port 9999\'"]'
     );
   });
@@ -164,6 +178,12 @@ describe('generateTeleportDockerfile — port + healthcheck + entrypoint', () =>
     expect(cmdLine).toContain('exec agentio server start');
   });
+  test('CMD uses `exec gosu agentio` so the server is NOT a child of the root shell (so tini sees the right PID)', () => {
+    const df = generateTeleportDockerfile();
+    const cmdLine = df.match(/CMD \[.*\]/)?.[0] ?? '';
+    expect(cmdLine).toContain('exec gosu agentio');
+  });
   test('CMD binds to 0.0.0.0 (required for Docker networking)', () => {
     const df = generateTeleportDockerfile();
     expect(df).toContain('--host 0.0.0.0');
@@ -171,13 +191,19 @@ describe('generateTeleportDockerfile — port + healthcheck + entrypoint', () =>
 });
 describe('generateTeleportDockerfile — security posture', () => {
-  test('switches to non-root user BEFORE the CMD runs', () => {
+  test('drops to non-root BEFORE any user-input code runs (config import, server)', () => {
+    // The boot sequence in CMD is: root runs chown /data, then `exec gosu
+    // agentio` hands control to the agentio user. Only AFTER that does
+    // `agentio config import` (which decrypts AGENTIO_CONFIG) and the
+    // server ever run. Confirm the ordering inside the CMD string itself.
     const df = generateTeleportDockerfile();
-    const userIdx = df.indexOf('USER agentio');
-    const cmdIdx = df.search(/^CMD /m);
-    expect(userIdx).toBeGreaterThan(-1);
-    expect(cmdIdx).toBeGreaterThan(-1);
-    expect(userIdx).toBeLessThan(cmdIdx);
+    const cmdLine = df.match(/CMD \[.*\]/)?.[0] ?? '';
+    const gosuIdx = cmdLine.indexOf('gosu agentio');
+    const importIdx = cmdLine.indexOf('agentio config import');
+    const serverIdx = cmdLine.indexOf('agentio server start');
+    expect(gosuIdx).toBeGreaterThan(-1);
+    expect(importIdx).toBeGreaterThan(gosuIdx);
+    expect(serverIdx).toBeGreaterThan(gosuIdx);
   });
   test('does not install sudo', () => {

package/src/server/dockerfile-gen.ts CHANGED Viewed

@@ -52,16 +52,21 @@ FROM ubuntu:24.04
 #   ca-certificates : HTTPS calls from the container
 #   curl            : release binary download + healthcheck
 #   tini            : proper PID 1 / signal handling
+#   gosu            : drop privileges from root to the agentio user at
+#                     container START (AFTER we've chowned the persistent
+#                     /data volume, which Docker mounts as root-owned)
 RUN apt-get update && apt-get install -y --no-install-recommends \\
-    ca-certificates curl tini \\
+    ca-certificates curl tini gosu \\
     && rm -rf /var/lib/apt/lists/*
-# Non-root user, home at /data so config.json + tokens.enc live in
-# a siteio-managed persistent volume path.
+# Non-root user. /data will be a siteio-managed persistent volume mount,
+# so its build-time ownership is meaningless — the volume masks whatever
+# we chown here at image-build time. We fix ownership at container-START
+# via the entrypoint script below (requires root, hence no USER directive).
 RUN groupadd -g 1001 agentio \\
     && useradd -u 1001 -g agentio -d /home/agentio -m agentio \\
     && mkdir -p /data /home/agentio/bin \\
-    && chown -R agentio:agentio /data /home/agentio/bin
+    && chown -R agentio:agentio /home/agentio/bin
 # Fetch the agentio linux binary at BUILD time (not boot) so siteio
 # caches the layer and subsequent deploys reuse it unless --no-cache
@@ -80,7 +85,6 @@ RUN set -eux; \\
     chmod +x /home/agentio/bin/agentio; \\
     chown agentio:agentio /home/agentio/bin/agentio
-USER agentio
 ENV HOME=/data
 ENV XDG_CONFIG_HOME=/data
 ENV PATH="/home/agentio/bin:\${PATH}"
@@ -93,9 +97,12 @@ EXPOSE ${port}
 HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \\
     CMD curl -sf http://localhost:${port}/health || exit 1
-# tini becomes PID 1 for signal handling. sh -c runs the import-then-
-# server pipeline so SIGTERM propagates correctly to the running server.
+# tini becomes PID 1 for signal handling. The sh -c script runs as ROOT
+# so it can chown the freshly-mounted /data volume (Docker mounts named
+# volumes with root:root ownership on first boot, overriding any
+# build-time chown), then drops to the agentio user via gosu and exec's
+# the config-import + server pipeline so SIGTERM propagates correctly.
 ENTRYPOINT ["/usr/bin/tini", "--"]
-CMD ["sh", "-c", "agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port ${port}"]
+CMD ["sh", "-c", "chown -R agentio:agentio /data && exec gosu agentio sh -c 'agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port ${port}'"]
 `;
 }

package/src/server/dockerfile-teleport.test.ts CHANGED Viewed

@@ -76,11 +76,12 @@ describe('docker/Dockerfile.teleport — structural invariants', () => {
     expect(df).toMatch(/FROM\s+ubuntu:24\.04\s*(?:\n|$)/);
   });
-  test('stage 2 installs ca-certificates, curl, tini', () => {
+  test('stage 2 installs ca-certificates, curl, tini, gosu', () => {
     const df = loadDockerfile();
     expect(df).toContain('ca-certificates');
     expect(df).toContain('curl');
     expect(df).toContain('tini');
+    expect(df).toContain('gosu');
   });
   test('stage 2 cleans up apt lists', () => {
@@ -92,7 +93,15 @@ describe('docker/Dockerfile.teleport — structural invariants', () => {
     const df = loadDockerfile();
     expect(df).toContain('groupadd -g 1001 agentio');
     expect(df).toContain('useradd -u 1001 -g agentio');
-    expect(df).toContain('USER agentio');
+  });
+  test('stage 2 does NOT set USER agentio (container runs as root until entrypoint drops privileges via gosu)', () => {
+    // Container boots as root so the CMD can chown /data (Docker mounts
+    // named volumes with root:root ownership on first boot, overriding
+    // any build-time chown). gosu drops privileges before any user-input
+    // code runs.
+    const df = loadDockerfile();
+    expect(df).not.toMatch(/^USER\s+agentio/m);
   });
   test('copies binary from stage 1 with --chown=agentio:agentio', () => {
@@ -133,10 +142,10 @@ describe('docker/Dockerfile.teleport — structural invariants', () => {
     expect(df).toContain('ENTRYPOINT ["/usr/bin/tini", "--"]');
   });
-  test('CMD runs config import THEN the server, via sh -c with exec', () => {
+  test('CMD chowns /data, drops privileges via gosu, imports config, execs server', () => {
     const df = loadDockerfile();
     expect(df).toContain(
-      'CMD ["sh", "-c", "agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port 9999"]'
+      'CMD ["sh", "-c", "chown -R agentio:agentio /data && exec gosu agentio sh -c \'agentio config import && exec agentio server start --foreground --host 0.0.0.0 --port 9999\'"]'
     );
   });
@@ -148,14 +157,19 @@ describe('docker/Dockerfile.teleport — structural invariants', () => {
 });
 describe('docker/Dockerfile.teleport — security posture', () => {
-  test('runtime stage switches to USER agentio before the CMD runs', () => {
+  test('drops to non-root BEFORE any user-input code runs (config import, server)', () => {
+    // The boot sequence in CMD is: root runs chown /data, then `exec gosu
+    // agentio` hands control to the agentio user. Only AFTER that does
+    // `agentio config import` (which decrypts AGENTIO_CONFIG) and the
+    // server ever run. Confirm the ordering inside the CMD string itself.
     const df = loadDockerfile();
-    // Find the position of USER agentio and the FINAL CMD/ENTRYPOINT.
-    const userIdx = df.lastIndexOf('USER agentio');
-    const cmdIdx = df.search(/^CMD /m);
-    expect(userIdx).toBeGreaterThan(-1);
-    expect(cmdIdx).toBeGreaterThan(-1);
-    expect(userIdx).toBeLessThan(cmdIdx);
+    const cmdLine = df.match(/CMD \[.*\]/m)?.[0] ?? '';
+    const gosuIdx = cmdLine.indexOf('gosu agentio');
+    const importIdx = cmdLine.indexOf('agentio config import');
+    const serverIdx = cmdLine.indexOf('agentio server start');
+    expect(gosuIdx).toBeGreaterThan(-1);
+    expect(importIdx).toBeGreaterThan(gosuIdx);
+    expect(serverIdx).toBeGreaterThan(gosuIdx);
   });
   test('does not install sudo', () => {
@@ -167,14 +181,4 @@ describe('docker/Dockerfile.teleport — security posture', () => {
     const df = loadDockerfile();
     expect(df).not.toContain('EXPOSE 22');
   });
-  test('HEALTHCHECK runs under the agentio user (no root escalation)', () => {
-    // Since USER agentio is set before HEALTHCHECK and CMD, they both
-    // run as agentio. Verify by ordering: USER must come before the
-    // HEALTHCHECK directive.
-    const df = loadDockerfile();
-    const userIdx = df.lastIndexOf('USER agentio');
-    const hcIdx = df.indexOf('HEALTHCHECK');
-    expect(userIdx).toBeLessThan(hcIdx);
-  });
 });

package/src/server/siteio-runner.test.ts CHANGED Viewed

@@ -718,3 +718,49 @@ describe('appInfo', () => {
     expect(info!.url).toBe('https://mcp.x.siteio.me');
   });
 });
+/* ------------------------------------------------------------------ */
+/* logsApp                                                             */
+/* ------------------------------------------------------------------ */
+describe('logsApp', () => {
+  test('emits exact argv without --tail when tail is omitted', async () => {
+    const { spawn, calls } = makeMockSpawn({
+      responses: [{ exitCode: 0, stdout: 'log line 1\nlog line 2\n', stderr: '' }],
+    });
+    const runner = createSiteioRunner(spawn);
+    const out = await runner.logsApp('mcp');
+    expect(out).toBe('log line 1\nlog line 2\n');
+    expect(calls[0].cmd).toEqual(['siteio', 'apps', 'logs', 'mcp']);
+  });
+  test('passes --tail N when provided', async () => {
+    const { spawn, calls } = makeMockSpawn({
+      responses: [{ exitCode: 0, stdout: 'tail output\n', stderr: '' }],
+    });
+    const runner = createSiteioRunner(spawn);
+    await runner.logsApp('mcp', { tail: 40 });
+    expect(calls[0].cmd).toEqual([
+      'siteio',
+      'apps',
+      'logs',
+      'mcp',
+      '--tail',
+      '40',
+    ]);
+  });
+  test('returns partial stdout without throwing even on non-zero exit (best-effort)', async () => {
+    // Logs are consumed inside an already-failing path (crash-loop
+    // surfacing) so we don't want them to raise and mask the original
+    // error. Verify graceful return.
+    const { spawn } = makeMockSpawn({
+      responses: [
+        { exitCode: 1, stdout: 'some partial output\n', stderr: 'agent offline' },
+      ],
+    });
+    const runner = createSiteioRunner(spawn);
+    const out = await runner.logsApp('mcp', { tail: 10 });
+    expect(out).toBe('some partial output\n');
+  });
+});

package/src/server/siteio-runner.ts CHANGED Viewed

@@ -99,6 +99,16 @@ export interface SiteioRunner {
   restartApp(name: string): Promise<void>;
   /** `siteio apps info <name> --json` — used to surface the deployed URL. */
   appInfo(name: string): Promise<SiteioApp | null>;
+  /**
+   * `siteio apps logs <name> [--tail N]` — tail of the container's
+   * stdout/stderr. Used by teleport to surface crash-loop output when
+   * /health never becomes responsive after a deploy.
+   *
+   * Returns the captured stdout. Callers that want stderr too should
+   * treat this as best-effort (siteio's own progress lines sometimes
+   * leak onto stderr, which we don't try to merge).
+   */
+  logsApp(name: string, opts?: { tail?: number }): Promise<string>;
 }
 /**
@@ -295,6 +305,19 @@ export function createSiteioRunner(
       }
     },
+    async logsApp(name, opts) {
+      const cmd = ['siteio', 'apps', 'logs', name];
+      if (opts?.tail != null) {
+        cmd.push('--tail', String(opts.tail));
+      }
+      const r = await spawn({ cmd });
+      // Logs are best-effort: if siteio errors (app doesn't exist yet,
+      // agent offline, etc), return whatever we got instead of throwing,
+      // so the caller — typically in an already-failing teleport path —
+      // can still surface something useful to the user.
+      return r.stdout;
+    },
     async appInfo(name) {
       const cmd = ['siteio', 'apps', 'info', name, '--json'];
       const r = await spawn({ cmd });