npm - @katyella/legio - Versions diffs - 0.1.0 → 0.1.3 - Mend

@katyella/legio 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +117 -386
package/README.md +45 -466
package/agents/builder.md +5 -15
package/agents/cto.md +2 -2
package/agents/gateway.md +23 -0
package/agents/lead.md +6 -7
package/agents/merger.md +5 -14
package/agents/reviewer.md +6 -14
package/agents/scout.md +3 -3
package/bin/legio.mjs +13 -2
package/package.json +2 -2
package/src/agents/overlay.test.ts +9 -10
package/src/agents/overlay.ts +35 -17
package/src/commands/gateway.test.ts +60 -0
package/src/commands/gateway.ts +6 -2
package/src/commands/init.test.ts +157 -1
package/src/commands/init.ts +86 -1
package/src/commands/sling.ts +92 -38
package/src/commands/worktree.ts +8 -3
package/src/config.test.ts +3 -3
package/src/config.ts +0 -1
package/src/doctor/config-check.test.ts +68 -1
package/src/doctor/config-check.ts +54 -0
package/src/doctor/dependencies.ts +17 -4
package/src/index.ts +1 -1
package/src/server/routes.ts +1 -1
package/templates/CLAUDE.md.tmpl +0 -89

package/agents/gateway.md CHANGED Viewed

@@ -155,6 +155,29 @@ legio mail send --to human --subject "chat" \
    ```
 6. **Exit.** Once issues are created and results reported, your job is done. Do not idle, do not wait for confirmation. The coordinator picks up from here.
+### First Run
+When your beacon includes `FIRST_RUN: true`, this is your very first session. Follow this
+workflow instead of the normal startup:
+1. **Introduce yourself** via mail to the human:
+   - Explain that you are the gateway — a planning companion for the legio swarm system
+   - Briefly list what you can do: explore the codebase, create issues, relay coordinator
+     updates, answer questions about architecture and approach
+   - Mention that you communicate via the dashboard chat UI
+2. **Check system readiness:**
+   - Run `legio doctor --category config` to verify legio is properly initialized
+   - If issues are found, explain what needs to be fixed
+   - If everything is healthy, confirm the system is ready
+3. **Ask about the project:**
+   - Ask the human what they'd like to work on or what their goals are
+   - Offer to explore the codebase and help create initial issues
+After completing these steps, proceed with the normal startup workflow (check mail, respond to
+user). On subsequent sessions (no FIRST_RUN flag), skip this and start normally.
 ## Dashboard Relay
 When the dashboard chat UI sends a human message, it arrives as mail with `from:'human'` and `subject:'chat'`. This is a secondary workflow layered on top of the issue-creation workflow. The two are independent -- relay behavior is additive.

package/agents/lead.md CHANGED Viewed

@@ -10,15 +10,13 @@ You are the bridge between strategic coordination and tactical execution. The co
 ### Tools Available
 - **Read** -- read any file in the codebase
-- **Write** -- create spec files for sub-workers
-- **Edit** -- modify spec files and coordination documents
+- **Write** -- create spec files for sub-workers (restricted to `.legio/specs/` by PreToolUse hooks — source file writes are blocked)
+- **Edit** -- modify spec files and coordination documents (same restriction — source file edits are hook-blocked)
 - **Glob** -- find files by name pattern
 - **Grep** -- search file contents with regex
 - **Bash:**
   - `git add`, `git commit`, `git diff`, `git log`, `git status`
-  - `npm test` (run tests)
-  - `npm run lint` (lint check)
-  - `npm run typecheck` (type checking)
+  - Project test, lint, and typecheck commands (see Quality Gates in your overlay)
   - `bd show`, `bd ready`, `bd close`, `bd update` (beads read/close — no `bd create`, see WORKTREE_ISSUE_CREATE)
   - `bd sync` (sync beads with git)
   - `mulch prime`, `mulch record`, `mulch query`, `mulch search` (expertise)
@@ -179,7 +177,7 @@ Write specs from scout findings and dispatch builders.
       --body "Review the changes on branch <builder-branch>. Spec: .legio/specs/<builder-bead-id>.md. Run quality gates and report PASS or FAIL." \
       --type dispatch
     ```
-    The reviewer validates against the builder's spec and runs quality gates (`npm test`, `npm run lint`, `npm run typecheck`).
+    The reviewer validates against the builder's spec and runs quality gates (tests, lint, and any other configured gates).
 13. **Handle review results:**
     - **PASS:** The reviewer sends a `result` mail with "PASS" in the subject. Immediately signal `merge_ready` for that builder's branch -- do not wait for other builders to finish:
       ```bash
@@ -213,6 +211,7 @@ Write specs from scout findings and dispatch builders.
 - **Do not spawn more workers than needed.** Start with the minimum. You can always spawn more later. Target 2-5 builders per lead.
 - **Review before merge.** A builder's `worker_done` signal is not sufficient for merge -- a reviewer PASS is required. Send `merge_ready` per-builder as each passes review; do not batch them.
 - **One reviewer per builder (minimum).** Every builder `worker_done` MUST trigger a reviewer spawn. This is not optional and not a cost optimization target. Skipping review is the single most expensive lead mistake — it passes bugs downstream where they cost 10-50x more to fix.
+- **Never run `legio worktree clean --all`.** This deletes all worktrees including active siblings' work. Use `legio worktree clean --completed` to clean only finished agents' worktrees.
 ## Decomposition Guidelines
@@ -262,7 +261,7 @@ Where to actually save tokens:
 1. **Verify reviewer coverage:** For each builder that sent `worker_done`, confirm you spawned a reviewer AND received a reviewer PASS. If any builder lacks a reviewer, spawn one now before proceeding.
 2. Verify all subtask beads issues are closed AND each builder's `merge_ready` has been sent (check via `bd show <id>` for each).
-3. Run integration tests if applicable: `npm test`.
+3. Run integration tests if applicable (use the project's test command from your overlay).
 4. **Record mulch learnings** -- review your orchestration work for insights (decomposition strategies, worker coordination patterns, failures encountered, decisions made) and record them:
    ```bash
    mulch record <domain> --type <convention|pattern|failure|decision> --description "..."

package/agents/merger.md CHANGED Viewed

@@ -16,9 +16,7 @@ You are a branch integration specialist. When workers complete their tasks on se
   - `git merge`, `git merge --abort`, `git merge --no-edit`
   - `git log`, `git diff`, `git show`, `git status`, `git blame`
   - `git checkout`, `git branch`
-  - `npm test` (verify merged code passes tests)
-  - `npm run lint` (verify merged code passes lint)
-  - `npm run typecheck` (verify no TypeScript errors)
+  - Project test, lint, and typecheck commands (see Quality Gates in your overlay)
   - `bd show`, `bd close` (beads task management)
   - `mulch prime`, `mulch query` (load expertise for conflict understanding)
   - `legio merge` (use legio merge infrastructure)
@@ -70,12 +68,7 @@ If AI-resolve fails or produces broken code:
 - Reimplement the changes from scratch against the current target state.
 - This is a last resort -- report that reimagine was needed.
-5. **Verify the merge:**
-   ```bash
-   npm test              # All tests must pass after merge
-   npm run lint          # Lint must be clean after merge
-   npm run typecheck     # No TypeScript errors after merge
-   ```
+5. **Verify the merge** by running the project's quality gate commands (tests, lint, and any other configured gates) as specified in your overlay.
 6. **Report the result:**
    ```bash
    bd close <task-id> --reason "Merged <branch>: <tier used>, tests passing"
@@ -93,7 +86,7 @@ If AI-resolve fails or produces broken code:
 - **Only merge branches assigned to you.** Your overlay specifies which branches to merge. Do not merge anything else.
 - **Preserve commit history.** Use merge commits, not rebases, unless explicitly instructed otherwise. The commit history from worker branches should remain intact.
 - **Never force-push.** No `git push --force`, `git reset --hard` on shared branches, or other destructive history rewrites.
-- **Always verify after merge.** Run `npm test`, `npm run lint`, and `npm run typecheck` after every merge. A merge that breaks tests is not complete.
+- **Always verify after merge.** Run the project's quality gates (tests, lint, and any other configured gates) after every merge. A merge that breaks tests is not complete.
 - **Escalate tier by tier.** Always start with Tier 1 (clean merge). Only escalate when the current tier fails. Do not skip tiers.
 - **Report which tier was used.** The orchestrator needs to know the resolution complexity for metrics and planning.
 - **Never modify code beyond conflict resolution.** Your job is to merge, not to refactor or improve. If you see issues in the code being merged, report them -- do not fix them.
@@ -127,7 +120,7 @@ Read your assignment. Execute immediately. Do not ask for confirmation, do not p
 These are named failures. If you catch yourself doing any of these, stop and correct immediately.
 - **TIER_SKIP** -- Jumping to a higher resolution tier without first attempting the lower tiers. Always start at Tier 1 and escalate only on failure.
-- **UNVERIFIED_MERGE** -- Completing a merge without running `npm test`, `npm run lint`, and `npm run typecheck` to verify the result. A merge that breaks tests is not complete.
+- **UNVERIFIED_MERGE** -- Completing a merge without running the project's quality gates (tests, lint, and any other configured gates) to verify the result. A merge that breaks tests is not complete.
 - **SCOPE_CREEP** -- Modifying code beyond what is needed for conflict resolution. Your job is to merge, not refactor or improve.
 - **SILENT_FAILURE** -- A merge fails at all tiers and you do not report it via mail. Every unresolvable conflict must be escalated to your parent with `--type error --priority urgent`.
 - **INCOMPLETE_CLOSE** -- Running `bd close` without first verifying tests pass and sending a merge report mail to your parent.
@@ -139,9 +132,7 @@ Every mail message and every tool call costs tokens. Be concise in merge reports
 ## Completion Protocol
-1. Run `npm test` -- all tests must pass after merge.
-2. Run `npm run lint` -- lint must be clean after merge.
-3. Run `npm run typecheck` -- no TypeScript errors after merge.
+1. Run the project's quality gate commands (tests, lint, and any other configured gates) as specified in your overlay -- all must pass after merge.
 4. **Record mulch learnings** -- capture merge resolution insights (conflict patterns, resolution strategies, branch integration issues):
    ```bash
    mulch record <domain> --type <convention|pattern|failure> --description "..."

package/agents/reviewer.md CHANGED Viewed

@@ -13,10 +13,7 @@ You are a validation specialist. Given code to review, you check it for correctn
 - **Glob** -- find files by name pattern
 - **Grep** -- search file contents with regex
 - **Bash** (observation and test commands only):
-  - `npm test` (run test suite)
-  - `npx vitest run <specific-file>` (run targeted tests)
-  - `npm run lint` (lint and format check)
-  - `npm run typecheck` (type checking)
+  - Project test, lint, and typecheck commands (see Quality Gates in your overlay)
   - `git log`, `git diff`, `git show`, `git blame`
   - `git diff <base-branch>...<feature-branch>` (review changes)
   - `bd show`, `bd ready` (read beads state)
@@ -50,12 +47,7 @@ You receive mail automatically. Do not call `legio mail check` in loops or on a
    - Check for: correctness, edge cases, error handling, naming conventions, code style.
    - Check for: security issues, hardcoded secrets, missing input validation.
    - Check for: adequate test coverage, meaningful test assertions.
-5. **Run quality gates:**
-   ```bash
-   npm test              # Do all tests pass?
-   npm run lint          # Does lint and formatting pass?
-   npm run typecheck     # Are there any TypeScript errors?
-   ```
+5. **Run quality gates** — run the project's test suite, linter, and any other configured checks to get objective results. Exact commands are in the project's CLAUDE.md or package scripts.
 6. **Report results** via `bd close` with a clear pass/fail summary:
    ```bash
    bd close <task-id> --reason "PASS: <summary>"
@@ -76,7 +68,7 @@ When reviewing code, systematically check:
 - **Correctness:** Does the code do what the spec says? Are edge cases handled?
 - **Tests:** Are there tests? Do they cover the important paths? Do they actually assert meaningful things?
-- **Types:** Is the TypeScript strict? Any `any` types, unchecked index access, or type assertions that could hide bugs?
+- **Type safety:** If the project uses a type system, is it used correctly? Any loose types, unchecked access, or assertions that could hide bugs?
 - **Error handling:** Are errors caught and handled appropriately? Are error messages useful?
 - **Style:** Does it follow existing project conventions? Is naming consistent?
 - **Security:** Any hardcoded secrets, SQL injection vectors, path traversal, or unsafe user input handling?
@@ -94,7 +86,7 @@ When reviewing code, systematically check:
   - No `rm`, `mv`, `cp`, `mkdir`, `touch`
   - No file writes of any kind
 - **NEVER** fix the code yourself. Report what is wrong and let the builder fix it.
-- Running `npm test`, `npm run lint`, and `npm run typecheck` is allowed because they are observation commands (they read and report, they do not modify).
+- Running the project's test suite, linter, and other quality gate checks is allowed because they are observation commands (they read and report, they do not modify).
 ## Communication Protocol
@@ -126,10 +118,10 @@ Every mail message and every tool call costs tokens. Be concise in review feedba
 ## Completion Protocol
-1. Run `npm test`, `npm run lint`, and `npm run typecheck` to get objective quality gate results.
+1. Run the project's quality gate commands (tests, lint, and any other configured gates) to get objective results.
 2. **Surface insights for your parent** -- you cannot run `mulch record` (read-only). Instead, prefix reusable findings with `INSIGHT:` in your result mail body. Format: `INSIGHT: <domain> <type> — <description>`. Your parent will record them via `mulch record`. Example:
    ```
-   INSIGHT: typescript convention — All SQLite stores must enable WAL mode and busy_timeout
+   INSIGHT: database convention — All SQLite stores must enable WAL mode and busy_timeout
    INSIGHT: cli failure — Missing --agent flag causes silent message drops in mail send
    ```
    This is required. Reviewers discover code quality patterns and convention violations that benefit future agents.

package/agents/scout.md CHANGED Viewed

@@ -10,12 +10,12 @@ You perform reconnaissance. Given a research question, exploration target, or an
 ### Tools Available
 - **Read** -- read any file in the codebase
-- **Glob** -- find files by name pattern (e.g., `**/*.ts`, `src/**/types.*`)
+- **Glob** -- find files by name pattern (e.g., `**/*.py`, `src/**/*.java`, `lib/**/*.elm`)
 - **Grep** -- search file contents with regex patterns
 - **Bash** (read-only commands only, with one narrow write exception):
   - `git log`, `git show`, `git diff`, `git blame`
   - `find`, `ls`, `wc`, `file`, `stat`
-  - `npx vitest list` (list tests without running)
+  - List available tests (use the project's test runner with a list/dry-run flag)
   - `bd show`, `bd ready`, `bd list` (read beads state)
   - `mulch prime`, `mulch query`, `mulch search`, `mulch status` (read expertise)
   - `legio mail check` (check inbox)
@@ -118,7 +118,7 @@ Every mail message and every tool call costs tokens. Be concise in mail bodies -
 2. If you produced a spec or detailed report, write it to file: `legio spec write <bead-id> --body "..." --agent <your-name>`.
 3. **Surface insights for your parent** -- you cannot run `mulch record` (read-only). Instead, prefix reusable findings with `INSIGHT:` in your result mail body. Format: `INSIGHT: <domain> <type> — <description>`. Your parent will record them via `mulch record`. Example:
    ```
-   INSIGHT: typescript convention — noUncheckedIndexedAccess requires guard clauses on all array/map lookups
+   INSIGHT: language convention — strict index access requires guard clauses on all array/map lookups
    INSIGHT: cli pattern — trace command follows local arg-parsing helper pattern (getFlag/hasFlag)
    ```
    This is required. Scouts are the primary source of codebase knowledge. Your findings are valuable beyond this single task.

package/bin/legio.mjs CHANGED Viewed

@@ -1,11 +1,15 @@
 #!/usr/bin/env node
 import { spawnSync } from "node:child_process";
+import { createRequire } from "node:module";
 import { fileURLToPath } from "node:url";
 // Bootstrap shim: re-exec Node with --import tsx so TypeScript files load
 // natively. tsx >= 4.21 dropped support for module.register() on Node >= 23,
 // requiring --import instead.
 //
+// Resolve tsx from legio's own node_modules (not the user's cwd) so that
+// `npm install -g` works regardless of what project the user is in.
+//
 // Guard logic (two-layer):
 // 1. __LEGIO_TSX_LOADED env var: standard guard for the non-node_modules case.
 //    Prevents infinite re-exec when the script is invoked directly from PATH.
@@ -18,17 +22,24 @@ import { fileURLToPath } from "node:url";
 const scriptPath = fileURLToPath(import.meta.url);
 const inNodeModules = scriptPath.includes("/node_modules/");
+// Resolve tsx to its absolute path within legio's own dependency tree.
+// This ensures --import finds tsx even when cwd is a different project.
+const require = createRequire(import.meta.url);
+const tsxPath = require.resolve("tsx");
 // True when this process was started with `node --import tsx ...`
 const tsxImportActive =
 	process.execArgv.some((arg, i, arr) => arg === "--import" && arr[i + 1] === "tsx") ||
-	process.execArgv.some((arg) => arg === "--import=tsx");
+	process.execArgv.some((arg) => arg === "--import=tsx") ||
+	process.execArgv.some((arg, i, arr) => arg === "--import" && arr[i + 1] === tsxPath) ||
+	process.execArgv.some((arg) => arg === `--import=${tsxPath}`);
 if (process.env.__LEGIO_TSX_LOADED && (!inNodeModules || tsxImportActive)) {
 	await import("../src/index.ts");
 } else {
 	const result = spawnSync(
 		process.execPath,
-		["--import", "tsx", scriptPath, ...process.argv.slice(2)],
+		["--import", tsxPath, scriptPath, ...process.argv.slice(2)],
 		{
 			stdio: "inherit",
 			env: { ...process.env, __LEGIO_TSX_LOADED: "1" },

package/package.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
 	"name": "@katyella/legio",
-	"version": "0.1.0",
+	"version": "0.1.3",
 	"description": "Multi-agent orchestration for Claude Code — spawn worker agents in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution",
-	"author": "Jaymin West",
+	"author": "Matthew Wojtowicz",
 	"license": "MIT",
 	"type": "module",
 	"repository": {

package/src/agents/overlay.test.ts CHANGED Viewed

@@ -211,7 +211,7 @@ describe("generateOverlay", () => {
 		const output = await generateOverlay(config);
 		expect(output).toContain("Quality Gates");
-		expect(output).toContain("npm run test:unit");
+		expect(output).toContain("npm test");
 		expect(output).toContain("npm run lint");
 		expect(output).toContain("Commit");
 	});
@@ -221,7 +221,7 @@ describe("generateOverlay", () => {
 		const output = await generateOverlay(config);
 		expect(output).toContain("Quality Gates");
-		expect(output).toContain("npm run test:unit");
+		expect(output).toContain("npm test");
 		expect(output).toContain("npm run lint");
 	});
@@ -230,15 +230,16 @@ describe("generateOverlay", () => {
 		const output = await generateOverlay(config);
 		expect(output).toContain("Quality Gates");
-		expect(output).toContain("npm run test:unit");
+		expect(output).toContain("npm test");
 	});
-	test("uses default npm commands when qualityGates not in config", async () => {
+	test("uses fallback npm commands when qualityGates not in config", async () => {
 		const config = makeConfig({ capability: "builder" });
 		const output = await generateOverlay(config);
-		expect(output).toContain("npm run test:unit");
+		expect(output).toContain("npm test");
 		expect(output).toContain("npm run lint");
-		expect(output).toContain("npm run typecheck");
+		// No typecheck in fallback — it's language-specific
+		expect(output).not.toContain("Typecheck");
 	});
 	test("uses custom qualityGates commands when provided in config", async () => {
@@ -263,8 +264,7 @@ describe("generateOverlay", () => {
 		expect(output).toContain("read-only agent");
 		expect(output).toContain("Do NOT commit");
 		expect(output).not.toContain("Quality Gates");
-		expect(output).not.toContain("npm run test:unit");
-		expect(output).not.toContain("npm run lint");
+		expect(output).not.toContain("npm test");
 	});
 	test("reviewer capability gets read-only completion section instead of quality gates", async () => {
@@ -275,8 +275,7 @@ describe("generateOverlay", () => {
 		expect(output).toContain("read-only agent");
 		expect(output).toContain("Do NOT commit");
 		expect(output).not.toContain("Quality Gates");
-		expect(output).not.toContain("npm run test:unit");
-		expect(output).not.toContain("npm run lint");
+		expect(output).not.toContain("npm test");
 	});
 	test("scout completion section includes bd close and mail send", async () => {

package/src/agents/overlay.ts CHANGED Viewed

@@ -56,17 +56,13 @@ function formatMulchExpertise(expertise: string | undefined): string {
 /** Capabilities that are read-only and should not get quality gates for commits/tests/lint. */
 const READ_ONLY_CAPABILITIES = new Set(["scout", "reviewer"]);
-/** Shape of per-project quality gate commands. Added to OverlayConfig by legio-787k (parallel). */
-type QualityGates = { test: string; lint: string; typecheck: string };
+/** Shape of per-project quality gate commands. */
+type QualityGates = { test: string; lint: string; typecheck?: string };
-/** OverlayConfig extended with the optional qualityGates field (landing via legio-787k). */
-type OverlayConfigWithGates = OverlayConfig & { qualityGates?: QualityGates };
-/** Default quality gate commands used when config.qualityGates is not provided. */
-const DEFAULT_QUALITY_GATES: QualityGates = {
-	test: "npm run test:unit",
+/** Minimal fallback when config.qualityGates is not provided (e.g. old configs). */
+const FALLBACK_QUALITY_GATES: QualityGates = {
+	test: "npm test",
 	lint: "npm run lint",
-	typecheck: "npm run typecheck",
 };
 /**
@@ -91,20 +87,42 @@ function formatQualityGates(config: OverlayConfig): string {
 		].join("\n");
 	}
-	const gates = (config as OverlayConfigWithGates).qualityGates ?? DEFAULT_QUALITY_GATES;
+	const gates = config.qualityGates ?? FALLBACK_QUALITY_GATES;
+	const parent = config.parentAgent ?? "orchestrator";
+	const steps: string[] = [
+		`1. **Tests:** \`${gates.test}\` — all tests must pass`,
+		`2. **Lint:** \`${gates.lint}\` — zero errors`,
+	];
+	let nextStep = 3;
+	if (gates.typecheck) {
+		steps.push(`${nextStep}. **Typecheck:** \`${gates.typecheck}\` — no type errors`);
+		nextStep++;
+	}
+	steps.push(
+		`${nextStep}. **Commit:** all changes committed to your branch (${config.branchName})`,
+	);
+	nextStep++;
+	steps.push(
+		`${nextStep}. **Record mulch learnings:** \`mulch record <domain> --type <convention|pattern|failure|decision> --description "..."\` — capture insights from your work`,
+	);
+	nextStep++;
+	steps.push(
+		`${nextStep}. **Signal completion:** send \`worker_done\` mail to ${parent}: \`legio mail send --to ${parent} --subject "Worker done: ${config.beadId}" --body "Quality gates passed." --type worker_done --agent ${config.agentName}\``,
+	);
+	nextStep++;
+	steps.push(
+		`${nextStep}. **Close issue:** \`bd close ${config.beadId} --reason "summary of changes"\``,
+	);
 	return [
 		"## Quality Gates",
 		"",
 		"Before reporting completion, you MUST pass all quality gates:",
 		"",
-		`1. **Tests:** \`${gates.test}\` — all tests must pass`,
-		`2. **Lint:** \`${gates.lint}\` — zero errors`,
-		`3. **Typecheck:** \`${gates.typecheck}\` — no TypeScript errors`,
-		`4. **Commit:** all changes committed to your branch (${config.branchName})`,
-		`5. **Record mulch learnings:** \`mulch record <domain> --type <convention|pattern|failure|decision> --description "..."\` — capture insights from your work`,
-		`6. **Signal completion:** send \`worker_done\` mail to ${config.parentAgent ?? "orchestrator"}: \`legio mail send --to ${config.parentAgent ?? "orchestrator"} --subject "Worker done: ${config.beadId}" --body "Quality gates passed." --type worker_done --agent ${config.agentName}\``,
-		`7. **Close issue:** \`bd close ${config.beadId} --reason "summary of changes"\``,
+		...steps,
 		"",
 		"Do NOT push to the canonical branch. Your work will be merged by the",
 		"orchestrator via `legio merge`.",

package/src/commands/gateway.test.ts CHANGED Viewed

@@ -449,6 +449,42 @@ describe("startGateway", () => {
 		}
 	});
+	test("sends FIRST_RUN beacon on first run (no existing identity)", async () => {
+		const { deps, calls } = makeDeps();
+		await captureStdout(() => gatewayCommand(["start", "--no-attach"], deps));
+		// First sendKeys call should be the beacon
+		const beaconCall = calls.sendKeys.find((c) => c.keys.includes("[LEGIO]"));
+		expect(beaconCall).toBeDefined();
+		expect(beaconCall?.keys).toContain("FIRST_RUN: true");
+	});
+	test("does not send FIRST_RUN beacon on subsequent runs (identity exists)", async () => {
+		// Create identity first so it exists before starting
+		const identityDir = join(legioDir, "agents", "gateway");
+		await mkdir(identityDir, { recursive: true });
+		await writeFile(
+			join(identityDir, "identity.yaml"),
+			[
+				"name: gateway",
+				"capability: gateway",
+				`created: ${new Date().toISOString()}`,
+				"sessionsCompleted: 1",
+				"expertiseDomains: []",
+				"recentTasks: []",
+			].join("\n"),
+		);
+		const { deps, calls } = makeDeps();
+		await captureStdout(() => gatewayCommand(["start", "--no-attach"], deps));
+		const beaconCall = calls.sendKeys.find((c) => c.keys.includes("[LEGIO]"));
+		expect(beaconCall).toBeDefined();
+		expect(beaconCall?.keys).not.toContain("FIRST_RUN");
+	});
 	test("cleans up dead session and starts new one", async () => {
 		// Write an existing session that claims to be working
 		const deadSession = makeGatewaySession({
@@ -667,6 +703,30 @@ describe("buildGatewayBeacon", () => {
 		const dashes = beacon.split(" — ");
 		expect(dashes).toHaveLength(5);
 	});
+	test("default (no args) does not include FIRST_RUN", () => {
+		const beacon = buildGatewayBeacon();
+		expect(beacon).not.toContain("FIRST_RUN");
+	});
+	test("isFirstRun=false does not include FIRST_RUN", () => {
+		const beacon = buildGatewayBeacon(false);
+		expect(beacon).not.toContain("FIRST_RUN");
+	});
+	test("isFirstRun=true includes FIRST_RUN flag", () => {
+		const beacon = buildGatewayBeacon(true);
+		expect(beacon).toContain("FIRST_RUN: true");
+		expect(beacon).toContain("Follow the First Run workflow");
+	});
+	test("isFirstRun=true beacon is longer than default", () => {
+		const normal = buildGatewayBeacon(false);
+		const firstRun = buildGatewayBeacon(true);
+		expect(firstRun.length).toBeGreaterThan(normal.length);
+		// The FIRST_RUN part is appended as an additional em-dash separated segment
+		expect(firstRun).toContain("FIRST_RUN: true");
+	});
 });
 describe("resolveAttach", () => {

package/src/commands/gateway.ts CHANGED Viewed

@@ -67,7 +67,7 @@ export interface GatewayDeps {
  * Build the gateway startup beacon — the first message sent to the gateway
  * via tmux send-keys after Claude Code initializes.
  */
-export function buildGatewayBeacon(): string {
+export function buildGatewayBeacon(isFirstRun = false): string {
 	const timestamp = new Date().toISOString();
 	const parts = [
 		`[LEGIO] ${GATEWAY_NAME} (gateway) ${timestamp}`,
@@ -76,6 +76,9 @@ export function buildGatewayBeacon(): string {
 		"ISSUES: Use bd create",
 		`Startup: run mulch prime, check mail (legio mail check --agent ${GATEWAY_NAME}), respond to user`,
 	];
+	if (isFirstRun) {
+		parts.push("FIRST_RUN: true — Follow the First Run workflow in your agent definition");
+	}
 	return parts.join(" — ");
 }
@@ -224,8 +227,9 @@ async function startGateway(args: string[], deps: GatewayDeps = {}): Promise<voi
 		}
 		// Send beacon after TUI initialization delay
+		const isFirstRun = !existingIdentity;
 		await sleep(3_000);
-		const beacon = buildGatewayBeacon();
+		const beacon = buildGatewayBeacon(isFirstRun);
 		await tmux.sendKeys(tmuxSession, beacon);
 		// Follow-up Enter to ensure submission (same pattern as sling.ts)