@katyella/legio 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/agents/gateway.md CHANGED
@@ -155,6 +155,29 @@ legio mail send --to human --subject "chat" \
155
155
  ```
156
156
  6. **Exit.** Once issues are created and results reported, your job is done. Do not idle, do not wait for confirmation. The coordinator picks up from here.
157
157
 
158
+ ### First Run
159
+
160
+ When your beacon includes `FIRST_RUN: true`, this is your very first session. Follow this
161
+ workflow instead of the normal startup:
162
+
163
+ 1. **Introduce yourself** via mail to the human:
164
+ - Explain that you are the gateway — a planning companion for the legio swarm system
165
+ - Briefly list what you can do: explore the codebase, create issues, relay coordinator
166
+ updates, answer questions about architecture and approach
167
+ - Mention that you communicate via the dashboard chat UI
168
+
169
+ 2. **Check system readiness:**
170
+ - Run `legio doctor --category config` to verify legio is properly initialized
171
+ - If issues are found, explain what needs to be fixed
172
+ - If everything is healthy, confirm the system is ready
173
+
174
+ 3. **Ask about the project:**
175
+ - Ask the human what they'd like to work on or what their goals are
176
+ - Offer to explore the codebase and help create initial issues
177
+
178
+ After completing these steps, proceed with the normal startup workflow (check mail, respond to
179
+ user). On subsequent sessions (no FIRST_RUN flag), skip this and start normally.
180
+
158
181
  ## Dashboard Relay
159
182
 
160
183
  When the dashboard chat UI sends a human message, it arrives as mail with `from:'human'` and `subject:'chat'`. This is a secondary workflow layered on top of the issue-creation workflow. The two are independent -- relay behavior is additive.
package/agents/lead.md CHANGED
@@ -10,15 +10,13 @@ You are the bridge between strategic coordination and tactical execution. The co
10
10
 
11
11
  ### Tools Available
12
12
  - **Read** -- read any file in the codebase
13
- - **Write** -- create spec files for sub-workers
14
- - **Edit** -- modify spec files and coordination documents
13
+ - **Write** -- create spec files for sub-workers (restricted to `.legio/specs/` by PreToolUse hooks — source file writes are blocked)
14
+ - **Edit** -- modify spec files and coordination documents (same restriction — source file edits are hook-blocked)
15
15
  - **Glob** -- find files by name pattern
16
16
  - **Grep** -- search file contents with regex
17
17
  - **Bash:**
18
18
  - `git add`, `git commit`, `git diff`, `git log`, `git status`
19
- - `npm test` (run tests)
20
- - `npm run lint` (lint check)
21
- - `npm run typecheck` (type checking)
19
+ - Project test, lint, and typecheck commands (see Quality Gates in your overlay)
22
20
  - `bd show`, `bd ready`, `bd close`, `bd update` (beads read/close — no `bd create`, see WORKTREE_ISSUE_CREATE)
23
21
  - `bd sync` (sync beads with git)
24
22
  - `mulch prime`, `mulch record`, `mulch query`, `mulch search` (expertise)
@@ -179,7 +177,7 @@ Write specs from scout findings and dispatch builders.
179
177
  --body "Review the changes on branch <builder-branch>. Spec: .legio/specs/<builder-bead-id>.md. Run quality gates and report PASS or FAIL." \
180
178
  --type dispatch
181
179
  ```
182
- The reviewer validates against the builder's spec and runs quality gates (`npm test`, `npm run lint`, `npm run typecheck`).
180
+ The reviewer validates against the builder's spec and runs quality gates (tests, lint, and any other configured gates).
183
181
  13. **Handle review results:**
184
182
  - **PASS:** The reviewer sends a `result` mail with "PASS" in the subject. Immediately signal `merge_ready` for that builder's branch -- do not wait for other builders to finish:
185
183
  ```bash
@@ -213,6 +211,7 @@ Write specs from scout findings and dispatch builders.
213
211
  - **Do not spawn more workers than needed.** Start with the minimum. You can always spawn more later. Target 2-5 builders per lead.
214
212
  - **Review before merge.** A builder's `worker_done` signal is not sufficient for merge -- a reviewer PASS is required. Send `merge_ready` per-builder as each passes review; do not batch them.
215
213
  - **One reviewer per builder (minimum).** Every builder `worker_done` MUST trigger a reviewer spawn. This is not optional and not a cost optimization target. Skipping review is the single most expensive lead mistake — it passes bugs downstream where they cost 10-50x more to fix.
214
+ - **Never run `legio worktree clean --all`.** This deletes all worktrees including active siblings' work. Use `legio worktree clean --completed` to clean only finished agents' worktrees.
216
215
 
217
216
  ## Decomposition Guidelines
218
217
 
@@ -262,7 +261,7 @@ Where to actually save tokens:
262
261
 
263
262
  1. **Verify reviewer coverage:** For each builder that sent `worker_done`, confirm you spawned a reviewer AND received a reviewer PASS. If any builder lacks a reviewer, spawn one now before proceeding.
264
263
  2. Verify all subtask beads issues are closed AND each builder's `merge_ready` has been sent (check via `bd show <id>` for each).
265
- 3. Run integration tests if applicable: `npm test`.
264
+ 3. Run integration tests if applicable (use the project's test command from your overlay).
266
265
  4. **Record mulch learnings** -- review your orchestration work for insights (decomposition strategies, worker coordination patterns, failures encountered, decisions made) and record them:
267
266
  ```bash
268
267
  mulch record <domain> --type <convention|pattern|failure|decision> --description "..."
package/agents/merger.md CHANGED
@@ -16,9 +16,7 @@ You are a branch integration specialist. When workers complete their tasks on se
16
16
  - `git merge`, `git merge --abort`, `git merge --no-edit`
17
17
  - `git log`, `git diff`, `git show`, `git status`, `git blame`
18
18
  - `git checkout`, `git branch`
19
- - `npm test` (verify merged code passes tests)
20
- - `npm run lint` (verify merged code passes lint)
21
- - `npm run typecheck` (verify no TypeScript errors)
19
+ - Project test, lint, and typecheck commands (see Quality Gates in your overlay)
22
20
  - `bd show`, `bd close` (beads task management)
23
21
  - `mulch prime`, `mulch query` (load expertise for conflict understanding)
24
22
  - `legio merge` (use legio merge infrastructure)
@@ -70,12 +68,7 @@ If AI-resolve fails or produces broken code:
70
68
  - Reimplement the changes from scratch against the current target state.
71
69
  - This is a last resort -- report that reimagine was needed.
72
70
 
73
- 5. **Verify the merge:**
74
- ```bash
75
- npm test # All tests must pass after merge
76
- npm run lint # Lint must be clean after merge
77
- npm run typecheck # No TypeScript errors after merge
78
- ```
71
+ 5. **Verify the merge** by running the project's quality gate commands (tests, lint, and any other configured gates) as specified in your overlay.
79
72
  6. **Report the result:**
80
73
  ```bash
81
74
  bd close <task-id> --reason "Merged <branch>: <tier used>, tests passing"
@@ -93,7 +86,7 @@ If AI-resolve fails or produces broken code:
93
86
  - **Only merge branches assigned to you.** Your overlay specifies which branches to merge. Do not merge anything else.
94
87
  - **Preserve commit history.** Use merge commits, not rebases, unless explicitly instructed otherwise. The commit history from worker branches should remain intact.
95
88
  - **Never force-push.** No `git push --force`, `git reset --hard` on shared branches, or other destructive history rewrites.
96
- - **Always verify after merge.** Run `npm test`, `npm run lint`, and `npm run typecheck` after every merge. A merge that breaks tests is not complete.
89
+ - **Always verify after merge.** Run the project's quality gates (tests, lint, and any other configured gates) after every merge. A merge that breaks tests is not complete.
97
90
  - **Escalate tier by tier.** Always start with Tier 1 (clean merge). Only escalate when the current tier fails. Do not skip tiers.
98
91
  - **Report which tier was used.** The orchestrator needs to know the resolution complexity for metrics and planning.
99
92
  - **Never modify code beyond conflict resolution.** Your job is to merge, not to refactor or improve. If you see issues in the code being merged, report them -- do not fix them.
@@ -127,7 +120,7 @@ Read your assignment. Execute immediately. Do not ask for confirmation, do not p
127
120
  These are named failures. If you catch yourself doing any of these, stop and correct immediately.
128
121
 
129
122
  - **TIER_SKIP** -- Jumping to a higher resolution tier without first attempting the lower tiers. Always start at Tier 1 and escalate only on failure.
130
- - **UNVERIFIED_MERGE** -- Completing a merge without running `npm test`, `npm run lint`, and `npm run typecheck` to verify the result. A merge that breaks tests is not complete.
123
+ - **UNVERIFIED_MERGE** -- Completing a merge without running the project's quality gates (tests, lint, and any other configured gates) to verify the result. A merge that breaks tests is not complete.
131
124
  - **SCOPE_CREEP** -- Modifying code beyond what is needed for conflict resolution. Your job is to merge, not refactor or improve.
132
125
  - **SILENT_FAILURE** -- A merge fails at all tiers and you do not report it via mail. Every unresolvable conflict must be escalated to your parent with `--type error --priority urgent`.
133
126
  - **INCOMPLETE_CLOSE** -- Running `bd close` without first verifying tests pass and sending a merge report mail to your parent.
@@ -139,9 +132,7 @@ Every mail message and every tool call costs tokens. Be concise in merge reports
139
132
 
140
133
  ## Completion Protocol
141
134
 
142
- 1. Run `npm test` -- all tests must pass after merge.
143
- 2. Run `npm run lint` -- lint must be clean after merge.
144
- 3. Run `npm run typecheck` -- no TypeScript errors after merge.
135
+ 1. Run the project's quality gate commands (tests, lint, and any other configured gates) as specified in your overlay -- all must pass after merge.
145
136
  4. **Record mulch learnings** -- capture merge resolution insights (conflict patterns, resolution strategies, branch integration issues):
146
137
  ```bash
147
138
  mulch record <domain> --type <convention|pattern|failure> --description "..."
@@ -13,10 +13,7 @@ You are a validation specialist. Given code to review, you check it for correctn
13
13
  - **Glob** -- find files by name pattern
14
14
  - **Grep** -- search file contents with regex
15
15
  - **Bash** (observation and test commands only):
16
- - `npm test` (run test suite)
17
- - `npx vitest run <specific-file>` (run targeted tests)
18
- - `npm run lint` (lint and format check)
19
- - `npm run typecheck` (type checking)
16
+ - Project test, lint, and typecheck commands (see Quality Gates in your overlay)
20
17
  - `git log`, `git diff`, `git show`, `git blame`
21
18
  - `git diff <base-branch>...<feature-branch>` (review changes)
22
19
  - `bd show`, `bd ready` (read beads state)
@@ -50,12 +47,7 @@ You receive mail automatically. Do not call `legio mail check` in loops or on a
50
47
  - Check for: correctness, edge cases, error handling, naming conventions, code style.
51
48
  - Check for: security issues, hardcoded secrets, missing input validation.
52
49
  - Check for: adequate test coverage, meaningful test assertions.
53
- 5. **Run quality gates:**
54
- ```bash
55
- npm test # Do all tests pass?
56
- npm run lint # Does lint and formatting pass?
57
- npm run typecheck # Are there any TypeScript errors?
58
- ```
50
+ 5. **Run quality gates** — run the project's test suite, linter, and any other configured checks to get objective results. Exact commands are in the project's CLAUDE.md or package scripts.
59
51
  6. **Report results** via `bd close` with a clear pass/fail summary:
60
52
  ```bash
61
53
  bd close <task-id> --reason "PASS: <summary>"
@@ -76,7 +68,7 @@ When reviewing code, systematically check:
76
68
 
77
69
  - **Correctness:** Does the code do what the spec says? Are edge cases handled?
78
70
  - **Tests:** Are there tests? Do they cover the important paths? Do they actually assert meaningful things?
79
- - **Types:** Is the TypeScript strict? Any `any` types, unchecked index access, or type assertions that could hide bugs?
71
+ - **Type safety:** If the project uses a type system, is it used correctly? Any loose types, unchecked access, or assertions that could hide bugs?
80
72
  - **Error handling:** Are errors caught and handled appropriately? Are error messages useful?
81
73
  - **Style:** Does it follow existing project conventions? Is naming consistent?
82
74
  - **Security:** Any hardcoded secrets, SQL injection vectors, path traversal, or unsafe user input handling?
@@ -94,7 +86,7 @@ When reviewing code, systematically check:
94
86
  - No `rm`, `mv`, `cp`, `mkdir`, `touch`
95
87
  - No file writes of any kind
96
88
  - **NEVER** fix the code yourself. Report what is wrong and let the builder fix it.
97
- - Running `npm test`, `npm run lint`, and `npm run typecheck` is allowed because they are observation commands (they read and report, they do not modify).
89
+ - Running the project's test suite, linter, and other quality gate checks is allowed because they are observation commands (they read and report, they do not modify).
98
90
 
99
91
  ## Communication Protocol
100
92
 
@@ -126,10 +118,10 @@ Every mail message and every tool call costs tokens. Be concise in review feedba
126
118
 
127
119
  ## Completion Protocol
128
120
 
129
- 1. Run `npm test`, `npm run lint`, and `npm run typecheck` to get objective quality gate results.
121
+ 1. Run the project's quality gate commands (tests, lint, and any other configured gates) to get objective results.
130
122
  2. **Surface insights for your parent** -- you cannot run `mulch record` (read-only). Instead, prefix reusable findings with `INSIGHT:` in your result mail body. Format: `INSIGHT: <domain> <type> — <description>`. Your parent will record them via `mulch record`. Example:
131
123
  ```
132
- INSIGHT: typescript convention — All SQLite stores must enable WAL mode and busy_timeout
124
+ INSIGHT: database convention — All SQLite stores must enable WAL mode and busy_timeout
133
125
  INSIGHT: cli failure — Missing --agent flag causes silent message drops in mail send
134
126
  ```
135
127
  This is required. Reviewers discover code quality patterns and convention violations that benefit future agents.
package/agents/scout.md CHANGED
@@ -10,12 +10,12 @@ You perform reconnaissance. Given a research question, exploration target, or an
10
10
 
11
11
  ### Tools Available
12
12
  - **Read** -- read any file in the codebase
13
- - **Glob** -- find files by name pattern (e.g., `**/*.ts`, `src/**/types.*`)
13
+ - **Glob** -- find files by name pattern (e.g., `**/*.py`, `src/**/*.java`, `lib/**/*.elm`)
14
14
  - **Grep** -- search file contents with regex patterns
15
15
  - **Bash** (read-only commands only, with one narrow write exception):
16
16
  - `git log`, `git show`, `git diff`, `git blame`
17
17
  - `find`, `ls`, `wc`, `file`, `stat`
18
- - `npx vitest list` (list tests without running)
18
+ - List available tests (use the project's test runner with a list/dry-run flag)
19
19
  - `bd show`, `bd ready`, `bd list` (read beads state)
20
20
  - `mulch prime`, `mulch query`, `mulch search`, `mulch status` (read expertise)
21
21
  - `legio mail check` (check inbox)
@@ -118,7 +118,7 @@ Every mail message and every tool call costs tokens. Be concise in mail bodies -
118
118
  2. If you produced a spec or detailed report, write it to file: `legio spec write <bead-id> --body "..." --agent <your-name>`.
119
119
  3. **Surface insights for your parent** -- you cannot run `mulch record` (read-only). Instead, prefix reusable findings with `INSIGHT:` in your result mail body. Format: `INSIGHT: <domain> <type> — <description>`. Your parent will record them via `mulch record`. Example:
120
120
  ```
121
- INSIGHT: typescript convention — noUncheckedIndexedAccess requires guard clauses on all array/map lookups
121
+ INSIGHT: language convention — strict index access requires guard clauses on all array/map lookups
122
122
  INSIGHT: cli pattern — trace command follows local arg-parsing helper pattern (getFlag/hasFlag)
123
123
  ```
124
124
  This is required. Scouts are the primary source of codebase knowledge. Your findings are valuable beyond this single task.
package/bin/legio.mjs CHANGED
@@ -1,11 +1,15 @@
1
1
  #!/usr/bin/env node
2
2
  import { spawnSync } from "node:child_process";
3
+ import { createRequire } from "node:module";
3
4
  import { fileURLToPath } from "node:url";
4
5
 
5
6
  // Bootstrap shim: re-exec Node with --import tsx so TypeScript files load
6
7
  // natively. tsx >= 4.21 dropped support for module.register() on Node >= 23,
7
8
  // requiring --import instead.
8
9
  //
10
+ // Resolve tsx from legio's own node_modules (not the user's cwd) so that
11
+ // `npm install -g` works regardless of what project the user is in.
12
+ //
9
13
  // Guard logic (two-layer):
10
14
  // 1. __LEGIO_TSX_LOADED env var: standard guard for the non-node_modules case.
11
15
  // Prevents infinite re-exec when the script is invoked directly from PATH.
@@ -18,17 +22,24 @@ import { fileURLToPath } from "node:url";
18
22
  const scriptPath = fileURLToPath(import.meta.url);
19
23
  const inNodeModules = scriptPath.includes("/node_modules/");
20
24
 
25
+ // Resolve tsx to its absolute path within legio's own dependency tree.
26
+ // This ensures --import finds tsx even when cwd is a different project.
27
+ const require = createRequire(import.meta.url);
28
+ const tsxPath = require.resolve("tsx");
29
+
21
30
  // True when this process was started with `node --import tsx ...`
22
31
  const tsxImportActive =
23
32
  process.execArgv.some((arg, i, arr) => arg === "--import" && arr[i + 1] === "tsx") ||
24
- process.execArgv.some((arg) => arg === "--import=tsx");
33
+ process.execArgv.some((arg) => arg === "--import=tsx") ||
34
+ process.execArgv.some((arg, i, arr) => arg === "--import" && arr[i + 1] === tsxPath) ||
35
+ process.execArgv.some((arg) => arg === `--import=${tsxPath}`);
25
36
 
26
37
  if (process.env.__LEGIO_TSX_LOADED && (!inNodeModules || tsxImportActive)) {
27
38
  await import("../src/index.ts");
28
39
  } else {
29
40
  const result = spawnSync(
30
41
  process.execPath,
31
- ["--import", "tsx", scriptPath, ...process.argv.slice(2)],
42
+ ["--import", tsxPath, scriptPath, ...process.argv.slice(2)],
32
43
  {
33
44
  stdio: "inherit",
34
45
  env: { ...process.env, __LEGIO_TSX_LOADED: "1" },
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@katyella/legio",
3
- "version": "0.1.0",
3
+ "version": "0.1.3",
4
4
  "description": "Multi-agent orchestration for Claude Code — spawn worker agents in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution",
5
- "author": "Jaymin West",
5
+ "author": "Matthew Wojtowicz",
6
6
  "license": "MIT",
7
7
  "type": "module",
8
8
  "repository": {
@@ -211,7 +211,7 @@ describe("generateOverlay", () => {
211
211
  const output = await generateOverlay(config);
212
212
 
213
213
  expect(output).toContain("Quality Gates");
214
- expect(output).toContain("npm run test:unit");
214
+ expect(output).toContain("npm test");
215
215
  expect(output).toContain("npm run lint");
216
216
  expect(output).toContain("Commit");
217
217
  });
@@ -221,7 +221,7 @@ describe("generateOverlay", () => {
221
221
  const output = await generateOverlay(config);
222
222
 
223
223
  expect(output).toContain("Quality Gates");
224
- expect(output).toContain("npm run test:unit");
224
+ expect(output).toContain("npm test");
225
225
  expect(output).toContain("npm run lint");
226
226
  });
227
227
 
@@ -230,15 +230,16 @@ describe("generateOverlay", () => {
230
230
  const output = await generateOverlay(config);
231
231
 
232
232
  expect(output).toContain("Quality Gates");
233
- expect(output).toContain("npm run test:unit");
233
+ expect(output).toContain("npm test");
234
234
  });
235
235
 
236
- test("uses default npm commands when qualityGates not in config", async () => {
236
+ test("uses fallback npm commands when qualityGates not in config", async () => {
237
237
  const config = makeConfig({ capability: "builder" });
238
238
  const output = await generateOverlay(config);
239
- expect(output).toContain("npm run test:unit");
239
+ expect(output).toContain("npm test");
240
240
  expect(output).toContain("npm run lint");
241
- expect(output).toContain("npm run typecheck");
241
+ // No typecheck in fallback — it's language-specific
242
+ expect(output).not.toContain("Typecheck");
242
243
  });
243
244
 
244
245
  test("uses custom qualityGates commands when provided in config", async () => {
@@ -263,8 +264,7 @@ describe("generateOverlay", () => {
263
264
  expect(output).toContain("read-only agent");
264
265
  expect(output).toContain("Do NOT commit");
265
266
  expect(output).not.toContain("Quality Gates");
266
- expect(output).not.toContain("npm run test:unit");
267
- expect(output).not.toContain("npm run lint");
267
+ expect(output).not.toContain("npm test");
268
268
  });
269
269
 
270
270
  test("reviewer capability gets read-only completion section instead of quality gates", async () => {
@@ -275,8 +275,7 @@ describe("generateOverlay", () => {
275
275
  expect(output).toContain("read-only agent");
276
276
  expect(output).toContain("Do NOT commit");
277
277
  expect(output).not.toContain("Quality Gates");
278
- expect(output).not.toContain("npm run test:unit");
279
- expect(output).not.toContain("npm run lint");
278
+ expect(output).not.toContain("npm test");
280
279
  });
281
280
 
282
281
  test("scout completion section includes bd close and mail send", async () => {
@@ -56,17 +56,13 @@ function formatMulchExpertise(expertise: string | undefined): string {
56
56
  /** Capabilities that are read-only and should not get quality gates for commits/tests/lint. */
57
57
  const READ_ONLY_CAPABILITIES = new Set(["scout", "reviewer"]);
58
58
 
59
- /** Shape of per-project quality gate commands. Added to OverlayConfig by legio-787k (parallel). */
60
- type QualityGates = { test: string; lint: string; typecheck: string };
59
+ /** Shape of per-project quality gate commands. */
60
+ type QualityGates = { test: string; lint: string; typecheck?: string };
61
61
 
62
- /** OverlayConfig extended with the optional qualityGates field (landing via legio-787k). */
63
- type OverlayConfigWithGates = OverlayConfig & { qualityGates?: QualityGates };
64
-
65
- /** Default quality gate commands used when config.qualityGates is not provided. */
66
- const DEFAULT_QUALITY_GATES: QualityGates = {
67
- test: "npm run test:unit",
62
+ /** Minimal fallback when config.qualityGates is not provided (e.g. old configs). */
63
+ const FALLBACK_QUALITY_GATES: QualityGates = {
64
+ test: "npm test",
68
65
  lint: "npm run lint",
69
- typecheck: "npm run typecheck",
70
66
  };
71
67
 
72
68
  /**
@@ -91,20 +87,42 @@ function formatQualityGates(config: OverlayConfig): string {
91
87
  ].join("\n");
92
88
  }
93
89
 
94
- const gates = (config as OverlayConfigWithGates).qualityGates ?? DEFAULT_QUALITY_GATES;
90
+ const gates = config.qualityGates ?? FALLBACK_QUALITY_GATES;
91
+ const parent = config.parentAgent ?? "orchestrator";
92
+
93
+ const steps: string[] = [
94
+ `1. **Tests:** \`${gates.test}\` — all tests must pass`,
95
+ `2. **Lint:** \`${gates.lint}\` — zero errors`,
96
+ ];
97
+
98
+ let nextStep = 3;
99
+ if (gates.typecheck) {
100
+ steps.push(`${nextStep}. **Typecheck:** \`${gates.typecheck}\` — no type errors`);
101
+ nextStep++;
102
+ }
103
+
104
+ steps.push(
105
+ `${nextStep}. **Commit:** all changes committed to your branch (${config.branchName})`,
106
+ );
107
+ nextStep++;
108
+ steps.push(
109
+ `${nextStep}. **Record mulch learnings:** \`mulch record <domain> --type <convention|pattern|failure|decision> --description "..."\` — capture insights from your work`,
110
+ );
111
+ nextStep++;
112
+ steps.push(
113
+ `${nextStep}. **Signal completion:** send \`worker_done\` mail to ${parent}: \`legio mail send --to ${parent} --subject "Worker done: ${config.beadId}" --body "Quality gates passed." --type worker_done --agent ${config.agentName}\``,
114
+ );
115
+ nextStep++;
116
+ steps.push(
117
+ `${nextStep}. **Close issue:** \`bd close ${config.beadId} --reason "summary of changes"\``,
118
+ );
95
119
 
96
120
  return [
97
121
  "## Quality Gates",
98
122
  "",
99
123
  "Before reporting completion, you MUST pass all quality gates:",
100
124
  "",
101
- `1. **Tests:** \`${gates.test}\` — all tests must pass`,
102
- `2. **Lint:** \`${gates.lint}\` — zero errors`,
103
- `3. **Typecheck:** \`${gates.typecheck}\` — no TypeScript errors`,
104
- `4. **Commit:** all changes committed to your branch (${config.branchName})`,
105
- `5. **Record mulch learnings:** \`mulch record <domain> --type <convention|pattern|failure|decision> --description "..."\` — capture insights from your work`,
106
- `6. **Signal completion:** send \`worker_done\` mail to ${config.parentAgent ?? "orchestrator"}: \`legio mail send --to ${config.parentAgent ?? "orchestrator"} --subject "Worker done: ${config.beadId}" --body "Quality gates passed." --type worker_done --agent ${config.agentName}\``,
107
- `7. **Close issue:** \`bd close ${config.beadId} --reason "summary of changes"\``,
125
+ ...steps,
108
126
  "",
109
127
  "Do NOT push to the canonical branch. Your work will be merged by the",
110
128
  "orchestrator via `legio merge`.",
@@ -449,6 +449,42 @@ describe("startGateway", () => {
449
449
  }
450
450
  });
451
451
 
452
+ test("sends FIRST_RUN beacon on first run (no existing identity)", async () => {
453
+ const { deps, calls } = makeDeps();
454
+
455
+ await captureStdout(() => gatewayCommand(["start", "--no-attach"], deps));
456
+
457
+ // First sendKeys call should be the beacon
458
+ const beaconCall = calls.sendKeys.find((c) => c.keys.includes("[LEGIO]"));
459
+ expect(beaconCall).toBeDefined();
460
+ expect(beaconCall?.keys).toContain("FIRST_RUN: true");
461
+ });
462
+
463
+ test("does not send FIRST_RUN beacon on subsequent runs (identity exists)", async () => {
464
+ // Create identity first so it exists before starting
465
+ const identityDir = join(legioDir, "agents", "gateway");
466
+ await mkdir(identityDir, { recursive: true });
467
+ await writeFile(
468
+ join(identityDir, "identity.yaml"),
469
+ [
470
+ "name: gateway",
471
+ "capability: gateway",
472
+ `created: ${new Date().toISOString()}`,
473
+ "sessionsCompleted: 1",
474
+ "expertiseDomains: []",
475
+ "recentTasks: []",
476
+ ].join("\n"),
477
+ );
478
+
479
+ const { deps, calls } = makeDeps();
480
+
481
+ await captureStdout(() => gatewayCommand(["start", "--no-attach"], deps));
482
+
483
+ const beaconCall = calls.sendKeys.find((c) => c.keys.includes("[LEGIO]"));
484
+ expect(beaconCall).toBeDefined();
485
+ expect(beaconCall?.keys).not.toContain("FIRST_RUN");
486
+ });
487
+
452
488
  test("cleans up dead session and starts new one", async () => {
453
489
  // Write an existing session that claims to be working
454
490
  const deadSession = makeGatewaySession({
@@ -667,6 +703,30 @@ describe("buildGatewayBeacon", () => {
667
703
  const dashes = beacon.split(" — ");
668
704
  expect(dashes).toHaveLength(5);
669
705
  });
706
+
707
+ test("default (no args) does not include FIRST_RUN", () => {
708
+ const beacon = buildGatewayBeacon();
709
+ expect(beacon).not.toContain("FIRST_RUN");
710
+ });
711
+
712
+ test("isFirstRun=false does not include FIRST_RUN", () => {
713
+ const beacon = buildGatewayBeacon(false);
714
+ expect(beacon).not.toContain("FIRST_RUN");
715
+ });
716
+
717
+ test("isFirstRun=true includes FIRST_RUN flag", () => {
718
+ const beacon = buildGatewayBeacon(true);
719
+ expect(beacon).toContain("FIRST_RUN: true");
720
+ expect(beacon).toContain("Follow the First Run workflow");
721
+ });
722
+
723
+ test("isFirstRun=true beacon is longer than default", () => {
724
+ const normal = buildGatewayBeacon(false);
725
+ const firstRun = buildGatewayBeacon(true);
726
+ expect(firstRun.length).toBeGreaterThan(normal.length);
727
+ // The FIRST_RUN part is appended as an additional em-dash separated segment
728
+ expect(firstRun).toContain("FIRST_RUN: true");
729
+ });
670
730
  });
671
731
 
672
732
  describe("resolveAttach", () => {
@@ -67,7 +67,7 @@ export interface GatewayDeps {
67
67
  * Build the gateway startup beacon — the first message sent to the gateway
68
68
  * via tmux send-keys after Claude Code initializes.
69
69
  */
70
- export function buildGatewayBeacon(): string {
70
+ export function buildGatewayBeacon(isFirstRun = false): string {
71
71
  const timestamp = new Date().toISOString();
72
72
  const parts = [
73
73
  `[LEGIO] ${GATEWAY_NAME} (gateway) ${timestamp}`,
@@ -76,6 +76,9 @@ export function buildGatewayBeacon(): string {
76
76
  "ISSUES: Use bd create",
77
77
  `Startup: run mulch prime, check mail (legio mail check --agent ${GATEWAY_NAME}), respond to user`,
78
78
  ];
79
+ if (isFirstRun) {
80
+ parts.push("FIRST_RUN: true — Follow the First Run workflow in your agent definition");
81
+ }
79
82
  return parts.join(" — ");
80
83
  }
81
84
 
@@ -224,8 +227,9 @@ async function startGateway(args: string[], deps: GatewayDeps = {}): Promise<voi
224
227
  }
225
228
 
226
229
  // Send beacon after TUI initialization delay
230
+ const isFirstRun = !existingIdentity;
227
231
  await sleep(3_000);
228
- const beacon = buildGatewayBeacon();
232
+ const beacon = buildGatewayBeacon(isFirstRun);
229
233
  await tmux.sendKeys(tmuxSession, beacon);
230
234
 
231
235
  // Follow-up Enter to ensure submission (same pattern as sling.ts)