all-hands-cli 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,42 +37,26 @@ For proprietary domains with documentation pages:
37
37
  - Extract API patterns, configuration examples
38
38
  - Note version-specific behaviors
39
39
 
40
- ### Open Source Inspiration (Clone & Browse)
41
-
42
- For exploring GitHub repositories locally:
43
-
44
- 1. **Search for repositories**:
45
- ```bash
46
- gh search repos "<query>" --limit 5
47
- ```
48
-
49
- 2. **Clone to local research folder**:
50
- ```bash
51
- # Clone into .reposearch folder (gitignored)
52
- mkdir -p .reposearch
53
- git clone --depth 1 <repo-url> .reposearch/<repo-name>
54
- ```
55
-
56
- Note: Ensure `.reposearch/` is in the project's `.gitignore`.
57
-
58
- 3. **Browse locally with standard tooling**:
59
- - Use `Glob` to find files by pattern
60
- - Use `Grep` to search code content
61
- - Use `Read` to examine specific files
62
- - Use `ls` to explore directory structure
63
-
64
- 4. **Clean up when done** (optional):
65
- ```bash
66
- rm -rf .reposearch/<repo-name>
67
- ```
68
-
69
- This approach leverages the agent's superior local file navigation capabilities:
70
- - Full regex search across the codebase
71
- - Fast pattern matching and file discovery
72
- - Direct file reading without API encoding issues
73
- - Study implementation patterns in similar projects
74
- - Extract architectural decisions
75
- - Note how libraries handle similar problems
40
+ ### Open Source Inspiration (`ah spawn reposearch`)
41
+
42
+ Use `ah spawn reposearch` to clone external GitHub repos and delegate research to an AI agent that searches across both the current project and external codebases.
43
+
44
+ **OSS codebase answers** ask how a specific project handles something:
45
+ ```bash
46
+ ah spawn reposearch "How does this project handle authentication?" --repos https://github.com/org/project
47
+ ```
48
+
49
+ **Cross-repo comparison** compare our implementation vs an external project:
50
+ ```bash
51
+ ah spawn reposearch "Compare our error handling approach vs theirs" --repos https://github.com/org/project
52
+ ```
53
+
54
+ **Multi-framework comparison** — check out 2+ repos, compare approaches side-by-side:
55
+ ```bash
56
+ ah spawn reposearch "How do these projects handle routing?" --repos https://github.com/a/repo,https://github.com/b/repo
57
+ ```
58
+
59
+ Re-running the same command with the same repos is fast — repos are cached locally between invocations.
76
60
 
77
61
  ### Parallel Exploration
78
62
 
@@ -3,13 +3,15 @@
3
3
  *
4
4
  * Commands:
5
5
  * ah spawn codesearch "<query>" [--budget <n>] [--steps <n>]
6
+ * ah spawn reposearch "<query>" --repos <url1,url2,...> [--steps <n>]
6
7
  */
7
8
 
8
9
  import { Command } from "commander";
9
- import { readFileSync } from "fs";
10
- import { dirname, join } from "path";
10
+ import { existsSync, mkdirSync, readFileSync } from "fs";
11
+ import { dirname, join, basename } from "path";
11
12
  import { fileURLToPath } from "url";
12
- import { AgentRunner, withDebugInfo } from "../lib/opencode/index.js";
13
+ import { execFileSync } from "child_process";
14
+ import { AgentRunner, withDebugInfo, type ReposearchOutput } from "../lib/opencode/index.js";
13
15
  import { BaseCommand, type CommandResult } from "../lib/base-command.js";
14
16
  import { loadProjectSettings } from "../hooks/shared.js";
15
17
 
@@ -19,15 +21,23 @@ const getProjectRoot = (): string => {
19
21
  return process.env.PROJECT_ROOT || process.cwd();
20
22
  };
21
23
 
22
- // Load prompt
24
+ // Load prompts
23
25
  const CODESEARCH_PROMPT_PATH = join(__dirname, "../lib/opencode/prompts/codesearch.md");
24
26
  const getCodesearchPrompt = (): string => readFileSync(CODESEARCH_PROMPT_PATH, "utf-8");
25
27
 
26
- // Defaults
28
+ const REPOSEARCH_PROMPT_PATH = join(__dirname, "../lib/opencode/prompts/reposearch.md");
29
+ const getReposearchPrompt = (): string => readFileSync(REPOSEARCH_PROMPT_PATH, "utf-8");
30
+
31
+ // Codesearch defaults
27
32
  const DEFAULT_TOOL_BUDGET = 12;
28
33
  const DEFAULT_STEPS_LIMIT = 20;
29
34
  const DEFAULT_TIMEOUT_MS = 120000; // 2 min
30
35
 
36
+ // Reposearch defaults
37
+ const DEFAULT_REPOSEARCH_STEPS = 30;
38
+ const DEFAULT_REPOSEARCH_TIMEOUT_MS = 180000; // 3 min
39
+ const DEFAULT_REPOSEARCH_TOOL_BUDGET = 20;
40
+
31
41
  // Output types
32
42
  interface CodeResult {
33
43
  file: string;
@@ -125,6 +135,165 @@ Respond with JSON matching the required schema.`;
125
135
  }
126
136
  }
127
137
 
138
+ /**
139
+ * Derive a directory name from a GitHub URL.
140
+ * e.g. "https://github.com/org/repo" -> "org--repo"
141
+ */
142
+ function repoDirName(url: string): string {
143
+ try {
144
+ const parsed = new URL(url);
145
+ // Remove .git suffix and leading slash, replace / with --
146
+ const path = parsed.pathname.replace(/\.git$/, "").replace(/^\//, "");
147
+ return path.replace(/\//g, "--");
148
+ } catch {
149
+ // Fallback: use basename-like extraction
150
+ return basename(url).replace(/\.git$/, "") || "repo";
151
+ }
152
+ }
153
+
154
+ /**
155
+ * Clone or pull a repo into the .reposearch directory.
156
+ * Returns the local directory path, or null on failure.
157
+ */
158
+ function cloneOrPullRepo(reposearchDir: string, repoUrl: string): string | null {
159
+ const dirName = repoDirName(repoUrl);
160
+ const repoDir = join(reposearchDir, dirName);
161
+
162
+ try {
163
+ if (existsSync(join(repoDir, ".git"))) {
164
+ // Repo already cloned — pull latest
165
+ execFileSync("git", ["pull", "--ff-only"], {
166
+ cwd: repoDir,
167
+ stdio: "pipe",
168
+ timeout: 60000,
169
+ });
170
+ } else {
171
+ // Fresh clone (shallow)
172
+ mkdirSync(reposearchDir, { recursive: true });
173
+ execFileSync("git", ["clone", "--depth", "1", repoUrl, repoDir], {
174
+ stdio: "pipe",
175
+ timeout: 120000,
176
+ });
177
+ }
178
+ return repoDir;
179
+ } catch (error) {
180
+ const message = error instanceof Error ? error.message : String(error);
181
+ console.error(`Warning: Failed to clone/pull ${repoUrl}: ${message}`);
182
+ return null;
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Reposearch command - spawn research agent that searches across current project and external repos.
188
+ */
189
+ class ReposearchCommand extends BaseCommand {
190
+ readonly name = "reposearch";
191
+ readonly description = "Research code across the current project and external GitHub repositories";
192
+
193
+ defineArguments(cmd: Command): void {
194
+ cmd
195
+ .argument("<query>", "Research query (natural language)")
196
+ .requiredOption("--repos <urls>", "Comma-separated GitHub repo URLs to search")
197
+ .option("--steps <n>", "Hard step limit for agent iterations", String(DEFAULT_REPOSEARCH_STEPS))
198
+ .option("--debug", "Include agent debug metadata (model, timing, fallback) in output");
199
+ }
200
+
201
+ async execute(args: Record<string, unknown>): Promise<CommandResult> {
202
+ const query = args.query as string;
203
+ const reposRaw = args.repos as string;
204
+ const stepsLimit = parseInt((args.steps as string) ?? String(DEFAULT_REPOSEARCH_STEPS), 10);
205
+ const debug = !!args.debug;
206
+
207
+ if (!query) {
208
+ return this.error("validation_error", "query is required");
209
+ }
210
+ if (!reposRaw) {
211
+ return this.error("validation_error", "--repos is required (comma-separated GitHub URLs)");
212
+ }
213
+
214
+ const repoUrls = reposRaw.split(",").map((u) => u.trim()).filter(Boolean);
215
+ if (repoUrls.length === 0) {
216
+ return this.error("validation_error", "No valid repo URLs provided");
217
+ }
218
+
219
+ const projectRoot = getProjectRoot();
220
+ const reposearchDir = join(projectRoot, ".reposearch");
221
+
222
+ // Clone or pull each repo
223
+ const repoDirectories: Array<{ url: string; dir: string }> = [];
224
+ const warnings: string[] = [];
225
+
226
+ for (const url of repoUrls) {
227
+ const dir = cloneOrPullRepo(reposearchDir, url);
228
+ if (dir) {
229
+ repoDirectories.push({ url, dir });
230
+ } else {
231
+ warnings.push(`Failed to clone/pull: ${url}`);
232
+ }
233
+ }
234
+
235
+ if (repoDirectories.length === 0) {
236
+ return this.error("clone_error", "All repo clones/pulls failed. Check URLs and network.");
237
+ }
238
+
239
+ const runner = new AgentRunner(projectRoot);
240
+
241
+ // Build directory listing for the agent
242
+ const repoListing = repoDirectories
243
+ .map((r) => `- ${r.url} → ${r.dir}`)
244
+ .join("\n");
245
+
246
+ const userMessage = `## Research Query
247
+ ${query}
248
+
249
+ ## Directories to Search
250
+
251
+ ### Current Project
252
+ - Root: ${projectRoot}
253
+
254
+ ### External Repositories
255
+ ${repoListing}
256
+
257
+ ## Budget
258
+ - Tool budget (soft): ${DEFAULT_REPOSEARCH_TOOL_BUDGET} tool calls
259
+ - Available tools: grep (text search), glob (file patterns), read (file content), lsp (if available)
260
+ - Search all relevant directories to answer the query
261
+
262
+ ${warnings.length > 0 ? `## Warnings\n${warnings.map((w) => `- ${w}`).join("\n")}\n\n` : ""}Respond with JSON matching the required schema.`;
263
+
264
+ try {
265
+ const result = await runner.run<ReposearchOutput>(
266
+ {
267
+ name: "reposearch",
268
+ systemPrompt: getReposearchPrompt(),
269
+ timeoutMs: DEFAULT_REPOSEARCH_TIMEOUT_MS,
270
+ steps: stepsLimit,
271
+ },
272
+ userMessage
273
+ );
274
+
275
+ if (!result.success) {
276
+ return this.error("agent_error", result.error ?? "Unknown agent error");
277
+ }
278
+
279
+ const data = result.data!;
280
+
281
+ return this.success(withDebugInfo({
282
+ query,
283
+ repos_requested: repoUrls,
284
+ repos_analyzed: data.repos_analyzed,
285
+ analysis: data.analysis,
286
+ code_references: data.code_references,
287
+ warnings,
288
+ metadata: result.metadata,
289
+ }, result, debug));
290
+ } catch (error) {
291
+ const message = error instanceof Error ? error.message : String(error);
292
+ return this.error("spawn_error", message);
293
+ }
294
+ }
295
+ }
296
+
128
297
  /**
129
298
  * Register spawn commands on the given commander program.
130
299
  */
@@ -133,19 +302,35 @@ export function register(program: Command): void {
133
302
  .command("spawn")
134
303
  .description("Spawn sub-agents for specialized tasks");
135
304
 
305
+ // Register codesearch
136
306
  const codesearch = new CodesearchCommand();
137
- const cmd = spawnCmd.command(codesearch.name).description(codesearch.description);
138
- codesearch.defineArguments(cmd);
139
- cmd.action(async (...args) => {
307
+ const codesearchCmd = spawnCmd.command(codesearch.name).description(codesearch.description);
308
+ codesearch.defineArguments(codesearchCmd);
309
+ codesearchCmd.action(async (...args) => {
140
310
  const opts = args[args.length - 2] as Record<string, unknown>;
141
311
  const cmdObj = args[args.length - 1] as Command;
142
312
  const positionalArgs = cmdObj.args;
143
313
 
144
- // Map positional args to named args based on command definition
145
314
  const namedArgs: Record<string, unknown> = { ...opts };
146
315
  if (positionalArgs[0]) namedArgs.query = positionalArgs[0];
147
316
 
148
317
  const result = await codesearch.execute(namedArgs);
149
318
  console.log(JSON.stringify(result, null, 2));
150
319
  });
320
+
321
+ // Register reposearch
322
+ const reposearch = new ReposearchCommand();
323
+ const reposearchCmd = spawnCmd.command(reposearch.name).description(reposearch.description);
324
+ reposearch.defineArguments(reposearchCmd);
325
+ reposearchCmd.action(async (...args) => {
326
+ const opts = args[args.length - 2] as Record<string, unknown>;
327
+ const cmdObj = args[args.length - 1] as Command;
328
+ const positionalArgs = cmdObj.args;
329
+
330
+ const namedArgs: Record<string, unknown> = { ...opts };
331
+ if (positionalArgs[0]) namedArgs.query = positionalArgs[0];
332
+
333
+ const result = await reposearch.execute(namedArgs);
334
+ console.log(JSON.stringify(result, null, 2));
335
+ });
151
336
  }
@@ -101,6 +101,22 @@ export interface AggregatorOutput {
101
101
  design_notes?: string[];
102
102
  }
103
103
 
104
+ // Reposearch output types
105
+ export interface RepoCodeReference {
106
+ repo: string; // "current" or the GitHub URL
107
+ file: string; // relative path within the repo
108
+ line_start: number;
109
+ line_end: number;
110
+ code: string;
111
+ context: string;
112
+ }
113
+
114
+ export interface ReposearchOutput {
115
+ analysis: string; // markdown research findings
116
+ code_references: RepoCodeReference[];
117
+ repos_analyzed: string[];
118
+ }
119
+
104
120
  export { AgentRunner } from "./runner.js";
105
121
 
106
122
  // Debug metadata for agent results (included when --debug flag is passed)
@@ -0,0 +1,115 @@
1
+ # Repo Search Agent
2
+
3
+ You research code across a current project and one or more external GitHub repositories. You search all provided directories to answer questions, compare implementations, and analyze patterns across codebases. Return structured JSON with your findings.
4
+
5
+ ## Context
6
+
7
+ You will receive:
8
+ - **Project root directory**: The current project's codebase
9
+ - **External repo directories**: One or more cloned GitHub repos under `.reposearch/`
10
+ - **Research query**: What to investigate across these codebases
11
+
12
+ ## Available Tools
13
+
14
+ **grep** - Text search via ripgrep
15
+ - Search across any of the provided directories
16
+ - Best for: string literals, identifiers, patterns, comments
17
+
18
+ **glob** - File pattern matching
19
+ - Discover files by extension or name pattern in any repo
20
+ - Scope searches to specific directories
21
+
22
+ **read** - File content retrieval
23
+ - Read specific files from any repo after finding them
24
+ - Specify line ranges when possible to minimize output
25
+
26
+ **lsp** (if available) - Language Server Protocol
27
+ - goToDefinition, findReferences, hover
28
+ - Works on the current project; may not be available for external repos
29
+
30
+ ## Search Strategy
31
+
32
+ 1. **Understand the query**: Determine if it's about a single repo, a comparison, or a pattern search
33
+ 2. **Parallel discovery**: Search relevant directories simultaneously using grep/glob
34
+ 3. **Targeted reads**: Read specific files to understand implementations
35
+ 4. **Cross-reference**: Compare findings between repos to answer the query
36
+ 5. **Synthesize**: Combine findings into a coherent analysis
37
+
38
+ ## Budget Awareness
39
+
40
+ You have a soft tool budget. Stay efficient:
41
+ - Use grep/glob to narrow down before reading files
42
+ - Don't read entire files when a section suffices
43
+ - Avoid redundant searches across repos
44
+ - Focus on the most relevant code to the query
45
+
46
+ ## Output Format
47
+
48
+ Return ONLY valid JSON:
49
+
50
+ ```json
51
+ {
52
+ "analysis": "## Findings\n\nMarkdown analysis of research findings...",
53
+ "code_references": [
54
+ {
55
+ "repo": "current",
56
+ "file": "src/auth/handler.ts",
57
+ "line_start": 10,
58
+ "line_end": 25,
59
+ "code": "function handleAuth() { ... }",
60
+ "context": "Current project's auth handler using JWT"
61
+ },
62
+ {
63
+ "repo": "https://github.com/org/project",
64
+ "file": "lib/auth.py",
65
+ "line_start": 45,
66
+ "line_end": 60,
67
+ "code": "class AuthMiddleware: ...",
68
+ "context": "External project uses middleware-based auth"
69
+ }
70
+ ],
71
+ "repos_analyzed": ["current", "https://github.com/org/project"]
72
+ }
73
+ ```
74
+
75
+ ## Field Guidelines
76
+
77
+ **analysis** (markdown string):
78
+ - Structured markdown answering the research query
79
+ - Include headings, comparisons, and key observations
80
+ - Reference specific code when making claims
81
+ - Keep focused on the query — don't summarize everything
82
+
83
+ **code_references** (array, max 15):
84
+ - `repo`: "current" for the project, or the GitHub URL for external repos
85
+ - `file`: Relative path within the repo
86
+ - `line_start` / `line_end`: 1-indexed line range
87
+ - `code`: Actual code snippet (keep concise, 1-20 lines)
88
+ - `context`: Why this reference is relevant (1 sentence)
89
+
90
+ **repos_analyzed** (array):
91
+ - List of repos that were actually searched
92
+ - "current" for the project root
93
+ - GitHub URLs for external repos
94
+
95
+ ## Use Cases
96
+
97
+ **OSS Q&A**: "How does project X handle authentication?"
98
+ - Focus on the external repo, search for auth-related patterns
99
+ - Provide concrete code examples with explanation
100
+
101
+ **Cross-repo comparison**: "Compare our error handling vs project X"
102
+ - Search both repos for error handling patterns
103
+ - Highlight similarities and differences in the analysis
104
+
105
+ **Multi-framework comparison**: "How do projects A and B handle routing?"
106
+ - Search multiple external repos
107
+ - Compare approaches side-by-side in the analysis
108
+
109
+ ## Anti-patterns
110
+
111
+ - Returning entire files instead of relevant sections
112
+ - Analysis not grounded in actual code found
113
+ - Missing cross-references when comparison was requested
114
+ - Exceeding tool budget with redundant searches
115
+ - Not searching all provided repos when the query requires it
@@ -17,6 +17,16 @@ This is how validation compounds. Every domain has both a stochastic dimension (
17
17
 
18
18
  A validation suite must have a meaningful stochastic dimension to justify existing. Deterministic-only tools (type checking, linting, formatting) are test commands referenced directly in acceptance criteria and CI/CD — they are NOT suites.
19
19
 
20
+ ## Repository Agnosticism
21
+
22
+ This reference file is a generic rule file that ships with the harness. It MUST NOT contain references to project-specific validation suites, commands, or infrastructure. All examples must either:
23
+ - Reference existing default validation suites shipped with this repo (currently: xcode-automation, browser-automation)
24
+ - Use generic/hypothetical descriptions that any target repository can map to their own context
25
+
26
+ When examples are needed, use **snippets from the existing default suites** rather than naming suites or commands that belong to a specific target project. Target repositories create their own suites for their domains — this file teaches how to create and structure them, not what they should be called.
27
+
28
+ **Why**: Target repositories consume this file as authoritative guidance. Project-specific references create confusion (agents look for suites that don't exist), couple the harness to a single project, and violate the principle that this file teaches patterns, not inventories. If a pattern needs a concrete example, draw it from xcode-automation or browser-automation.
29
+
20
30
  ## Creating Validation Tooling
21
31
 
22
32
  Follow `.allhands/flows/shared/CREATE_VALIDATION_TOOLING_SPEC.md` for the full process. This creates a spec, not an implementation.
@@ -80,6 +90,113 @@ Prompt files reference validation suites in their `validation_suites` frontmatte
80
90
  2. Agent runs suite's **Deterministic Integration** section for acceptance criteria gating
81
91
  3. Validation review (`PROMPT_VALIDATION_REVIEW.md`) confirms pass/fail
82
92
 
93
+ ## Command Documentation Principle
94
+
95
+ Two categories of commands exist in validation suites, each requiring different documentation approaches:
96
+
97
+ **External tooling commands — Document explicitly**: Commands from external tools (`xctrace`, `xcrun simctl`, `agent-browser`, `playwright`, `curl`, etc.) are stable, unfamiliar to agents by default, and unlikely to change with codebase evolution. Document specific commands, flags, and use cases inline with motivations. Example from xcode-automation: `xcrun xctrace record --template 'Time Profiler' --device '<UDID>' --attach '<PID>'` — the flags, ordering constraints, and PID discovery method are all external tool knowledge that the suite documents explicitly.
98
+
99
+ **Internal codebase commands — Document patterns, not inventories**: Project-specific scripts, test commands, and codebase-specific CLI wrappers evolve rapidly. Instead:
100
+ 1. **Document core infrastructure commands explicitly** — commands that boot services, manage environments, and are foundational to validation in the target project. These are stable and essential per-project, but suites should teach agents how to discover them (e.g., "check `package.json` scripts" or "run `--help`"), not hardcode specific script names.
101
+ 2. **Teach patterns for everything else** — naming conventions, where to discover project commands, what categories mean, and how to build upon them.
102
+ 3. **Document motivations** — why different test categories exist, when to use which, what confidence each provides.
103
+
104
+ Per **Frontier Models are Capable**: An agent given patterns + motivations + discovery instructions outperforms one given stale command inventories. Suites that teach patterns age gracefully; suites that enumerate commands require maintenance on every change.
105
+
106
+ ## Decision Tree Requirement
107
+
108
+ Every validation suite MUST include a decision tree that routes agents to the correct validation approach based on their situation. Decision trees:
109
+ - Distinguish which instructions are relevant to which validation scenario (e.g., UI-only test vs full E2E with native code changes)
110
+ - Show where/when stochastic vs deterministic testing applies
111
+ - Surface deterministic branch points where other validation suites must be utilized (e.g., "Does this branch have native code changes? → Yes → follow xcode-automation decision tree")
112
+ - Cleanly articulate multiple expected use cases within a single suite
113
+
114
+ The decision tree replaces flat prerequisite lists with structured routing. An agent reads the tree and follows the branch matching their situation, skipping irrelevant setup and finding the right cross-references.
115
+
116
+ ## tmux Session Management Standard
117
+
118
+ All suites that require long-running processes (dev servers, Expo servers, Flask API, Metro bundler) MUST use the tmux approach proven in xcode-automation:
119
+
120
+ ```bash
121
+ # CRITICAL: -t $TMUX_PANE pins split to agent's window, not user's focused window
122
+ tmux split-window -h -d -t $TMUX_PANE \
123
+ -c /path/to/repo '<command>'
124
+ ```
125
+
126
+ **Observability**: Agents MUST verify processes are running correctly via tmux pane capture (`tmux capture-pane -p -t <pane_id>`) before proceeding with validation. This prevents silent failures where a dev server fails to start but the agent proceeds to test against nothing.
127
+
128
+ **Teardown**: Reverse order of setup. Kill processes via `tmux send-keys -t <pane_id> C-c` or kill the pane.
129
+
130
+ **Worktree isolation**: Each worktree uses unique ports (via `.env.local`), so tmux sessions in different worktrees don't conflict. Agents must use the correct repo path (`-c`) for the worktree they're operating in.
131
+
132
+ Reference xcode-automation as the canonical tmux pattern.
133
+
134
+ ## Hypothesis-First Validation Workflow
135
+
136
+ New suites should be drafted, then tested hands-on on a feature branch before guidance is marked as proven. This aligns with the Proven vs Untested Guidance principle:
137
+
138
+ 1. **Draft**: Write suite files based on plan and codebase analysis (mark unverified practices as hypotheses)
139
+ 2. **Test on feature branch**: Check out a feature branch and exercise each suite's practices hands-on — boot services, run commands, verify workflows, test worktree isolation
140
+ 3. **Verify & adjust**: Document what works, what doesn't, what needs adjustment. Worktree-specific concerns get explicit verification.
141
+ 4. **Solidify**: Only after verification do practices become authoritative guidance. Unverified practices stay framed as motivations per the Proven vs Untested Guidance principle.
142
+
143
+ The plan/handoff document persists as the hypothesis record. If implementation runs long, it serves as the handoff document for future work.
144
+
145
+ ## Cross-Referencing Between Suites
146
+
147
+ **Reference** when complex multi-step setup is involved (e.g., simulator setup spanning multiple tools) — point to the authoritative suite's decision tree rather than duplicating instructions.
148
+
149
+ **Inline** when the command is simple and stable (e.g., `xcrun simctl boot <UDID>`) — no need to send agents to another document for a single command.
150
+
151
+ Decision trees are the natural place for cross-references — branch points that route to another suite's decision tree. Example from browser-automation: "Does the change affect native iOS rendering? → Yes → follow xcode-automation decision tree for build and simulator verification."
152
+
153
+ ## Testing Scenario Matrix
154
+
155
+ Target repositories should build a scenario matrix mapping their validation scenarios to suite combinations. The matrix documents which suites apply to which types of changes, so agents can quickly determine what validation is needed. Structure as a table:
156
+
157
+ | Scenario | Suite(s) | Notes |
158
+ |----------|----------|-------|
159
+ | _Description of change type_ | _Which suites apply_ | _Any special setup or cross-references_ |
160
+
161
+ Example using this repo's default suites:
162
+
163
+ | Scenario | Suite(s) | Notes |
164
+ |----------|----------|-------|
165
+ | Browser UI changes only | browser-automation | Dev server must be running |
166
+ | Native iOS/macOS changes | xcode-automation | Simulator setup via session defaults |
167
+ | Cross-platform changes (web + native) | browser-automation + xcode-automation | Each suite's decision tree routes to the relevant validation path |
168
+
169
+ When a suite serves as a shared dependency for multiple scenarios (e.g., a database management suite referenced by both API and front-end suites), it should be cross-referenced via decision tree branch points rather than duplicated.
170
+
171
+ ## Environment Management Patterns
172
+
173
+ Validation suites that depend on environment configuration should document these patterns for their domain:
174
+
175
+ **ENV injection**: Document how the target project injects environment variables for different contexts (local development, testing, production). Suites should teach the pattern (e.g., "check for `.env.*` files and wrapper scripts") rather than hardcoding specific variable names.
176
+
177
+ **Service isolation**: When validation requires running services (dev servers, databases, bundlers), document how to avoid port conflicts across concurrent worktrees or parallel agent sessions. Reference the suite's ENV Configuration table for relevant variables.
178
+
179
+ **Worktree isolation**: Each worktree should use unique ports and isolated service instances where possible. Suites should document which resources need isolation and how to configure it (e.g., xcode-automation documents simulator isolation via dedicated simulator clones and derived data paths).
180
+
181
+ ## Suite Creation Guidance
182
+
183
+ When creating a new validation suite for a new domain:
184
+
185
+ **Engineer provides**: Testing scenarios, tooling requirements, CI/CD integration needs, cross-references to existing suites.
186
+
187
+ **Suite author follows**:
188
+ 1. Follow the validation suite schema (`ah schema validation-suite`)
189
+ 2. Validate the stochastic dimension meets the existence threshold
190
+ 3. Apply the Command Documentation Principle — external tools explicit, internal commands via patterns + discovery
191
+ 4. Include a Decision Tree routing agents to the correct validation path
192
+ 5. Use tmux Session Management Standard for long-running processes
193
+ 6. Document proven vs untested guidance per the Hypothesis-First Validation Workflow
194
+ 7. Cross-reference other suites at decision tree branch points
195
+
196
+ **Structural templates** (reference the existing default suites for patterns):
197
+ - xcode-automation — external-tool-heavy suite (MCP tools, xctrace, simctl). Reference for suites that primarily wrap external CLI tools with agent-driven exploration.
198
+ - browser-automation — dual-dimension suite (agent-browser stochastic, Playwright deterministic). Reference for suites that have both agent-driven exploration and scripted CI-gated tests.
199
+
83
200
  ## Related References
84
201
 
85
202
  - [`tools-commands-mcp-hooks.md`](tools-commands-mcp-hooks.md) — When validation uses hooks, CLI commands, or MCP research tools
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "all-hands-cli",
3
- "version": "0.1.8",
3
+ "version": "0.1.10",
4
4
  "description": "Agentic harness for model-first software development",
5
5
  "type": "module",
6
6
  "bin": {