all-hands-cli 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.allhands/flows/shared/EXTERNAL_TECH_GUIDANCE.md +20 -36
- package/.allhands/harness/src/commands/spawn.ts +194 -9
- package/.allhands/harness/src/lib/opencode/index.ts +16 -0
- package/.allhands/harness/src/lib/opencode/prompts/reposearch.md +115 -0
- package/.allhands/skills/harness-maintenance/references/validation-tooling.md +117 -0
- package/package.json +1 -1
|
@@ -37,42 +37,26 @@ For proprietary domains with documentation pages:
|
|
|
37
37
|
- Extract API patterns, configuration examples
|
|
38
38
|
- Note version-specific behaviors
|
|
39
39
|
|
|
40
|
-
### Open Source Inspiration (
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
- Use `Grep` to search code content
|
|
61
|
-
- Use `Read` to examine specific files
|
|
62
|
-
- Use `ls` to explore directory structure
|
|
63
|
-
|
|
64
|
-
4. **Clean up when done** (optional):
|
|
65
|
-
```bash
|
|
66
|
-
rm -rf .reposearch/<repo-name>
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
This approach leverages the agent's superior local file navigation capabilities:
|
|
70
|
-
- Full regex search across the codebase
|
|
71
|
-
- Fast pattern matching and file discovery
|
|
72
|
-
- Direct file reading without API encoding issues
|
|
73
|
-
- Study implementation patterns in similar projects
|
|
74
|
-
- Extract architectural decisions
|
|
75
|
-
- Note how libraries handle similar problems
|
|
40
|
+
### Open Source Inspiration (`ah spawn reposearch`)
|
|
41
|
+
|
|
42
|
+
Use `ah spawn reposearch` to clone external GitHub repos and delegate research to an AI agent that searches across both the current project and external codebases.
|
|
43
|
+
|
|
44
|
+
**OSS codebase answers** — ask how a specific project handles something:
|
|
45
|
+
```bash
|
|
46
|
+
ah spawn reposearch "How does this project handle authentication?" --repos https://github.com/org/project
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Cross-repo comparison** — compare our implementation vs an external project:
|
|
50
|
+
```bash
|
|
51
|
+
ah spawn reposearch "Compare our error handling approach vs theirs" --repos https://github.com/org/project
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Multi-framework comparison** — check out 2+ repos, compare approaches side-by-side:
|
|
55
|
+
```bash
|
|
56
|
+
ah spawn reposearch "How do these projects handle routing?" --repos https://github.com/a/repo,https://github.com/b/repo
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Re-running the same command with the same repos is fast — repos are cached locally between invocations.
|
|
76
60
|
|
|
77
61
|
### Parallel Exploration
|
|
78
62
|
|
|
@@ -3,13 +3,15 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Commands:
|
|
5
5
|
* ah spawn codesearch "<query>" [--budget <n>] [--steps <n>]
|
|
6
|
+
* ah spawn reposearch "<query>" --repos <url1,url2,...> [--steps <n>]
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
import { Command } from "commander";
|
|
9
|
-
import { readFileSync } from "fs";
|
|
10
|
-
import { dirname, join } from "path";
|
|
10
|
+
import { existsSync, mkdirSync, readFileSync } from "fs";
|
|
11
|
+
import { dirname, join, basename } from "path";
|
|
11
12
|
import { fileURLToPath } from "url";
|
|
12
|
-
import {
|
|
13
|
+
import { execFileSync } from "child_process";
|
|
14
|
+
import { AgentRunner, withDebugInfo, type ReposearchOutput } from "../lib/opencode/index.js";
|
|
13
15
|
import { BaseCommand, type CommandResult } from "../lib/base-command.js";
|
|
14
16
|
import { loadProjectSettings } from "../hooks/shared.js";
|
|
15
17
|
|
|
@@ -19,15 +21,23 @@ const getProjectRoot = (): string => {
|
|
|
19
21
|
return process.env.PROJECT_ROOT || process.cwd();
|
|
20
22
|
};
|
|
21
23
|
|
|
22
|
-
// Load
|
|
24
|
+
// Load prompts
|
|
23
25
|
const CODESEARCH_PROMPT_PATH = join(__dirname, "../lib/opencode/prompts/codesearch.md");
|
|
24
26
|
const getCodesearchPrompt = (): string => readFileSync(CODESEARCH_PROMPT_PATH, "utf-8");
|
|
25
27
|
|
|
26
|
-
|
|
28
|
+
const REPOSEARCH_PROMPT_PATH = join(__dirname, "../lib/opencode/prompts/reposearch.md");
|
|
29
|
+
const getReposearchPrompt = (): string => readFileSync(REPOSEARCH_PROMPT_PATH, "utf-8");
|
|
30
|
+
|
|
31
|
+
// Codesearch defaults
|
|
27
32
|
const DEFAULT_TOOL_BUDGET = 12;
|
|
28
33
|
const DEFAULT_STEPS_LIMIT = 20;
|
|
29
34
|
const DEFAULT_TIMEOUT_MS = 120000; // 2 min
|
|
30
35
|
|
|
36
|
+
// Reposearch defaults
|
|
37
|
+
const DEFAULT_REPOSEARCH_STEPS = 30;
|
|
38
|
+
const DEFAULT_REPOSEARCH_TIMEOUT_MS = 180000; // 3 min
|
|
39
|
+
const DEFAULT_REPOSEARCH_TOOL_BUDGET = 20;
|
|
40
|
+
|
|
31
41
|
// Output types
|
|
32
42
|
interface CodeResult {
|
|
33
43
|
file: string;
|
|
@@ -125,6 +135,165 @@ Respond with JSON matching the required schema.`;
|
|
|
125
135
|
}
|
|
126
136
|
}
|
|
127
137
|
|
|
138
|
+
/**
|
|
139
|
+
* Derive a directory name from a GitHub URL.
|
|
140
|
+
* e.g. "https://github.com/org/repo" -> "org--repo"
|
|
141
|
+
*/
|
|
142
|
+
function repoDirName(url: string): string {
|
|
143
|
+
try {
|
|
144
|
+
const parsed = new URL(url);
|
|
145
|
+
// Remove .git suffix and leading slash, replace / with --
|
|
146
|
+
const path = parsed.pathname.replace(/\.git$/, "").replace(/^\//, "");
|
|
147
|
+
return path.replace(/\//g, "--");
|
|
148
|
+
} catch {
|
|
149
|
+
// Fallback: use basename-like extraction
|
|
150
|
+
return basename(url).replace(/\.git$/, "") || "repo";
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Clone or pull a repo into the .reposearch directory.
|
|
156
|
+
* Returns the local directory path, or null on failure.
|
|
157
|
+
*/
|
|
158
|
+
function cloneOrPullRepo(reposearchDir: string, repoUrl: string): string | null {
|
|
159
|
+
const dirName = repoDirName(repoUrl);
|
|
160
|
+
const repoDir = join(reposearchDir, dirName);
|
|
161
|
+
|
|
162
|
+
try {
|
|
163
|
+
if (existsSync(join(repoDir, ".git"))) {
|
|
164
|
+
// Repo already cloned — pull latest
|
|
165
|
+
execFileSync("git", ["pull", "--ff-only"], {
|
|
166
|
+
cwd: repoDir,
|
|
167
|
+
stdio: "pipe",
|
|
168
|
+
timeout: 60000,
|
|
169
|
+
});
|
|
170
|
+
} else {
|
|
171
|
+
// Fresh clone (shallow)
|
|
172
|
+
mkdirSync(reposearchDir, { recursive: true });
|
|
173
|
+
execFileSync("git", ["clone", "--depth", "1", repoUrl, repoDir], {
|
|
174
|
+
stdio: "pipe",
|
|
175
|
+
timeout: 120000,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
return repoDir;
|
|
179
|
+
} catch (error) {
|
|
180
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
181
|
+
console.error(`Warning: Failed to clone/pull ${repoUrl}: ${message}`);
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Reposearch command - spawn research agent that searches across current project and external repos.
|
|
188
|
+
*/
|
|
189
|
+
class ReposearchCommand extends BaseCommand {
|
|
190
|
+
readonly name = "reposearch";
|
|
191
|
+
readonly description = "Research code across the current project and external GitHub repositories";
|
|
192
|
+
|
|
193
|
+
defineArguments(cmd: Command): void {
|
|
194
|
+
cmd
|
|
195
|
+
.argument("<query>", "Research query (natural language)")
|
|
196
|
+
.requiredOption("--repos <urls>", "Comma-separated GitHub repo URLs to search")
|
|
197
|
+
.option("--steps <n>", "Hard step limit for agent iterations", String(DEFAULT_REPOSEARCH_STEPS))
|
|
198
|
+
.option("--debug", "Include agent debug metadata (model, timing, fallback) in output");
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
async execute(args: Record<string, unknown>): Promise<CommandResult> {
|
|
202
|
+
const query = args.query as string;
|
|
203
|
+
const reposRaw = args.repos as string;
|
|
204
|
+
const stepsLimit = parseInt((args.steps as string) ?? String(DEFAULT_REPOSEARCH_STEPS), 10);
|
|
205
|
+
const debug = !!args.debug;
|
|
206
|
+
|
|
207
|
+
if (!query) {
|
|
208
|
+
return this.error("validation_error", "query is required");
|
|
209
|
+
}
|
|
210
|
+
if (!reposRaw) {
|
|
211
|
+
return this.error("validation_error", "--repos is required (comma-separated GitHub URLs)");
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const repoUrls = reposRaw.split(",").map((u) => u.trim()).filter(Boolean);
|
|
215
|
+
if (repoUrls.length === 0) {
|
|
216
|
+
return this.error("validation_error", "No valid repo URLs provided");
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const projectRoot = getProjectRoot();
|
|
220
|
+
const reposearchDir = join(projectRoot, ".reposearch");
|
|
221
|
+
|
|
222
|
+
// Clone or pull each repo
|
|
223
|
+
const repoDirectories: Array<{ url: string; dir: string }> = [];
|
|
224
|
+
const warnings: string[] = [];
|
|
225
|
+
|
|
226
|
+
for (const url of repoUrls) {
|
|
227
|
+
const dir = cloneOrPullRepo(reposearchDir, url);
|
|
228
|
+
if (dir) {
|
|
229
|
+
repoDirectories.push({ url, dir });
|
|
230
|
+
} else {
|
|
231
|
+
warnings.push(`Failed to clone/pull: ${url}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (repoDirectories.length === 0) {
|
|
236
|
+
return this.error("clone_error", "All repo clones/pulls failed. Check URLs and network.");
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const runner = new AgentRunner(projectRoot);
|
|
240
|
+
|
|
241
|
+
// Build directory listing for the agent
|
|
242
|
+
const repoListing = repoDirectories
|
|
243
|
+
.map((r) => `- ${r.url} → ${r.dir}`)
|
|
244
|
+
.join("\n");
|
|
245
|
+
|
|
246
|
+
const userMessage = `## Research Query
|
|
247
|
+
${query}
|
|
248
|
+
|
|
249
|
+
## Directories to Search
|
|
250
|
+
|
|
251
|
+
### Current Project
|
|
252
|
+
- Root: ${projectRoot}
|
|
253
|
+
|
|
254
|
+
### External Repositories
|
|
255
|
+
${repoListing}
|
|
256
|
+
|
|
257
|
+
## Budget
|
|
258
|
+
- Tool budget (soft): ${DEFAULT_REPOSEARCH_TOOL_BUDGET} tool calls
|
|
259
|
+
- Available tools: grep (text search), glob (file patterns), read (file content), lsp (if available)
|
|
260
|
+
- Search all relevant directories to answer the query
|
|
261
|
+
|
|
262
|
+
${warnings.length > 0 ? `## Warnings\n${warnings.map((w) => `- ${w}`).join("\n")}\n\n` : ""}Respond with JSON matching the required schema.`;
|
|
263
|
+
|
|
264
|
+
try {
|
|
265
|
+
const result = await runner.run<ReposearchOutput>(
|
|
266
|
+
{
|
|
267
|
+
name: "reposearch",
|
|
268
|
+
systemPrompt: getReposearchPrompt(),
|
|
269
|
+
timeoutMs: DEFAULT_REPOSEARCH_TIMEOUT_MS,
|
|
270
|
+
steps: stepsLimit,
|
|
271
|
+
},
|
|
272
|
+
userMessage
|
|
273
|
+
);
|
|
274
|
+
|
|
275
|
+
if (!result.success) {
|
|
276
|
+
return this.error("agent_error", result.error ?? "Unknown agent error");
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const data = result.data!;
|
|
280
|
+
|
|
281
|
+
return this.success(withDebugInfo({
|
|
282
|
+
query,
|
|
283
|
+
repos_requested: repoUrls,
|
|
284
|
+
repos_analyzed: data.repos_analyzed,
|
|
285
|
+
analysis: data.analysis,
|
|
286
|
+
code_references: data.code_references,
|
|
287
|
+
warnings,
|
|
288
|
+
metadata: result.metadata,
|
|
289
|
+
}, result, debug));
|
|
290
|
+
} catch (error) {
|
|
291
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
292
|
+
return this.error("spawn_error", message);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
128
297
|
/**
|
|
129
298
|
* Register spawn commands on the given commander program.
|
|
130
299
|
*/
|
|
@@ -133,19 +302,35 @@ export function register(program: Command): void {
|
|
|
133
302
|
.command("spawn")
|
|
134
303
|
.description("Spawn sub-agents for specialized tasks");
|
|
135
304
|
|
|
305
|
+
// Register codesearch
|
|
136
306
|
const codesearch = new CodesearchCommand();
|
|
137
|
-
const
|
|
138
|
-
codesearch.defineArguments(
|
|
139
|
-
|
|
307
|
+
const codesearchCmd = spawnCmd.command(codesearch.name).description(codesearch.description);
|
|
308
|
+
codesearch.defineArguments(codesearchCmd);
|
|
309
|
+
codesearchCmd.action(async (...args) => {
|
|
140
310
|
const opts = args[args.length - 2] as Record<string, unknown>;
|
|
141
311
|
const cmdObj = args[args.length - 1] as Command;
|
|
142
312
|
const positionalArgs = cmdObj.args;
|
|
143
313
|
|
|
144
|
-
// Map positional args to named args based on command definition
|
|
145
314
|
const namedArgs: Record<string, unknown> = { ...opts };
|
|
146
315
|
if (positionalArgs[0]) namedArgs.query = positionalArgs[0];
|
|
147
316
|
|
|
148
317
|
const result = await codesearch.execute(namedArgs);
|
|
149
318
|
console.log(JSON.stringify(result, null, 2));
|
|
150
319
|
});
|
|
320
|
+
|
|
321
|
+
// Register reposearch
|
|
322
|
+
const reposearch = new ReposearchCommand();
|
|
323
|
+
const reposearchCmd = spawnCmd.command(reposearch.name).description(reposearch.description);
|
|
324
|
+
reposearch.defineArguments(reposearchCmd);
|
|
325
|
+
reposearchCmd.action(async (...args) => {
|
|
326
|
+
const opts = args[args.length - 2] as Record<string, unknown>;
|
|
327
|
+
const cmdObj = args[args.length - 1] as Command;
|
|
328
|
+
const positionalArgs = cmdObj.args;
|
|
329
|
+
|
|
330
|
+
const namedArgs: Record<string, unknown> = { ...opts };
|
|
331
|
+
if (positionalArgs[0]) namedArgs.query = positionalArgs[0];
|
|
332
|
+
|
|
333
|
+
const result = await reposearch.execute(namedArgs);
|
|
334
|
+
console.log(JSON.stringify(result, null, 2));
|
|
335
|
+
});
|
|
151
336
|
}
|
|
@@ -101,6 +101,22 @@ export interface AggregatorOutput {
|
|
|
101
101
|
design_notes?: string[];
|
|
102
102
|
}
|
|
103
103
|
|
|
104
|
+
// Reposearch output types
|
|
105
|
+
export interface RepoCodeReference {
|
|
106
|
+
repo: string; // "current" or the GitHub URL
|
|
107
|
+
file: string; // relative path within the repo
|
|
108
|
+
line_start: number;
|
|
109
|
+
line_end: number;
|
|
110
|
+
code: string;
|
|
111
|
+
context: string;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export interface ReposearchOutput {
|
|
115
|
+
analysis: string; // markdown research findings
|
|
116
|
+
code_references: RepoCodeReference[];
|
|
117
|
+
repos_analyzed: string[];
|
|
118
|
+
}
|
|
119
|
+
|
|
104
120
|
export { AgentRunner } from "./runner.js";
|
|
105
121
|
|
|
106
122
|
// Debug metadata for agent results (included when --debug flag is passed)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Repo Search Agent
|
|
2
|
+
|
|
3
|
+
You research code across a current project and one or more external GitHub repositories. You search all provided directories to answer questions, compare implementations, and analyze patterns across codebases. Return structured JSON with your findings.
|
|
4
|
+
|
|
5
|
+
## Context
|
|
6
|
+
|
|
7
|
+
You will receive:
|
|
8
|
+
- **Project root directory**: The current project's codebase
|
|
9
|
+
- **External repo directories**: One or more cloned GitHub repos under `.reposearch/`
|
|
10
|
+
- **Research query**: What to investigate across these codebases
|
|
11
|
+
|
|
12
|
+
## Available Tools
|
|
13
|
+
|
|
14
|
+
**grep** - Text search via ripgrep
|
|
15
|
+
- Search across any of the provided directories
|
|
16
|
+
- Best for: string literals, identifiers, patterns, comments
|
|
17
|
+
|
|
18
|
+
**glob** - File pattern matching
|
|
19
|
+
- Discover files by extension or name pattern in any repo
|
|
20
|
+
- Scope searches to specific directories
|
|
21
|
+
|
|
22
|
+
**read** - File content retrieval
|
|
23
|
+
- Read specific files from any repo after finding them
|
|
24
|
+
- Specify line ranges when possible to minimize output
|
|
25
|
+
|
|
26
|
+
**lsp** (if available) - Language Server Protocol
|
|
27
|
+
- goToDefinition, findReferences, hover
|
|
28
|
+
- Works on the current project; may not be available for external repos
|
|
29
|
+
|
|
30
|
+
## Search Strategy
|
|
31
|
+
|
|
32
|
+
1. **Understand the query**: Determine if it's about a single repo, a comparison, or a pattern search
|
|
33
|
+
2. **Parallel discovery**: Search relevant directories simultaneously using grep/glob
|
|
34
|
+
3. **Targeted reads**: Read specific files to understand implementations
|
|
35
|
+
4. **Cross-reference**: Compare findings between repos to answer the query
|
|
36
|
+
5. **Synthesize**: Combine findings into a coherent analysis
|
|
37
|
+
|
|
38
|
+
## Budget Awareness
|
|
39
|
+
|
|
40
|
+
You have a soft tool budget. Stay efficient:
|
|
41
|
+
- Use grep/glob to narrow down before reading files
|
|
42
|
+
- Don't read entire files when a section suffices
|
|
43
|
+
- Avoid redundant searches across repos
|
|
44
|
+
- Focus on the most relevant code to the query
|
|
45
|
+
|
|
46
|
+
## Output Format
|
|
47
|
+
|
|
48
|
+
Return ONLY valid JSON:
|
|
49
|
+
|
|
50
|
+
```json
|
|
51
|
+
{
|
|
52
|
+
"analysis": "## Findings\n\nMarkdown analysis of research findings...",
|
|
53
|
+
"code_references": [
|
|
54
|
+
{
|
|
55
|
+
"repo": "current",
|
|
56
|
+
"file": "src/auth/handler.ts",
|
|
57
|
+
"line_start": 10,
|
|
58
|
+
"line_end": 25,
|
|
59
|
+
"code": "function handleAuth() { ... }",
|
|
60
|
+
"context": "Current project's auth handler using JWT"
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"repo": "https://github.com/org/project",
|
|
64
|
+
"file": "lib/auth.py",
|
|
65
|
+
"line_start": 45,
|
|
66
|
+
"line_end": 60,
|
|
67
|
+
"code": "class AuthMiddleware: ...",
|
|
68
|
+
"context": "External project uses middleware-based auth"
|
|
69
|
+
}
|
|
70
|
+
],
|
|
71
|
+
"repos_analyzed": ["current", "https://github.com/org/project"]
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Field Guidelines
|
|
76
|
+
|
|
77
|
+
**analysis** (markdown string):
|
|
78
|
+
- Structured markdown answering the research query
|
|
79
|
+
- Include headings, comparisons, and key observations
|
|
80
|
+
- Reference specific code when making claims
|
|
81
|
+
- Keep focused on the query — don't summarize everything
|
|
82
|
+
|
|
83
|
+
**code_references** (array, max 15):
|
|
84
|
+
- `repo`: "current" for the project, or the GitHub URL for external repos
|
|
85
|
+
- `file`: Relative path within the repo
|
|
86
|
+
- `line_start` / `line_end`: 1-indexed line range
|
|
87
|
+
- `code`: Actual code snippet (keep concise, 1-20 lines)
|
|
88
|
+
- `context`: Why this reference is relevant (1 sentence)
|
|
89
|
+
|
|
90
|
+
**repos_analyzed** (array):
|
|
91
|
+
- List of repos that were actually searched
|
|
92
|
+
- "current" for the project root
|
|
93
|
+
- GitHub URLs for external repos
|
|
94
|
+
|
|
95
|
+
## Use Cases
|
|
96
|
+
|
|
97
|
+
**OSS Q&A**: "How does project X handle authentication?"
|
|
98
|
+
- Focus on the external repo, search for auth-related patterns
|
|
99
|
+
- Provide concrete code examples with explanation
|
|
100
|
+
|
|
101
|
+
**Cross-repo comparison**: "Compare our error handling vs project X"
|
|
102
|
+
- Search both repos for error handling patterns
|
|
103
|
+
- Highlight similarities and differences in the analysis
|
|
104
|
+
|
|
105
|
+
**Multi-framework comparison**: "How do projects A and B handle routing?"
|
|
106
|
+
- Search multiple external repos
|
|
107
|
+
- Compare approaches side-by-side in the analysis
|
|
108
|
+
|
|
109
|
+
## Anti-patterns
|
|
110
|
+
|
|
111
|
+
- Returning entire files instead of relevant sections
|
|
112
|
+
- Analysis not grounded in actual code found
|
|
113
|
+
- Missing cross-references when comparison was requested
|
|
114
|
+
- Exceeding tool budget with redundant searches
|
|
115
|
+
- Not searching all provided repos when the query requires it
|
|
@@ -17,6 +17,16 @@ This is how validation compounds. Every domain has both a stochastic dimension (
|
|
|
17
17
|
|
|
18
18
|
A validation suite must have a meaningful stochastic dimension to justify existing. Deterministic-only tools (type checking, linting, formatting) are test commands referenced directly in acceptance criteria and CI/CD — they are NOT suites.
|
|
19
19
|
|
|
20
|
+
## Repository Agnosticism
|
|
21
|
+
|
|
22
|
+
This reference file is a generic rule file that ships with the harness. It MUST NOT contain references to project-specific validation suites, commands, or infrastructure. All examples must either:
|
|
23
|
+
- Reference existing default validation suites shipped with this repo (currently: xcode-automation, browser-automation)
|
|
24
|
+
- Use generic/hypothetical descriptions that any target repository can map to their own context
|
|
25
|
+
|
|
26
|
+
When examples are needed, use **snippets from the existing default suites** rather than naming suites or commands that belong to a specific target project. Target repositories create their own suites for their domains — this file teaches how to create and structure them, not what they should be called.
|
|
27
|
+
|
|
28
|
+
**Why**: Target repositories consume this file as authoritative guidance. Project-specific references create confusion (agents look for suites that don't exist), couple the harness to a single project, and violate the principle that this file teaches patterns, not inventories. If a pattern needs a concrete example, draw it from xcode-automation or browser-automation.
|
|
29
|
+
|
|
20
30
|
## Creating Validation Tooling
|
|
21
31
|
|
|
22
32
|
Follow `.allhands/flows/shared/CREATE_VALIDATION_TOOLING_SPEC.md` for the full process. This creates a spec, not an implementation.
|
|
@@ -80,6 +90,113 @@ Prompt files reference validation suites in their `validation_suites` frontmatte
|
|
|
80
90
|
2. Agent runs suite's **Deterministic Integration** section for acceptance criteria gating
|
|
81
91
|
3. Validation review (`PROMPT_VALIDATION_REVIEW.md`) confirms pass/fail
|
|
82
92
|
|
|
93
|
+
## Command Documentation Principle
|
|
94
|
+
|
|
95
|
+
Two categories of commands exist in validation suites, each requiring different documentation approaches:
|
|
96
|
+
|
|
97
|
+
**External tooling commands — Document explicitly**: Commands from external tools (`xctrace`, `xcrun simctl`, `agent-browser`, `playwright`, `curl`, etc.) are stable, unfamiliar to agents by default, and unlikely to change with codebase evolution. Document specific commands, flags, and use cases inline with motivations. Example from xcode-automation: `xcrun xctrace record --template 'Time Profiler' --device '<UDID>' --attach '<PID>'` — the flags, ordering constraints, and PID discovery method are all external tool knowledge that the suite documents explicitly.
|
|
98
|
+
|
|
99
|
+
**Internal codebase commands — Document patterns, not inventories**: Project-specific scripts, test commands, and codebase-specific CLI wrappers evolve rapidly. Instead:
|
|
100
|
+
1. **Document core infrastructure commands explicitly** — commands that boot services, manage environments, and are foundational to validation in the target project. These are stable and essential per-project, but suites should teach agents how to discover them (e.g., "check `package.json` scripts" or "run `--help`"), not hardcode specific script names.
|
|
101
|
+
2. **Teach patterns for everything else** — naming conventions, where to discover project commands, what categories mean, and how to build upon them.
|
|
102
|
+
3. **Document motivations** — why different test categories exist, when to use which, what confidence each provides.
|
|
103
|
+
|
|
104
|
+
Per **Frontier Models are Capable**: An agent given patterns + motivations + discovery instructions outperforms one given stale command inventories. Suites that teach patterns age gracefully; suites that enumerate commands require maintenance on every change.
|
|
105
|
+
|
|
106
|
+
## Decision Tree Requirement
|
|
107
|
+
|
|
108
|
+
Every validation suite MUST include a decision tree that routes agents to the correct validation approach based on their situation. Decision trees:
|
|
109
|
+
- Distinguish which instructions are relevant to which validation scenario (e.g., UI-only test vs full E2E with native code changes)
|
|
110
|
+
- Show where/when stochastic vs deterministic testing applies
|
|
111
|
+
- Surface deterministic branch points where other validation suites must be utilized (e.g., "Does this branch have native code changes? → Yes → follow xcode-automation decision tree")
|
|
112
|
+
- Cleanly articulate multiple expected use cases within a single suite
|
|
113
|
+
|
|
114
|
+
The decision tree replaces flat prerequisite lists with structured routing. An agent reads the tree and follows the branch matching their situation, skipping irrelevant setup and finding the right cross-references.
|
|
115
|
+
|
|
116
|
+
## tmux Session Management Standard
|
|
117
|
+
|
|
118
|
+
All suites that require long-running processes (dev servers, Expo servers, Flask API, Metro bundler) MUST use the tmux approach proven in xcode-automation:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# CRITICAL: -t $TMUX_PANE pins split to agent's window, not user's focused window
|
|
122
|
+
tmux split-window -h -d -t $TMUX_PANE \
|
|
123
|
+
-c /path/to/repo '<command>'
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Observability**: Agents MUST verify processes are running correctly via tmux pane capture (`tmux capture-pane -p -t <pane_id>`) before proceeding with validation. This prevents silent failures where a dev server fails to start but the agent proceeds to test against nothing.
|
|
127
|
+
|
|
128
|
+
**Teardown**: Reverse order of setup. Kill processes via `tmux send-keys -t <pane_id> C-c` or kill the pane.
|
|
129
|
+
|
|
130
|
+
**Worktree isolation**: Each worktree uses unique ports (via `.env.local`), so tmux sessions in different worktrees don't conflict. Agents must use the correct repo path (`-c`) for the worktree they're operating in.
|
|
131
|
+
|
|
132
|
+
Reference xcode-automation as the canonical tmux pattern.
|
|
133
|
+
|
|
134
|
+
## Hypothesis-First Validation Workflow
|
|
135
|
+
|
|
136
|
+
New suites should be drafted, then tested hands-on on a feature branch before guidance is marked as proven. This aligns with the Proven vs Untested Guidance principle:
|
|
137
|
+
|
|
138
|
+
1. **Draft**: Write suite files based on plan and codebase analysis (mark unverified practices as hypotheses)
|
|
139
|
+
2. **Test on feature branch**: Check out a feature branch and exercise each suite's practices hands-on — boot services, run commands, verify workflows, test worktree isolation
|
|
140
|
+
3. **Verify & adjust**: Document what works, what doesn't, what needs adjustment. Worktree-specific concerns get explicit verification.
|
|
141
|
+
4. **Solidify**: Only after verification do practices become authoritative guidance. Unverified practices stay framed as motivations per the Proven vs Untested Guidance principle.
|
|
142
|
+
|
|
143
|
+
The plan/handoff document persists as the hypothesis record. If implementation runs long, it serves as the handoff document for future work.
|
|
144
|
+
|
|
145
|
+
## Cross-Referencing Between Suites
|
|
146
|
+
|
|
147
|
+
**Reference** when complex multi-step setup is involved (e.g., simulator setup spanning multiple tools) — point to the authoritative suite's decision tree rather than duplicating instructions.
|
|
148
|
+
|
|
149
|
+
**Inline** when the command is simple and stable (e.g., `xcrun simctl boot <UDID>`) — no need to send agents to another document for a single command.
|
|
150
|
+
|
|
151
|
+
Decision trees are the natural place for cross-references — branch points that route to another suite's decision tree. Example from browser-automation: "Does the change affect native iOS rendering? → Yes → follow xcode-automation decision tree for build and simulator verification."
|
|
152
|
+
|
|
153
|
+
## Testing Scenario Matrix
|
|
154
|
+
|
|
155
|
+
Target repositories should build a scenario matrix mapping their validation scenarios to suite combinations. The matrix documents which suites apply to which types of changes, so agents can quickly determine what validation is needed. Structure as a table:
|
|
156
|
+
|
|
157
|
+
| Scenario | Suite(s) | Notes |
|
|
158
|
+
|----------|----------|-------|
|
|
159
|
+
| _Description of change type_ | _Which suites apply_ | _Any special setup or cross-references_ |
|
|
160
|
+
|
|
161
|
+
Example using this repo's default suites:
|
|
162
|
+
|
|
163
|
+
| Scenario | Suite(s) | Notes |
|
|
164
|
+
|----------|----------|-------|
|
|
165
|
+
| Browser UI changes only | browser-automation | Dev server must be running |
|
|
166
|
+
| Native iOS/macOS changes | xcode-automation | Simulator setup via session defaults |
|
|
167
|
+
| Cross-platform changes (web + native) | browser-automation + xcode-automation | Each suite's decision tree routes to the relevant validation path |
|
|
168
|
+
|
|
169
|
+
When a suite serves as a shared dependency for multiple scenarios (e.g., a database management suite referenced by both API and front-end suites), it should be cross-referenced via decision tree branch points rather than duplicated.
|
|
170
|
+
|
|
171
|
+
## Environment Management Patterns
|
|
172
|
+
|
|
173
|
+
Validation suites that depend on environment configuration should document these patterns for their domain:
|
|
174
|
+
|
|
175
|
+
**ENV injection**: Document how the target project injects environment variables for different contexts (local development, testing, production). Suites should teach the pattern (e.g., "check for `.env.*` files and wrapper scripts") rather than hardcoding specific variable names.
|
|
176
|
+
|
|
177
|
+
**Service isolation**: When validation requires running services (dev servers, databases, bundlers), document how to avoid port conflicts across concurrent worktrees or parallel agent sessions. Reference the suite's ENV Configuration table for relevant variables.
|
|
178
|
+
|
|
179
|
+
**Worktree isolation**: Each worktree should use unique ports and isolated service instances where possible. Suites should document which resources need isolation and how to configure it (e.g., xcode-automation documents simulator isolation via dedicated simulator clones and derived data paths).
|
|
180
|
+
|
|
181
|
+
## Suite Creation Guidance
|
|
182
|
+
|
|
183
|
+
When creating a new validation suite for a new domain:
|
|
184
|
+
|
|
185
|
+
**Engineer provides**: Testing scenarios, tooling requirements, CI/CD integration needs, cross-references to existing suites.
|
|
186
|
+
|
|
187
|
+
**Suite author follows**:
|
|
188
|
+
1. Follow the validation suite schema (`ah schema validation-suite`)
|
|
189
|
+
2. Validate the stochastic dimension meets the existence threshold
|
|
190
|
+
3. Apply the Command Documentation Principle — external tools explicit, internal commands via patterns + discovery
|
|
191
|
+
4. Include a Decision Tree routing agents to the correct validation path
|
|
192
|
+
5. Use tmux Session Management Standard for long-running processes
|
|
193
|
+
6. Document proven vs untested guidance per the Hypothesis-First Validation Workflow
|
|
194
|
+
7. Cross-reference other suites at decision tree branch points
|
|
195
|
+
|
|
196
|
+
**Structural templates** (reference the existing default suites for patterns):
|
|
197
|
+
- xcode-automation — external-tool-heavy suite (MCP tools, xctrace, simctl). Reference for suites that primarily wrap external CLI tools with agent-driven exploration.
|
|
198
|
+
- browser-automation — dual-dimension suite (agent-browser stochastic, Playwright deterministic). Reference for suites that have both agent-driven exploration and scripted CI-gated tests.
|
|
199
|
+
|
|
83
200
|
## Related References
|
|
84
201
|
|
|
85
202
|
- [`tools-commands-mcp-hooks.md`](tools-commands-mcp-hooks.md) — When validation uses hooks, CLI commands, or MCP research tools
|