all-hands-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. package/.allhands/README.md +75 -0
  2. package/.allhands/agents/compounder.yaml +15 -0
  3. package/.allhands/agents/coordinator.yaml +17 -0
  4. package/.allhands/agents/documentor.yaml +15 -0
  5. package/.allhands/agents/e2e-test-planner.yaml +17 -0
  6. package/.allhands/agents/emergent.yaml +22 -0
  7. package/.allhands/agents/executor.yaml +14 -0
  8. package/.allhands/agents/ideation.yaml +11 -0
  9. package/.allhands/agents/initiative-steering.yaml +19 -0
  10. package/.allhands/agents/judge.yaml +13 -0
  11. package/.allhands/agents/planner.yaml +19 -0
  12. package/.allhands/agents/pr-reviewer.yaml +15 -0
  13. package/.allhands/docs.json +5 -0
  14. package/.allhands/docs.local.json +26 -0
  15. package/.allhands/flows/COMPOUNDING.md +203 -0
  16. package/.allhands/flows/COORDINATION.md +89 -0
  17. package/.allhands/flows/CORE.md +87 -0
  18. package/.allhands/flows/DOCUMENTATION.md +218 -0
  19. package/.allhands/flows/E2E_TEST_PLAN_BUILDING.md +140 -0
  20. package/.allhands/flows/EMERGENT_PLANNING.md +57 -0
  21. package/.allhands/flows/IDEATION_SCOPING.md +154 -0
  22. package/.allhands/flows/INITIATIVE_STEERING.md +110 -0
  23. package/.allhands/flows/JUDGE_REVIEWING.md +79 -0
  24. package/.allhands/flows/PROMPT_TASK_EXECUTION.md +68 -0
  25. package/.allhands/flows/PR_REVIEWING.md +43 -0
  26. package/.allhands/flows/SPEC_PLANNING.md +216 -0
  27. package/.allhands/flows/harness/WRITING_HARNESS_FLOWS.md +27 -0
  28. package/.allhands/flows/harness/WRITING_HARNESS_KNOWLEDGE.md +27 -0
  29. package/.allhands/flows/harness/WRITING_HARNESS_ORCHESTRATION.md +27 -0
  30. package/.allhands/flows/harness/WRITING_HARNESS_SKILLS.md +27 -0
  31. package/.allhands/flows/harness/WRITING_HARNESS_TOOLS.md +27 -0
  32. package/.allhands/flows/harness/WRITING_HARNESS_VALIDATION_TOOLING.md +27 -0
  33. package/.allhands/flows/shared/CODEBASE_UNDERSTANDING.md +72 -0
  34. package/.allhands/flows/shared/CREATE_HARNESS_SPEC.md +48 -0
  35. package/.allhands/flows/shared/CREATE_SPEC.md +41 -0
  36. package/.allhands/flows/shared/CREATE_VALIDATION_TOOLING_SPEC.md +70 -0
  37. package/.allhands/flows/shared/DOCUMENTATION_DISCOVERY.md +123 -0
  38. package/.allhands/flows/shared/DOCUMENTATION_WRITER.md +101 -0
  39. package/.allhands/flows/shared/EMERGENT_REFINEMENT_ANALYSIS.md +76 -0
  40. package/.allhands/flows/shared/EXTERNAL_TECH_GUIDANCE.md +97 -0
  41. package/.allhands/flows/shared/IDEATION_CODEBASE_GROUNDING.md +49 -0
  42. package/.allhands/flows/shared/PLAN_DEEPENING.md +152 -0
  43. package/.allhands/flows/shared/PROMPT_TASKS_CURATION.md +113 -0
  44. package/.allhands/flows/shared/PROMPT_VALIDATION_REVIEW.MD +99 -0
  45. package/.allhands/flows/shared/QUICK_PREMORTEM.md +70 -0
  46. package/.allhands/flows/shared/RESEARCH_GUIDANCE.md +38 -0
  47. package/.allhands/flows/shared/REVIEW_OPTIONS_BREAKDOWN.md +68 -0
  48. package/.allhands/flows/shared/SKILL_EXTRACTION.md +84 -0
  49. package/.allhands/flows/shared/SPEC_FLOW_ANALYSIS.md +119 -0
  50. package/.allhands/flows/shared/TDD_WORKFLOW.md +109 -0
  51. package/.allhands/flows/shared/UTILIZE_VALIDATION_TOOLING.md +84 -0
  52. package/.allhands/flows/shared/WRITING_HARNESS_FLOWS.md +11 -0
  53. package/.allhands/flows/shared/WRITING_HARNESS_MCP_TOOLS.md +84 -0
  54. package/.allhands/flows/shared/jury/ARCHITECTURE_REVIEW.md +91 -0
  55. package/.allhands/flows/shared/jury/BEST_PRACTICES_REVIEW.md +80 -0
  56. package/.allhands/flows/shared/jury/CLAIM_VERIFICATION_REVIEW.md +101 -0
  57. package/.allhands/flows/shared/jury/EXPECTATIONS_FIT_REVIEW.md +78 -0
  58. package/.allhands/flows/shared/jury/MAINTAINABILITY_REVIEW.md +110 -0
  59. package/.allhands/flows/shared/jury/PROMPTS_EXPECTATIONS_FIT.md +74 -0
  60. package/.allhands/flows/shared/jury/PROMPTS_FLOW_ANALYSIS.md +92 -0
  61. package/.allhands/flows/shared/jury/PROMPTS_YAGNI.md +78 -0
  62. package/.allhands/flows/shared/jury/PROMPT_PREMORTEM.md +125 -0
  63. package/.allhands/flows/shared/jury/SECURITY_REVIEW.md +86 -0
  64. package/.allhands/flows/shared/jury/YAGNI_REVIEW.md +82 -0
  65. package/.allhands/flows/wip/DEBUG_INVESTIGATION.md +162 -0
  66. package/.allhands/flows/wip/MEMORY_RECALL.md +62 -0
  67. package/.allhands/harness/ah +131 -0
  68. package/.allhands/harness/package-lock.json +5292 -0
  69. package/.allhands/harness/package.json +52 -0
  70. package/.allhands/harness/src/__tests__/e2e/commands.test.ts +307 -0
  71. package/.allhands/harness/src/__tests__/e2e/event-loop.test.ts +539 -0
  72. package/.allhands/harness/src/__tests__/e2e/hooks.test.ts +427 -0
  73. package/.allhands/harness/src/__tests__/e2e/new-initiative-routing.test.ts +137 -0
  74. package/.allhands/harness/src/__tests__/e2e/run-e2e.ts +109 -0
  75. package/.allhands/harness/src/__tests__/e2e/specs-type.test.ts +210 -0
  76. package/.allhands/harness/src/__tests__/e2e/validation-hooks.test.ts +669 -0
  77. package/.allhands/harness/src/__tests__/e2e/validation-path-consistency.test.ts +354 -0
  78. package/.allhands/harness/src/__tests__/e2e/validation.test.ts +528 -0
  79. package/.allhands/harness/src/__tests__/harness/assertions.ts +318 -0
  80. package/.allhands/harness/src/__tests__/harness/cli-runner.ts +359 -0
  81. package/.allhands/harness/src/__tests__/harness/fixture.ts +384 -0
  82. package/.allhands/harness/src/__tests__/harness/hook-runner.ts +411 -0
  83. package/.allhands/harness/src/__tests__/harness/index.ts +122 -0
  84. package/.allhands/harness/src/cli.ts +36 -0
  85. package/.allhands/harness/src/commands/complexity.ts +177 -0
  86. package/.allhands/harness/src/commands/context7.ts +202 -0
  87. package/.allhands/harness/src/commands/docs.ts +557 -0
  88. package/.allhands/harness/src/commands/hooks.ts +24 -0
  89. package/.allhands/harness/src/commands/index.ts +51 -0
  90. package/.allhands/harness/src/commands/knowledge.ts +382 -0
  91. package/.allhands/harness/src/commands/memories.ts +302 -0
  92. package/.allhands/harness/src/commands/notify.ts +61 -0
  93. package/.allhands/harness/src/commands/oracle.ts +158 -0
  94. package/.allhands/harness/src/commands/perplexity.ts +220 -0
  95. package/.allhands/harness/src/commands/planning.ts +245 -0
  96. package/.allhands/harness/src/commands/schema.ts +73 -0
  97. package/.allhands/harness/src/commands/skills.ts +128 -0
  98. package/.allhands/harness/src/commands/solutions.ts +353 -0
  99. package/.allhands/harness/src/commands/spawn.ts +158 -0
  100. package/.allhands/harness/src/commands/specs.ts +532 -0
  101. package/.allhands/harness/src/commands/tavily.ts +226 -0
  102. package/.allhands/harness/src/commands/tools.ts +579 -0
  103. package/.allhands/harness/src/commands/trace.ts +327 -0
  104. package/.allhands/harness/src/commands/tui.ts +960 -0
  105. package/.allhands/harness/src/commands/validate.ts +143 -0
  106. package/.allhands/harness/src/commands/validation-tools.ts +108 -0
  107. package/.allhands/harness/src/hooks/context.ts +1442 -0
  108. package/.allhands/harness/src/hooks/enforcement.ts +170 -0
  109. package/.allhands/harness/src/hooks/index.ts +54 -0
  110. package/.allhands/harness/src/hooks/lifecycle.ts +229 -0
  111. package/.allhands/harness/src/hooks/notification.ts +104 -0
  112. package/.allhands/harness/src/hooks/observability.ts +551 -0
  113. package/.allhands/harness/src/hooks/session.ts +88 -0
  114. package/.allhands/harness/src/hooks/shared.ts +815 -0
  115. package/.allhands/harness/src/hooks/transcript-parser.ts +208 -0
  116. package/.allhands/harness/src/hooks/validation.ts +617 -0
  117. package/.allhands/harness/src/lib/__tests__/ctags.test.ts +244 -0
  118. package/.allhands/harness/src/lib/__tests__/docs-validation.test.ts +344 -0
  119. package/.allhands/harness/src/lib/__tests__/mcp-runtime.test.ts +190 -0
  120. package/.allhands/harness/src/lib/__tests__/schema.test.ts +861 -0
  121. package/.allhands/harness/src/lib/base-command.ts +198 -0
  122. package/.allhands/harness/src/lib/cli-daemon.ts +343 -0
  123. package/.allhands/harness/src/lib/compaction.ts +313 -0
  124. package/.allhands/harness/src/lib/ctags.ts +497 -0
  125. package/.allhands/harness/src/lib/docs-validation.ts +907 -0
  126. package/.allhands/harness/src/lib/event-loop.ts +662 -0
  127. package/.allhands/harness/src/lib/flows.ts +155 -0
  128. package/.allhands/harness/src/lib/git.ts +276 -0
  129. package/.allhands/harness/src/lib/knowledge-worker.ts +72 -0
  130. package/.allhands/harness/src/lib/knowledge.ts +810 -0
  131. package/.allhands/harness/src/lib/llm.ts +255 -0
  132. package/.allhands/harness/src/lib/mcp-client.ts +432 -0
  133. package/.allhands/harness/src/lib/mcp-daemon.ts +486 -0
  134. package/.allhands/harness/src/lib/mcp-runtime.ts +418 -0
  135. package/.allhands/harness/src/lib/notification.ts +115 -0
  136. package/.allhands/harness/src/lib/opencode/index.ts +70 -0
  137. package/.allhands/harness/src/lib/opencode/profiles.ts +300 -0
  138. package/.allhands/harness/src/lib/opencode/prompts/codesearch.md +98 -0
  139. package/.allhands/harness/src/lib/opencode/prompts/knowledge-aggregator.md +67 -0
  140. package/.allhands/harness/src/lib/opencode/runner.ts +281 -0
  141. package/.allhands/harness/src/lib/oracle.ts +926 -0
  142. package/.allhands/harness/src/lib/planning-utils.ts +150 -0
  143. package/.allhands/harness/src/lib/planning.ts +605 -0
  144. package/.allhands/harness/src/lib/pr-review.ts +225 -0
  145. package/.allhands/harness/src/lib/prompts.ts +522 -0
  146. package/.allhands/harness/src/lib/schema.ts +418 -0
  147. package/.allhands/harness/src/lib/schemas/agent-profile.ts +141 -0
  148. package/.allhands/harness/src/lib/schemas/template-vars.ts +138 -0
  149. package/.allhands/harness/src/lib/session.ts +164 -0
  150. package/.allhands/harness/src/lib/specs.ts +348 -0
  151. package/.allhands/harness/src/lib/tldr.ts +829 -0
  152. package/.allhands/harness/src/lib/tmux.ts +1051 -0
  153. package/.allhands/harness/src/lib/trace-store.ts +714 -0
  154. package/.allhands/harness/src/mcp/__tests__/index.test.ts +46 -0
  155. package/.allhands/harness/src/mcp/_template.ts +47 -0
  156. package/.allhands/harness/src/mcp/filesystem.ts +33 -0
  157. package/.allhands/harness/src/mcp/index.ts +69 -0
  158. package/.allhands/harness/src/mcp/playwright.ts +34 -0
  159. package/.allhands/harness/src/mcp/xcodebuild.ts +29 -0
  160. package/.allhands/harness/src/schemas/docs.schema.json +44 -0
  161. package/.allhands/harness/src/schemas/settings.schema.json +214 -0
  162. package/.allhands/harness/src/tui/actions.ts +227 -0
  163. package/.allhands/harness/src/tui/file-viewer-modal.ts +270 -0
  164. package/.allhands/harness/src/tui/index.ts +1574 -0
  165. package/.allhands/harness/src/tui/modal.ts +232 -0
  166. package/.allhands/harness/src/tui/prompts-pane.ts +186 -0
  167. package/.allhands/harness/src/tui/status-pane.ts +434 -0
  168. package/.allhands/harness/tsconfig.json +22 -0
  169. package/.allhands/harness/vitest.config.ts +13 -0
  170. package/.allhands/pillars.md +33 -0
  171. package/.allhands/principles.md +88 -0
  172. package/.allhands/schemas/alignment.yaml +51 -0
  173. package/.allhands/schemas/documentation.yaml +10 -0
  174. package/.allhands/schemas/prompt.yaml +92 -0
  175. package/.allhands/schemas/skill.yaml +34 -0
  176. package/.allhands/schemas/solution.yaml +131 -0
  177. package/.allhands/schemas/spec.yaml +67 -0
  178. package/.allhands/schemas/validation-suite.yaml +49 -0
  179. package/.allhands/schemas/workflow.yaml +51 -0
  180. package/.allhands/settings.json +57 -0
  181. package/.allhands/skills/claude-code-patterns/SKILL.md +60 -0
  182. package/.allhands/skills/claude-code-patterns/docs/context-hygiene.md +19 -0
  183. package/.allhands/skills/harness-maintenance/SKILL.md +449 -0
  184. package/.allhands/skills/harness-maintenance/references/core-architecture.md +187 -0
  185. package/.allhands/skills/harness-maintenance/references/harness-skills.md +87 -0
  186. package/.allhands/skills/harness-maintenance/references/knowledge-compounding.md +78 -0
  187. package/.allhands/skills/harness-maintenance/references/tools-commands-mcp-hooks.md +115 -0
  188. package/.allhands/skills/harness-maintenance/references/validation-tooling.md +77 -0
  189. package/.allhands/skills/harness-maintenance/references/writing-flows.md +84 -0
  190. package/.allhands/validation/browser-automation.md +109 -0
  191. package/.allhands/validation/xcode-automation.md +195 -0
  192. package/.allhands/workflows/documentation.md +86 -0
  193. package/.allhands/workflows/investigation.md +81 -0
  194. package/.allhands/workflows/milestone.md +91 -0
  195. package/.allhands/workflows/optimization.md +85 -0
  196. package/.allhands/workflows/refactor.md +99 -0
  197. package/.allhands/workflows/triage.md +81 -0
  198. package/.claude/README.md +1 -0
  199. package/.claude/agents/explorer.md +10 -0
  200. package/.claude/agents/researcher.md +11 -0
  201. package/.claude/agents/task-runner.md +8 -0
  202. package/.claude/settings.json +231 -0
  203. package/.env.ai.example +7 -0
  204. package/.github/workflows/npm-publish.yml +69 -0
  205. package/.internal.json +45 -0
  206. package/.tldr/config.json +11 -0
  207. package/.tldrignore +90 -0
  208. package/CLAUDE.md +6 -0
  209. package/README.md +98 -0
  210. package/bin/sync-cli.js +7552 -0
  211. package/concerns.md +7 -0
  212. package/docs/README.md +41 -0
  213. package/docs/agents/README.md +24 -0
  214. package/docs/agents/agent-configuration-system.md +86 -0
  215. package/docs/agents/execution-agents.md +50 -0
  216. package/docs/agents/knowledge-agents.md +61 -0
  217. package/docs/agents/orchestration-agent.md +57 -0
  218. package/docs/agents/planning-agents.md +84 -0
  219. package/docs/agents/quality-review-agents.md +67 -0
  220. package/docs/agents/workflow-agent-orchestration.md +69 -0
  221. package/docs/flows/README.md +44 -0
  222. package/docs/flows/compounding.md +126 -0
  223. package/docs/flows/coordination.md +72 -0
  224. package/docs/flows/core-harness-integration.md +63 -0
  225. package/docs/flows/documentation-orchestration.md +98 -0
  226. package/docs/flows/e2e-test-plan-building.md +83 -0
  227. package/docs/flows/emergent-refinement.md +104 -0
  228. package/docs/flows/flow-authoring-and-mcp-tools.md +89 -0
  229. package/docs/flows/judge-reviewing.md +112 -0
  230. package/docs/flows/plan-deepening-and-research.md +107 -0
  231. package/docs/flows/plan-review-jury.md +114 -0
  232. package/docs/flows/pr-reviewing.md +54 -0
  233. package/docs/flows/prompt-task-execution.md +119 -0
  234. package/docs/flows/spec-planning.md +162 -0
  235. package/docs/flows/type-specific-scoping-flows.md +49 -0
  236. package/docs/flows/validation-and-skills-integration.md +145 -0
  237. package/docs/flows/wip/wip-flows.md +102 -0
  238. package/docs/harness/README.md +23 -0
  239. package/docs/harness/agent-profiles.md +84 -0
  240. package/docs/harness/cli/README.md +24 -0
  241. package/docs/harness/cli/cli-entry-and-command-discovery.md +91 -0
  242. package/docs/harness/cli/docs-command.md +87 -0
  243. package/docs/harness/cli/knowledge-command.md +91 -0
  244. package/docs/harness/cli/minor-cli-commands.md +65 -0
  245. package/docs/harness/cli/oracle-command.md +113 -0
  246. package/docs/harness/cli/planning-command.md +95 -0
  247. package/docs/harness/cli/schema-and-validation-commands.md +154 -0
  248. package/docs/harness/cli/search-commands.md +97 -0
  249. package/docs/harness/cli/spawn-command.md +136 -0
  250. package/docs/harness/cli/specs-command.md +102 -0
  251. package/docs/harness/cli/tools-command.md +122 -0
  252. package/docs/harness/cli/trace-command.md +122 -0
  253. package/docs/harness/cli-daemon.md +92 -0
  254. package/docs/harness/event-loop.md +184 -0
  255. package/docs/harness/hooks/README.md +15 -0
  256. package/docs/harness/hooks/context-hooks.md +96 -0
  257. package/docs/harness/hooks/lifecycle-and-observability-hooks.md +135 -0
  258. package/docs/harness/hooks/validation-hooks.md +97 -0
  259. package/docs/harness/test-harness.md +149 -0
  260. package/docs/harness/tui.md +176 -0
  261. package/docs/memories.md +20 -0
  262. package/docs/solutions/agentic-issues/premature-agent-deletion-tui-action-dependency-20260130.md +49 -0
  263. package/docs/solutions/agentic-issues/ref-anchor-scope-mismatch-skill-references-20260131.md +55 -0
  264. package/docs/solutions/agentic-issues/tautological-tests-routing-20260131.md +52 -0
  265. package/docs/solutions/integration_issue/blocktool-output-format-mismatch-hook-runner-20260130.md +52 -0
  266. package/docs/solutions/integration_issue/dual-validation-path-divergence-schema-20260130.md +66 -0
  267. package/docs/solutions/security-issues/unsanitized-domain-path-join-20260131.md +52 -0
  268. package/docs/solutions/test-failures/event-loop-mock-ordering-checkAgentWindows-20260130.md +63 -0
  269. package/docs/sync-cli/README.md +19 -0
  270. package/docs/sync-cli/cli-entrypoint-and-commands.md +39 -0
  271. package/docs/sync-cli/commands/README.md +11 -0
  272. package/docs/sync-cli/commands/pull-manifest-command.md +36 -0
  273. package/docs/sync-cli/commands/push-command.md +84 -0
  274. package/docs/sync-cli/commands/sync-command.md +71 -0
  275. package/docs/sync-cli/systems/README.md +14 -0
  276. package/docs/sync-cli/systems/git-and-github-integration.md +49 -0
  277. package/docs/sync-cli/systems/interactive-ui.md +43 -0
  278. package/docs/sync-cli/systems/manifest-and-distribution.md +51 -0
  279. package/docs/sync-cli/systems/path-resolution.md +42 -0
  280. package/package.json +46 -0
  281. package/scripts/install-shim.sh +40 -0
  282. package/scripts/pre-pack.sh +25 -0
  283. package/specs/harness-maintenance-skill.spec.md +138 -0
  284. package/specs/roadmap/git-spec-lifecycle-management.spec.md +113 -0
  285. package/specs/sync-init-flag.spec.md +117 -0
  286. package/specs/unified-workflow-orchestration.spec.md +250 -0
  287. package/specs/validation-tooling-practice.spec.md +98 -0
  288. package/specs/workflow-domain-configuration.spec.md +265 -0
  289. package/src/commands/pull-manifest.ts +31 -0
  290. package/src/commands/push.ts +344 -0
  291. package/src/commands/sync.ts +289 -0
  292. package/src/lib/constants.ts +10 -0
  293. package/src/lib/dotfiles.ts +36 -0
  294. package/src/lib/fs-utils.ts +18 -0
  295. package/src/lib/gh.ts +40 -0
  296. package/src/lib/git.ts +63 -0
  297. package/src/lib/gitignore.ts +167 -0
  298. package/src/lib/manifest.ts +121 -0
  299. package/src/lib/marker-sync.ts +39 -0
  300. package/src/lib/paths.ts +38 -0
  301. package/src/lib/target-lines.ts +66 -0
  302. package/src/lib/ui.ts +78 -0
  303. package/src/sync-cli.ts +120 -0
  304. package/target-lines.json +23 -0
  305. package/tsconfig.json +20 -0
@@ -0,0 +1,87 @@
1
+ # Harness Skills
2
+
3
+ Per **Knowledge Compounding**, skills are domain expertise packages that compound over time — agents discover them automatically and load only what they need per **Context is Precious**.
4
+
5
+ ## Skill Schema
6
+
7
+ Skills are defined by a YAML frontmatter manifest. Run `ah schema skill` for the authoritative schema. Key fields:
8
+
9
+ - **name**: Skill identifier (e.g., `harness-maintenance`)
10
+ - **description**: When to use this skill — agents match on this
11
+ - **version**: Semver for tracking changes
12
+ - **globs**: File patterns that trigger skill discovery (e.g., `".allhands/flows/**/*.md"`)
13
+
14
+ ## Directory Conventions
15
+
16
+ ```
17
+ .allhands/skills/
18
+ ├── <skill-name>/
19
+ │ ├── SKILL.md # Hub: frontmatter + routing table
20
+ │ └── references/ # Domain-specific reference docs (optional)
21
+ │ ├── topic-a.md
22
+ │ └── topic-b.md
23
+ ```
24
+
25
+ - `SKILL.md` is the entry point — always a compact routing hub
26
+ - `references/` (or `docs/`) contains deep domain knowledge
27
+ - Agents read the hub first, then load only the reference matching their scenario
28
+
29
+ ## Discovery Mechanism
30
+
31
+ Skills are discovered via glob matching against the files an agent is working on:
32
+
33
+ 1. Agent touches a file (e.g., `.allhands/flows/shared/MY_FLOW.md`)
34
+ 2. Harness matches file against all skill globs
35
+ 3. Matching skill(s) are surfaced to the agent
36
+ 4. Agent reads `SKILL.md` hub for routing context
37
+
38
+ List all skills: `ah skills list`
39
+
40
+ ## Hub-and-Spoke Pattern
41
+
42
+ This restructure establishes the convention for all harness skills:
43
+
44
+ **Hub** (`SKILL.md`):
45
+ - Compact (<100 lines) routing document
46
+ - Contains `<goal>`, `<constraints>`, cross-cutting patterns
47
+ - **Routing table**: Maps scenarios to specific reference docs
48
+ - Agents always start here
49
+
50
+ **Spokes** (`references/*.md`):
51
+ - Deep, domain-specific knowledge
52
+ - Loaded only when the routing table directs
53
+ - Flexibly structured per domain (no rigid template)
54
+ - Grounded in codebase reality (file paths, commands, schema fields)
55
+
56
+ ### When to Create a New Skill vs Extend Existing
57
+
58
+ **Create new** when:
59
+ - The domain is distinct (different file patterns, different expertise)
60
+ - The knowledge doesn't fit under any existing skill's glob patterns
61
+ - Agents working in this domain need dedicated context
62
+
63
+ **Extend existing** when:
64
+ - The knowledge falls within an existing skill's glob patterns
65
+ - Adding a new reference doc to the existing hub covers it
66
+ - The domain is a sub-specialty of an existing skill
67
+
68
+ ## Reference Doc Guidelines
69
+
70
+ Per **Context is Precious**, each reference doc should:
71
+ - Start with the most relevant first principles for that domain
72
+ - Be grounded in codebase reality (file paths, schema fields, command examples)
73
+ - Structure itself flexibly to fit its domain
74
+ - Be concise — agents load only what they need
75
+
76
+ ## Existing Skills
77
+
78
+ | Skill | Purpose | Pattern | Primary For |
79
+ |-------|---------|---------|-------------|
80
+ | `harness-maintenance` | Harness architecture and extension | Hub + `references/` | `.allhands/` content files (flows, schemas, skills, validation, agents) |
81
+ | `claude-code-patterns` | Claude Code native features | Hub + `docs/` | TypeScript implementation in `harness/src/`, Claude Code configs in `.claude/` |
82
+
83
+ ## Related References
84
+
85
+ - [`writing-flows.md`](writing-flows.md) — When authoring reference docs or skill entry-point flows
86
+ - [`knowledge-compounding.md`](knowledge-compounding.md) — When skills need to compound knowledge via schemas or indexes
87
+ - [`core-architecture.md`](core-architecture.md) — When skill globs or discovery interact with directory structure
@@ -0,0 +1,78 @@
1
+ # Knowledge Compounding
2
+
3
+ Per **Knowledge Compounding**, everything feeds forward — decisions, pivots, limitations, realizations, best practices, and preferences. The harness captures and surfaces knowledge so future agent work benefits from all past work.
4
+
5
+ ## Documentation Schemas
6
+
7
+ The harness defines schemas for structured knowledge artifacts. Run `ah schema <type>` to inspect any schema:
8
+
9
+ | Schema | File Pattern | Purpose |
10
+ |--------|-------------|---------|
11
+ | `prompt` | `.planning/*/prompts/*.prompt.md` | Task definition and completion records |
12
+ | `alignment` | `.planning/*/alignment.md` | Milestone context, prompt summaries, decisions |
13
+ | `spec` | `specs/**/*.spec.md` | Feature specifications |
14
+ | `skill` | `.allhands/skills/*/SKILL.md` | Domain expertise manifests |
15
+ | `validation-suite` | `.allhands/validation/*.md` | Validation tooling definitions |
16
+ | `solution` | `docs/solutions/*.md` | Reusable solution documentation |
17
+ | `documentation` | `docs/*.md` | General documentation |
18
+
19
+ Run `ah schema <type> body` to see the body format (not just frontmatter).
20
+
21
+ ## Knowledge Indexes
22
+
23
+ ### Solutions (`docs/solutions/`)
24
+ Reusable patterns discovered during work. Searchable by future agents:
25
+ - `ah solutions search "<keywords>"` — Find relevant past solutions
26
+ - Solutions are created when an agent discovers a reusable pattern worth preserving
27
+ - Per **Knowledge Compounding**, solutions prevent re-discovery of known patterns
28
+
29
+ ### Memories (`ah memories`)
30
+ Agent learnings and engineer preferences that persist across sessions:
31
+ - `ah memories search "<keywords>"` — Find relevant learnings
32
+ - Captures: debugging insights, preference decisions, architectural rationale
33
+ - Per **Knowledge Compounding**, memories prevent repeated mistakes
34
+
35
+ ### Knowledge Docs
36
+ Codebase knowledge indexed for semantic search:
37
+ - `ah knowledge docs search <descriptive_query>` — Semantic code search
38
+ - Built from codebase during TUI startup (semantic index)
39
+ - Per **Context is Precious**, agents search rather than loading full codebase
40
+
41
+ ## How Knowledge Feeds Forward
42
+
43
+ ### Prompt Completion Cycle
44
+ Per **Prompt Files as Units of Work**, completed prompts document what was decided:
45
+ 1. Agent completes prompt tasks
46
+ 2. Summary appended to prompt file (decisions, deviations, learnings)
47
+ 3. Summary appended to alignment doc's "## Prompt Summaries" section
48
+ 4. Future agents read alignment doc to see all completed work without reading each prompt
49
+
50
+ ### Compaction Summaries
51
+ Per **Knowledge Compounding**, compaction preserves work across context boundaries:
52
+ 1. `agent-compact` hook parses transcript for session summary
53
+ 2. Oracle generates summary with decision (CONTINUE/RESTART/BLOCKED)
54
+ 3. Summary appended to prompt file
55
+ 4. Same prompt can be re-run with accumulated learnings
56
+
57
+ ### Skill Improvement
58
+ Skills and validation tooling improve with use:
59
+ - Skills gain new reference docs as domains expand
60
+ - Validation suites crystallize stochastic patterns into deterministic checks
61
+ - Solutions capture reusable patterns discovered during implementation
62
+
63
+ ## Compounding Principles from `principles.md`
64
+
65
+ The **Knowledge Compounding** principle states:
66
+ > Everything feeds forward — decisions, pivots, limitations, disagreements, realizations, best practices, preferences. The harness implementation itself improves with use. Future tasks benefit from all past work.
67
+
68
+ This manifests in:
69
+ - **Alignment docs**: Cross-prompt visibility without context bloat
70
+ - **Solution docs**: Reusable pattern library growing with each milestone
71
+ - **Memories**: Persistent learnings across agent sessions
72
+ - **Validation suites**: Crystallized quality checks that compound
73
+ - **Skills**: Domain expertise packages that deepen over time
74
+
75
+ ## Related References
76
+
77
+ - [`validation-tooling.md`](validation-tooling.md) — When knowledge artifacts involve validation suites or crystallization
78
+ - [`harness-skills.md`](harness-skills.md) — When knowledge compounds through skill improvement or reference docs
@@ -0,0 +1,115 @@
1
+ # Tools, Commands, MCP & Hooks
2
+
3
+ Per **Context is Precious** and **Agentic Validation Tooling**, the harness extends Claude Code through hooks, CLI commands, and MCP integrations — all sharing an auto-discovery pattern.
4
+
5
+ ## Auto-Discovery Pattern
6
+
7
+ Hooks, commands, and MCP servers share the same extension model:
8
+ 1. Create a module in the appropriate `src/` subdirectory
9
+ 2. Export a `register(parent: Command)` function
10
+ 3. The harness auto-discovers and registers at startup
11
+
12
+ This pattern applies to:
13
+ - **Hooks**: `.allhands/harness/src/hooks/` — Claude Code lifecycle integration
14
+ - **Commands**: `.allhands/harness/src/commands/` — CLI subcommands under `ah`
15
+ - **MCP Servers**: `.allhands/harness/src/mcp/` — External tool integrations
16
+
17
+ ## Hooks System
18
+
19
+ Per **Context is Precious** and **Agentic Validation Tooling**, hooks bridge Claude Code and the harness.
20
+
21
+ ### Categories
22
+
23
+ | Category | Purpose | Key Hooks |
24
+ |----------|---------|-----------|
25
+ | **Context** | Token-efficient context injection | `tldr-inject`, `read-enforcer`, `edit-inject`, `signature` |
26
+ | **Enforcement** | Guide toward appropriate tools | `github-url`, `research-fetch`, `research-search` |
27
+ | **Validation** | Quality gates on edits | `diagnostics`, `schema`, `format` |
28
+ | **Lifecycle** | Handle agent events | `agent-stop`, `agent-compact` |
29
+ | **Notification** | Desktop alerts | `elicitation`, `stop`, `compact` |
30
+ | **Session** | Startup tasks | `tldr-warm` |
31
+
32
+ ### Hook Events (`.claude/settings.json`)
33
+
34
+ | Event | Purpose |
35
+ |-------|---------|
36
+ | **PreToolUse** | Context injection, enforcement, blocking |
37
+ | **PostToolUse** | Diagnostics, validation |
38
+ | **SessionStart** | TLDR daemon warm-up |
39
+ | **Stop/SessionEnd** | Notifications, cleanup |
40
+ | **PreCompact** | Compaction handling |
41
+
42
+ ### Design Rules
43
+ - Graceful degradation: hooks allow tool execution even if analysis fails
44
+ - Enforcement blocks include helpful redirect messages
45
+ - All optional dependencies (TLDR, pyright) have fallback behavior
46
+
47
+ ### Compaction Hook (Critical)
48
+ Per **Knowledge Compounding**, `agent-compact` preserves work:
49
+ 1. Parse agent transcript for session summary
50
+ 2. Get git status (file changes)
51
+ 3. Call oracle to generate summary with decision (CONTINUE/RESTART/BLOCKED)
52
+ 4. Append summary to prompt file (allows re-run with learnings)
53
+ 5. Kill agent window
54
+
55
+ ## Commands Architecture
56
+
57
+ Entry point: `.allhands/harness/src/cli.ts` (default action launches TUI). Auto-discovers commands from `.allhands/harness/src/commands/`.
58
+
59
+ ### Core Commands
60
+
61
+ | Command | Domain | First Principle |
62
+ |---------|--------|-----------------|
63
+ | `ah knowledge` | Semantic search | **Context is Precious** |
64
+ | `ah schema` | File structure | **Frontier Models are Capable** |
65
+ | `ah validate` | Quality gates | **Agentic Validation Tooling** |
66
+ | `ah oracle` | LLM inference | **Context is Precious** (saves caller context) |
67
+ | `ah spawn` | Sub-agents | **Context is Precious** (isolated work) |
68
+ | `ah tools` | MCP integration | **Agentic Validation Tooling** |
69
+
70
+ ### Command Design Rules
71
+ - Use `--json` flag for machine-readable output
72
+ - Graceful degradation when optional deps missing
73
+ - Help text explains first principle motivation
74
+
75
+ ## MCP Server Integration
76
+
77
+ Per **Agentic Validation Tooling**, MCP servers extend the harness with external tool capabilities.
78
+
79
+ ### Adding a New MCP Server
80
+
81
+ Follow `.allhands/flows/shared/WRITING_HARNESS_MCP_TOOLS.md` for the full process. Key phases:
82
+
83
+ 1. **Research**: Investigate package requirements (transport type, auth, env vars)
84
+ 2. **Build Config**: Copy `.allhands/harness/src/mcp/_template.ts`, fill in researched values
85
+ 3. **Environment**: Document required env vars (do NOT add values)
86
+ 4. **Validate**: Build harness, verify with `ah tools --list` and `ah tools <server-name>`
87
+
88
+ ### Config Structure
89
+ - `name`: Short identifier (used in `ah tools <name>:tool`)
90
+ - `type`: Transport ('stdio', 'http', 'sse')
91
+ - `command`/`args`: For stdio transport
92
+ - `env`: Environment variables (`${VAR_NAME}` syntax)
93
+ - `stateful`: Whether server maintains session state
94
+ - `toolHints`: Helpful hints for key tools
95
+
96
+ ## Extension Points
97
+
98
+ ### Adding New Hooks
99
+ 1. Create file in `.allhands/harness/src/hooks/`
100
+ 2. Export `register(parent: Command)` function
101
+ 3. Add matcher to `.claude/settings.json`
102
+
103
+ ### Adding New Commands
104
+ 1. Create file in `.allhands/harness/src/commands/`
105
+ 2. Export `register(parent: Command)` function
106
+ 3. Document in `README.md`
107
+
108
+ ### Adding New Template Variables
109
+ 1. Add to `TemplateVars` registry in `.allhands/harness/src/lib/schemas/template-vars.ts`
110
+ 2. Include Zod schema and description
111
+
112
+ ## Related References
113
+
114
+ - [`core-architecture.md`](core-architecture.md) — When your hook or command integrates with TUI lifecycle or platform settings
115
+ - [`validation-tooling.md`](validation-tooling.md) — When adding validation hooks or quality gate tooling
@@ -0,0 +1,77 @@
1
+ # Validation Tooling
2
+
3
+ Per **Agentic Validation Tooling**, programmatic validation replaces human supervision. This reference covers how validation suites are created, structured, and how they compound from stochastic exploration into deterministic gates.
4
+
5
+ ## Crystallization Lifecycle
6
+
7
+ Per **Agentic Validation Tooling**, validation compounds through a lifecycle:
8
+
9
+ 1. **Stochastic exploration** — Agent-driven exploratory testing using model intuition discovers patterns
10
+ 2. **Pattern crystallization** — Discovered patterns become deterministic checks
11
+ 3. **CI/CD entrenchment** — Deterministic checks gate releases
12
+ 4. **Frontier shift** — Stochastic exploration moves to new unknowns
13
+
14
+ This is how validation compounds. Every domain has both a stochastic dimension (exploratory) and a deterministic dimension (binary pass/fail).
15
+
16
+ ## Suite Existence Threshold
17
+
18
+ A validation suite must have a meaningful stochastic dimension to justify existing. Deterministic-only tools (type checking, linting, formatting) are test commands referenced directly in acceptance criteria and CI/CD — they are NOT suites.
19
+
20
+ ## Creating Validation Tooling
21
+
22
+ Follow `.allhands/flows/shared/CREATE_VALIDATION_TOOLING_SPEC.md` for the full process. This creates a spec, not an implementation.
23
+
24
+ ### Research Phase
25
+ - Run `ah tavily search "<validation_type> testing tools"` for available tools
26
+ - Run `ah perplexity research "best practices <validation_type> testing <technology>"` for best practices
27
+ - Determine whether the domain has a meaningful stochastic dimension before proceeding
28
+ - Run `ah tools --list` to check existing MCP integrations
29
+
30
+ ### Tool Validation Phase
31
+ Per **Agentic Validation Tooling**, research produces assumptions; running the tool produces ground truth:
32
+ - Install and verify tool responds to `--help`
33
+ - Create a minimal test target (temp directory, not committed)
34
+ - Execute representative stochastic workflows
35
+ - Systematically try commands against codebase-relevant scenarios
36
+ - Document divergences from researched documentation
37
+
38
+ ### Suite Writing Philosophy
39
+
40
+ Per **Frontier Models are Capable** and **Context is Precious**:
41
+
42
+ - **`--help` as prerequisite**: Suites MUST instruct agents to pull `<tool> --help` before any exploration — command vocabulary shapes exploration quality. The suite MUST NOT replicate full command docs.
43
+ - **Inline command examples**: Weave brief examples into use-case motivations as calibration anchors — not exhaustive catalogs, not separated command reference sections.
44
+ - **Motivation framing**: Frame around harness value: reducing human-in-loop supervision, verifying code quality, confirming implementation matches expectations.
45
+ - **Exploration categories**: Describe with enough command specificity to orient, not prescriptive sequences that constrain.
46
+
47
+ Formula: **motivations backed by inline command examples + `--help` as prerequisite and progressive disclosure**. Commands woven into use cases give direction; `--help` reveals depth.
48
+
49
+ ### Evidence Capture
50
+
51
+ Per **Quality Engineering**, two audiences require different artifacts:
52
+
53
+ - **Agent (self-verification)**: Primitives used during the observe-act-verify loop (state checks, assertions, console output). Real-time, not recorded.
54
+ - **Engineer (review artifacts)**: Trust evidence produced after exploration (recordings, screenshots, traces, reports).
55
+
56
+ Pattern: explore first, capture second.
57
+
58
+ ## Validation Suite Schema
59
+
60
+ Run `ah schema validation-suite` for the authoritative schema. Key sections in a suite:
61
+
62
+ - **Stochastic Validation**: Agent-driven exploratory testing with model intuition
63
+ - **Deterministic Integration**: Binary pass/fail commands that gate completion
64
+
65
+ List available suites: `ah validation-tools list`
66
+
67
+ ## Integration with Prompt Execution
68
+
69
+ Prompt files reference validation suites in their `validation_suites` frontmatter. During execution:
70
+ 1. Agent reads suite's **Stochastic Validation** section during implementation for exploratory quality
71
+ 2. Agent runs suite's **Deterministic Integration** section for acceptance criteria gating
72
+ 3. Validation review (`PROMPT_VALIDATION_REVIEW.md`) confirms pass/fail
73
+
74
+ ## Related References
75
+
76
+ - [`tools-commands-mcp-hooks.md`](tools-commands-mcp-hooks.md) — When validation uses hooks, CLI commands, or MCP research tools
77
+ - [`knowledge-compounding.md`](knowledge-compounding.md) — When crystallized patterns need to compound into persistent knowledge
@@ -0,0 +1,84 @@
1
+ # Writing Flows
2
+
3
+ Per **Context is Precious** and **Frontier Models are Capable**, flows articulate "why" so agents deduce "what" and "how". This reference covers flow authorship patterns, structure conventions, and the progressive disclosure model.
4
+
5
+ ## First Principles Applied
6
+
7
+ | First Principle | Flow Directive |
8
+ |-----------------|----------------|
9
+ | **Context is Precious** | Be brief. Progressive disclosure. Don't over-explain. |
10
+ | **Frontier Models are Capable** | Provide "why", let agents deduce "what/how". Trust capability. |
11
+ | **Knowledge Compounding** | DRY - centralize instructions, reference rather than repeat. |
12
+
13
+ When a flow instructs a behavior, cite the motivating First Principle. This teaches agents to think like members of a model-first company.
14
+
15
+ ## XML Tags
16
+
17
+ Reserved for drawing specific attention to rules, use as needed:
18
+ - `<goal>`: Motivations and contribution to the wider harness
19
+ - `<constraints>`: Hard rules (NEVER/MUST/ALWAYS)
20
+ - `<ownership>`: Files and domains the agent is restricted to
21
+ - `<success_criteria>`: Validation criteria for task completion
22
+ - `<inputs>`: Inputs required for the flow to execute
23
+ - `<outputs>`: Outputs expected from the flow
24
+
25
+ ## Structure
26
+
27
+ Per **Frontier Models are Capable**:
28
+ - Start with `<goal>` - the "why" that enables capable deduction
29
+ - Organize into `##` sections representing phases or capability chunks
30
+ - Use bullet points for individual units of capability:
31
+ - "Read `path/to/FLOW_DOC.md`"
32
+ - "Use `ah [command]` to [action]"
33
+ - "Think deeply about X, Y, and Z"
34
+
35
+ Per **Context is Precious**:
36
+ - Reference other flows for progressive disclosure rather than repeating
37
+ - Keep flows brief - agents only see what they need, when they need it
38
+
39
+ Per **Knowledge Compounding**:
40
+ - Centralize instructions, use decision trees that reference capability chunks
41
+ - Don't repeat messaging, instructions, or command usage across flows
42
+
43
+ ## File Organization
44
+
45
+ - `flows/` root: Agent default flows, disclosed immediately on spawn
46
+ - `flows/shared/`: Progressively disclosed via references in parent flows
47
+ - `flows/shared/jury/`: Specialized review sub-agents
48
+
49
+ ### Progressive Disclosure Pattern
50
+ ```markdown
51
+ - Read `.allhands/flows/shared/SKILL_EXTRACTION.md` and follow its instructions
52
+ ```
53
+
54
+ Sub-flows use `<inputs>` and `<outputs>` tags for execution-agnostic subtasks. This decouples the flow from its caller — any agent can execute it given the right inputs.
55
+
56
+ ## Quickfire Writing Tips
57
+
58
+ - **Action-verb bullets**: Start with verbs ("Read", "Use", "Follow", "Run")
59
+ - **Path backticking**: Consistently wrap paths and commands in backticks
60
+ - **Conditional simplicity**: Use "If X - Y" pattern, keep logic flat
61
+ - **Hierarchical nesting**: Sub-bullets for related sub-tasks only
62
+ - **Phase naming**: Section headers as capability phases (Context Gathering, Implementation, Validation, Completion)
63
+ - **Exit clarity**: End with explicit stop condition ("commit your work", "Stop")
64
+ - **Progressive disclosure**: Reference external flows for complexity ("read `.allhands/flows/FLOW.md` and follow its instructions")
65
+ - **Inline commands**: Embed CLI usage directly ("Run `ah schema prompt body`")
66
+ - **First Principle citation**: Label motivating principles by name to teach agents the "why" behind directives
67
+
68
+ ## Northstar Example
69
+
70
+ See `.allhands/flows/PROMPT_TASK_EXECUTION.md` — this flow demonstrates all conventions: `<goal>`, `<constraints>`, phase sections, action-verb bullets, progressive disclosure via sub-flow references, and explicit completion steps.
71
+
72
+ ## Flow-Config Boundary
73
+
74
+ Flows and workflow domain configs serve distinct roles. Maintaining the boundary prevents redundancy and keeps configs concise.
75
+
76
+ - **Flows** own generic behaviors: interview mechanics, grounding patterns, conviction spectrum, open question triage, roadmap-aware assumptions
77
+ - **Configs** provide domain-specific context: vocabulary, gap signals, output sections, planning strategy, category deep dives
78
+ - **Litmus test**: If removing it from the config would break ALL domains, it belongs in the flow. If it only affects ONE domain, it belongs in the config.
79
+ - Configs layer domain-specific additions on top of flow behaviors — they should not restate the base behaviors the flow already provides
80
+
81
+ ## Related References
82
+
83
+ - [`core-architecture.md`](core-architecture.md) — When your flow change touches directory structure, TUI lifecycle, or schema system
84
+ - [`harness-skills.md`](harness-skills.md) — When creating a flow that should be discoverable as a skill entry point
@@ -0,0 +1,109 @@
1
+ ---
2
+ name: browser-automation
3
+ description: "Browser-based validation for front-end web implementations — exploratory UX testing, visual regression, accessibility scanning, and end-to-end flow verification"
4
+ globs:
5
+ - "**/*.tsx"
6
+ - "**/*.jsx"
7
+ - "**/*.vue"
8
+ - "**/*.svelte"
9
+ - "**/*.html"
10
+ - "**/*.css"
11
+ - "**/*.scss"
12
+ - "**/*.astro"
13
+ - "**/pages/**"
14
+ - "**/app/**"
15
+ - "**/components/**"
16
+ - "**/layouts/**"
17
+ - "**/views/**"
18
+ tools:
19
+ - "agent-browser"
20
+ - "playwright"
21
+ - "@axe-core/playwright"
22
+ ---
23
+
24
+ ## Purpose
25
+
26
+ This suite validates browser-based quality across a unified domain: end-to-end flow verification, visual regression, UX quality, and accessibility. These are sub-concerns within a single validation domain — the browser — not separate suites.
27
+
28
+ The stochastic dimension uses agent-driven browser exploration to probe edge cases, test responsive behavior, verify interaction flows, and discover regressions that scripted tests miss. The deterministic dimension uses Playwright directly for CI-gated visual regression, accessibility scanning, and scripted e2e flows.
29
+
30
+ Per **Agentic Validation Tooling**, this suite meets the existence threshold: the stochastic dimension (exploratory UX testing, interaction probing, visual state exploration) provides meaningful agent-driven validation beyond what deterministic tests alone can cover.
31
+
32
+ ## Tooling
33
+
34
+ ### agent-browser (stochastic dimension)
35
+
36
+ - **Installation**: `npm install -g agent-browser && agent-browser install`
37
+ - Rust CLI + Node.js daemon built on Playwright. Discrete CLI commands, not a persistent API.
38
+ - **Snapshot+Refs model**: Accessibility tree with compact element refs (`@e1`). Refs invalidate on state change — always re-snapshot after navigation or DOM mutation.
39
+ - **Session isolation**: Named sessions for parallel exploration. Auth state persists across sessions.
40
+ - **Command reference first**: Run `agent-browser --help` and `agent-browser <command> --help` before any exploration — command vocabulary shapes what you attempt. Prerequisite, not afterthought.
41
+
42
+ ### Playwright (deterministic dimension)
43
+
44
+ - **Installation**: `npm install -D @playwright/test @axe-core/playwright && npx playwright install chromium --with-deps`
45
+ - **Command reference first**: `npx playwright --help` and Playwright docs for the full API surface.
46
+ - Scripted CI tests — visual regression (`toHaveScreenshot()`), accessibility (`@axe-core/playwright`, WCAG 2.1 AA), e2e flows. Not via MCP; no LLM reasoning needed.
47
+
48
+ ## Stochastic Validation
49
+
50
+ Agent-driven exploratory browser validation. This section teaches WHAT to validate and WHY — the CLI teaches HOW.
51
+
52
+ ### Core Loop
53
+
54
+ **Prerequisite**: `agent-browser --help` — internalize the full command vocabulary before exploring. Every subcommand has its own `--help`. Command awareness shapes exploration quality.
55
+
56
+ Navigate → snapshot → identify targets → interact → wait for result → verify outcome → check errors.
57
+
58
+ This is the thinking pattern to internalize, not a command sequence:
59
+
60
+ - Always re-snapshot after state changes — navigation, form submission, modal appearance, any DOM mutation. Stale refs cause cascading failures.
61
+ - Wait for async results before verifying — element appearance, text change, URL update, network settlement
62
+ - Verify outcomes before proceeding — never assume an interaction succeeded
63
+ - Check console errors after interactions — bugs are often invisible in the visual state
64
+
65
+ ### Use Cases
66
+
67
+ These seed categories guide exploration. Per **Frontier Models are Capable**, the agent extrapolates deeper investigation from these starting points.
68
+
69
+ - **Flow verification**: `navigate` to entry point, `snapshot` to orient, interact via refs — `click @e3`, `type @e5 "user@test.com"` — then re-snapshot to verify: URL changed, success state appeared, no console errors. Walk full critical paths (registration, checkout, settings). Exercise redirects and back/forward navigation.
70
+ - **Responsive testing**: `resize` viewport across breakpoints (e.g., 375px mobile, 768px tablet, 1280px desktop), `snapshot` at each to inspect layout changes. Test media preferences with `emulate-media` (dark mode, reduced motion). Layout bugs at specific widths are among the most common front-end regressions.
71
+ - **Edge case probing**: `click` submit without filling fields, `type` overlong strings and special characters into inputs. Verify error handling surfaces appropriate messages in the next `snapshot`. Test keyboard-only navigation — `press Tab`, `press Enter`, `press Escape` — can a user complete flows without a mouse?
72
+ - **Accessibility exploration**: `snapshot` IS the assistive technology view — the accessibility tree reveals semantic structure directly. Verify Tab order (`press Tab` sequences), Enter/Space activation (`press Enter` on focused element), Escape dismissal (`press Escape`), focus management on modals.
73
+ - **Evidence capture**: `screenshot` before/after interactions for visual comparison. Capture console output tied to specific flows as bug evidence. Screenshots are opportunistic — capture what's interesting, not on a schedule.
74
+ - **Video recording**: Explore first (discover flows, find issues), then `record` a clean replay for engineer review. Recording creates a fresh context but preserves session state — focused evidence, not noisy exploration footage.
75
+
76
+ ### Resilience
77
+
78
+ Stochastic exploration is inherently unpredictable. These patterns prevent death spirals:
79
+
80
+ - Max 3 retries on any interaction, then report failure and move on
81
+ - `screenshot` on failure — capture full-page state before recovery attempts
82
+ - Session restart if page becomes unresponsive — fresh named session + alternative path
83
+ - Auth bail-out — OAuth, MFA, or CAPTCHA blockers: save state, report, move on
84
+ - Dialog/frame handling — accept or dismiss dialogs to unblock; switch into iframes for embedded content
85
+ - Self-healing pattern — when an element disappears, re-snapshot the entire page rather than retrying the same selector (Stagehand reference)
86
+
87
+ Use `agent-browser --help` and `agent-browser <command> --help` for all available commands and options. This suite teaches what to validate and why — the CLI teaches how.
88
+
89
+ ## Deterministic Integration
90
+
91
+ CI-gated browser regression testing using Playwright directly. Scripted assertions — no LLM reasoning needed.
92
+
93
+ - **Visual regression**: `await expect(page).toHaveScreenshot('dashboard.png')` — baseline screenshots committed to repo, fail CI on drift. Mask dynamic content (`{ mask: [page.locator('.timestamp')] }`) to avoid false positives. Single OS + browser in CI for font rendering consistency.
94
+ - **Accessibility**: `const results = await new AxeBuilder({ page }).analyze()` — WCAG 2.1 AA scanning via `@axe-core/playwright`. Reusable fixture for consistent config. Scope to components (`.include('.component')`) for feature tests, full-page for integration.
95
+ - **Multi-device**: `projects: [{ use: devices['iPhone 14'] }]` — Chromium-only, desktop/mobile/tablet viewport projects. Responsive regression is a viewport concern, not a browser concern.
96
+ - **CI artifacts**: `use: { trace: 'on-first-retry', screenshot: 'only-on-failure' }` — traces + screenshots on failure for remote debugging. Upload artifacts to survive ephemeral runners.
97
+
98
+ ## ENV Configuration
99
+
100
+ | Variable | Required | Dimension | Purpose |
101
+ |----------|----------|-----------|---------|
102
+ | `BASE_URL` | Yes | Both | Dev server URL (e.g., `http://localhost:3000`) |
103
+ | `AGENT_BROWSER_SESSION` | No | Stochastic | Named session for isolation |
104
+ | `AGENT_BROWSER_PROFILE` | No | Stochastic | Persistent browser profile path for auth state |
105
+ | `BROWSERBASE_API_KEY` | No | Both (CI) | Cloud browser provider API key |
106
+ | `BROWSERBASE_PROJECT_ID` | No | Both (CI) | Cloud browser provider project ID |
107
+ | `CI` | Auto | Deterministic | Set by CI environment; controls retries, reporter |
108
+
109
+ `BASE_URL` must be configured per-target-project. This suite is framework-agnostic — the agent should discover the target project's dev server configuration at execution time.