all-hands-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. package/.allhands/README.md +75 -0
  2. package/.allhands/agents/compounder.yaml +15 -0
  3. package/.allhands/agents/coordinator.yaml +17 -0
  4. package/.allhands/agents/documentor.yaml +15 -0
  5. package/.allhands/agents/e2e-test-planner.yaml +17 -0
  6. package/.allhands/agents/emergent.yaml +22 -0
  7. package/.allhands/agents/executor.yaml +14 -0
  8. package/.allhands/agents/ideation.yaml +11 -0
  9. package/.allhands/agents/initiative-steering.yaml +19 -0
  10. package/.allhands/agents/judge.yaml +13 -0
  11. package/.allhands/agents/planner.yaml +19 -0
  12. package/.allhands/agents/pr-reviewer.yaml +15 -0
  13. package/.allhands/docs.json +5 -0
  14. package/.allhands/docs.local.json +26 -0
  15. package/.allhands/flows/COMPOUNDING.md +203 -0
  16. package/.allhands/flows/COORDINATION.md +89 -0
  17. package/.allhands/flows/CORE.md +87 -0
  18. package/.allhands/flows/DOCUMENTATION.md +218 -0
  19. package/.allhands/flows/E2E_TEST_PLAN_BUILDING.md +140 -0
  20. package/.allhands/flows/EMERGENT_PLANNING.md +57 -0
  21. package/.allhands/flows/IDEATION_SCOPING.md +154 -0
  22. package/.allhands/flows/INITIATIVE_STEERING.md +110 -0
  23. package/.allhands/flows/JUDGE_REVIEWING.md +79 -0
  24. package/.allhands/flows/PROMPT_TASK_EXECUTION.md +68 -0
  25. package/.allhands/flows/PR_REVIEWING.md +43 -0
  26. package/.allhands/flows/SPEC_PLANNING.md +216 -0
  27. package/.allhands/flows/harness/WRITING_HARNESS_FLOWS.md +27 -0
  28. package/.allhands/flows/harness/WRITING_HARNESS_KNOWLEDGE.md +27 -0
  29. package/.allhands/flows/harness/WRITING_HARNESS_ORCHESTRATION.md +27 -0
  30. package/.allhands/flows/harness/WRITING_HARNESS_SKILLS.md +27 -0
  31. package/.allhands/flows/harness/WRITING_HARNESS_TOOLS.md +27 -0
  32. package/.allhands/flows/harness/WRITING_HARNESS_VALIDATION_TOOLING.md +27 -0
  33. package/.allhands/flows/shared/CODEBASE_UNDERSTANDING.md +72 -0
  34. package/.allhands/flows/shared/CREATE_HARNESS_SPEC.md +48 -0
  35. package/.allhands/flows/shared/CREATE_SPEC.md +41 -0
  36. package/.allhands/flows/shared/CREATE_VALIDATION_TOOLING_SPEC.md +70 -0
  37. package/.allhands/flows/shared/DOCUMENTATION_DISCOVERY.md +123 -0
  38. package/.allhands/flows/shared/DOCUMENTATION_WRITER.md +101 -0
  39. package/.allhands/flows/shared/EMERGENT_REFINEMENT_ANALYSIS.md +76 -0
  40. package/.allhands/flows/shared/EXTERNAL_TECH_GUIDANCE.md +97 -0
  41. package/.allhands/flows/shared/IDEATION_CODEBASE_GROUNDING.md +49 -0
  42. package/.allhands/flows/shared/PLAN_DEEPENING.md +152 -0
  43. package/.allhands/flows/shared/PROMPT_TASKS_CURATION.md +113 -0
  44. package/.allhands/flows/shared/PROMPT_VALIDATION_REVIEW.MD +99 -0
  45. package/.allhands/flows/shared/QUICK_PREMORTEM.md +70 -0
  46. package/.allhands/flows/shared/RESEARCH_GUIDANCE.md +38 -0
  47. package/.allhands/flows/shared/REVIEW_OPTIONS_BREAKDOWN.md +68 -0
  48. package/.allhands/flows/shared/SKILL_EXTRACTION.md +84 -0
  49. package/.allhands/flows/shared/SPEC_FLOW_ANALYSIS.md +119 -0
  50. package/.allhands/flows/shared/TDD_WORKFLOW.md +109 -0
  51. package/.allhands/flows/shared/UTILIZE_VALIDATION_TOOLING.md +84 -0
  52. package/.allhands/flows/shared/WRITING_HARNESS_FLOWS.md +11 -0
  53. package/.allhands/flows/shared/WRITING_HARNESS_MCP_TOOLS.md +84 -0
  54. package/.allhands/flows/shared/jury/ARCHITECTURE_REVIEW.md +91 -0
  55. package/.allhands/flows/shared/jury/BEST_PRACTICES_REVIEW.md +80 -0
  56. package/.allhands/flows/shared/jury/CLAIM_VERIFICATION_REVIEW.md +101 -0
  57. package/.allhands/flows/shared/jury/EXPECTATIONS_FIT_REVIEW.md +78 -0
  58. package/.allhands/flows/shared/jury/MAINTAINABILITY_REVIEW.md +110 -0
  59. package/.allhands/flows/shared/jury/PROMPTS_EXPECTATIONS_FIT.md +74 -0
  60. package/.allhands/flows/shared/jury/PROMPTS_FLOW_ANALYSIS.md +92 -0
  61. package/.allhands/flows/shared/jury/PROMPTS_YAGNI.md +78 -0
  62. package/.allhands/flows/shared/jury/PROMPT_PREMORTEM.md +125 -0
  63. package/.allhands/flows/shared/jury/SECURITY_REVIEW.md +86 -0
  64. package/.allhands/flows/shared/jury/YAGNI_REVIEW.md +82 -0
  65. package/.allhands/flows/wip/DEBUG_INVESTIGATION.md +162 -0
  66. package/.allhands/flows/wip/MEMORY_RECALL.md +62 -0
  67. package/.allhands/harness/ah +131 -0
  68. package/.allhands/harness/package-lock.json +5292 -0
  69. package/.allhands/harness/package.json +52 -0
  70. package/.allhands/harness/src/__tests__/e2e/commands.test.ts +307 -0
  71. package/.allhands/harness/src/__tests__/e2e/event-loop.test.ts +539 -0
  72. package/.allhands/harness/src/__tests__/e2e/hooks.test.ts +427 -0
  73. package/.allhands/harness/src/__tests__/e2e/new-initiative-routing.test.ts +137 -0
  74. package/.allhands/harness/src/__tests__/e2e/run-e2e.ts +109 -0
  75. package/.allhands/harness/src/__tests__/e2e/specs-type.test.ts +210 -0
  76. package/.allhands/harness/src/__tests__/e2e/validation-hooks.test.ts +669 -0
  77. package/.allhands/harness/src/__tests__/e2e/validation-path-consistency.test.ts +354 -0
  78. package/.allhands/harness/src/__tests__/e2e/validation.test.ts +528 -0
  79. package/.allhands/harness/src/__tests__/harness/assertions.ts +318 -0
  80. package/.allhands/harness/src/__tests__/harness/cli-runner.ts +359 -0
  81. package/.allhands/harness/src/__tests__/harness/fixture.ts +384 -0
  82. package/.allhands/harness/src/__tests__/harness/hook-runner.ts +411 -0
  83. package/.allhands/harness/src/__tests__/harness/index.ts +122 -0
  84. package/.allhands/harness/src/cli.ts +36 -0
  85. package/.allhands/harness/src/commands/complexity.ts +177 -0
  86. package/.allhands/harness/src/commands/context7.ts +202 -0
  87. package/.allhands/harness/src/commands/docs.ts +557 -0
  88. package/.allhands/harness/src/commands/hooks.ts +24 -0
  89. package/.allhands/harness/src/commands/index.ts +51 -0
  90. package/.allhands/harness/src/commands/knowledge.ts +382 -0
  91. package/.allhands/harness/src/commands/memories.ts +302 -0
  92. package/.allhands/harness/src/commands/notify.ts +61 -0
  93. package/.allhands/harness/src/commands/oracle.ts +158 -0
  94. package/.allhands/harness/src/commands/perplexity.ts +220 -0
  95. package/.allhands/harness/src/commands/planning.ts +245 -0
  96. package/.allhands/harness/src/commands/schema.ts +73 -0
  97. package/.allhands/harness/src/commands/skills.ts +128 -0
  98. package/.allhands/harness/src/commands/solutions.ts +353 -0
  99. package/.allhands/harness/src/commands/spawn.ts +158 -0
  100. package/.allhands/harness/src/commands/specs.ts +532 -0
  101. package/.allhands/harness/src/commands/tavily.ts +226 -0
  102. package/.allhands/harness/src/commands/tools.ts +579 -0
  103. package/.allhands/harness/src/commands/trace.ts +327 -0
  104. package/.allhands/harness/src/commands/tui.ts +960 -0
  105. package/.allhands/harness/src/commands/validate.ts +143 -0
  106. package/.allhands/harness/src/commands/validation-tools.ts +108 -0
  107. package/.allhands/harness/src/hooks/context.ts +1442 -0
  108. package/.allhands/harness/src/hooks/enforcement.ts +170 -0
  109. package/.allhands/harness/src/hooks/index.ts +54 -0
  110. package/.allhands/harness/src/hooks/lifecycle.ts +229 -0
  111. package/.allhands/harness/src/hooks/notification.ts +104 -0
  112. package/.allhands/harness/src/hooks/observability.ts +551 -0
  113. package/.allhands/harness/src/hooks/session.ts +88 -0
  114. package/.allhands/harness/src/hooks/shared.ts +815 -0
  115. package/.allhands/harness/src/hooks/transcript-parser.ts +208 -0
  116. package/.allhands/harness/src/hooks/validation.ts +617 -0
  117. package/.allhands/harness/src/lib/__tests__/ctags.test.ts +244 -0
  118. package/.allhands/harness/src/lib/__tests__/docs-validation.test.ts +344 -0
  119. package/.allhands/harness/src/lib/__tests__/mcp-runtime.test.ts +190 -0
  120. package/.allhands/harness/src/lib/__tests__/schema.test.ts +861 -0
  121. package/.allhands/harness/src/lib/base-command.ts +198 -0
  122. package/.allhands/harness/src/lib/cli-daemon.ts +343 -0
  123. package/.allhands/harness/src/lib/compaction.ts +313 -0
  124. package/.allhands/harness/src/lib/ctags.ts +497 -0
  125. package/.allhands/harness/src/lib/docs-validation.ts +907 -0
  126. package/.allhands/harness/src/lib/event-loop.ts +662 -0
  127. package/.allhands/harness/src/lib/flows.ts +155 -0
  128. package/.allhands/harness/src/lib/git.ts +276 -0
  129. package/.allhands/harness/src/lib/knowledge-worker.ts +72 -0
  130. package/.allhands/harness/src/lib/knowledge.ts +810 -0
  131. package/.allhands/harness/src/lib/llm.ts +255 -0
  132. package/.allhands/harness/src/lib/mcp-client.ts +432 -0
  133. package/.allhands/harness/src/lib/mcp-daemon.ts +486 -0
  134. package/.allhands/harness/src/lib/mcp-runtime.ts +418 -0
  135. package/.allhands/harness/src/lib/notification.ts +115 -0
  136. package/.allhands/harness/src/lib/opencode/index.ts +70 -0
  137. package/.allhands/harness/src/lib/opencode/profiles.ts +300 -0
  138. package/.allhands/harness/src/lib/opencode/prompts/codesearch.md +98 -0
  139. package/.allhands/harness/src/lib/opencode/prompts/knowledge-aggregator.md +67 -0
  140. package/.allhands/harness/src/lib/opencode/runner.ts +281 -0
  141. package/.allhands/harness/src/lib/oracle.ts +926 -0
  142. package/.allhands/harness/src/lib/planning-utils.ts +150 -0
  143. package/.allhands/harness/src/lib/planning.ts +605 -0
  144. package/.allhands/harness/src/lib/pr-review.ts +225 -0
  145. package/.allhands/harness/src/lib/prompts.ts +522 -0
  146. package/.allhands/harness/src/lib/schema.ts +418 -0
  147. package/.allhands/harness/src/lib/schemas/agent-profile.ts +141 -0
  148. package/.allhands/harness/src/lib/schemas/template-vars.ts +138 -0
  149. package/.allhands/harness/src/lib/session.ts +164 -0
  150. package/.allhands/harness/src/lib/specs.ts +348 -0
  151. package/.allhands/harness/src/lib/tldr.ts +829 -0
  152. package/.allhands/harness/src/lib/tmux.ts +1051 -0
  153. package/.allhands/harness/src/lib/trace-store.ts +714 -0
  154. package/.allhands/harness/src/mcp/__tests__/index.test.ts +46 -0
  155. package/.allhands/harness/src/mcp/_template.ts +47 -0
  156. package/.allhands/harness/src/mcp/filesystem.ts +33 -0
  157. package/.allhands/harness/src/mcp/index.ts +69 -0
  158. package/.allhands/harness/src/mcp/playwright.ts +34 -0
  159. package/.allhands/harness/src/mcp/xcodebuild.ts +29 -0
  160. package/.allhands/harness/src/schemas/docs.schema.json +44 -0
  161. package/.allhands/harness/src/schemas/settings.schema.json +214 -0
  162. package/.allhands/harness/src/tui/actions.ts +227 -0
  163. package/.allhands/harness/src/tui/file-viewer-modal.ts +270 -0
  164. package/.allhands/harness/src/tui/index.ts +1574 -0
  165. package/.allhands/harness/src/tui/modal.ts +232 -0
  166. package/.allhands/harness/src/tui/prompts-pane.ts +186 -0
  167. package/.allhands/harness/src/tui/status-pane.ts +434 -0
  168. package/.allhands/harness/tsconfig.json +22 -0
  169. package/.allhands/harness/vitest.config.ts +13 -0
  170. package/.allhands/pillars.md +33 -0
  171. package/.allhands/principles.md +88 -0
  172. package/.allhands/schemas/alignment.yaml +51 -0
  173. package/.allhands/schemas/documentation.yaml +10 -0
  174. package/.allhands/schemas/prompt.yaml +92 -0
  175. package/.allhands/schemas/skill.yaml +34 -0
  176. package/.allhands/schemas/solution.yaml +131 -0
  177. package/.allhands/schemas/spec.yaml +67 -0
  178. package/.allhands/schemas/validation-suite.yaml +49 -0
  179. package/.allhands/schemas/workflow.yaml +51 -0
  180. package/.allhands/settings.json +57 -0
  181. package/.allhands/skills/claude-code-patterns/SKILL.md +60 -0
  182. package/.allhands/skills/claude-code-patterns/docs/context-hygiene.md +19 -0
  183. package/.allhands/skills/harness-maintenance/SKILL.md +449 -0
  184. package/.allhands/skills/harness-maintenance/references/core-architecture.md +187 -0
  185. package/.allhands/skills/harness-maintenance/references/harness-skills.md +87 -0
  186. package/.allhands/skills/harness-maintenance/references/knowledge-compounding.md +78 -0
  187. package/.allhands/skills/harness-maintenance/references/tools-commands-mcp-hooks.md +115 -0
  188. package/.allhands/skills/harness-maintenance/references/validation-tooling.md +77 -0
  189. package/.allhands/skills/harness-maintenance/references/writing-flows.md +84 -0
  190. package/.allhands/validation/browser-automation.md +109 -0
  191. package/.allhands/validation/xcode-automation.md +195 -0
  192. package/.allhands/workflows/documentation.md +86 -0
  193. package/.allhands/workflows/investigation.md +81 -0
  194. package/.allhands/workflows/milestone.md +91 -0
  195. package/.allhands/workflows/optimization.md +85 -0
  196. package/.allhands/workflows/refactor.md +99 -0
  197. package/.allhands/workflows/triage.md +81 -0
  198. package/.claude/README.md +1 -0
  199. package/.claude/agents/explorer.md +10 -0
  200. package/.claude/agents/researcher.md +11 -0
  201. package/.claude/agents/task-runner.md +8 -0
  202. package/.claude/settings.json +231 -0
  203. package/.env.ai.example +7 -0
  204. package/.github/workflows/npm-publish.yml +69 -0
  205. package/.internal.json +45 -0
  206. package/.tldr/config.json +11 -0
  207. package/.tldrignore +90 -0
  208. package/CLAUDE.md +6 -0
  209. package/README.md +98 -0
  210. package/bin/sync-cli.js +7552 -0
  211. package/concerns.md +7 -0
  212. package/docs/README.md +41 -0
  213. package/docs/agents/README.md +24 -0
  214. package/docs/agents/agent-configuration-system.md +86 -0
  215. package/docs/agents/execution-agents.md +50 -0
  216. package/docs/agents/knowledge-agents.md +61 -0
  217. package/docs/agents/orchestration-agent.md +57 -0
  218. package/docs/agents/planning-agents.md +84 -0
  219. package/docs/agents/quality-review-agents.md +67 -0
  220. package/docs/agents/workflow-agent-orchestration.md +69 -0
  221. package/docs/flows/README.md +44 -0
  222. package/docs/flows/compounding.md +126 -0
  223. package/docs/flows/coordination.md +72 -0
  224. package/docs/flows/core-harness-integration.md +63 -0
  225. package/docs/flows/documentation-orchestration.md +98 -0
  226. package/docs/flows/e2e-test-plan-building.md +83 -0
  227. package/docs/flows/emergent-refinement.md +104 -0
  228. package/docs/flows/flow-authoring-and-mcp-tools.md +89 -0
  229. package/docs/flows/judge-reviewing.md +112 -0
  230. package/docs/flows/plan-deepening-and-research.md +107 -0
  231. package/docs/flows/plan-review-jury.md +114 -0
  232. package/docs/flows/pr-reviewing.md +54 -0
  233. package/docs/flows/prompt-task-execution.md +119 -0
  234. package/docs/flows/spec-planning.md +162 -0
  235. package/docs/flows/type-specific-scoping-flows.md +49 -0
  236. package/docs/flows/validation-and-skills-integration.md +145 -0
  237. package/docs/flows/wip/wip-flows.md +102 -0
  238. package/docs/harness/README.md +23 -0
  239. package/docs/harness/agent-profiles.md +84 -0
  240. package/docs/harness/cli/README.md +24 -0
  241. package/docs/harness/cli/cli-entry-and-command-discovery.md +91 -0
  242. package/docs/harness/cli/docs-command.md +87 -0
  243. package/docs/harness/cli/knowledge-command.md +91 -0
  244. package/docs/harness/cli/minor-cli-commands.md +65 -0
  245. package/docs/harness/cli/oracle-command.md +113 -0
  246. package/docs/harness/cli/planning-command.md +95 -0
  247. package/docs/harness/cli/schema-and-validation-commands.md +154 -0
  248. package/docs/harness/cli/search-commands.md +97 -0
  249. package/docs/harness/cli/spawn-command.md +136 -0
  250. package/docs/harness/cli/specs-command.md +102 -0
  251. package/docs/harness/cli/tools-command.md +122 -0
  252. package/docs/harness/cli/trace-command.md +122 -0
  253. package/docs/harness/cli-daemon.md +92 -0
  254. package/docs/harness/event-loop.md +184 -0
  255. package/docs/harness/hooks/README.md +15 -0
  256. package/docs/harness/hooks/context-hooks.md +96 -0
  257. package/docs/harness/hooks/lifecycle-and-observability-hooks.md +135 -0
  258. package/docs/harness/hooks/validation-hooks.md +97 -0
  259. package/docs/harness/test-harness.md +149 -0
  260. package/docs/harness/tui.md +176 -0
  261. package/docs/memories.md +20 -0
  262. package/docs/solutions/agentic-issues/premature-agent-deletion-tui-action-dependency-20260130.md +49 -0
  263. package/docs/solutions/agentic-issues/ref-anchor-scope-mismatch-skill-references-20260131.md +55 -0
  264. package/docs/solutions/agentic-issues/tautological-tests-routing-20260131.md +52 -0
  265. package/docs/solutions/integration_issue/blocktool-output-format-mismatch-hook-runner-20260130.md +52 -0
  266. package/docs/solutions/integration_issue/dual-validation-path-divergence-schema-20260130.md +66 -0
  267. package/docs/solutions/security-issues/unsanitized-domain-path-join-20260131.md +52 -0
  268. package/docs/solutions/test-failures/event-loop-mock-ordering-checkAgentWindows-20260130.md +63 -0
  269. package/docs/sync-cli/README.md +19 -0
  270. package/docs/sync-cli/cli-entrypoint-and-commands.md +39 -0
  271. package/docs/sync-cli/commands/README.md +11 -0
  272. package/docs/sync-cli/commands/pull-manifest-command.md +36 -0
  273. package/docs/sync-cli/commands/push-command.md +84 -0
  274. package/docs/sync-cli/commands/sync-command.md +71 -0
  275. package/docs/sync-cli/systems/README.md +14 -0
  276. package/docs/sync-cli/systems/git-and-github-integration.md +49 -0
  277. package/docs/sync-cli/systems/interactive-ui.md +43 -0
  278. package/docs/sync-cli/systems/manifest-and-distribution.md +51 -0
  279. package/docs/sync-cli/systems/path-resolution.md +42 -0
  280. package/package.json +46 -0
  281. package/scripts/install-shim.sh +40 -0
  282. package/scripts/pre-pack.sh +25 -0
  283. package/specs/harness-maintenance-skill.spec.md +138 -0
  284. package/specs/roadmap/git-spec-lifecycle-management.spec.md +113 -0
  285. package/specs/sync-init-flag.spec.md +117 -0
  286. package/specs/unified-workflow-orchestration.spec.md +250 -0
  287. package/specs/validation-tooling-practice.spec.md +98 -0
  288. package/specs/workflow-domain-configuration.spec.md +265 -0
  289. package/src/commands/pull-manifest.ts +31 -0
  290. package/src/commands/push.ts +344 -0
  291. package/src/commands/sync.ts +289 -0
  292. package/src/lib/constants.ts +10 -0
  293. package/src/lib/dotfiles.ts +36 -0
  294. package/src/lib/fs-utils.ts +18 -0
  295. package/src/lib/gh.ts +40 -0
  296. package/src/lib/git.ts +63 -0
  297. package/src/lib/gitignore.ts +167 -0
  298. package/src/lib/manifest.ts +121 -0
  299. package/src/lib/marker-sync.ts +39 -0
  300. package/src/lib/paths.ts +38 -0
  301. package/src/lib/target-lines.ts +66 -0
  302. package/src/lib/ui.ts +78 -0
  303. package/src/sync-cli.ts +120 -0
  304. package/target-lines.json +23 -0
  305. package/tsconfig.json +20 -0
@@ -0,0 +1,126 @@
1
+ ---
2
+ description: "Post-spec knowledge extraction flow that captures learnings, solutions, memories, and harness improvements from completed specifications, with type-aware completion assessment for milestone vs exploratory specs"
3
+ ---
4
+
5
+ # Compounding Flow
6
+
7
+ The compounding flow exists to close the feedback loop after a spec completes. Without it, knowledge gained during implementation -- decisions, failures, workarounds, engineer preferences -- evaporates between sessions. Per **Knowledge Compounding**, everything feeds forward.
8
+
9
+ This flow runs after all prompts in a spec have been executed and reviewed. It is intentionally the last step before a spec is considered fully closed.
10
+
11
+ ## Lifecycle Position
12
+
13
+ ```mermaid
14
+ flowchart LR
15
+ A[Spec Planning] --> B[Prompt Execution Loop]
16
+ B --> C[PR Review]
17
+ C --> D[Compounding]
18
+ D --> E[Spec Closed]
19
+ D -->|harness issues| F[Harness Improvement Spec]
20
+ ```
21
+
22
+ The flow is idempotent -- running it again on a spec with no new changes produces no output.
23
+
24
+ ## Phase Progression
25
+
26
+ The flow progresses through ordered phases, each building on the previous. Completion Assessment runs early to establish the evaluation frame for the spec type. The final phase (Harness Improvement) is intentionally last so that all other compounding artifacts are complete before any diversion into structural changes.
27
+
28
+ ```mermaid
29
+ flowchart TD
30
+ CG[Context Gathering] --> CA[Completion Assessment]
31
+ CA --> SA[Signal Analysis]
32
+ SA --> ME[Memory Extraction]
33
+ SA --> SD[Solution Documentation]
34
+ ME --> SF[Spec Finalization]
35
+ SD --> SF
36
+ SF --> HI[Harness Improvement Handling]
37
+ ```
38
+
39
+ ## Completion Assessment by Spec Type
40
+
41
+ Per **Frontier Models are Capable**, completion means different things depending on the spec type:
42
+
43
+ | Spec Type | Completion Criteria |
44
+ |-----------|-------------------|
45
+ | **Milestone** (or missing) | Spec acceptance criteria met, all prompts complete, thorough knowledge extraction |
46
+ | **Exploratory** (investigation, optimization, refactor, documentation, triage) | Problem resolution assessed against original hypothesis, learnings extracted from experiment outcomes, unresolved questions documented for future work |
47
+
48
+ Milestone completion is binary -- acceptance criteria are either met or not. Exploratory completion is evaluated against hypothesis outcomes: did the experiments answer the questions posed? What was learned? What remains open? This distinction shapes how subsequent Signal Analysis interprets prompt results.
49
+
50
+ ## Signal Analysis
51
+
52
+ The core analytical phase reads all spec artifacts and identifies patterns across four signal categories:
53
+
54
+ | Signal Category | What It Reveals | Key Indicators |
55
+ |----------------|-----------------|----------------|
56
+ | Prompt Signals | Execution and planning quality | Failed prompts, patch counts, blocker learnings |
57
+ | Tooling Signals | Skill and validation suite effectiveness | Per-tool impact map of what each tool caught vs. missed |
58
+ | Decision Signals | Engineer intent and preferences | Rejections, overrides, compromise patterns |
59
+ | Emergent Work Signals | Quality control preferences | Kept vs. reverted emergent work |
60
+
61
+ The tooling signals phase produces a **per-tool impact map** that cross-references every prompt's skills and validation suites against its summary (Limitations, Decisions, Learnings). This map becomes evidence for harness improvement specs.
62
+
63
+ ### Crystallization Evaluation
64
+
65
+ Per **Agentic Validation Tooling**, the tooling signals phase also evaluates each validation suite for **crystallization** opportunities. For each suite used during execution:
66
+
67
+ - What stochastic patterns were discovered during exploratory validation?
68
+ - Which patterns are stable and repeatable enough to crystallize into deterministic checks?
69
+ - Should new deterministic tests be added to the suite's Deterministic Integration section?
70
+ - Are there stochastic exploration patterns that should be documented for future agents?
71
+
72
+ This evaluation feeds directly into the Harness Improvement phase as evidence for suite refinement per [ref:.allhands/flows/shared/CREATE_VALIDATION_TOOLING_SPEC.md::9750183].
73
+
74
+ A key design decision: emergent prompts are never framed as "scope creep." Per **Quality Engineering**, emergent work discovers valuable variants. Reverted emergent work is expected experimentation cost per **Software is Cheap**.
75
+
76
+ ## Knowledge Outputs
77
+
78
+ The flow produces three distinct knowledge artifacts:
79
+
80
+ | Artifact | Location | Purpose |
81
+ |----------|----------|---------|
82
+ | Memories | `docs/memories.md` | Lightweight learnings searchable via `ah memories search` |
83
+ | Solutions | `docs/solutions/<category>/` | Detailed problem-solution documentation for non-trivial issues |
84
+ | Spec Finalization | `.planning/<spec>/spec.md` | Historical record with implementation reality vs. original plan |
85
+
86
+ ### Memory Categories
87
+
88
+ Memories are captured across five categories when signals exist: technical learnings, engineer preference memories, systemic validation signals, harness behavior patterns. Each memory is tagged with domain and source (`user-steering` vs `agent-inferred`) for relevance scoring in future recall.
89
+
90
+ ### Solution Documentation
91
+
92
+ Solutions target problems that required multiple investigation attempts, had non-obvious resolutions, or involve agentic anti-patterns. Trivial fixes are explicitly excluded. After writing, solutions are cross-referenced against existing solutions via `ah solutions search` to build a connected knowledge graph.
93
+
94
+ ## Harness Improvement Classification
95
+
96
+ The final phase classifies detected issues and requires engineer interview before action:
97
+
98
+ ```mermaid
99
+ flowchart TD
100
+ I[Classified Issues] --> Q{Type?}
101
+ Q -->|Skill gaps| A[Update skill file inline]
102
+ Q -->|Validation suite gaps| B[Update suite file inline]
103
+ Q -->|Missing validation suite| C{Engineer decision}
104
+ Q -->|Structural: flows/commands/hooks| D{Engineer decision}
105
+ C -->|Create| E[CREATE_HARNESS_SPEC]
106
+ C -->|Defer| F[Document in memories]
107
+ D -->|Create| E
108
+ D -->|Defer| F
109
+ ```
110
+
111
+ Inline updates (skills, validation suites) require engineer approval. Structural changes always go through a spec. Deferred items are documented in `docs/memories.md` under "Deferred Harness Improvements."
112
+
113
+ ### Crystallization Promotion
114
+
115
+ Per [ref:.allhands/flows/COMPOUNDING.md::905aed8], validation suite refinements include **crystallization promotion**: stable stochastic patterns discovered during execution are promoted into deterministic checks in the suite's Deterministic Integration section. This shifts stochastic exploration to the frontier -- agents no longer need to rediscover patterns that have been automated. The crystallization lifecycle ensures validation compounds across spec executions.
116
+
117
+ ## Key Design Decisions
118
+
119
+ - **Engineer interview is mandatory** before finalizing the compounding summary -- the flow must not finalize without sign-off on classified issues
120
+ - **Harness modification requires first principle justification** -- changes must trace back to principles in [ref:.allhands/principles.md::0610b13]
121
+ - **Spec finalization preserves original Goals and Non-Goals** unmodified for historical contrast against the new Implementation Reality section
122
+ - **The per-tool impact map is evidence, not a stored artifact** -- it feeds directly into harness improvement specs rather than being persisted separately
123
+
124
+ ## Source Flow
125
+
126
+ [ref:.allhands/flows/COMPOUNDING.md::905aed8]
@@ -0,0 +1,72 @@
1
+ ---
2
+ description: "Engineer-facing orchestration agent for managing active prompt loops, agent health, and mid-execution interventions"
3
+ ---
4
+
5
+ # Coordination Flow
6
+
7
+ The coordination flow provides an engineer-facing orchestration layer during active prompt execution. It exists because prompt loops are autonomous -- once started, agents execute prompts independently. The engineer needs a control plane to intervene, patch, and steer without breaking the execution model.
8
+
9
+ Per **Frontier Models are Capable**, this agent orchestrates without implementing. It modifies only harness-managed files (prompts, alignment docs) and never writes implementation code.
10
+
11
+ ## Coordination Services
12
+
13
+ The flow presents six services to the engineer, each addressing a different intervention need:
14
+
15
+ | Service | Purpose | Delegation |
16
+ |---------|---------|------------|
17
+ | Quick Patch | Deterministic fix for a specific issue | [ref:.allhands/flows/shared/PROMPT_TASKS_CURATION.md::1abf30b] |
18
+ | Interjection | Insert new prompt into active dependency graph | Internal (see below) |
19
+ | Emergent Surgery | Triage emergent refinement prompts | [ref:.allhands/flows/shared/EMERGENT_REFINEMENT_ANALYSIS.md::f3f4914] |
20
+ | Prompt Edit | Modify prompts given engineer concerns | [ref:.allhands/flows/shared/PROMPT_TASKS_CURATION.md::1abf30b] |
21
+ | Agent Status | Check tmux windows and agent health | Tmux patterns |
22
+ | Kill/Restart | Terminate broken agents and fix prompts | Tmux + prompt edit |
23
+
24
+ ## Prompt Interjection
25
+
26
+ Interjection is the most architecturally significant service. It inserts new prompts into the active dependency graph mid-loop without renumbering existing prompts. The event loop detects new prompt files automatically -- sequencing is controlled entirely through dependency mapping.
27
+
28
+ ```mermaid
29
+ flowchart LR
30
+ subgraph Before
31
+ P1[Prompt 1] --> P2[Prompt 2]
32
+ P2 --> P3["Prompt 3 (deps: [1])"]
33
+ end
34
+ subgraph After
35
+ P1b[Prompt 1] --> P2b[Prompt 2]
36
+ P2b --> P7["Prompt 7 (deps: [2])"]
37
+ P7 --> P3b["Prompt 3 (deps: [1, 7])"]
38
+ end
39
+ ```
40
+
41
+ The interjection process:
42
+
43
+ 1. Engineer specifies "run after" and "run before" prompts
44
+ 2. Coordinator assigns next available prompt number (append-only, never renumber)
45
+ 3. New prompt gets `dependencies` set to "run after" prompts
46
+ 4. "Run before" prompts get their `dependencies` patched to include the new prompt
47
+ 5. Per **Ideation First**, resulting execution order is confirmed with the engineer before writing files
48
+
49
+ When an interjection fixes prior execution issues, the prompt is additionally marked as a user-patch.
50
+
51
+ ## User-Patch Prompts
52
+
53
+ A cross-cutting concern shared between services. Whenever a prompt is created to fix issues from prior execution:
54
+
55
+ - Frontmatter includes `type: user-patch` and `patches_prompts: [X, Y]`
56
+ - Body documents what went wrong, engineer feedback, and specific issues
57
+ - Per **Knowledge Compounding**, this metadata enables the compounding flow to trace failures back to root causes and improve skills/validation suites
58
+
59
+ ## Decision Documentation
60
+
61
+ Per **Knowledge Compounding**, the coordination flow captures engineer contributions in two locations:
62
+
63
+ - **Prompt files**: Expectations, compromises, decisions
64
+ - **Alignment doc**: Engineer steering appended to agent summaries (summaries are never deleted)
65
+
66
+ ## Conversational Approach
67
+
68
+ Per **Ideation First**, the coordinator always clarifies before acting. It asks what the engineer wants to accomplish, presents options with tradeoffs, confirms understanding before modifying files, and surfaces relevant context from prompts and the alignment doc. This is an interactive agent, not a fire-and-forget automation.
69
+
70
+ ## Source Flow
71
+
72
+ [ref:.allhands/flows/COORDINATION.md::607d330]
@@ -0,0 +1,63 @@
1
+ ---
2
+ description: "Core tooling directives shared by all agents, establishing tldr and ah knowledge as the foundation for context-efficient codebase discovery"
3
+ ---
4
+
5
+ # Core Harness Integration
6
+
7
+ Every agent in the harness loads [ref:.allhands/flows/CORE.md::ae9924a] as its foundation. This flow exists because of a single principle: **Context is Precious**. Agents degrade with large context windows, so codebase discovery must be targeted and structured rather than brute-force file reading.
8
+
9
+ ## Two Mandatory Discovery Tools
10
+
11
+ The core integration enforces two tools for all codebase interaction:
12
+
13
+ | Tool | Purpose | When |
14
+ |------|---------|------|
15
+ | `ah knowledge docs search` | Semantic search over documented project knowledge | Any discovery task tied to crucial project understanding |
16
+ | `tldr` | Structured code analysis (trees, codemaps, control flow, data flow) | Retrieving file-level and function-level codebase context |
17
+
18
+ These replace ad-hoc file reads. The constraint is deliberate: agents that read files directly consume context on content that may be irrelevant. Structured tools return only what matters.
19
+
20
+ ## Discovery Protocol
21
+
22
+ The mandated sequence reflects a progressive disclosure pattern -- start broad, narrow to specifics:
23
+
24
+ ```mermaid
25
+ flowchart TD
26
+ A[Codebase Question] --> B[tldr semantic search]
27
+ B --> C{Need deeper analysis?}
28
+ C -->|Structure| D[tldr structure / tree]
29
+ C -->|Search| E[tldr search]
30
+ C -->|Function behavior| F[tldr cfg / dfg]
31
+ C -->|Impact of change| G[tldr impact]
32
+ C -->|What affects line N| H[tldr slice]
33
+ C -->|Cross-file calls| I[tldr calls]
34
+ C -->|Quality check| J[tldr diagnostics]
35
+ ```
36
+
37
+ ## tldr Capability Categories
38
+
39
+ ### Core Analysis
40
+
41
+ File trees, code structure maps, pattern search, full file extraction, and LLM-ready context bundles. These are the building blocks agents use before reading raw files.
42
+
43
+ ### Flow Analysis
44
+
45
+ Control flow graphs, data flow graphs, program slices, and cross-file call graphs. These enable agents to understand function behavior without reading every line -- critical for keeping context budgets low.
46
+
47
+ ### Codebase Analysis
48
+
49
+ Reverse call graphs (`impact`), dead code detection (`dead`), and architectural layer detection (`arch`). These support refactoring decisions and cleanup tasks.
50
+
51
+ ### Import Analysis
52
+
53
+ Forward imports (`imports`) and reverse import tracking (`importers`). These answer "what does this file depend on?" and "who depends on this module?" -- essential for understanding blast radius.
54
+
55
+ ### Quality and Testing
56
+
57
+ Type checking and linting via `diagnostics`. Agents run this before tests to catch errors without consuming test execution context.
58
+
59
+ ## Why This Exists as a Shared Flow
60
+
61
+ Per **Knowledge Compounding**, centralizing these directives in a single flow prevents every agent-specific flow from repeating the same tooling instructions. All agents inherit codebase discovery capability from this one file, and improvements to discovery practices propagate to every agent simultaneously.
62
+
63
+ The flow is deliberately terse -- per **Frontier Models are Capable**, agents deduce how to combine these tools for their specific tasks. The flow provides the "what tools exist and when to use them" while trusting agents to figure out the "how" for their domain.
@@ -0,0 +1,98 @@
1
+ ---
2
+ description: "Two-mode documentation pipeline with discovery sub-agents, writer sub-agents, and validation for creating and maintaining engineering knowledge docs"
3
+ ---
4
+
5
+ # Documentation Orchestration
6
+
7
+ The documentation flow creates and maintains engineering knowledge docs that expose code through file references and LSP symbols. Per **Knowledge Compounding**, docs enable semantic discovery of code through compounded understanding of use cases, intent, and key decisions.
8
+
9
+ Per **Context is Precious**, the orchestrator delegates discovery and writing to sub-agents rather than performing all work in a single context window.
10
+
11
+ ## Mode Detection
12
+
13
+ The flow operates in two modes, selected automatically based on context variables:
14
+
15
+ ```mermaid
16
+ flowchart TD
17
+ Start[Flow Invoked] --> Check{ALIGNMENT_PATH + PROMPTS_FOLDER provided?}
18
+ Check -->|Yes| Inc[Incremental Mode]
19
+ Check -->|No| FTG[Fill-the-Gaps Mode]
20
+ Inc --> Core[Core Flow]
21
+ FTG --> Core
22
+ ```
23
+
24
+ | Mode | Trigger | Scope | Knowledge Source |
25
+ |------|---------|-------|-----------------|
26
+ | Fill-the-Gaps | Cold start or refresh, no spec context | All domains, full repo scan | Inferred from code |
27
+ | Incremental | Feature branch with spec context | Affected domains only | Prompts, commits, alignment docs |
28
+
29
+ If no message or context variables are provided, the flow defaults directly to Fill-the-Gaps without asking.
30
+
31
+ ## Pre-flight Requirement
32
+
33
+ A clean git working tree is mandatory. File references include a git commit hash component -- uncommitted files have no hash, and modified files produce stale hashes that break immediately after the next commit.
34
+
35
+ ## Fill-the-Gaps Initialization
36
+
37
+ 1. Run `ah docs validate --json` to identify invalid refs, stale refs, and missing frontmatter
38
+ 2. Detect domains: read `docs.json` or infer from project structure (checking monorepo markers like `pnpm-workspace.yaml`, `turbo.json`, `nx.json`)
39
+ 3. Present detected domains to user for confirmation
40
+ 4. Persist confirmed domains to `docs.json` for future incremental runs
41
+
42
+ ## Incremental Initialization
43
+
44
+ 1. Read alignment doc and prompt files for session knowledge
45
+ 2. Run `git diff` against merge base for changed files
46
+ 3. Run `ah docs validate --json` for current staleness
47
+ 4. Impact analysis via `ah knowledge docs search` to find related docs
48
+ 5. Categorize changes: **Edit** (existing docs reference changed code), **Create** (new functionality), **Stale** (outdated refs)
49
+
50
+ ## Core Pipeline
51
+
52
+ Both modes converge into a shared four-phase pipeline:
53
+
54
+ ```mermaid
55
+ flowchart TD
56
+ D[Discovery Phase] --> A[Aggregate & Plan]
57
+ A --> W[Writing Phase]
58
+ W --> P[Post-Processing]
59
+
60
+ D -.- D1[1 sub-agent per domain]
61
+ W -.- W1[5-10 writer sub-agents]
62
+ W1 -.- W2[5-15 approaches per writer]
63
+ P -.- P1[README generation + validation loop]
64
+ ```
65
+
66
+ ### Discovery Phase
67
+
68
+ One discovery sub-agent per domain, each following [ref:.allhands/flows/shared/DOCUMENTATION_DISCOVERY.md::f3f2716]. Discovery identifies all documentable approaches/features, groups them intelligently, and checks existing coverage. Key constraint: stay under 20 approaches per domain, grouping aggressively.
69
+
70
+ ### Aggregate and Plan
71
+
72
+ The orchestrator merges discovery results, filters out fully-covered approaches, and groups remaining approaches into writer assignments. Target: 5-10 writers total, each handling 5-15 approaches from one domain or related subset.
73
+
74
+ ### Writing Phase
75
+
76
+ Writer sub-agents follow [ref:.allhands/flows/shared/DOCUMENTATION_WRITER.md::8447f47]. Each writer receives its approaches with file lists, symbols, the target directory, any existing docs to edit, and session knowledge (in incremental mode). The `group` field controls subdirectory placement.
77
+
78
+ ### Post-Processing
79
+
80
+ The orchestrator handles cross-domain concerns that individual writers lack context for:
81
+
82
+ - **README generation**: Top-level `docs/README.md`, per-domain `docs/<domain>/README.md`, and per-group READMEs for subdirectories with 3+ docs
83
+ - **Finalize and validate loop**: Run `ah docs finalize`, then `ah docs validate --json`. If issues exist, spawn fixup writers and repeat until clean
84
+ - **Reindex**: Run `ah knowledge docs reindex` to update semantic search
85
+
86
+ ## Ownership Boundaries
87
+
88
+ | Artifact | Owner |
89
+ |----------|-------|
90
+ | Approach docs | Writer sub-agents |
91
+ | README.md files | Orchestrator (cross-domain context) |
92
+ | `docs/solutions/`, `docs/memories.md` | Compounding flow (never written by documentation) |
93
+
94
+ ## Source Flows
95
+
96
+ - [ref:.allhands/flows/DOCUMENTATION.md::dc3e5c6]
97
+ - [ref:.allhands/flows/shared/DOCUMENTATION_DISCOVERY.md::f3f2716]
98
+ - [ref:.allhands/flows/shared/DOCUMENTATION_WRITER.md::8447f47]
@@ -0,0 +1,83 @@
1
+ ---
2
+ description: "Flow for building comprehensive E2E test plans with dimension-mapped sections: deterministic integration tests, infrastructure setup, stochastic AI-coordinated validation, and manual verification flows for milestone validation"
3
+ ---
4
+
5
+ # E2E Test Plan Building
6
+
7
+ The E2E test plan flow produces a document that convinces the engineer a milestone works as expected. Per **Agentic Validation Tooling**, engineers are excluded from prompt-by-prompt validation -- this plan provides the comprehensive proof they need through a single artifact.
8
+
9
+ The plan is not a restatement of automated test output. It layers deterministic summaries, infrastructure setup, AI-coordinated validation, and manual flows into a progressive document the engineer can use to verify the milestone end-to-end.
10
+
11
+ ## Plan Structure
12
+
13
+ The test plan follows a four-section progressive structure, where each section builds on the previous:
14
+
15
+ ```mermaid
16
+ flowchart TD
17
+ S1["Section 1: Deterministic Test Summary"] --> S2["Section 2: Infrastructure Setup"]
18
+ S2 --> S3["Section 3: AI-Coordinated Validation"]
19
+ S3 --> S4["Section 4: Manual E2E Flows"]
20
+
21
+ S1 -.- N1["Concise command list with comments"]
22
+ S2 -.- N2["Derived from implementation artifacts"]
23
+ S3 -.- N3["Conditional — only if agentic tooling exists"]
24
+ S4 -.- N4["Core 'convince the engineer' section"]
25
+ ```
26
+
27
+ ### Dimension Mapping
28
+
29
+ Per **Agentic Validation Tooling**, the test plan sections map to the two-dimensional validation model:
30
+
31
+ | Section | Validation Dimension | Suite Body Section |
32
+ |---------|---------------------|--------------------|
33
+ | Section 1: Deterministic Test Summary | **Deterministic** | Commands drawn from suite **Deterministic Integration** sections |
34
+ | Section 3: AI-Coordinated Validation | **Stochastic** | Agent exploration drawn from suite **Stochastic Validation** playbooks |
35
+ | Section 2 & 4 | N/A | Infrastructure and manual flows are not dimension-mapped |
36
+
37
+ ### Section Design Decisions
38
+
39
+ | Section | What It Contains | What It Avoids |
40
+ |---------|-----------------|----------------|
41
+ | Deterministic Test Summary | Runnable commands grouped by domain, inline comments | Detailed breakdowns, file listings, coverage percentages |
42
+ | Infrastructure Setup | Dependencies, env, database, services, dev servers | Assumed knowledge; derives everything from artifacts |
43
+ | AI-Coordinated Validation | Agent prompts for Playwright MCP, agent-browser, load testing, profiling | Inclusion when no agentic tooling exists for the project |
44
+ | Manual E2E Flows | User flows, edge cases, regression scenarios | Duplicating what automated tests already cover |
45
+
46
+ ## Update Mode
47
+
48
+ The plan supports incremental updates. When a test plan already exists at the output path:
49
+
50
+ 1. Extract `last_commit` from frontmatter
51
+ 2. Diff commits and files since that commit
52
+ 3. Compare alignment doc prompt summaries against covered prompts
53
+ 4. Append new scenarios rather than rewriting existing coverage
54
+
55
+ This makes the plan a living document that grows with the milestone.
56
+
57
+ ## Variant Awareness
58
+
59
+ Per **Quality Engineering**, implementation may produce disposable variants (A/B implementations, backend alternatives, experimental features). The infrastructure setup section documents how to switch between variants using feature flags, environment variables, or infrastructure flags, with setup commands for each testable variant.
60
+
61
+ ## Infrastructure as Documentation Quality Signal
62
+
63
+ A key insight: if infrastructure setup cannot be derived from implementation artifacts (commits, summaries, code, existing docs), it signals inadequate documentation. The subsequent documentation phase will face the same challenge. This makes the E2E test plan an early warning system for documentation gaps.
64
+
65
+ ## AI-Coordinated Validation
66
+
67
+ Section 3 is conditional -- it only appears when the project has tooling that supports agentic testing:
68
+
69
+ - UI automation (Playwright MCP, agent-browser, simulator automation, browser MCPs)
70
+ - Load testing tools (k6, artillery, locust)
71
+ - Performance profiling (flamegraphs, memory profilers, bundle analyzers)
72
+ - Database inspection/scripting
73
+ - API testing tools (curl automation, Postman/Insomnia MCPs)
74
+
75
+ When present, the section provides example prompts engineers can give to agent sessions to exercise specific flows. When absent, the section notes which tooling categories would be valuable.
76
+
77
+ ### Context Gathering for Tooling
78
+
79
+ Per [ref:.allhands/flows/E2E_TEST_PLAN_BUILDING.md::aa87ec8], during context gathering the flow runs `ah validation-tools list` to identify available suites. The `tools` field in suite output identifies which tooling is available for both stochastic and deterministic dimensions -- this informs which Section 3 tooling categories the project can support and surfaces tooling that may not be obvious from code inspection alone.
80
+
81
+ ## Source Flow
82
+
83
+ [ref:.allhands/flows/E2E_TEST_PLAN_BUILDING.md::aa87ec8]
@@ -0,0 +1,104 @@
1
+ ---
2
+ description: "Hypothesis-driven emergent planning for continuous improvement beyond planned prompts -- a plan-only agent that creates typed prompt files for executors, with work mode diversification and post-refinement analysis"
3
+ ---
4
+
5
+ # Emergent Refinement
6
+
7
+ Planned prompts address known requirements. Emergent refinement addresses the unknown -- the improvements, extensions, and experiments that only become visible after initial implementation exists. Per **Quality Engineering**, emergent work discovers which variants are valuable, not just what was explicitly requested.
8
+
9
+ ## Core Concept: Hypothesis-Driven Planning
10
+
11
+ Every emergent prompt starts with a hypothesis: "If I implement X, then Y outcome will result." This is fundamentally different from planned prompts, which start with "The spec requires X." The hypothesis framing forces agents to articulate expected outcomes, making success measurable and failure informative.
12
+
13
+ The emergent agent is a **plan-only** agent -- it creates `type: emergent` prompt files but never executes them. Executors pick up these prompts through the standard execution loop. This separation of planning and execution keeps each concern bounded in its own context window.
14
+
15
+ [ref:.allhands/flows/EMERGENT_PLANNING.md::4f1d9bf]
16
+
17
+ ## Planning Lifecycle
18
+
19
+ ```mermaid
20
+ stateDiagram-v2
21
+ [*] --> ContextGathering
22
+ ContextGathering --> GapAssessment
23
+ GapAssessment --> HypothesisFormation
24
+ HypothesisFormation --> PromptCreation
25
+ PromptCreation --> [*]
26
+
27
+ state ContextGathering {
28
+ [*] --> ReadAlignment
29
+ ReadAlignment --> SearchMemories
30
+ SearchMemories --> IdentifyGaps
31
+ }
32
+
33
+ state PromptCreation {
34
+ [*] --> DiscoverValidation
35
+ DiscoverValidation --> CreatePromptFiles
36
+ CreatePromptFiles --> Stop
37
+ }
38
+ ```
39
+
40
+ The planner stops after creating prompt files. Executors handle implementation, and validation suites discovered by the planner are attached to prompts for executors to run.
41
+
42
+ ## Work Mode Diversification
43
+
44
+ Emergent agents select from three work modes, cycling between them based on what prior prompts have already explored:
45
+
46
+ ```mermaid
47
+ flowchart LR
48
+ CC[Core Goal Work] -->|reveals gaps| AI[Adjacent Improvements]
49
+ AI -->|exposes new core needs| CC
50
+ AI -->|stress-tests assumptions| NE[Novel Experiments]
51
+ NE -->|compounds back into| CC
52
+ ```
53
+
54
+ | Mode | Purpose | Examples |
55
+ |------|---------|---------|
56
+ | Core Goal Work | Directly addresses spec goals, acceptance criteria, known gaps | Missing edge cases, error recovery paths |
57
+ | Adjacent Improvements | Tangentially related enhancements that compound core work | Performance optimization, UX polish |
58
+ | Novel Experiments | Creative extensions behind feature flags that stress-test assumptions | Alternative approaches, exploratory features |
59
+
60
+ These are not sequential phases. An agent doing novel experiments may discover a core stability gap, returning to consolidation. Per **Knowledge Compounding**, each mode feeds the others -- adjacent work exposes core needs, novel work stress-tests assumptions.
61
+
62
+ ## Validation Discovery
63
+
64
+ The planner discovers applicable validation suites during prompt creation and attaches them to prompt frontmatter via `validation_suites`. The planner does not run validation -- executors do when they pick up the prompts. This ensures emergent work meets the same quality bar as planned work without conflating planning with execution.
65
+
66
+ ## Completion Protocol
67
+
68
+ The planner completes after creating prompt files. Each prompt includes the work mode type in its metadata so subsequent emergent planning rounds can diversify their mode selection. Alignment files and prompt files are not git tracked -- only implementation changes committed by executors are.
69
+
70
+ The planner must always produce at least one prompt. If core goals are met, it creates adjacent improvements or novel experiments. Per **Knowledge Compounding**, each round compounds work.
71
+
72
+ ## Post-Refinement Analysis
73
+
74
+ [ref:.allhands/flows/shared/EMERGENT_REFINEMENT_ANALYSIS.md::f3f4914]
75
+
76
+ After a batch of emergent prompts completes, an analysis phase evaluates each one:
77
+
78
+ ### Classification Decision Tree
79
+
80
+ ```mermaid
81
+ flowchart TD
82
+ EP[Emergent Prompt] --> E{Evaluate}
83
+ E -->|Strong hypothesis, effective, aligned| K[Keep]
84
+ E -->|Good hypothesis, execution gaps| I[Improve]
85
+ E -->|Hypothesis doesn't support goal| EL[Eliminate]
86
+
87
+ I --> IP[Create improvement patch prompt]
88
+ EL --> RP[Create reversion patch prompt]
89
+ K --> Done[No action needed]
90
+ ```
91
+
92
+ | Classification | Criteria | Action |
93
+ |---------------|----------|--------|
94
+ | Keep | Strong hypothesis, effective execution, aligned with goals | None |
95
+ | Improve | Good hypothesis, but execution gaps remain | Create `type: user-patch` improvement prompt |
96
+ | Eliminate | Hypothesis doesn't support spec goal | Create `type: user-patch` reversion prompt using git hashes |
97
+
98
+ ### Engineer Decision Point
99
+
100
+ The analysis presents findings holistically -- comparing emergent prompts against each other, highlighting patterns of effective versus ineffective hypotheses. The engineer accepts, adjusts, or overrides recommendations. Per **Knowledge Compounding**, all decisions and rationale are documented in the alignment doc to prevent future agents from re-proposing eliminated approaches.
101
+
102
+ ## Why Emergent Refinement Exists
103
+
104
+ Per **Prompt Files as Units of Work**, novelty emerges from prompt tasking. Planned prompts capture what engineers know they want. Emergent refinement captures what they didn't know they wanted until the system existed. The separation of planning from execution ensures the planner can focus entirely on hypothesis quality while executors focus on implementation quality. The framing as "indefinite compounding" rather than "percentage complete" reflects the principle that there is always a next valuable iteration to discover.
@@ -0,0 +1,89 @@
1
+ ---
2
+ description: "Guidelines for authoring harness flow files driven by first principles and adding MCP server integrations to extend harness capabilities"
3
+ ---
4
+
5
+ # Flow Authoring and MCP Tool Integration
6
+
7
+ Two complementary authoring guidelines: one for writing harness flows (the instruction layer), one for adding MCP server integrations (the tool layer). Both shape how agents receive direction and capability.
8
+
9
+ ---
10
+
11
+ ## Flow Authoring
12
+
13
+ [ref:.allhands/flows/shared/WRITING_HARNESS_FLOWS.md::f39048c]
14
+
15
+ ### Principle-to-Directive Mapping
16
+
17
+ Every flow directive traces back to a first principle from [ref:.allhands/principles.md::0610b13]. This mapping is the foundation of flow authoring:
18
+
19
+ | First Principle | What It Means for Flows |
20
+ |-----------------|------------------------|
21
+ | **Context is Precious** | Be brief. Progressive disclosure. Reference rather than repeat. |
22
+ | **Frontier Models are Capable** | Provide "why," trust agents to deduce "what" and "how." |
23
+ | **Knowledge Compounding** | DRY -- centralize instructions, use decision trees that reference capability chunks. |
24
+
25
+ When a flow instructs a behavior, it must cite the motivating principle by name. This teaches agents to think in terms of the harness philosophy, not just follow instructions.
26
+
27
+ ### Flow Anatomy
28
+
29
+ Flows use XML tags for structural attention:
30
+
31
+ | Tag | Purpose |
32
+ |-----|---------|
33
+ | `<goal>` | Motivations and contribution to the wider harness |
34
+ | `<constraints>` | Hard rules (NEVER / MUST / ALWAYS) |
35
+ | `<ownership>` | Files and domains the agent is restricted to |
36
+ | `<success_criteria>` | Validation criteria for task completion |
37
+ | `<inputs>` | Inputs required for the flow to execute |
38
+ | `<outputs>` | Outputs expected from the flow |
39
+
40
+ Body sections use `##` headers as capability phases (Context Gathering, Implementation, Validation, Completion). Bullet points start with action verbs. Paths and commands are backtick-wrapped. Conditionals use flat "If X - Y" patterns.
41
+
42
+ ### File Organization
43
+
44
+ - `flows/` root: Agent default flows, disclosed immediately on agent startup
45
+ - `flows/subdirectories/`: Progressively disclosed flows with `<inputs>` and `<outputs>` tags, invoked by other flows
46
+
47
+ The northstar example flow is [ref:.allhands/flows/PROMPT_TASK_EXECUTION.md::9baf478].
48
+
49
+ ---
50
+
51
+ ## MCP Server Integration
52
+
53
+ [ref:.allhands/flows/shared/WRITING_HARNESS_MCP_TOOLS.md::fad1587]
54
+
55
+ ### Integration Phases
56
+
57
+ ```mermaid
58
+ flowchart LR
59
+ R[Research] --> B[Build Config]
60
+ B --> E[Environment Setup]
61
+ E --> V[Validation]
62
+ ```
63
+
64
+ **Research**: Investigate the MCP package via `ah tavily search` and `ah context7 search`. Identify transport type (stdio, http, sse), command/args, environment variables, and authentication method.
65
+
66
+ **Build Config**: Copy the template at `.allhands/harness/src/mcp/_template.ts` and populate with researched values -- name, description, transport config, environment variable references using `${VAR_NAME}` syntax, statefulness, and tool hints.
67
+
68
+ **Environment Setup**: Document required variables (name, where to obtain, expected format) without adding actual values. Check `.env.ai` for existing variables.
69
+
70
+ **Validation**: Build harness, verify server appears in `ah tools --list`, verify tools are discovered via `ah tools <server-name>`, and test a read-only tool call.
71
+
72
+ ### Config Structure
73
+
74
+ Each MCP server config lives at `.allhands/harness/src/mcp/<server-name>.ts` and specifies:
75
+
76
+ | Field | Purpose |
77
+ |-------|---------|
78
+ | `name` | Short identifier (used in `ah tools <name>:tool`) |
79
+ | `description` | What the server provides |
80
+ | `type` | Transport: `stdio`, `http`, or `sse` |
81
+ | `command` / `args` | For stdio transport |
82
+ | `url` | For http/sse transport |
83
+ | `env` | Environment variable references |
84
+ | `stateful` | Whether server maintains session state |
85
+ | `toolHints` | Helpful hints for key tools |
86
+
87
+ ### Design Decision: Sub-agent Execution
88
+
89
+ MCP integration runs as a sub-agent to avoid blocking the main thread. The main thread can proceed knowing the MCP server is (or will be) available, receiving a completion report with config path, available tools, environment requirements, and validation status.