all-hands-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. package/.allhands/README.md +75 -0
  2. package/.allhands/agents/compounder.yaml +15 -0
  3. package/.allhands/agents/coordinator.yaml +17 -0
  4. package/.allhands/agents/documentor.yaml +15 -0
  5. package/.allhands/agents/e2e-test-planner.yaml +17 -0
  6. package/.allhands/agents/emergent.yaml +22 -0
  7. package/.allhands/agents/executor.yaml +14 -0
  8. package/.allhands/agents/ideation.yaml +11 -0
  9. package/.allhands/agents/initiative-steering.yaml +19 -0
  10. package/.allhands/agents/judge.yaml +13 -0
  11. package/.allhands/agents/planner.yaml +19 -0
  12. package/.allhands/agents/pr-reviewer.yaml +15 -0
  13. package/.allhands/docs.json +5 -0
  14. package/.allhands/docs.local.json +26 -0
  15. package/.allhands/flows/COMPOUNDING.md +203 -0
  16. package/.allhands/flows/COORDINATION.md +89 -0
  17. package/.allhands/flows/CORE.md +87 -0
  18. package/.allhands/flows/DOCUMENTATION.md +218 -0
  19. package/.allhands/flows/E2E_TEST_PLAN_BUILDING.md +140 -0
  20. package/.allhands/flows/EMERGENT_PLANNING.md +57 -0
  21. package/.allhands/flows/IDEATION_SCOPING.md +154 -0
  22. package/.allhands/flows/INITIATIVE_STEERING.md +110 -0
  23. package/.allhands/flows/JUDGE_REVIEWING.md +79 -0
  24. package/.allhands/flows/PROMPT_TASK_EXECUTION.md +68 -0
  25. package/.allhands/flows/PR_REVIEWING.md +43 -0
  26. package/.allhands/flows/SPEC_PLANNING.md +216 -0
  27. package/.allhands/flows/harness/WRITING_HARNESS_FLOWS.md +27 -0
  28. package/.allhands/flows/harness/WRITING_HARNESS_KNOWLEDGE.md +27 -0
  29. package/.allhands/flows/harness/WRITING_HARNESS_ORCHESTRATION.md +27 -0
  30. package/.allhands/flows/harness/WRITING_HARNESS_SKILLS.md +27 -0
  31. package/.allhands/flows/harness/WRITING_HARNESS_TOOLS.md +27 -0
  32. package/.allhands/flows/harness/WRITING_HARNESS_VALIDATION_TOOLING.md +27 -0
  33. package/.allhands/flows/shared/CODEBASE_UNDERSTANDING.md +72 -0
  34. package/.allhands/flows/shared/CREATE_HARNESS_SPEC.md +48 -0
  35. package/.allhands/flows/shared/CREATE_SPEC.md +41 -0
  36. package/.allhands/flows/shared/CREATE_VALIDATION_TOOLING_SPEC.md +70 -0
  37. package/.allhands/flows/shared/DOCUMENTATION_DISCOVERY.md +123 -0
  38. package/.allhands/flows/shared/DOCUMENTATION_WRITER.md +101 -0
  39. package/.allhands/flows/shared/EMERGENT_REFINEMENT_ANALYSIS.md +76 -0
  40. package/.allhands/flows/shared/EXTERNAL_TECH_GUIDANCE.md +97 -0
  41. package/.allhands/flows/shared/IDEATION_CODEBASE_GROUNDING.md +49 -0
  42. package/.allhands/flows/shared/PLAN_DEEPENING.md +152 -0
  43. package/.allhands/flows/shared/PROMPT_TASKS_CURATION.md +113 -0
  44. package/.allhands/flows/shared/PROMPT_VALIDATION_REVIEW.MD +99 -0
  45. package/.allhands/flows/shared/QUICK_PREMORTEM.md +70 -0
  46. package/.allhands/flows/shared/RESEARCH_GUIDANCE.md +38 -0
  47. package/.allhands/flows/shared/REVIEW_OPTIONS_BREAKDOWN.md +68 -0
  48. package/.allhands/flows/shared/SKILL_EXTRACTION.md +84 -0
  49. package/.allhands/flows/shared/SPEC_FLOW_ANALYSIS.md +119 -0
  50. package/.allhands/flows/shared/TDD_WORKFLOW.md +109 -0
  51. package/.allhands/flows/shared/UTILIZE_VALIDATION_TOOLING.md +84 -0
  52. package/.allhands/flows/shared/WRITING_HARNESS_FLOWS.md +11 -0
  53. package/.allhands/flows/shared/WRITING_HARNESS_MCP_TOOLS.md +84 -0
  54. package/.allhands/flows/shared/jury/ARCHITECTURE_REVIEW.md +91 -0
  55. package/.allhands/flows/shared/jury/BEST_PRACTICES_REVIEW.md +80 -0
  56. package/.allhands/flows/shared/jury/CLAIM_VERIFICATION_REVIEW.md +101 -0
  57. package/.allhands/flows/shared/jury/EXPECTATIONS_FIT_REVIEW.md +78 -0
  58. package/.allhands/flows/shared/jury/MAINTAINABILITY_REVIEW.md +110 -0
  59. package/.allhands/flows/shared/jury/PROMPTS_EXPECTATIONS_FIT.md +74 -0
  60. package/.allhands/flows/shared/jury/PROMPTS_FLOW_ANALYSIS.md +92 -0
  61. package/.allhands/flows/shared/jury/PROMPTS_YAGNI.md +78 -0
  62. package/.allhands/flows/shared/jury/PROMPT_PREMORTEM.md +125 -0
  63. package/.allhands/flows/shared/jury/SECURITY_REVIEW.md +86 -0
  64. package/.allhands/flows/shared/jury/YAGNI_REVIEW.md +82 -0
  65. package/.allhands/flows/wip/DEBUG_INVESTIGATION.md +162 -0
  66. package/.allhands/flows/wip/MEMORY_RECALL.md +62 -0
  67. package/.allhands/harness/ah +131 -0
  68. package/.allhands/harness/package-lock.json +5292 -0
  69. package/.allhands/harness/package.json +52 -0
  70. package/.allhands/harness/src/__tests__/e2e/commands.test.ts +307 -0
  71. package/.allhands/harness/src/__tests__/e2e/event-loop.test.ts +539 -0
  72. package/.allhands/harness/src/__tests__/e2e/hooks.test.ts +427 -0
  73. package/.allhands/harness/src/__tests__/e2e/new-initiative-routing.test.ts +137 -0
  74. package/.allhands/harness/src/__tests__/e2e/run-e2e.ts +109 -0
  75. package/.allhands/harness/src/__tests__/e2e/specs-type.test.ts +210 -0
  76. package/.allhands/harness/src/__tests__/e2e/validation-hooks.test.ts +669 -0
  77. package/.allhands/harness/src/__tests__/e2e/validation-path-consistency.test.ts +354 -0
  78. package/.allhands/harness/src/__tests__/e2e/validation.test.ts +528 -0
  79. package/.allhands/harness/src/__tests__/harness/assertions.ts +318 -0
  80. package/.allhands/harness/src/__tests__/harness/cli-runner.ts +359 -0
  81. package/.allhands/harness/src/__tests__/harness/fixture.ts +384 -0
  82. package/.allhands/harness/src/__tests__/harness/hook-runner.ts +411 -0
  83. package/.allhands/harness/src/__tests__/harness/index.ts +122 -0
  84. package/.allhands/harness/src/cli.ts +36 -0
  85. package/.allhands/harness/src/commands/complexity.ts +177 -0
  86. package/.allhands/harness/src/commands/context7.ts +202 -0
  87. package/.allhands/harness/src/commands/docs.ts +557 -0
  88. package/.allhands/harness/src/commands/hooks.ts +24 -0
  89. package/.allhands/harness/src/commands/index.ts +51 -0
  90. package/.allhands/harness/src/commands/knowledge.ts +382 -0
  91. package/.allhands/harness/src/commands/memories.ts +302 -0
  92. package/.allhands/harness/src/commands/notify.ts +61 -0
  93. package/.allhands/harness/src/commands/oracle.ts +158 -0
  94. package/.allhands/harness/src/commands/perplexity.ts +220 -0
  95. package/.allhands/harness/src/commands/planning.ts +245 -0
  96. package/.allhands/harness/src/commands/schema.ts +73 -0
  97. package/.allhands/harness/src/commands/skills.ts +128 -0
  98. package/.allhands/harness/src/commands/solutions.ts +353 -0
  99. package/.allhands/harness/src/commands/spawn.ts +158 -0
  100. package/.allhands/harness/src/commands/specs.ts +532 -0
  101. package/.allhands/harness/src/commands/tavily.ts +226 -0
  102. package/.allhands/harness/src/commands/tools.ts +579 -0
  103. package/.allhands/harness/src/commands/trace.ts +327 -0
  104. package/.allhands/harness/src/commands/tui.ts +960 -0
  105. package/.allhands/harness/src/commands/validate.ts +143 -0
  106. package/.allhands/harness/src/commands/validation-tools.ts +108 -0
  107. package/.allhands/harness/src/hooks/context.ts +1442 -0
  108. package/.allhands/harness/src/hooks/enforcement.ts +170 -0
  109. package/.allhands/harness/src/hooks/index.ts +54 -0
  110. package/.allhands/harness/src/hooks/lifecycle.ts +229 -0
  111. package/.allhands/harness/src/hooks/notification.ts +104 -0
  112. package/.allhands/harness/src/hooks/observability.ts +551 -0
  113. package/.allhands/harness/src/hooks/session.ts +88 -0
  114. package/.allhands/harness/src/hooks/shared.ts +815 -0
  115. package/.allhands/harness/src/hooks/transcript-parser.ts +208 -0
  116. package/.allhands/harness/src/hooks/validation.ts +617 -0
  117. package/.allhands/harness/src/lib/__tests__/ctags.test.ts +244 -0
  118. package/.allhands/harness/src/lib/__tests__/docs-validation.test.ts +344 -0
  119. package/.allhands/harness/src/lib/__tests__/mcp-runtime.test.ts +190 -0
  120. package/.allhands/harness/src/lib/__tests__/schema.test.ts +861 -0
  121. package/.allhands/harness/src/lib/base-command.ts +198 -0
  122. package/.allhands/harness/src/lib/cli-daemon.ts +343 -0
  123. package/.allhands/harness/src/lib/compaction.ts +313 -0
  124. package/.allhands/harness/src/lib/ctags.ts +497 -0
  125. package/.allhands/harness/src/lib/docs-validation.ts +907 -0
  126. package/.allhands/harness/src/lib/event-loop.ts +662 -0
  127. package/.allhands/harness/src/lib/flows.ts +155 -0
  128. package/.allhands/harness/src/lib/git.ts +276 -0
  129. package/.allhands/harness/src/lib/knowledge-worker.ts +72 -0
  130. package/.allhands/harness/src/lib/knowledge.ts +810 -0
  131. package/.allhands/harness/src/lib/llm.ts +255 -0
  132. package/.allhands/harness/src/lib/mcp-client.ts +432 -0
  133. package/.allhands/harness/src/lib/mcp-daemon.ts +486 -0
  134. package/.allhands/harness/src/lib/mcp-runtime.ts +418 -0
  135. package/.allhands/harness/src/lib/notification.ts +115 -0
  136. package/.allhands/harness/src/lib/opencode/index.ts +70 -0
  137. package/.allhands/harness/src/lib/opencode/profiles.ts +300 -0
  138. package/.allhands/harness/src/lib/opencode/prompts/codesearch.md +98 -0
  139. package/.allhands/harness/src/lib/opencode/prompts/knowledge-aggregator.md +67 -0
  140. package/.allhands/harness/src/lib/opencode/runner.ts +281 -0
  141. package/.allhands/harness/src/lib/oracle.ts +926 -0
  142. package/.allhands/harness/src/lib/planning-utils.ts +150 -0
  143. package/.allhands/harness/src/lib/planning.ts +605 -0
  144. package/.allhands/harness/src/lib/pr-review.ts +225 -0
  145. package/.allhands/harness/src/lib/prompts.ts +522 -0
  146. package/.allhands/harness/src/lib/schema.ts +418 -0
  147. package/.allhands/harness/src/lib/schemas/agent-profile.ts +141 -0
  148. package/.allhands/harness/src/lib/schemas/template-vars.ts +138 -0
  149. package/.allhands/harness/src/lib/session.ts +164 -0
  150. package/.allhands/harness/src/lib/specs.ts +348 -0
  151. package/.allhands/harness/src/lib/tldr.ts +829 -0
  152. package/.allhands/harness/src/lib/tmux.ts +1051 -0
  153. package/.allhands/harness/src/lib/trace-store.ts +714 -0
  154. package/.allhands/harness/src/mcp/__tests__/index.test.ts +46 -0
  155. package/.allhands/harness/src/mcp/_template.ts +47 -0
  156. package/.allhands/harness/src/mcp/filesystem.ts +33 -0
  157. package/.allhands/harness/src/mcp/index.ts +69 -0
  158. package/.allhands/harness/src/mcp/playwright.ts +34 -0
  159. package/.allhands/harness/src/mcp/xcodebuild.ts +29 -0
  160. package/.allhands/harness/src/schemas/docs.schema.json +44 -0
  161. package/.allhands/harness/src/schemas/settings.schema.json +214 -0
  162. package/.allhands/harness/src/tui/actions.ts +227 -0
  163. package/.allhands/harness/src/tui/file-viewer-modal.ts +270 -0
  164. package/.allhands/harness/src/tui/index.ts +1574 -0
  165. package/.allhands/harness/src/tui/modal.ts +232 -0
  166. package/.allhands/harness/src/tui/prompts-pane.ts +186 -0
  167. package/.allhands/harness/src/tui/status-pane.ts +434 -0
  168. package/.allhands/harness/tsconfig.json +22 -0
  169. package/.allhands/harness/vitest.config.ts +13 -0
  170. package/.allhands/pillars.md +33 -0
  171. package/.allhands/principles.md +88 -0
  172. package/.allhands/schemas/alignment.yaml +51 -0
  173. package/.allhands/schemas/documentation.yaml +10 -0
  174. package/.allhands/schemas/prompt.yaml +92 -0
  175. package/.allhands/schemas/skill.yaml +34 -0
  176. package/.allhands/schemas/solution.yaml +131 -0
  177. package/.allhands/schemas/spec.yaml +67 -0
  178. package/.allhands/schemas/validation-suite.yaml +49 -0
  179. package/.allhands/schemas/workflow.yaml +51 -0
  180. package/.allhands/settings.json +57 -0
  181. package/.allhands/skills/claude-code-patterns/SKILL.md +60 -0
  182. package/.allhands/skills/claude-code-patterns/docs/context-hygiene.md +19 -0
  183. package/.allhands/skills/harness-maintenance/SKILL.md +449 -0
  184. package/.allhands/skills/harness-maintenance/references/core-architecture.md +187 -0
  185. package/.allhands/skills/harness-maintenance/references/harness-skills.md +87 -0
  186. package/.allhands/skills/harness-maintenance/references/knowledge-compounding.md +78 -0
  187. package/.allhands/skills/harness-maintenance/references/tools-commands-mcp-hooks.md +115 -0
  188. package/.allhands/skills/harness-maintenance/references/validation-tooling.md +77 -0
  189. package/.allhands/skills/harness-maintenance/references/writing-flows.md +84 -0
  190. package/.allhands/validation/browser-automation.md +109 -0
  191. package/.allhands/validation/xcode-automation.md +195 -0
  192. package/.allhands/workflows/documentation.md +86 -0
  193. package/.allhands/workflows/investigation.md +81 -0
  194. package/.allhands/workflows/milestone.md +91 -0
  195. package/.allhands/workflows/optimization.md +85 -0
  196. package/.allhands/workflows/refactor.md +99 -0
  197. package/.allhands/workflows/triage.md +81 -0
  198. package/.claude/README.md +1 -0
  199. package/.claude/agents/explorer.md +10 -0
  200. package/.claude/agents/researcher.md +11 -0
  201. package/.claude/agents/task-runner.md +8 -0
  202. package/.claude/settings.json +231 -0
  203. package/.env.ai.example +7 -0
  204. package/.github/workflows/npm-publish.yml +69 -0
  205. package/.internal.json +45 -0
  206. package/.tldr/config.json +11 -0
  207. package/.tldrignore +90 -0
  208. package/CLAUDE.md +6 -0
  209. package/README.md +98 -0
  210. package/bin/sync-cli.js +7552 -0
  211. package/concerns.md +7 -0
  212. package/docs/README.md +41 -0
  213. package/docs/agents/README.md +24 -0
  214. package/docs/agents/agent-configuration-system.md +86 -0
  215. package/docs/agents/execution-agents.md +50 -0
  216. package/docs/agents/knowledge-agents.md +61 -0
  217. package/docs/agents/orchestration-agent.md +57 -0
  218. package/docs/agents/planning-agents.md +84 -0
  219. package/docs/agents/quality-review-agents.md +67 -0
  220. package/docs/agents/workflow-agent-orchestration.md +69 -0
  221. package/docs/flows/README.md +44 -0
  222. package/docs/flows/compounding.md +126 -0
  223. package/docs/flows/coordination.md +72 -0
  224. package/docs/flows/core-harness-integration.md +63 -0
  225. package/docs/flows/documentation-orchestration.md +98 -0
  226. package/docs/flows/e2e-test-plan-building.md +83 -0
  227. package/docs/flows/emergent-refinement.md +104 -0
  228. package/docs/flows/flow-authoring-and-mcp-tools.md +89 -0
  229. package/docs/flows/judge-reviewing.md +112 -0
  230. package/docs/flows/plan-deepening-and-research.md +107 -0
  231. package/docs/flows/plan-review-jury.md +114 -0
  232. package/docs/flows/pr-reviewing.md +54 -0
  233. package/docs/flows/prompt-task-execution.md +119 -0
  234. package/docs/flows/spec-planning.md +162 -0
  235. package/docs/flows/type-specific-scoping-flows.md +49 -0
  236. package/docs/flows/validation-and-skills-integration.md +145 -0
  237. package/docs/flows/wip/wip-flows.md +102 -0
  238. package/docs/harness/README.md +23 -0
  239. package/docs/harness/agent-profiles.md +84 -0
  240. package/docs/harness/cli/README.md +24 -0
  241. package/docs/harness/cli/cli-entry-and-command-discovery.md +91 -0
  242. package/docs/harness/cli/docs-command.md +87 -0
  243. package/docs/harness/cli/knowledge-command.md +91 -0
  244. package/docs/harness/cli/minor-cli-commands.md +65 -0
  245. package/docs/harness/cli/oracle-command.md +113 -0
  246. package/docs/harness/cli/planning-command.md +95 -0
  247. package/docs/harness/cli/schema-and-validation-commands.md +154 -0
  248. package/docs/harness/cli/search-commands.md +97 -0
  249. package/docs/harness/cli/spawn-command.md +136 -0
  250. package/docs/harness/cli/specs-command.md +102 -0
  251. package/docs/harness/cli/tools-command.md +122 -0
  252. package/docs/harness/cli/trace-command.md +122 -0
  253. package/docs/harness/cli-daemon.md +92 -0
  254. package/docs/harness/event-loop.md +184 -0
  255. package/docs/harness/hooks/README.md +15 -0
  256. package/docs/harness/hooks/context-hooks.md +96 -0
  257. package/docs/harness/hooks/lifecycle-and-observability-hooks.md +135 -0
  258. package/docs/harness/hooks/validation-hooks.md +97 -0
  259. package/docs/harness/test-harness.md +149 -0
  260. package/docs/harness/tui.md +176 -0
  261. package/docs/memories.md +20 -0
  262. package/docs/solutions/agentic-issues/premature-agent-deletion-tui-action-dependency-20260130.md +49 -0
  263. package/docs/solutions/agentic-issues/ref-anchor-scope-mismatch-skill-references-20260131.md +55 -0
  264. package/docs/solutions/agentic-issues/tautological-tests-routing-20260131.md +52 -0
  265. package/docs/solutions/integration_issue/blocktool-output-format-mismatch-hook-runner-20260130.md +52 -0
  266. package/docs/solutions/integration_issue/dual-validation-path-divergence-schema-20260130.md +66 -0
  267. package/docs/solutions/security-issues/unsanitized-domain-path-join-20260131.md +52 -0
  268. package/docs/solutions/test-failures/event-loop-mock-ordering-checkAgentWindows-20260130.md +63 -0
  269. package/docs/sync-cli/README.md +19 -0
  270. package/docs/sync-cli/cli-entrypoint-and-commands.md +39 -0
  271. package/docs/sync-cli/commands/README.md +11 -0
  272. package/docs/sync-cli/commands/pull-manifest-command.md +36 -0
  273. package/docs/sync-cli/commands/push-command.md +84 -0
  274. package/docs/sync-cli/commands/sync-command.md +71 -0
  275. package/docs/sync-cli/systems/README.md +14 -0
  276. package/docs/sync-cli/systems/git-and-github-integration.md +49 -0
  277. package/docs/sync-cli/systems/interactive-ui.md +43 -0
  278. package/docs/sync-cli/systems/manifest-and-distribution.md +51 -0
  279. package/docs/sync-cli/systems/path-resolution.md +42 -0
  280. package/package.json +46 -0
  281. package/scripts/install-shim.sh +40 -0
  282. package/scripts/pre-pack.sh +25 -0
  283. package/specs/harness-maintenance-skill.spec.md +138 -0
  284. package/specs/roadmap/git-spec-lifecycle-management.spec.md +113 -0
  285. package/specs/sync-init-flag.spec.md +117 -0
  286. package/specs/unified-workflow-orchestration.spec.md +250 -0
  287. package/specs/validation-tooling-practice.spec.md +98 -0
  288. package/specs/workflow-domain-configuration.spec.md +265 -0
  289. package/src/commands/pull-manifest.ts +31 -0
  290. package/src/commands/push.ts +344 -0
  291. package/src/commands/sync.ts +289 -0
  292. package/src/lib/constants.ts +10 -0
  293. package/src/lib/dotfiles.ts +36 -0
  294. package/src/lib/fs-utils.ts +18 -0
  295. package/src/lib/gh.ts +40 -0
  296. package/src/lib/git.ts +63 -0
  297. package/src/lib/gitignore.ts +167 -0
  298. package/src/lib/manifest.ts +121 -0
  299. package/src/lib/marker-sync.ts +39 -0
  300. package/src/lib/paths.ts +38 -0
  301. package/src/lib/target-lines.ts +66 -0
  302. package/src/lib/ui.ts +78 -0
  303. package/src/sync-cli.ts +120 -0
  304. package/target-lines.json +23 -0
  305. package/tsconfig.json +20 -0
@@ -0,0 +1,195 @@
1
+ ---
2
+ name: xcode-automation
3
+ description: "Xcode-based validation for iOS/macOS native implementations — exploratory build verification, performance profiling, UI automation, and runtime analysis"
4
+ globs:
5
+ - "**/*.swift"
6
+ - "**/*.m"
7
+ - "**/*.h"
8
+ - "**/*.xib"
9
+ - "**/*.storyboard"
10
+ - "**/*.xcodeproj/**"
11
+ - "**/*.xcworkspace/**"
12
+ - "**/ios/**"
13
+ - "**/macos/**"
14
+ - "**/Podfile"
15
+ - "**/Package.swift"
16
+ - "**/*.entitlements"
17
+ - "**/*.plist"
18
+ tools:
19
+ - "xcodebuildmcp"
20
+ - "xctrace"
21
+ ---
22
+
23
+ ## Purpose
24
+
25
+ This suite validates native Apple platform quality across a unified domain: build integrity, runtime performance, UI interaction correctness, and resource profiling. These are sub-concerns within a single validation domain — the Xcode build and runtime environment — not separate suites.
26
+
27
+ The stochastic dimension uses agent-driven Xcode automation to build projects, deploy to simulators, explore UI via accessibility-based interaction, capture logs, and probe performance characteristics using profiling instruments. The deterministic dimension (unit tests, snapshot tests via `xcodebuild test`) is planned but not yet implemented.
28
+
29
+ Per **Agentic Validation Tooling**, this suite meets the existence threshold: the stochastic dimension (exploratory build verification, UI automation, performance profiling, memory analysis) provides meaningful agent-driven validation beyond what deterministic tests alone can cover.
30
+
31
+ ## Tooling
32
+
33
+ ### XcodeBuildMCP (stochastic dimension)
34
+
35
+ - **Harness integration**: Registered as MCP server `xcodebuild` — access via `ah tools xcodebuild`. Run `ah tools xcodebuild --help-tool` for full parameter schemas before exploration.
36
+ - MCP server wrapping `xcodebuild`, `simctl`, and AXe (accessibility-based UI automation). 63+ tools across workflow groups — exposed as MCP tool calls via `ah tools xcodebuild:<tool>`.
37
+ - **Workflow groups**: Only `simulator` is enabled by default. Enable additional groups (`ui-automation`, `logging`, `project-discovery`, `session-management`, `simulator-management`) via `.xcodebuildmcp/config.yaml` in the target project.
38
+ - **Session defaults model**: `session-set-defaults` (hyphenated) persists workspace path, scheme, simulator, and configuration across subsequent calls — reduces token overhead significantly. Use `workspacePath` for CocoaPods projects (`.xcworkspace`), `projectPath` for standalone `.xcodeproj`. Always set session defaults before exploration.
39
+ - **Tool discovery first**: Run `ah tools xcodebuild` to see all available tools, then `ah tools xcodebuild --help-tool` for parameter schemas. Tool awareness shapes what you attempt. Prerequisite, not afterthought.
40
+
41
+ ### xctrace (stochastic dimension — profiling)
42
+
43
+ - **Installation**: Ships with Xcode. Verify with `xcrun xctrace version`.
44
+ - CLI for Instruments profiling — not an MCP tool; invoked directly via shell. Run `xcrun xctrace help` and `xcrun xctrace record --help` before any profiling — the subcommand vocabulary (record, export, list, symbolicate) determines what analysis is possible.
45
+ - **Template-based recording**: `xctrace list templates` reveals available profiling templates. Templates define what instruments are active during a recording session.
46
+ - **Attachment by PID**: `--attach` requires a numeric PID, not a process name. For simulator apps, `xcrun simctl spawn <UDID> launchctl list | grep <bundle_id>` returns the simulator-internal PID, which xctrace cannot use. Instead, find the **host PID** via `pgrep -f "appname.app/appname"`. For `--launch` mode, all flags (`--time-limit`, `--output`, `--no-prompt`) must come **before** the `--launch -- <bundle_id>` terminator — flags after `--` are passed to the launched app, not xctrace.
47
+
48
+ ## Stochastic Validation
49
+
50
+ Agent-driven exploratory Xcode validation. This section teaches WHAT to validate and WHY — MCP tool discovery and `xctrace --help` teach HOW.
51
+
52
+ ### Core Loop
53
+
54
+ **Prerequisite**: Run `ah tools xcodebuild --help-tool` for parameter schemas, then `session-set-defaults` with the target project. Run `xcrun xctrace help` to internalize profiling vocabulary.
55
+
56
+ Discover project → build → deploy to simulator → explore UI → capture logs → profile performance → analyze results.
57
+
58
+ This is the thinking pattern to internalize, not a command sequence:
59
+
60
+ - Always discover before building — `discover_projs` reveals workspace/project structure, `list_schemes` shows available build targets. Never assume scheme names.
61
+ - Set session defaults early — `workspacePath` (for CocoaPods projects) or `projectPath`, scheme, simulator name, and configuration persist across calls. This eliminates repetitive parameter passing and reduces token cost.
62
+ - **Visible simulator preferred** — call `open_sim` before `boot_sim` to make the simulator visible. Headless mode (`boot_sim` without `open_sim`) provides no visual feedback on what the agent is doing. Visible simulators let engineers observe agent-driven UI interactions in real time. Default to visible; use headless only in CI environments.
63
+ - Verify build success before deployment — build failures surface dependency issues, missing signing, or configuration problems that must be resolved before any runtime validation. For React Native / Expo projects, `npx expo prebuild --platform ios --clean` must run before the first build to ensure all XCFramework slices are downloaded.
64
+ - Use `preferXcodebuild=true` for clean builds — the incremental build system (`xcodemake`) can produce incomplete `.app` bundles (missing Info.plist, executables). Always use `--preferXcodebuild=true` on the first build after `clean` or `expo prebuild`.
65
+ - Check logs after interactions — native logs via `start_sim_log_cap` (requires `bundleId`), JS logs via direct `log stream` with `subsystem == "com.facebook.react.log"` for RN/Expo apps (see Log capture and analysis use case). Runtime crashes, constraint violations, and warnings appear in logs, not the UI.
66
+ - Profile after functional verification — profiling a broken app wastes time. Confirm the app runs correctly first, then measure performance.
67
+
68
+ ### Use Cases
69
+
70
+ These seed categories guide exploration. Per **Frontier Models are Capable**, the agent extrapolates deeper investigation from these starting points.
71
+
72
+ - **Build verification**: `discover_projs` to find workspace, `list_schemes` to enumerate targets, `build_sim --preferXcodebuild=true` to compile for simulator. Verify clean builds succeed. Exercise `clean` then rebuild to catch incremental build artifacts masking errors. Check `show_build_settings` for unexpected configuration (wrong SDK, missing preprocessor flags, incorrect deployment target).
73
+ - **Deploy and run**: For reliable deploy, use the multi-step sequence: `build_sim` → `get_sim_app_path` → `get_app_bundle_id` → `install_app_sim` → `launch_app_sim`. The composite `build_run_sim` is convenient but can timeout on long builds. Verify the app launches without crashes — `launch_app_logs_sim` captures stdout/stderr from launch. Boot specific simulator devices via `list_sims` and `boot_sim` to test across device classes (iPhone SE, iPhone 16 Pro Max, iPad).
74
+ - **UI automation and verification**: Enable the `ui-automation` workflow group. Verification uses two complementary methods — both are required, not interchangeable:
75
+ - **Programmatic verification** (`describe_ui`): Captures the full accessibility hierarchy with precise frame coordinates. Use for asserting specific state changes: element labels, button presence/absence, text content. Call `describe_ui` after each interaction to confirm the expected state change occurred (e.g., "Like count: 0" → "Like count: 3"). This is the primary method for **semantic state verification** — did the right data appear?
76
+ - **Visual verification** (`screenshot` + read the image): Captures a screenshot and the agent MUST visually inspect it to verify layout correctness, rendering quality, and visual state. This catches issues invisible to the accessibility hierarchy: clipped text, overlapping elements, incorrect colors, broken layouts, missing images, compressed frames. This is the primary method for **visual/layout verification** — does it look right?
77
+ - Per **Agentic Validation Tooling**, the agent is the observer — it must use both its programmatic and visual senses. Taking a screenshot without reading it provides no validation value. `describe_ui` alone misses rendering bugs. Use `describe_ui` for "is the state correct?" and `screenshot` (visually inspected) for "does it render correctly?"
78
+ - Interact via `tap`, `swipe`, `type_text`, `key_press` using accessibility labels from `describe_ui` (preferred) or coordinates. Walk critical user flows: onboarding, navigation between screens, form submission, back navigation.
79
+ - **Log capture and analysis**: Two log channels exist for native and JS respectively:
80
+ - **Native logs** (`start_sim_log_cap`): Requires `bundleId`. Captures structured `os_log` messages filtered by `subsystem == "<bundleId>"`. Surfaces: constraint ambiguity warnings (Auto Layout issues), main thread violations, memory warnings, unhandled exceptions, API deprecation notices, missing `UIBackgroundModes` entries. Use `captureConsole=true` to additionally capture the app process's stdout/stderr (note: this relaunches the app on start and **terminates it on stop** — plan the lifecycle accordingly).
81
+ - **JS logs for React Native / Expo**: JavaScript `console.log` output routes through Hermes JSI → `RCTLog` → Apple's `os_log` under subsystem `com.facebook.react.log` (category `javascript`) — NOT the app's bundle ID subsystem and NOT stdout/stderr. This means `start_sim_log_cap` will **not** capture JS console.log messages, because it filters by the app's bundle ID subsystem. Two approaches to capture JS logs:
82
+ 1. **Direct `log stream`** (preferred for automation): `xcrun simctl spawn <UDID> log stream --level=debug --predicate 'subsystem == "com.facebook.react.log"'` — captures JS `console.log` output in real time via the simulator's unified log system. Run in background, exercise the app, then inspect the output.
83
+ 2. **Metro terminal output**: When Metro is running (`expo start`), JS logs also appear in Metro's stdout as `LOG [message]`. If Metro is running as a background task, its output file contains all JS logs.
84
+ - For comprehensive validation, use both channels: `start_sim_log_cap` for native-level diagnostics and direct `log stream` (or Metro output) for JS-level state change verification. The JS log channel is essential for verifying that UI automation interactions produce the expected application-level state changes.
85
+ - **Performance profiling**: Use `xctrace` after confirming the app runs correctly. Find the **host PID** via `pgrep -f "appname.app/appname"` (not `launchctl list`, which returns the simulator-internal PID). Then: `xcrun xctrace record --template 'Time Profiler' --device '<UDID>' --attach '<PID>' --time-limit 30s --output /tmp/profile.trace --no-prompt`. Export with `xcrun xctrace export --input /tmp/profile.trace --toc` to understand trace structure (schemas, tables), then XPath queries for specific tables. For Expo/RN apps, look for `com.facebook.react.runtime.JavaScript` (Hermes JS thread) and `hades` (Hermes GC) in thread samples.
86
+ - **Memory analysis**: Same host PID discovery, then `xcrun xctrace record --template 'Leaks' --device '<UDID>' --attach '<PID>' --time-limit 60s --output /tmp/leaks.trace --no-prompt`. The `Leaks` template includes the `Allocations` instrument — a single recording provides both leak detection and heap allocation statistics. Export leak results via `xcrun xctrace export --input /tmp/leaks.trace --xpath '/trace-toc/run[@number="1"]/tracks/track[@name="Leaks"]/details/detail[@name="Leaks"]'` and allocation statistics via the `Allocations` track. The standalone `Allocations` template uses deferred recording mode, making CLI export less straightforward — prefer `Leaks` for combined analysis. Exercise flows repeatedly and check for monotonic heap growth indicating retain cycles.
87
+ - **Combined profiling + UI automation**: Run `xctrace record` in the background (long `--time-limit` or no limit), then exercise the app via xcodebuild MCP UI automation (`describe_ui`, `tap`, `gesture`) while profiling captures the runtime behavior. This surfaces memory leaks, performance regressions, and hangs in actual user flows, not just idle state. **Ordering matters**: `xctrace --attach` sessions end when the target process exits. If `stop_sim_log_cap` (console mode) terminates the app while xctrace is recording, the trace ends early. Always let xctrace reach its `--time-limit` or stop it explicitly before terminating the app or stopping console log capture.
88
+ - **Animation quality**: `xcrun xctrace record --template 'Animation Hitches' --device '<UDID>' --attach '<PID>' --time-limit 30s --output /tmp/hitches.trace --no-prompt` while scrolling, navigating, and animating. Hitch duration > 33ms (2 frames) indicates dropped frames visible to users. **Note**: Animation Hitches is **not supported on simulator** — requires a physical device.
89
+ - **Additional profiling templates**: Beyond the core three, `App Launch` measures startup time (critical for RN/Expo apps with large JS bundles), `Network` captures HTTP request timing and payload sizes, `SwiftUI` profiles SwiftUI-specific rendering (not relevant for RN), `Swift Concurrency` profiles async/await patterns, `CPU Counters` provides low-level CPU performance data, and `Power Profiler` measures battery impact.
90
+ - **Evidence capture**: Per **Agentic Validation Tooling**, two audiences require different artifacts. Agent self-verification (real-time `describe_ui` checks, screenshot visual inspection, log stream monitoring) happens during the observe-act-verify loop. Engineer review artifacts (`screenshot` images, `record_sim_video` recordings, xctrace `.trace` files, captured log output) are produced after exploration. Pattern: explore first, then capture review evidence — but the agent MUST visually read screenshots it takes during exploration, not just save them.
91
+
92
+ ### Resilience
93
+
94
+ Stochastic exploration in the Xcode environment has unique failure modes. These patterns prevent death spirals:
95
+
96
+ - Max 3 retries on any build or interaction, then report failure and move on
97
+ - `screenshot` on failure — capture simulator state before recovery attempts
98
+ - Simulator reset if app becomes unresponsive — `stop_app_sim`, then re-launch. If simulator itself hangs, `erase_sims` for a clean slate
99
+ - Code signing bail-out — provisioning profile or certificate errors: report the exact error and move on. These require human intervention.
100
+ - CocoaPods/SPM resolution — dependency resolution failures: check `Podfile.lock` freshness, try `clean` and rebuild. Report if unresolvable.
101
+ - Stale session defaults — if switching between projects or schemes, always call `session-set-defaults` again. Stale defaults cause confusing errors.
102
+ - xctrace attachment failures — `--attach` requires the **host PID** (via `pgrep -f "appname.app/appname"`), not the simulator-internal PID from `launchctl list`. If the app exits before profiling starts, use `--launch` mode — but note that `--launch` does not pass URL scheme arguments, so Expo/RN apps may fail to connect to the Metro bundler. For Expo apps, prefer `--attach` after launching the app via xcodebuild MCP or `expo start`.
103
+ - Incomplete `.app` bundles — if `install_app_sim` fails with "Missing bundle ID", the build produced a partial bundle. Run `clean` then `build_sim --preferXcodebuild=true` (the incremental builder can produce incomplete output).
104
+ - React Native / Expo setup — `npx expo prebuild --platform ios --clean` must run before first build to download XCFramework simulator slices. Without this, the `[CP] Copy XCFrameworks` build phase fails with rsync errors.
105
+
106
+ Use `ah tools xcodebuild --help-tool` and `xcrun xctrace help` for all available operations. This suite teaches what to validate and why — the tools teach how.
107
+
108
+ ### Simulator Visibility
109
+
110
+ Per **Agentic Validation Tooling**, programmatic validation replaces human supervision — but engineer trust requires observability. **Visible simulators are preferred** over headless for local development and validation:
111
+
112
+ - Call `open_sim` before `boot_sim` to make the Simulator.app window visible
113
+ - Visible mode lets engineers observe agent-driven UI interactions, verify screenshot quality, and spot issues the agent might miss
114
+ - `boot_sim` alone boots headless (no window) — appropriate for CI but not for interactive validation sessions
115
+ - The `.xcodebuildmcp/config.yaml` `sessionDefaults.simulatorName` targets the device; visibility is controlled by whether Simulator.app is open
116
+
117
+ ### Simulator Isolation (Multi-Worktree)
118
+
119
+ When running validation across multiple worktrees simultaneously, each needs an isolated simulator, derived data path, and Metro port to prevent contention:
120
+
121
+ - **Dedicated simulator**: `xcrun simctl create "<worktree>-iPhone16Pro" "iPhone 16 Pro"` creates a named clone. Target by UDID via `session-set-defaults --simulatorId=<UDID>`.
122
+ - **Derived data isolation**: `build_sim --derivedDataPath=<path>` or `-derivedDataPath` on any build tool keeps build products separate per worktree.
123
+ - **AGENT_ID isolation**: The harness MCP daemon already isolates sessions by `AGENT_ID` — parallel agents get independent XcodeBuildMCP sessions automatically.
124
+ - **Expo Metro port isolation**: Each worktree's Metro bundler must run on a unique port. Use `expo start --port <port>` (e.g., 8081, 8082, 8083). The built app connects to the port specified at launch time — if switching ports, the app must be rebuilt with `expo run:ios` targeting the new port, or the `RCT_METRO_PORT` environment variable must be set before build. The `--port` flag on `expo start` only controls where Metro listens; the app's compiled bundler URL must match.
125
+
126
+ ### Test Target Setup (Expo / React Native)
127
+
128
+ For Expo projects used as validation targets:
129
+
130
+ 1. `npm install` in the project root
131
+ 2. `npx expo prebuild --platform ios --clean` — generates `ios/` directory with proper XCFramework slices. CocoaPods install is handled automatically.
132
+ 3. **Build using preconfigured package.json scripts when they exist** — check for `"ios"` script (typically `expo run:ios`). Use `npm run ios -- --device "<SimulatorName>"` to target a specific simulator. Fall back to `expo run:ios` directly if no script exists. For already-built apps testing JS-only changes, use `expo start` instead.
133
+ 4. `expo run:ios` handles the full pipeline: build → install → launch. However, in non-interactive environments it may log `"Skipping dev server"` — the Metro bundler won't start automatically. Start `expo start --port 8081` separately to serve the JS bundle, then reload the app via UI automation (`tap --label "Reload"` on the red box).
134
+ 5. First build after `expo prebuild --clean` should use `--preferXcodebuild=true` if building via xcodebuild MCP tools directly.
135
+
136
+ The workspace path for session defaults is `./ios/<project>.xcworkspace` (not `.xcodeproj`) when CocoaPods are in use. Use `simulatorId` (UDID) rather than `simulatorName` — the two parameters are **mutually exclusive** in `session-set-defaults`, and many tools require the UDID. Prefer `simulatorId` for reliable targeting.
137
+
138
+ ### Expo Dev Server and HMR
139
+
140
+ For JS-only change validation (no native code changes):
141
+
142
+ - `expo start --port <port>` starts the Metro bundler for Hot Module Replacement. JS file changes propagate to the running app automatically without rebuild.
143
+ - Verify HMR changes via `describe_ui` — check that `AXLabel` values reflect the updated text after file save.
144
+ - The Metro bundler must be running on the port the app expects (default 8081). The app connects to the bundler URL passed at launch time.
145
+ - For native code changes, a full rebuild via `expo run:ios` or `build_sim` is required.
146
+
147
+ ### Deterministic Teardown
148
+
149
+ Teardown in reverse order of setup to prevent orphaned processes. **Ordering is critical** — stopping log capture (console mode) terminates the app, which terminates any attached xctrace session:
150
+
151
+ 1. **Wait for / stop xctrace**: If profiling is active, either let it reach `--time-limit` or terminate it first. An attached xctrace session will end when the app process exits, so stopping it before step 2 ensures a clean trace file.
152
+ 2. **Stop the app**: `stop_app_sim --bundleId "<bundle_id>"` — terminates the app in the simulator
153
+ 3. **Stop active log captures**: `stop_sim_log_cap --logSessionId "<id>"` for any active sessions. Console capture mode terminates the app on stop — skip step 2 if using console capture.
154
+ 4. **Kill JS log stream**: If a background `log stream` process was started for JS log capture (subsystem `com.facebook.react.log`), terminate it.
155
+ 5. **Clear session defaults**: `session-clear-defaults --all true` — prevents stale defaults from affecting the next session
156
+ 6. **Kill the Metro dev server**: Terminate the `expo start` process (background task or PID)
157
+ 7. **Clean trace artifacts**: Remove `.trace` bundles from `/tmp` or working directory
158
+ 8. **Simulator**: Leave running for potential reuse by other worktrees. Only shut down via `xcrun simctl shutdown <UDID>` if explicitly cleaning up.
159
+
160
+ ### Bad State Detection
161
+
162
+ Detect app failures by comparing `describe_ui` output against expected state:
163
+
164
+ - **Red box / error screen**: UI hierarchy contains `redbox-dismiss`, `redbox-reload`, `redbox-copy` buttons — the app hit a JS error or couldn't connect to the dev server
165
+ - **Home screen instead of app**: `AXLabel` shows system app names (Safari, Messages, etc.) with `pid` belonging to SpringBoard — the app crashed or was terminated
166
+ - **Empty view hierarchy**: Single `Application` node with no children — the app is loading or hung during initialization
167
+ - **Stale PID**: `describe_ui` returns elements with a different `pid` than expected — the app was relaunched (possibly by `captureConsole` or xctrace `--launch`)
168
+
169
+ On bad state detection: `screenshot` for evidence, then attempt recovery via `stop_app_sim` → `launch_app_sim`. If the simulator itself is unresponsive, `erase_sims` for a clean slate.
170
+
171
+ ## Deterministic Integration
172
+
173
+ **Planned — not yet implemented.**
174
+
175
+ The deterministic dimension for this suite will use `xcodebuild test` for CI-gated binary pass/fail validation:
176
+
177
+ - **Unit tests**: XCTest suites validating business logic, model layer, and service interfaces. Run via `xcodebuild test -scheme <scheme> -destination 'platform=iOS Simulator,name=<device>'`.
178
+ - **Snapshot tests**: Point-free swift-snapshot-testing or similar for visual regression of individual views/components. Baseline images committed to repo, fail CI on drift.
179
+ - **Performance tests**: XCTest `measure {}` blocks with baselines for critical code paths. Fail on regression beyond configured deviation.
180
+
181
+ These will be implemented as the suite matures through the crystallization lifecycle — current stochastic exploration patterns will inform which deterministic gates are most valuable.
182
+
183
+ ## ENV Configuration
184
+
185
+ | Variable | Required | Dimension | Purpose |
186
+ |----------|----------|-----------|---------|
187
+ | `XCODE_WORKSPACE` | No | Both | Path to `.xcworkspace` (discovered automatically if not set) |
188
+ | `XCODE_SCHEME` | No | Both | Build scheme name (discovered via `list_schemes` if not set) |
189
+ | `SIMULATOR_NAME` | No | Stochastic | Target simulator device (e.g., `iPhone 16 Pro`) |
190
+ | `XCODEBUILDMCP_WORKFLOWS` | No | Stochastic | Comma-separated workflow groups to enable beyond `simulator` |
191
+ | `DERIVED_DATA_PATH` | No | Both | Custom DerivedData location for build isolation |
192
+ | `RCT_METRO_PORT` | No | Stochastic | Metro bundler port for Expo/RN apps (default 8081). Must match `expo start --port` |
193
+ | `CI` | Auto | Deterministic | Set by CI environment; controls test retry and reporter behavior |
194
+
195
+ Project-specific configuration should be committed as `.xcodebuildmcp/config.yaml` in the target project root. This file controls which workflow groups are enabled and sets session defaults (workspace path, scheme, simulator, configuration). The agent should still discover the target project's workspace structure at execution time via `discover_projs` — the config file provides defaults, not overrides.
@@ -0,0 +1,86 @@
1
+ ---
2
+ name: documentation
3
+ type: documentation
4
+ planning_depth: focused
5
+ jury_required: false
6
+ max_tangential_hypotheses: 1
7
+ required_ideation_questions:
8
+ - "What areas need documentation?"
9
+ - "Who is the audience?"
10
+ - "Any existing docs to extend or replace?"
11
+ - "What format and location?"
12
+ ---
13
+
14
+ ## Domain Knowledge
15
+
16
+ ### Audience-First Thinking
17
+
18
+ Documentation specs are organized around audiences, not features. The same system may need different documentation for different readers:
19
+
20
+ | Audience | Focus | Depth |
21
+ |----------|-------|-------|
22
+ | **Developers** | APIs, architecture, contribution guides | Technical, code-level |
23
+ | **End users** | Features, workflows, troubleshooting | Task-oriented, no internals |
24
+ | **Ops/SRE** | Runbooks, monitoring, deployment | Operational, procedure-focused |
25
+ | **New team members** | Onboarding, architecture overview, conventions | Progressive, context-building |
26
+
27
+ ### Documentation State Vocabulary
28
+
29
+ Existing documentation falls into identifiable states that inform the approach:
30
+
31
+ | State | Meaning | Action |
32
+ |-------|---------|--------|
33
+ | **Outdated** | Exists but no longer accurate | Update with current reality |
34
+ | **Missing** | No documentation exists | Create from scratch |
35
+ | **Scattered** | Information exists across multiple locations | Consolidate and organize |
36
+ | **Wrong** | Actively misleading | Correct with high priority |
37
+
38
+ ### Format Taxonomy
39
+
40
+ Documentation format should match audience and content type:
41
+
42
+ | Format | Best For |
43
+ |--------|----------|
44
+ | **README** | Project overview, quickstart, contribution guide |
45
+ | **Docs site** | Comprehensive reference, tutorials, guides |
46
+ | **Inline code docs** | API reference, function-level documentation |
47
+ | **Runbooks** | Operational procedures, incident response |
48
+
49
+ ## Ideation Guidance
50
+
51
+ Per **Knowledge Compounding**, documentation compounds value when it targets the right audience with the right depth.
52
+
53
+ ### Probe Guidance
54
+
55
+ - Probe vague coverage requests — demand specific areas and audiences
56
+ - Distinguish between "no docs" and "wrong docs" — the approach differs significantly
57
+
58
+ ### Output Sections
59
+
60
+ Spec body sections for documentation domain:
61
+ - **Motivation**: Why current documentation is insufficient
62
+ - **Goals**: Coverage targets by audience and area
63
+ - **Technical Considerations**: Existing docs state, format preferences, location
64
+ - **Open Questions**: Unknowns the planner should investigate
65
+
66
+ ## Planning Considerations
67
+
68
+ ### Coverage-by-Audience-and-Area Framing
69
+
70
+ Planning should organize documentation work as a coverage matrix:
71
+ - Rows: areas/features needing documentation
72
+ - Columns: audiences requiring documentation
73
+ - Cells: specific documentation deliverables
74
+
75
+ This framing prevents gaps and avoids redundant documentation across audiences.
76
+
77
+ ### Existing Documentation Assessment
78
+
79
+ Before writing new documentation, planning should assess what exists:
80
+ - Audit existing docs for accuracy and completeness
81
+ - Identify reusable content vs content needing replacement
82
+ - Map existing documentation to the coverage matrix
83
+
84
+ ### Prompt Output Range
85
+
86
+ Documentation specs produce 2-5 focused prompts. Each prompt typically covers one audience or one major area.
@@ -0,0 +1,81 @@
1
+ ---
2
+ name: investigation
3
+ type: investigation
4
+ planning_depth: focused
5
+ jury_required: false
6
+ max_tangential_hypotheses: 2
7
+ required_ideation_questions:
8
+ - "What's broken / what's the issue?"
9
+ - "What evidence do you have?"
10
+ - "What does 'fixed' look like?"
11
+ - "Any constraints?"
12
+ - "Any suspected root causes?"
13
+ ---
14
+
15
+ ## Domain Knowledge
16
+
17
+ ### Problem-Evidence-Fix Framing
18
+
19
+ Investigation specs are structured around a symptom-first approach: capture what's wrong, gather evidence, define what "fixed" means. The engineer describes symptoms, not suspected causes — root cause identification is the investigation's output, not its input.
20
+
21
+ ### Evidence Vocabulary
22
+
23
+ Evidence types to surface and categorize:
24
+
25
+ | Evidence Type | Examples |
26
+ |---------------|----------|
27
+ | **Error logs** | Stack traces, error messages, log patterns |
28
+ | **Reproduction steps** | Exact sequence to trigger the issue |
29
+ | **Affected scope** | Users affected, environments, frequency |
30
+ | **Temporal patterns** | When it started, intermittent vs constant, correlation with deploys |
31
+ | **Metrics** | Error rates, latency spikes, resource exhaustion signals |
32
+
33
+ ### Suspected Root Causes as Hypothesis Seeds
34
+
35
+ Engineer-provided suspected causes are hypothesis seeds, not conclusions. They inform investigation direction but should not constrain the search space. Weight them alongside evidence-based hypotheses generated during planning.
36
+
37
+ ### Knowledge Gap Detection
38
+
39
+ | Signal | Action |
40
+ |--------|--------|
41
+ | "It just broke" (no timeline) | Probe for recent changes, deploys, config updates |
42
+ | "It happens sometimes" (no pattern) | Probe for environmental differences, load conditions |
43
+ | "I think it's X" (premature diagnosis) | Acknowledge hypothesis, still gather full evidence |
44
+ | Symptom described as cause | Redirect to observable behavior — "what do you see?" |
45
+
46
+ ## Ideation Guidance
47
+
48
+ Per **Ideation First**, the investigation interview captures the problem space so the planner can ground hypotheses in evidence.
49
+
50
+ ### Probe Guidance
51
+
52
+ - Probe vague symptom descriptions — demand concrete evidence
53
+ - Separate symptoms from suspected causes — capture both but label them distinctly
54
+
55
+ ### Output Sections
56
+
57
+ Spec body sections for investigation domain:
58
+ - **Motivation**: The problem and its impact
59
+ - **Goals**: Success criteria from "what does fixed look like"
60
+ - **Technical Considerations**: Evidence, constraints, suspected causes
61
+ - **Open Questions**: Unknowns the planner should investigate
62
+
63
+ ## Planning Considerations
64
+
65
+ ### Focused Research
66
+
67
+ Focused research on the problem domain rather than broad codebase exploration. Investigation planning should:
68
+ - Ground hypotheses in gathered evidence
69
+ - Prioritize hypotheses by evidence weight and impact
70
+ - Design diagnostic steps that narrow the search space efficiently
71
+
72
+ ### Hypothesis-Driven Investigation Approach
73
+
74
+ Prompts should be structured as hypothesis validation steps:
75
+ - Each prompt tests one or more hypotheses
76
+ - Early prompts gather diagnostic data; later prompts apply fixes
77
+ - Evidence correlation patterns guide hypothesis ordering
78
+
79
+ ### Prompt Output Range
80
+
81
+ Investigation specs produce 2-5 focused prompts. Investigation is inherently iterative — fewer, targeted prompts are preferred over broad sweeps.
@@ -0,0 +1,91 @@
1
+ ---
2
+ name: milestone
3
+ type: milestone
4
+ planning_depth: deep
5
+ jury_required: true
6
+ max_tangential_hypotheses: 5
7
+ required_ideation_questions:
8
+ - "What are you trying to accomplish?"
9
+ - "Why does this matter and what worries you about this?"
10
+ - "What can you handle vs need automated?"
11
+ - "What would success look like?"
12
+ ---
13
+
14
+ ## Domain Knowledge
15
+
16
+ ### Core Interview Dimensions
17
+
18
+ The `required_ideation_questions` elicit each dimension directly. Also infer dimensions passively from engineer behavior:
19
+
20
+ | Dimension | Elicit via | Infer from |
21
+ |-----------|------------|------------|
22
+ | Goals | "What are you trying to accomplish?" | Problem description |
23
+ | Motivations | "Why does this matter?" | Frustrations expressed |
24
+ | Concerns | "What worries you about this?" | Caveats/hedging |
25
+ | Desires | "What would ideal look like?" | Enthusiasm |
26
+ | Capabilities | "What can you handle vs need automated?" | Technical language |
27
+ | Expectations | "What would success look like?" | Examples given |
28
+
29
+ ### Category Deep Dives
30
+
31
+ Work through relevant categories based on milestone scope. Each category surfaces domain-specific concerns that engineers often underspecify:
32
+
33
+ | Category | Key Questions | Knowledge Gap Signals |
34
+ |----------|---------------|----------------------|
35
+ | **User Experience** | "Walk through: user opens this first time - what happens?" | Describes features instead of journeys |
36
+ | **Data & State** | "What needs to be stored? Where does data come from/go?" | Says "just a database" without schema thinking |
37
+ | **Technical** | "What systems must this work with? Constraints?" | Picks tech without understanding tradeoffs |
38
+ | **Scale** | "How many users/requests? Now vs future?" | Says "millions" without infrastructure thinking |
39
+ | **Integrations** | "External services? APIs consumed/created?" | Assumes integrations are simple |
40
+ | **Security** | "Who should do what? Sensitive data?" | Says "just basic login" |
41
+
42
+ ### Additional Knowledge Gap Signals
43
+
44
+ | Signal | Action |
45
+ |--------|--------|
46
+ | Conflicting requirements | Surface the conflict explicitly and ask for Disposable Variants Approach |
47
+
48
+ ### Completeness Check
49
+
50
+ Before transitioning from ideation to spec writing, verify coverage:
51
+
52
+ | Area | Verified |
53
+ |------|----------|
54
+ | Problem statement clear | [ ] |
55
+ | Technical constraints understood | [ ] |
56
+ | User expectations deeply understood | [ ] |
57
+ | All discernable milestone elements either have a user expectation, or an open question for downstream agents | [ ] |
58
+ | No "To Be Discussed" items remaining | [ ] |
59
+
60
+ If gaps exist, return to surveying for specific categories.
61
+
62
+ ## Ideation Guidance
63
+
64
+ Per **Ideation First**, engineers control depth — domain config ensures coverage without forcing depth.
65
+
66
+ ### Probe Guidance
67
+
68
+ - Probe vague responses with category deep dives
69
+ - Detect knowledge gaps using the signal tables
70
+
71
+ ### Guiding Principles Synthesis
72
+
73
+ Synthesize guiding principles from the engineer's philosophy expressed during ideation. Validate synthesized principles with the engineer before proceeding to spec writing.
74
+
75
+ ### Output Sections
76
+
77
+ Spec body sections for milestone domain:
78
+ - **Motivation**: Implicit in goals — why this matters
79
+ - **Goals**: What the engineer is trying to accomplish
80
+ - **Technical Considerations**: Grounded in codebase reality from exploration subtasks
81
+ - **Open Questions**: For architect to research/decide during planning
82
+
83
+ ### Optional: Spec Flow Analysis
84
+
85
+ Before or after creating the spec, offer flow analysis for complex features. Recommended for user-facing features with multiple paths, complex integrations, or features with unclear scope boundaries.
86
+
87
+ ## Planning Considerations
88
+
89
+ ### Prompt Output Range
90
+
91
+ Milestone specs produce 5-15 coordinated prompts. Prompts must be fully autonomous — no human intervention during execution.
@@ -0,0 +1,85 @@
1
+ ---
2
+ name: optimization
3
+ type: optimization
4
+ planning_depth: focused
5
+ jury_required: false
6
+ max_tangential_hypotheses: 2
7
+ required_ideation_questions:
8
+ - "What's slow / expensive?"
9
+ - "What are the performance targets?"
10
+ - "How should improvements be measured?"
11
+ - "Current baseline metrics?"
12
+ - "Any constraints?"
13
+ ---
14
+
15
+ ## Domain Knowledge
16
+
17
+ ### Performance Vocabulary
18
+
19
+ Optimization specs are grounded in quantitative measurement. Key performance dimensions:
20
+
21
+ | Dimension | Metrics | Examples |
22
+ |-----------|---------|----------|
23
+ | **Latency** | Response time, P50/P95/P99 | "API responds in 200ms P95" |
24
+ | **Throughput** | Requests/sec, items/sec | "Process 1000 events/sec" |
25
+ | **Resource usage** | Memory, CPU, disk, connections | "Stay under 512MB RSS" |
26
+ | **Cost** | $/request, $/month, compute hours | "Reduce Lambda cost by 40%" |
27
+
28
+ ### Baseline-Target-Measurement Triple
29
+
30
+ Every optimization must establish three things:
31
+ 1. **Baseline**: Current measured performance ("now it takes 2s P95")
32
+ 2. **Target**: Concrete improvement goal ("reduce to 500ms P95")
33
+ 3. **Measurement**: How improvement is verified ("benchmark suite X, dashboard Y")
34
+
35
+ Without all three, the optimization is underspecified. Probe for missing elements.
36
+
37
+ ### Knowledge Gap Detection
38
+
39
+ | Signal | Action |
40
+ |--------|--------|
41
+ | "It feels slow" (no numbers) | Demand concrete metrics — profile first |
42
+ | "Make it faster" (no target) | Probe for acceptable thresholds |
43
+ | "Optimize everything" (no focus) | Identify the bottleneck — what's the user-facing pain? |
44
+ | Assumes cause without profiling | Redirect to measurement — "have you profiled this?" |
45
+
46
+ ## Ideation Guidance
47
+
48
+ Per **Ideation First**, the optimization interview captures measurable targets so the planner can create profiling-first hypotheses.
49
+
50
+ ### Probe Guidance
51
+
52
+ - Probe vague targets — demand concrete numbers
53
+ - Verify baseline metrics exist or flag measurement as a prerequisite task
54
+
55
+ ### Output Sections
56
+
57
+ Spec body sections for optimization domain:
58
+ - **Motivation**: What's slow/expensive and why it matters
59
+ - **Goals**: Performance targets with measurable thresholds
60
+ - **Technical Considerations**: Baseline metrics, measurement approach, constraints
61
+ - **Open Questions**: Unknowns the planner should profile or research
62
+
63
+ ## Planning Considerations
64
+
65
+ ### Profiling-First Approach
66
+
67
+ Planning should front-load measurement and profiling:
68
+ - First prompt(s) establish baseline measurements if not already available
69
+ - Optimization prompts follow, each targeting a specific bottleneck
70
+ - Final prompt verifies targets are met against the same measurement approach
71
+
72
+ ### Measurement Method Validation
73
+
74
+ The measurement approach itself must be validated — unreliable benchmarks produce unreliable results. Planning should ensure the measurement tooling is trustworthy before optimizing against it.
75
+
76
+ ### Backwards Compatibility Constraints
77
+
78
+ Optimization must not change observable behavior. Planning should surface:
79
+ - API contract preservation requirements
80
+ - Data format compatibility
81
+ - Feature flag needs for gradual rollout of performance changes
82
+
83
+ ### Prompt Output Range
84
+
85
+ Optimization specs produce 2-6 focused prompts. Measurement setup may require its own prompt if baselines don't exist.
@@ -0,0 +1,99 @@
1
+ ---
2
+ name: refactor
3
+ type: refactor
4
+ planning_depth: focused
5
+ jury_required: false
6
+ max_tangential_hypotheses: 2
7
+ required_ideation_questions:
8
+ - "What's the scope?"
9
+ - "What invariants must be preserved?"
10
+ - "What's the target architecture?"
11
+ - "Incremental or big-bang?"
12
+ - "Any constraints?"
13
+ ---
14
+
15
+ ## Domain Knowledge
16
+
17
+ ### Invariant Preservation
18
+
19
+ Refactor specs are defined by what must NOT change alongside what should. Key invariant categories:
20
+
21
+ | Invariant Type | Examples |
22
+ |----------------|----------|
23
+ | **API contracts** | Public function signatures, REST endpoints, event schemas |
24
+ | **Observable behavior** | Output for given inputs, side effects, error handling |
25
+ | **Test coverage** | Existing tests continue to pass without modification |
26
+ | **External interfaces** | Database schemas, file formats, wire protocols |
27
+
28
+ Invariants are the safety rails of a refactor — they define the transformation's constraints and enable confident validation.
29
+
30
+ ### Current-State to Target-Architecture Framing
31
+
32
+ Every refactor must articulate:
33
+ 1. **Current state**: What exists now and why it's problematic
34
+ 2. **Target architecture**: The desired end state — pattern, structure, naming, organization
35
+ 3. **Transformation path**: How to get from current to target — incremental stages or atomic landing
36
+
37
+ ### Migration Strategy Dimension
38
+
39
+ The incremental vs big-bang decision shapes the entire plan:
40
+
41
+ | Strategy | When to Use | Planning Impact |
42
+ |----------|-------------|-----------------|
43
+ | **Incremental** | Large scope, dependent consumers, high risk | Multiple prompts with intermediate stable states |
44
+ | **Big-bang** | Small scope, isolated module, low risk | Fewer prompts, atomic transformation |
45
+ | **Feature-flagged** | Parallel old/new paths needed during transition | Additional prompt for flag setup and cleanup |
46
+
47
+ ### Knowledge Gap Detection
48
+
49
+ | Signal | Action |
50
+ |--------|--------|
51
+ | "Clean up the code" (no target) | Probe for specific target architecture |
52
+ | "Refactor everything" (no scope) | Demand scope boundaries — which modules/files? |
53
+ | "It should just work the same" (vague invariants) | Enumerate specific contracts to preserve |
54
+ | No mention of tests | Surface test coverage as explicit invariant |
55
+
56
+ ## Ideation Guidance
57
+
58
+ Per **Ideation First**, the refactor interview captures scope boundaries and invariants so the planner can create safe transformation hypotheses.
59
+
60
+ ### Probe Guidance
61
+
62
+ - Probe vague scope boundaries — demand specific modules, files, or patterns
63
+ - Enumerate invariants explicitly — don't assume the engineer has considered all contract surfaces
64
+
65
+ ### Output Sections
66
+
67
+ Spec body sections for refactor domain:
68
+ - **Motivation**: Why the current structure is problematic
69
+ - **Goals**: Target architecture and preserved invariants
70
+ - **Non-Goals**: What's explicitly out of scope (unique to refactor)
71
+ - **Technical Considerations**: Migration strategy, constraints, coordination needs
72
+ - **Open Questions**: Unknowns the planner should investigate
73
+
74
+ ## Planning Considerations
75
+
76
+ ### Feature Flag Consideration
77
+
78
+ For staged delivery, planning should evaluate whether feature flags are needed:
79
+ - Parallel old/new code paths during transition
80
+ - Gradual migration of dependent consumers
81
+ - Rollback capability for high-risk transformations
82
+
83
+ ### Dependent Consumer Coordination
84
+
85
+ When refactoring shared code, planning must account for:
86
+ - Which consumers depend on the current interface
87
+ - Migration order for dependent consumers
88
+ - Whether consumers can be updated atomically or need compatibility shims
89
+
90
+ ### Test Coverage Preservation
91
+
92
+ Planning should verify and maintain test coverage:
93
+ - Existing tests pass against both current and target architectures during transition
94
+ - New tests cover the target architecture's specific patterns
95
+ - Test migration may require its own prompt for large refactors
96
+
97
+ ### Prompt Output Range
98
+
99
+ Refactor specs produce 2-7 focused prompts. Incremental refactors with many dependent consumers trend toward the higher end.