all-hands-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.allhands/README.md +75 -0
- package/.allhands/agents/compounder.yaml +15 -0
- package/.allhands/agents/coordinator.yaml +17 -0
- package/.allhands/agents/documentor.yaml +15 -0
- package/.allhands/agents/e2e-test-planner.yaml +17 -0
- package/.allhands/agents/emergent.yaml +22 -0
- package/.allhands/agents/executor.yaml +14 -0
- package/.allhands/agents/ideation.yaml +11 -0
- package/.allhands/agents/initiative-steering.yaml +19 -0
- package/.allhands/agents/judge.yaml +13 -0
- package/.allhands/agents/planner.yaml +19 -0
- package/.allhands/agents/pr-reviewer.yaml +15 -0
- package/.allhands/docs.json +5 -0
- package/.allhands/docs.local.json +26 -0
- package/.allhands/flows/COMPOUNDING.md +203 -0
- package/.allhands/flows/COORDINATION.md +89 -0
- package/.allhands/flows/CORE.md +87 -0
- package/.allhands/flows/DOCUMENTATION.md +218 -0
- package/.allhands/flows/E2E_TEST_PLAN_BUILDING.md +140 -0
- package/.allhands/flows/EMERGENT_PLANNING.md +57 -0
- package/.allhands/flows/IDEATION_SCOPING.md +154 -0
- package/.allhands/flows/INITIATIVE_STEERING.md +110 -0
- package/.allhands/flows/JUDGE_REVIEWING.md +79 -0
- package/.allhands/flows/PROMPT_TASK_EXECUTION.md +68 -0
- package/.allhands/flows/PR_REVIEWING.md +43 -0
- package/.allhands/flows/SPEC_PLANNING.md +216 -0
- package/.allhands/flows/harness/WRITING_HARNESS_FLOWS.md +27 -0
- package/.allhands/flows/harness/WRITING_HARNESS_KNOWLEDGE.md +27 -0
- package/.allhands/flows/harness/WRITING_HARNESS_ORCHESTRATION.md +27 -0
- package/.allhands/flows/harness/WRITING_HARNESS_SKILLS.md +27 -0
- package/.allhands/flows/harness/WRITING_HARNESS_TOOLS.md +27 -0
- package/.allhands/flows/harness/WRITING_HARNESS_VALIDATION_TOOLING.md +27 -0
- package/.allhands/flows/shared/CODEBASE_UNDERSTANDING.md +72 -0
- package/.allhands/flows/shared/CREATE_HARNESS_SPEC.md +48 -0
- package/.allhands/flows/shared/CREATE_SPEC.md +41 -0
- package/.allhands/flows/shared/CREATE_VALIDATION_TOOLING_SPEC.md +70 -0
- package/.allhands/flows/shared/DOCUMENTATION_DISCOVERY.md +123 -0
- package/.allhands/flows/shared/DOCUMENTATION_WRITER.md +101 -0
- package/.allhands/flows/shared/EMERGENT_REFINEMENT_ANALYSIS.md +76 -0
- package/.allhands/flows/shared/EXTERNAL_TECH_GUIDANCE.md +97 -0
- package/.allhands/flows/shared/IDEATION_CODEBASE_GROUNDING.md +49 -0
- package/.allhands/flows/shared/PLAN_DEEPENING.md +152 -0
- package/.allhands/flows/shared/PROMPT_TASKS_CURATION.md +113 -0
- package/.allhands/flows/shared/PROMPT_VALIDATION_REVIEW.MD +99 -0
- package/.allhands/flows/shared/QUICK_PREMORTEM.md +70 -0
- package/.allhands/flows/shared/RESEARCH_GUIDANCE.md +38 -0
- package/.allhands/flows/shared/REVIEW_OPTIONS_BREAKDOWN.md +68 -0
- package/.allhands/flows/shared/SKILL_EXTRACTION.md +84 -0
- package/.allhands/flows/shared/SPEC_FLOW_ANALYSIS.md +119 -0
- package/.allhands/flows/shared/TDD_WORKFLOW.md +109 -0
- package/.allhands/flows/shared/UTILIZE_VALIDATION_TOOLING.md +84 -0
- package/.allhands/flows/shared/WRITING_HARNESS_FLOWS.md +11 -0
- package/.allhands/flows/shared/WRITING_HARNESS_MCP_TOOLS.md +84 -0
- package/.allhands/flows/shared/jury/ARCHITECTURE_REVIEW.md +91 -0
- package/.allhands/flows/shared/jury/BEST_PRACTICES_REVIEW.md +80 -0
- package/.allhands/flows/shared/jury/CLAIM_VERIFICATION_REVIEW.md +101 -0
- package/.allhands/flows/shared/jury/EXPECTATIONS_FIT_REVIEW.md +78 -0
- package/.allhands/flows/shared/jury/MAINTAINABILITY_REVIEW.md +110 -0
- package/.allhands/flows/shared/jury/PROMPTS_EXPECTATIONS_FIT.md +74 -0
- package/.allhands/flows/shared/jury/PROMPTS_FLOW_ANALYSIS.md +92 -0
- package/.allhands/flows/shared/jury/PROMPTS_YAGNI.md +78 -0
- package/.allhands/flows/shared/jury/PROMPT_PREMORTEM.md +125 -0
- package/.allhands/flows/shared/jury/SECURITY_REVIEW.md +86 -0
- package/.allhands/flows/shared/jury/YAGNI_REVIEW.md +82 -0
- package/.allhands/flows/wip/DEBUG_INVESTIGATION.md +162 -0
- package/.allhands/flows/wip/MEMORY_RECALL.md +62 -0
- package/.allhands/harness/ah +131 -0
- package/.allhands/harness/package-lock.json +5292 -0
- package/.allhands/harness/package.json +52 -0
- package/.allhands/harness/src/__tests__/e2e/commands.test.ts +307 -0
- package/.allhands/harness/src/__tests__/e2e/event-loop.test.ts +539 -0
- package/.allhands/harness/src/__tests__/e2e/hooks.test.ts +427 -0
- package/.allhands/harness/src/__tests__/e2e/new-initiative-routing.test.ts +137 -0
- package/.allhands/harness/src/__tests__/e2e/run-e2e.ts +109 -0
- package/.allhands/harness/src/__tests__/e2e/specs-type.test.ts +210 -0
- package/.allhands/harness/src/__tests__/e2e/validation-hooks.test.ts +669 -0
- package/.allhands/harness/src/__tests__/e2e/validation-path-consistency.test.ts +354 -0
- package/.allhands/harness/src/__tests__/e2e/validation.test.ts +528 -0
- package/.allhands/harness/src/__tests__/harness/assertions.ts +318 -0
- package/.allhands/harness/src/__tests__/harness/cli-runner.ts +359 -0
- package/.allhands/harness/src/__tests__/harness/fixture.ts +384 -0
- package/.allhands/harness/src/__tests__/harness/hook-runner.ts +411 -0
- package/.allhands/harness/src/__tests__/harness/index.ts +122 -0
- package/.allhands/harness/src/cli.ts +36 -0
- package/.allhands/harness/src/commands/complexity.ts +177 -0
- package/.allhands/harness/src/commands/context7.ts +202 -0
- package/.allhands/harness/src/commands/docs.ts +557 -0
- package/.allhands/harness/src/commands/hooks.ts +24 -0
- package/.allhands/harness/src/commands/index.ts +51 -0
- package/.allhands/harness/src/commands/knowledge.ts +382 -0
- package/.allhands/harness/src/commands/memories.ts +302 -0
- package/.allhands/harness/src/commands/notify.ts +61 -0
- package/.allhands/harness/src/commands/oracle.ts +158 -0
- package/.allhands/harness/src/commands/perplexity.ts +220 -0
- package/.allhands/harness/src/commands/planning.ts +245 -0
- package/.allhands/harness/src/commands/schema.ts +73 -0
- package/.allhands/harness/src/commands/skills.ts +128 -0
- package/.allhands/harness/src/commands/solutions.ts +353 -0
- package/.allhands/harness/src/commands/spawn.ts +158 -0
- package/.allhands/harness/src/commands/specs.ts +532 -0
- package/.allhands/harness/src/commands/tavily.ts +226 -0
- package/.allhands/harness/src/commands/tools.ts +579 -0
- package/.allhands/harness/src/commands/trace.ts +327 -0
- package/.allhands/harness/src/commands/tui.ts +960 -0
- package/.allhands/harness/src/commands/validate.ts +143 -0
- package/.allhands/harness/src/commands/validation-tools.ts +108 -0
- package/.allhands/harness/src/hooks/context.ts +1442 -0
- package/.allhands/harness/src/hooks/enforcement.ts +170 -0
- package/.allhands/harness/src/hooks/index.ts +54 -0
- package/.allhands/harness/src/hooks/lifecycle.ts +229 -0
- package/.allhands/harness/src/hooks/notification.ts +104 -0
- package/.allhands/harness/src/hooks/observability.ts +551 -0
- package/.allhands/harness/src/hooks/session.ts +88 -0
- package/.allhands/harness/src/hooks/shared.ts +815 -0
- package/.allhands/harness/src/hooks/transcript-parser.ts +208 -0
- package/.allhands/harness/src/hooks/validation.ts +617 -0
- package/.allhands/harness/src/lib/__tests__/ctags.test.ts +244 -0
- package/.allhands/harness/src/lib/__tests__/docs-validation.test.ts +344 -0
- package/.allhands/harness/src/lib/__tests__/mcp-runtime.test.ts +190 -0
- package/.allhands/harness/src/lib/__tests__/schema.test.ts +861 -0
- package/.allhands/harness/src/lib/base-command.ts +198 -0
- package/.allhands/harness/src/lib/cli-daemon.ts +343 -0
- package/.allhands/harness/src/lib/compaction.ts +313 -0
- package/.allhands/harness/src/lib/ctags.ts +497 -0
- package/.allhands/harness/src/lib/docs-validation.ts +907 -0
- package/.allhands/harness/src/lib/event-loop.ts +662 -0
- package/.allhands/harness/src/lib/flows.ts +155 -0
- package/.allhands/harness/src/lib/git.ts +276 -0
- package/.allhands/harness/src/lib/knowledge-worker.ts +72 -0
- package/.allhands/harness/src/lib/knowledge.ts +810 -0
- package/.allhands/harness/src/lib/llm.ts +255 -0
- package/.allhands/harness/src/lib/mcp-client.ts +432 -0
- package/.allhands/harness/src/lib/mcp-daemon.ts +486 -0
- package/.allhands/harness/src/lib/mcp-runtime.ts +418 -0
- package/.allhands/harness/src/lib/notification.ts +115 -0
- package/.allhands/harness/src/lib/opencode/index.ts +70 -0
- package/.allhands/harness/src/lib/opencode/profiles.ts +300 -0
- package/.allhands/harness/src/lib/opencode/prompts/codesearch.md +98 -0
- package/.allhands/harness/src/lib/opencode/prompts/knowledge-aggregator.md +67 -0
- package/.allhands/harness/src/lib/opencode/runner.ts +281 -0
- package/.allhands/harness/src/lib/oracle.ts +926 -0
- package/.allhands/harness/src/lib/planning-utils.ts +150 -0
- package/.allhands/harness/src/lib/planning.ts +605 -0
- package/.allhands/harness/src/lib/pr-review.ts +225 -0
- package/.allhands/harness/src/lib/prompts.ts +522 -0
- package/.allhands/harness/src/lib/schema.ts +418 -0
- package/.allhands/harness/src/lib/schemas/agent-profile.ts +141 -0
- package/.allhands/harness/src/lib/schemas/template-vars.ts +138 -0
- package/.allhands/harness/src/lib/session.ts +164 -0
- package/.allhands/harness/src/lib/specs.ts +348 -0
- package/.allhands/harness/src/lib/tldr.ts +829 -0
- package/.allhands/harness/src/lib/tmux.ts +1051 -0
- package/.allhands/harness/src/lib/trace-store.ts +714 -0
- package/.allhands/harness/src/mcp/__tests__/index.test.ts +46 -0
- package/.allhands/harness/src/mcp/_template.ts +47 -0
- package/.allhands/harness/src/mcp/filesystem.ts +33 -0
- package/.allhands/harness/src/mcp/index.ts +69 -0
- package/.allhands/harness/src/mcp/playwright.ts +34 -0
- package/.allhands/harness/src/mcp/xcodebuild.ts +29 -0
- package/.allhands/harness/src/schemas/docs.schema.json +44 -0
- package/.allhands/harness/src/schemas/settings.schema.json +214 -0
- package/.allhands/harness/src/tui/actions.ts +227 -0
- package/.allhands/harness/src/tui/file-viewer-modal.ts +270 -0
- package/.allhands/harness/src/tui/index.ts +1574 -0
- package/.allhands/harness/src/tui/modal.ts +232 -0
- package/.allhands/harness/src/tui/prompts-pane.ts +186 -0
- package/.allhands/harness/src/tui/status-pane.ts +434 -0
- package/.allhands/harness/tsconfig.json +22 -0
- package/.allhands/harness/vitest.config.ts +13 -0
- package/.allhands/pillars.md +33 -0
- package/.allhands/principles.md +88 -0
- package/.allhands/schemas/alignment.yaml +51 -0
- package/.allhands/schemas/documentation.yaml +10 -0
- package/.allhands/schemas/prompt.yaml +92 -0
- package/.allhands/schemas/skill.yaml +34 -0
- package/.allhands/schemas/solution.yaml +131 -0
- package/.allhands/schemas/spec.yaml +67 -0
- package/.allhands/schemas/validation-suite.yaml +49 -0
- package/.allhands/schemas/workflow.yaml +51 -0
- package/.allhands/settings.json +57 -0
- package/.allhands/skills/claude-code-patterns/SKILL.md +60 -0
- package/.allhands/skills/claude-code-patterns/docs/context-hygiene.md +19 -0
- package/.allhands/skills/harness-maintenance/SKILL.md +449 -0
- package/.allhands/skills/harness-maintenance/references/core-architecture.md +187 -0
- package/.allhands/skills/harness-maintenance/references/harness-skills.md +87 -0
- package/.allhands/skills/harness-maintenance/references/knowledge-compounding.md +78 -0
- package/.allhands/skills/harness-maintenance/references/tools-commands-mcp-hooks.md +115 -0
- package/.allhands/skills/harness-maintenance/references/validation-tooling.md +77 -0
- package/.allhands/skills/harness-maintenance/references/writing-flows.md +84 -0
- package/.allhands/validation/browser-automation.md +109 -0
- package/.allhands/validation/xcode-automation.md +195 -0
- package/.allhands/workflows/documentation.md +86 -0
- package/.allhands/workflows/investigation.md +81 -0
- package/.allhands/workflows/milestone.md +91 -0
- package/.allhands/workflows/optimization.md +85 -0
- package/.allhands/workflows/refactor.md +99 -0
- package/.allhands/workflows/triage.md +81 -0
- package/.claude/README.md +1 -0
- package/.claude/agents/explorer.md +10 -0
- package/.claude/agents/researcher.md +11 -0
- package/.claude/agents/task-runner.md +8 -0
- package/.claude/settings.json +231 -0
- package/.env.ai.example +7 -0
- package/.github/workflows/npm-publish.yml +69 -0
- package/.internal.json +45 -0
- package/.tldr/config.json +11 -0
- package/.tldrignore +90 -0
- package/CLAUDE.md +6 -0
- package/README.md +98 -0
- package/bin/sync-cli.js +7552 -0
- package/concerns.md +7 -0
- package/docs/README.md +41 -0
- package/docs/agents/README.md +24 -0
- package/docs/agents/agent-configuration-system.md +86 -0
- package/docs/agents/execution-agents.md +50 -0
- package/docs/agents/knowledge-agents.md +61 -0
- package/docs/agents/orchestration-agent.md +57 -0
- package/docs/agents/planning-agents.md +84 -0
- package/docs/agents/quality-review-agents.md +67 -0
- package/docs/agents/workflow-agent-orchestration.md +69 -0
- package/docs/flows/README.md +44 -0
- package/docs/flows/compounding.md +126 -0
- package/docs/flows/coordination.md +72 -0
- package/docs/flows/core-harness-integration.md +63 -0
- package/docs/flows/documentation-orchestration.md +98 -0
- package/docs/flows/e2e-test-plan-building.md +83 -0
- package/docs/flows/emergent-refinement.md +104 -0
- package/docs/flows/flow-authoring-and-mcp-tools.md +89 -0
- package/docs/flows/judge-reviewing.md +112 -0
- package/docs/flows/plan-deepening-and-research.md +107 -0
- package/docs/flows/plan-review-jury.md +114 -0
- package/docs/flows/pr-reviewing.md +54 -0
- package/docs/flows/prompt-task-execution.md +119 -0
- package/docs/flows/spec-planning.md +162 -0
- package/docs/flows/type-specific-scoping-flows.md +49 -0
- package/docs/flows/validation-and-skills-integration.md +145 -0
- package/docs/flows/wip/wip-flows.md +102 -0
- package/docs/harness/README.md +23 -0
- package/docs/harness/agent-profiles.md +84 -0
- package/docs/harness/cli/README.md +24 -0
- package/docs/harness/cli/cli-entry-and-command-discovery.md +91 -0
- package/docs/harness/cli/docs-command.md +87 -0
- package/docs/harness/cli/knowledge-command.md +91 -0
- package/docs/harness/cli/minor-cli-commands.md +65 -0
- package/docs/harness/cli/oracle-command.md +113 -0
- package/docs/harness/cli/planning-command.md +95 -0
- package/docs/harness/cli/schema-and-validation-commands.md +154 -0
- package/docs/harness/cli/search-commands.md +97 -0
- package/docs/harness/cli/spawn-command.md +136 -0
- package/docs/harness/cli/specs-command.md +102 -0
- package/docs/harness/cli/tools-command.md +122 -0
- package/docs/harness/cli/trace-command.md +122 -0
- package/docs/harness/cli-daemon.md +92 -0
- package/docs/harness/event-loop.md +184 -0
- package/docs/harness/hooks/README.md +15 -0
- package/docs/harness/hooks/context-hooks.md +96 -0
- package/docs/harness/hooks/lifecycle-and-observability-hooks.md +135 -0
- package/docs/harness/hooks/validation-hooks.md +97 -0
- package/docs/harness/test-harness.md +149 -0
- package/docs/harness/tui.md +176 -0
- package/docs/memories.md +20 -0
- package/docs/solutions/agentic-issues/premature-agent-deletion-tui-action-dependency-20260130.md +49 -0
- package/docs/solutions/agentic-issues/ref-anchor-scope-mismatch-skill-references-20260131.md +55 -0
- package/docs/solutions/agentic-issues/tautological-tests-routing-20260131.md +52 -0
- package/docs/solutions/integration_issue/blocktool-output-format-mismatch-hook-runner-20260130.md +52 -0
- package/docs/solutions/integration_issue/dual-validation-path-divergence-schema-20260130.md +66 -0
- package/docs/solutions/security-issues/unsanitized-domain-path-join-20260131.md +52 -0
- package/docs/solutions/test-failures/event-loop-mock-ordering-checkAgentWindows-20260130.md +63 -0
- package/docs/sync-cli/README.md +19 -0
- package/docs/sync-cli/cli-entrypoint-and-commands.md +39 -0
- package/docs/sync-cli/commands/README.md +11 -0
- package/docs/sync-cli/commands/pull-manifest-command.md +36 -0
- package/docs/sync-cli/commands/push-command.md +84 -0
- package/docs/sync-cli/commands/sync-command.md +71 -0
- package/docs/sync-cli/systems/README.md +14 -0
- package/docs/sync-cli/systems/git-and-github-integration.md +49 -0
- package/docs/sync-cli/systems/interactive-ui.md +43 -0
- package/docs/sync-cli/systems/manifest-and-distribution.md +51 -0
- package/docs/sync-cli/systems/path-resolution.md +42 -0
- package/package.json +46 -0
- package/scripts/install-shim.sh +40 -0
- package/scripts/pre-pack.sh +25 -0
- package/specs/harness-maintenance-skill.spec.md +138 -0
- package/specs/roadmap/git-spec-lifecycle-management.spec.md +113 -0
- package/specs/sync-init-flag.spec.md +117 -0
- package/specs/unified-workflow-orchestration.spec.md +250 -0
- package/specs/validation-tooling-practice.spec.md +98 -0
- package/specs/workflow-domain-configuration.spec.md +265 -0
- package/src/commands/pull-manifest.ts +31 -0
- package/src/commands/push.ts +344 -0
- package/src/commands/sync.ts +289 -0
- package/src/lib/constants.ts +10 -0
- package/src/lib/dotfiles.ts +36 -0
- package/src/lib/fs-utils.ts +18 -0
- package/src/lib/gh.ts +40 -0
- package/src/lib/git.ts +63 -0
- package/src/lib/gitignore.ts +167 -0
- package/src/lib/manifest.ts +121 -0
- package/src/lib/marker-sync.ts +39 -0
- package/src/lib/paths.ts +38 -0
- package/src/lib/target-lines.ts +66 -0
- package/src/lib/ui.ts +78 -0
- package/src/sync-cli.ts +120 -0
- package/target-lines.json +23 -0
- package/tsconfig.json +20 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Post-spec knowledge extraction flow that captures learnings, solutions, memories, and harness improvements from completed specifications, with type-aware completion assessment for milestone vs exploratory specs"
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Compounding Flow
|
|
6
|
+
|
|
7
|
+
The compounding flow exists to close the feedback loop after a spec completes. Without it, knowledge gained during implementation -- decisions, failures, workarounds, engineer preferences -- evaporates between sessions. Per **Knowledge Compounding**, everything feeds forward.
|
|
8
|
+
|
|
9
|
+
This flow runs after all prompts in a spec have been executed and reviewed. It is intentionally the last step before a spec is considered fully closed.
|
|
10
|
+
|
|
11
|
+
## Lifecycle Position
|
|
12
|
+
|
|
13
|
+
```mermaid
|
|
14
|
+
flowchart LR
|
|
15
|
+
A[Spec Planning] --> B[Prompt Execution Loop]
|
|
16
|
+
B --> C[PR Review]
|
|
17
|
+
C --> D[Compounding]
|
|
18
|
+
D --> E[Spec Closed]
|
|
19
|
+
D -->|harness issues| F[Harness Improvement Spec]
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
The flow is idempotent -- running it again on a spec with no new changes produces no output.
|
|
23
|
+
|
|
24
|
+
## Phase Progression
|
|
25
|
+
|
|
26
|
+
The flow progresses through ordered phases, each building on the previous. Completion Assessment runs early to establish the evaluation frame for the spec type. The final phase (Harness Improvement) is intentionally last so that all other compounding artifacts are complete before any diversion into structural changes.
|
|
27
|
+
|
|
28
|
+
```mermaid
|
|
29
|
+
flowchart TD
|
|
30
|
+
CG[Context Gathering] --> CA[Completion Assessment]
|
|
31
|
+
CA --> SA[Signal Analysis]
|
|
32
|
+
SA --> ME[Memory Extraction]
|
|
33
|
+
SA --> SD[Solution Documentation]
|
|
34
|
+
ME --> SF[Spec Finalization]
|
|
35
|
+
SD --> SF
|
|
36
|
+
SF --> HI[Harness Improvement Handling]
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Completion Assessment by Spec Type
|
|
40
|
+
|
|
41
|
+
Per **Frontier Models are Capable**, completion means different things depending on the spec type:
|
|
42
|
+
|
|
43
|
+
| Spec Type | Completion Criteria |
|
|
44
|
+
|-----------|-------------------|
|
|
45
|
+
| **Milestone** (or missing) | Spec acceptance criteria met, all prompts complete, thorough knowledge extraction |
|
|
46
|
+
| **Exploratory** (investigation, optimization, refactor, documentation, triage) | Problem resolution assessed against original hypothesis, learnings extracted from experiment outcomes, unresolved questions documented for future work |
|
|
47
|
+
|
|
48
|
+
Milestone completion is binary -- acceptance criteria are either met or not. Exploratory completion is evaluated against hypothesis outcomes: did the experiments answer the questions posed? What was learned? What remains open? This distinction shapes how subsequent Signal Analysis interprets prompt results.
|
|
49
|
+
|
|
50
|
+
## Signal Analysis
|
|
51
|
+
|
|
52
|
+
The core analytical phase reads all spec artifacts and identifies patterns across four signal categories:
|
|
53
|
+
|
|
54
|
+
| Signal Category | What It Reveals | Key Indicators |
|
|
55
|
+
|----------------|-----------------|----------------|
|
|
56
|
+
| Prompt Signals | Execution and planning quality | Failed prompts, patch counts, blocker learnings |
|
|
57
|
+
| Tooling Signals | Skill and validation suite effectiveness | Per-tool impact map of what each tool caught vs. missed |
|
|
58
|
+
| Decision Signals | Engineer intent and preferences | Rejections, overrides, compromise patterns |
|
|
59
|
+
| Emergent Work Signals | Quality control preferences | Kept vs. reverted emergent work |
|
|
60
|
+
|
|
61
|
+
The tooling signals phase produces a **per-tool impact map** that cross-references every prompt's skills and validation suites against its summary (Limitations, Decisions, Learnings). This map becomes evidence for harness improvement specs.
|
|
62
|
+
|
|
63
|
+
### Crystallization Evaluation
|
|
64
|
+
|
|
65
|
+
Per **Agentic Validation Tooling**, the tooling signals phase also evaluates each validation suite for **crystallization** opportunities. For each suite used during execution:
|
|
66
|
+
|
|
67
|
+
- What stochastic patterns were discovered during exploratory validation?
|
|
68
|
+
- Which patterns are stable and repeatable enough to crystallize into deterministic checks?
|
|
69
|
+
- Should new deterministic tests be added to the suite's Deterministic Integration section?
|
|
70
|
+
- Are there stochastic exploration patterns that should be documented for future agents?
|
|
71
|
+
|
|
72
|
+
This evaluation feeds directly into the Harness Improvement phase as evidence for suite refinement per [ref:.allhands/flows/shared/CREATE_VALIDATION_TOOLING_SPEC.md::9750183].
|
|
73
|
+
|
|
74
|
+
A key design decision: emergent prompts are never framed as "scope creep." Per **Quality Engineering**, emergent work discovers valuable variants. Reverted emergent work is expected experimentation cost per **Software is Cheap**.
|
|
75
|
+
|
|
76
|
+
## Knowledge Outputs
|
|
77
|
+
|
|
78
|
+
The flow produces three distinct knowledge artifacts:
|
|
79
|
+
|
|
80
|
+
| Artifact | Location | Purpose |
|
|
81
|
+
|----------|----------|---------|
|
|
82
|
+
| Memories | `docs/memories.md` | Lightweight learnings searchable via `ah memories search` |
|
|
83
|
+
| Solutions | `docs/solutions/<category>/` | Detailed problem-solution documentation for non-trivial issues |
|
|
84
|
+
| Spec Finalization | `.planning/<spec>/spec.md` | Historical record with implementation reality vs. original plan |
|
|
85
|
+
|
|
86
|
+
### Memory Categories
|
|
87
|
+
|
|
88
|
+
Memories are captured across five categories when signals exist: technical learnings, engineer preference memories, systemic validation signals, harness behavior patterns. Each memory is tagged with domain and source (`user-steering` vs `agent-inferred`) for relevance scoring in future recall.
|
|
89
|
+
|
|
90
|
+
### Solution Documentation
|
|
91
|
+
|
|
92
|
+
Solutions target problems that required multiple investigation attempts, had non-obvious resolutions, or involve agentic anti-patterns. Trivial fixes are explicitly excluded. After writing, solutions are cross-referenced against existing solutions via `ah solutions search` to build a connected knowledge graph.
|
|
93
|
+
|
|
94
|
+
## Harness Improvement Classification
|
|
95
|
+
|
|
96
|
+
The final phase classifies detected issues and requires engineer interview before action:
|
|
97
|
+
|
|
98
|
+
```mermaid
|
|
99
|
+
flowchart TD
|
|
100
|
+
I[Classified Issues] --> Q{Type?}
|
|
101
|
+
Q -->|Skill gaps| A[Update skill file inline]
|
|
102
|
+
Q -->|Validation suite gaps| B[Update suite file inline]
|
|
103
|
+
Q -->|Missing validation suite| C{Engineer decision}
|
|
104
|
+
Q -->|Structural: flows/commands/hooks| D{Engineer decision}
|
|
105
|
+
C -->|Create| E[CREATE_HARNESS_SPEC]
|
|
106
|
+
C -->|Defer| F[Document in memories]
|
|
107
|
+
D -->|Create| E
|
|
108
|
+
D -->|Defer| F
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Inline updates (skills, validation suites) require engineer approval. Structural changes always go through a spec. Deferred items are documented in `docs/memories.md` under "Deferred Harness Improvements."
|
|
112
|
+
|
|
113
|
+
### Crystallization Promotion
|
|
114
|
+
|
|
115
|
+
Per [ref:.allhands/flows/COMPOUNDING.md::905aed8], validation suite refinements include **crystallization promotion**: stable stochastic patterns discovered during execution are promoted into deterministic checks in the suite's Deterministic Integration section. This shifts stochastic exploration to the frontier -- agents no longer need to rediscover patterns that have been automated. The crystallization lifecycle ensures validation compounds across spec executions.
|
|
116
|
+
|
|
117
|
+
## Key Design Decisions
|
|
118
|
+
|
|
119
|
+
- **Engineer interview is mandatory** before finalizing the compounding summary -- the flow must not finalize without sign-off on classified issues
|
|
120
|
+
- **Harness modification requires first principle justification** -- changes must trace back to principles in [ref:.allhands/principles.md::0610b13]
|
|
121
|
+
- **Spec finalization preserves original Goals and Non-Goals** unmodified for historical contrast against the new Implementation Reality section
|
|
122
|
+
- **The per-tool impact map is evidence, not a stored artifact** -- it feeds directly into harness improvement specs rather than being persisted separately
|
|
123
|
+
|
|
124
|
+
## Source Flow
|
|
125
|
+
|
|
126
|
+
[ref:.allhands/flows/COMPOUNDING.md::905aed8]
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Engineer-facing orchestration agent for managing active prompt loops, agent health, and mid-execution interventions"
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Coordination Flow
|
|
6
|
+
|
|
7
|
+
The coordination flow provides an engineer-facing orchestration layer during active prompt execution. It exists because prompt loops are autonomous -- once started, agents execute prompts independently. The engineer needs a control plane to intervene, patch, and steer without breaking the execution model.
|
|
8
|
+
|
|
9
|
+
Per **Frontier Models are Capable**, this agent orchestrates without implementing. It modifies only harness-managed files (prompts, alignment docs) and never writes implementation code.
|
|
10
|
+
|
|
11
|
+
## Coordination Services
|
|
12
|
+
|
|
13
|
+
The flow presents six services to the engineer, each addressing a different intervention need:
|
|
14
|
+
|
|
15
|
+
| Service | Purpose | Delegation |
|
|
16
|
+
|---------|---------|------------|
|
|
17
|
+
| Quick Patch | Deterministic fix for a specific issue | [ref:.allhands/flows/shared/PROMPT_TASKS_CURATION.md::1abf30b] |
|
|
18
|
+
| Interjection | Insert new prompt into active dependency graph | Internal (see below) |
|
|
19
|
+
| Emergent Surgery | Triage emergent refinement prompts | [ref:.allhands/flows/shared/EMERGENT_REFINEMENT_ANALYSIS.md::f3f4914] |
|
|
20
|
+
| Prompt Edit | Modify prompts given engineer concerns | [ref:.allhands/flows/shared/PROMPT_TASKS_CURATION.md::1abf30b] |
|
|
21
|
+
| Agent Status | Check tmux windows and agent health | Tmux patterns |
|
|
22
|
+
| Kill/Restart | Terminate broken agents and fix prompts | Tmux + prompt edit |
|
|
23
|
+
|
|
24
|
+
## Prompt Interjection
|
|
25
|
+
|
|
26
|
+
Interjection is the most architecturally significant service. It inserts new prompts into the active dependency graph mid-loop without renumbering existing prompts. The event loop detects new prompt files automatically -- sequencing is controlled entirely through dependency mapping.
|
|
27
|
+
|
|
28
|
+
```mermaid
|
|
29
|
+
flowchart LR
|
|
30
|
+
subgraph Before
|
|
31
|
+
P1[Prompt 1] --> P2[Prompt 2]
|
|
32
|
+
P2 --> P3["Prompt 3 (deps: [1])"]
|
|
33
|
+
end
|
|
34
|
+
subgraph After
|
|
35
|
+
P1b[Prompt 1] --> P2b[Prompt 2]
|
|
36
|
+
P2b --> P7["Prompt 7 (deps: [2])"]
|
|
37
|
+
P7 --> P3b["Prompt 3 (deps: [1, 7])"]
|
|
38
|
+
end
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
The interjection process:
|
|
42
|
+
|
|
43
|
+
1. Engineer specifies "run after" and "run before" prompts
|
|
44
|
+
2. Coordinator assigns next available prompt number (append-only, never renumber)
|
|
45
|
+
3. New prompt gets `dependencies` set to "run after" prompts
|
|
46
|
+
4. "Run before" prompts get their `dependencies` patched to include the new prompt
|
|
47
|
+
5. Per **Ideation First**, resulting execution order is confirmed with the engineer before writing files
|
|
48
|
+
|
|
49
|
+
When an interjection fixes prior execution issues, the prompt is additionally marked as a user-patch.
|
|
50
|
+
|
|
51
|
+
## User-Patch Prompts
|
|
52
|
+
|
|
53
|
+
A cross-cutting concern shared between services. Whenever a prompt is created to fix issues from prior execution:
|
|
54
|
+
|
|
55
|
+
- Frontmatter includes `type: user-patch` and `patches_prompts: [X, Y]`
|
|
56
|
+
- Body documents what went wrong, engineer feedback, and specific issues
|
|
57
|
+
- Per **Knowledge Compounding**, this metadata enables the compounding flow to trace failures back to root causes and improve skills/validation suites
|
|
58
|
+
|
|
59
|
+
## Decision Documentation
|
|
60
|
+
|
|
61
|
+
Per **Knowledge Compounding**, the coordination flow captures engineer contributions in two locations:
|
|
62
|
+
|
|
63
|
+
- **Prompt files**: Expectations, compromises, decisions
|
|
64
|
+
- **Alignment doc**: Engineer steering appended to agent summaries (summaries are never deleted)
|
|
65
|
+
|
|
66
|
+
## Conversational Approach
|
|
67
|
+
|
|
68
|
+
Per **Ideation First**, the coordinator always clarifies before acting. It asks what the engineer wants to accomplish, presents options with tradeoffs, confirms understanding before modifying files, and surfaces relevant context from prompts and the alignment doc. This is an interactive agent, not a fire-and-forget automation.
|
|
69
|
+
|
|
70
|
+
## Source Flow
|
|
71
|
+
|
|
72
|
+
[ref:.allhands/flows/COORDINATION.md::607d330]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Core tooling directives shared by all agents, establishing tldr and ah knowledge as the foundation for context-efficient codebase discovery"
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Core Harness Integration
|
|
6
|
+
|
|
7
|
+
Every agent in the harness loads [ref:.allhands/flows/CORE.md::ae9924a] as its foundation. This flow exists because of a single principle: **Context is Precious**. Agents degrade with large context windows, so codebase discovery must be targeted and structured rather than brute-force file reading.
|
|
8
|
+
|
|
9
|
+
## Two Mandatory Discovery Tools
|
|
10
|
+
|
|
11
|
+
The core integration enforces two tools for all codebase interaction:
|
|
12
|
+
|
|
13
|
+
| Tool | Purpose | When |
|
|
14
|
+
|------|---------|------|
|
|
15
|
+
| `ah knowledge docs search` | Semantic search over documented project knowledge | Any discovery task tied to crucial project understanding |
|
|
16
|
+
| `tldr` | Structured code analysis (trees, codemaps, control flow, data flow) | Retrieving file-level and function-level codebase context |
|
|
17
|
+
|
|
18
|
+
These replace ad-hoc file reads. The constraint is deliberate: agents that read files directly consume context on content that may be irrelevant. Structured tools return only what matters.
|
|
19
|
+
|
|
20
|
+
## Discovery Protocol
|
|
21
|
+
|
|
22
|
+
The mandated sequence reflects a progressive disclosure pattern -- start broad, narrow to specifics:
|
|
23
|
+
|
|
24
|
+
```mermaid
|
|
25
|
+
flowchart TD
|
|
26
|
+
A[Codebase Question] --> B[tldr semantic search]
|
|
27
|
+
B --> C{Need deeper analysis?}
|
|
28
|
+
C -->|Structure| D[tldr structure / tree]
|
|
29
|
+
C -->|Search| E[tldr search]
|
|
30
|
+
C -->|Function behavior| F[tldr cfg / dfg]
|
|
31
|
+
C -->|Impact of change| G[tldr impact]
|
|
32
|
+
C -->|What affects line N| H[tldr slice]
|
|
33
|
+
C -->|Cross-file calls| I[tldr calls]
|
|
34
|
+
C -->|Quality check| J[tldr diagnostics]
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## tldr Capability Categories
|
|
38
|
+
|
|
39
|
+
### Core Analysis
|
|
40
|
+
|
|
41
|
+
File trees, code structure maps, pattern search, full file extraction, and LLM-ready context bundles. These are the building blocks agents use before reading raw files.
|
|
42
|
+
|
|
43
|
+
### Flow Analysis
|
|
44
|
+
|
|
45
|
+
Control flow graphs, data flow graphs, program slices, and cross-file call graphs. These enable agents to understand function behavior without reading every line -- critical for keeping context budgets low.
|
|
46
|
+
|
|
47
|
+
### Codebase Analysis
|
|
48
|
+
|
|
49
|
+
Reverse call graphs (`impact`), dead code detection (`dead`), and architectural layer detection (`arch`). These support refactoring decisions and cleanup tasks.
|
|
50
|
+
|
|
51
|
+
### Import Analysis
|
|
52
|
+
|
|
53
|
+
Forward imports (`imports`) and reverse import tracking (`importers`). These answer "what does this file depend on?" and "who depends on this module?" -- essential for understanding blast radius.
|
|
54
|
+
|
|
55
|
+
### Quality and Testing
|
|
56
|
+
|
|
57
|
+
Type checking and linting via `diagnostics`. Agents run this before tests to catch errors without consuming test execution context.
|
|
58
|
+
|
|
59
|
+
## Why This Exists as a Shared Flow
|
|
60
|
+
|
|
61
|
+
Per **Knowledge Compounding**, centralizing these directives in a single flow prevents every agent-specific flow from repeating the same tooling instructions. All agents inherit codebase discovery capability from this one file, and improvements to discovery practices propagate to every agent simultaneously.
|
|
62
|
+
|
|
63
|
+
The flow is deliberately terse -- per **Frontier Models are Capable**, agents deduce how to combine these tools for their specific tasks. The flow provides the "what tools exist and when to use them" while trusting agents to figure out the "how" for their domain.
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Two-mode documentation pipeline with discovery sub-agents, writer sub-agents, and validation for creating and maintaining engineering knowledge docs"
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Documentation Orchestration
|
|
6
|
+
|
|
7
|
+
The documentation flow creates and maintains engineering knowledge docs that expose code through file references and LSP symbols. Per **Knowledge Compounding**, docs enable semantic discovery of code through compounded understanding of use cases, intent, and key decisions.
|
|
8
|
+
|
|
9
|
+
Per **Context is Precious**, the orchestrator delegates discovery and writing to sub-agents rather than performing all work in a single context window.
|
|
10
|
+
|
|
11
|
+
## Mode Detection
|
|
12
|
+
|
|
13
|
+
The flow operates in two modes, selected automatically based on context variables:
|
|
14
|
+
|
|
15
|
+
```mermaid
|
|
16
|
+
flowchart TD
|
|
17
|
+
Start[Flow Invoked] --> Check{ALIGNMENT_PATH + PROMPTS_FOLDER provided?}
|
|
18
|
+
Check -->|Yes| Inc[Incremental Mode]
|
|
19
|
+
Check -->|No| FTG[Fill-the-Gaps Mode]
|
|
20
|
+
Inc --> Core[Core Flow]
|
|
21
|
+
FTG --> Core
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
| Mode | Trigger | Scope | Knowledge Source |
|
|
25
|
+
|------|---------|-------|-----------------|
|
|
26
|
+
| Fill-the-Gaps | Cold start or refresh, no spec context | All domains, full repo scan | Inferred from code |
|
|
27
|
+
| Incremental | Feature branch with spec context | Affected domains only | Prompts, commits, alignment docs |
|
|
28
|
+
|
|
29
|
+
If no message or context variables are provided, the flow defaults directly to Fill-the-Gaps without asking.
|
|
30
|
+
|
|
31
|
+
## Pre-flight Requirement
|
|
32
|
+
|
|
33
|
+
A clean git working tree is mandatory. File references include a git commit hash component -- uncommitted files have no hash, and modified files produce stale hashes that break immediately after the next commit.
|
|
34
|
+
|
|
35
|
+
## Fill-the-Gaps Initialization
|
|
36
|
+
|
|
37
|
+
1. Run `ah docs validate --json` to identify invalid refs, stale refs, and missing frontmatter
|
|
38
|
+
2. Detect domains: read `docs.json` or infer from project structure (checking monorepo markers like `pnpm-workspace.yaml`, `turbo.json`, `nx.json`)
|
|
39
|
+
3. Present detected domains to user for confirmation
|
|
40
|
+
4. Persist confirmed domains to `docs.json` for future incremental runs
|
|
41
|
+
|
|
42
|
+
## Incremental Initialization
|
|
43
|
+
|
|
44
|
+
1. Read alignment doc and prompt files for session knowledge
|
|
45
|
+
2. Run `git diff` against merge base for changed files
|
|
46
|
+
3. Run `ah docs validate --json` for current staleness
|
|
47
|
+
4. Impact analysis via `ah knowledge docs search` to find related docs
|
|
48
|
+
5. Categorize changes: **Edit** (existing docs reference changed code), **Create** (new functionality), **Stale** (outdated refs)
|
|
49
|
+
|
|
50
|
+
## Core Pipeline
|
|
51
|
+
|
|
52
|
+
Both modes converge into a shared four-phase pipeline:
|
|
53
|
+
|
|
54
|
+
```mermaid
|
|
55
|
+
flowchart TD
|
|
56
|
+
D[Discovery Phase] --> A[Aggregate & Plan]
|
|
57
|
+
A --> W[Writing Phase]
|
|
58
|
+
W --> P[Post-Processing]
|
|
59
|
+
|
|
60
|
+
D -.- D1[1 sub-agent per domain]
|
|
61
|
+
W -.- W1[5-10 writer sub-agents]
|
|
62
|
+
W1 -.- W2[5-15 approaches per writer]
|
|
63
|
+
P -.- P1[README generation + validation loop]
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Discovery Phase
|
|
67
|
+
|
|
68
|
+
One discovery sub-agent per domain, each following [ref:.allhands/flows/shared/DOCUMENTATION_DISCOVERY.md::f3f2716]. Discovery identifies all documentable approaches/features, groups them intelligently, and checks existing coverage. Key constraint: stay under 20 approaches per domain, grouping aggressively.
|
|
69
|
+
|
|
70
|
+
### Aggregate and Plan
|
|
71
|
+
|
|
72
|
+
The orchestrator merges discovery results, filters out fully-covered approaches, and groups remaining approaches into writer assignments. Target: 5-10 writers total, each handling 5-15 approaches from one domain or related subset.
|
|
73
|
+
|
|
74
|
+
### Writing Phase
|
|
75
|
+
|
|
76
|
+
Writer sub-agents follow [ref:.allhands/flows/shared/DOCUMENTATION_WRITER.md::8447f47]. Each writer receives its approaches with file lists, symbols, the target directory, any existing docs to edit, and session knowledge (in incremental mode). The `group` field controls subdirectory placement.
|
|
77
|
+
|
|
78
|
+
### Post-Processing
|
|
79
|
+
|
|
80
|
+
The orchestrator handles cross-domain concerns that individual writers lack context for:
|
|
81
|
+
|
|
82
|
+
- **README generation**: Top-level `docs/README.md`, per-domain `docs/<domain>/README.md`, and per-group READMEs for subdirectories with 3+ docs
|
|
83
|
+
- **Finalize and validate loop**: Run `ah docs finalize`, then `ah docs validate --json`. If issues exist, spawn fixup writers and repeat until clean
|
|
84
|
+
- **Reindex**: Run `ah knowledge docs reindex` to update semantic search
|
|
85
|
+
|
|
86
|
+
## Ownership Boundaries
|
|
87
|
+
|
|
88
|
+
| Artifact | Owner |
|
|
89
|
+
|----------|-------|
|
|
90
|
+
| Approach docs | Writer sub-agents |
|
|
91
|
+
| README.md files | Orchestrator (cross-domain context) |
|
|
92
|
+
| `docs/solutions/`, `docs/memories.md` | Compounding flow (never written by documentation) |
|
|
93
|
+
|
|
94
|
+
## Source Flows
|
|
95
|
+
|
|
96
|
+
- [ref:.allhands/flows/DOCUMENTATION.md::dc3e5c6]
|
|
97
|
+
- [ref:.allhands/flows/shared/DOCUMENTATION_DISCOVERY.md::f3f2716]
|
|
98
|
+
- [ref:.allhands/flows/shared/DOCUMENTATION_WRITER.md::8447f47]
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Flow for building comprehensive E2E test plans with dimension-mapped sections: deterministic integration tests, infrastructure setup, stochastic AI-coordinated validation, and manual verification flows for milestone validation"
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# E2E Test Plan Building
|
|
6
|
+
|
|
7
|
+
The E2E test plan flow produces a document that convinces the engineer a milestone works as expected. Per **Agentic Validation Tooling**, engineers are excluded from prompt-by-prompt validation -- this plan provides the comprehensive proof they need through a single artifact.
|
|
8
|
+
|
|
9
|
+
The plan is not a restatement of automated test output. It layers deterministic summaries, infrastructure setup, AI-coordinated validation, and manual flows into a progressive document the engineer can use to verify the milestone end-to-end.
|
|
10
|
+
|
|
11
|
+
## Plan Structure
|
|
12
|
+
|
|
13
|
+
The test plan follows a four-section progressive structure, where each section builds on the previous:
|
|
14
|
+
|
|
15
|
+
```mermaid
|
|
16
|
+
flowchart TD
|
|
17
|
+
S1["Section 1: Deterministic Test Summary"] --> S2["Section 2: Infrastructure Setup"]
|
|
18
|
+
S2 --> S3["Section 3: AI-Coordinated Validation"]
|
|
19
|
+
S3 --> S4["Section 4: Manual E2E Flows"]
|
|
20
|
+
|
|
21
|
+
S1 -.- N1["Concise command list with comments"]
|
|
22
|
+
S2 -.- N2["Derived from implementation artifacts"]
|
|
23
|
+
S3 -.- N3["Conditional — only if agentic tooling exists"]
|
|
24
|
+
S4 -.- N4["Core 'convince the engineer' section"]
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Dimension Mapping
|
|
28
|
+
|
|
29
|
+
Per **Agentic Validation Tooling**, the test plan sections map to the two-dimensional validation model:
|
|
30
|
+
|
|
31
|
+
| Section | Validation Dimension | Suite Body Section |
|
|
32
|
+
|---------|---------------------|--------------------|
|
|
33
|
+
| Section 1: Deterministic Test Summary | **Deterministic** | Commands drawn from suite **Deterministic Integration** sections |
|
|
34
|
+
| Section 3: AI-Coordinated Validation | **Stochastic** | Agent exploration drawn from suite **Stochastic Validation** playbooks |
|
|
35
|
+
| Section 2 & 4 | N/A | Infrastructure and manual flows are not dimension-mapped |
|
|
36
|
+
|
|
37
|
+
### Section Design Decisions
|
|
38
|
+
|
|
39
|
+
| Section | What It Contains | What It Avoids |
|
|
40
|
+
|---------|-----------------|----------------|
|
|
41
|
+
| Deterministic Test Summary | Runnable commands grouped by domain, inline comments | Detailed breakdowns, file listings, coverage percentages |
|
|
42
|
+
| Infrastructure Setup | Dependencies, env, database, services, dev servers | Assumed knowledge; derives everything from artifacts |
|
|
43
|
+
| AI-Coordinated Validation | Agent prompts for Playwright MCP, agent-browser, load testing, profiling | Inclusion when no agentic tooling exists for the project |
|
|
44
|
+
| Manual E2E Flows | User flows, edge cases, regression scenarios | Duplicating what automated tests already cover |
|
|
45
|
+
|
|
46
|
+
## Update Mode
|
|
47
|
+
|
|
48
|
+
The plan supports incremental updates. When a test plan already exists at the output path:
|
|
49
|
+
|
|
50
|
+
1. Extract `last_commit` from frontmatter
|
|
51
|
+
2. Diff commits and files since that commit
|
|
52
|
+
3. Compare alignment doc prompt summaries against covered prompts
|
|
53
|
+
4. Append new scenarios rather than rewriting existing coverage
|
|
54
|
+
|
|
55
|
+
This makes the plan a living document that grows with the milestone.
|
|
56
|
+
|
|
57
|
+
## Variant Awareness
|
|
58
|
+
|
|
59
|
+
Per **Quality Engineering**, implementation may produce disposable variants (A/B implementations, backend alternatives, experimental features). The infrastructure setup section documents how to switch between variants using feature flags, environment variables, or infrastructure flags, with setup commands for each testable variant.
|
|
60
|
+
|
|
61
|
+
## Infrastructure as Documentation Quality Signal
|
|
62
|
+
|
|
63
|
+
A key insight: if infrastructure setup cannot be derived from implementation artifacts (commits, summaries, code, existing docs), it signals inadequate documentation. The subsequent documentation phase will face the same challenge. This makes the E2E test plan an early warning system for documentation gaps.
|
|
64
|
+
|
|
65
|
+
## AI-Coordinated Validation
|
|
66
|
+
|
|
67
|
+
Section 3 is conditional -- it only appears when the project has tooling that supports agentic testing:
|
|
68
|
+
|
|
69
|
+
- UI automation (Playwright MCP, agent-browser, simulator automation, browser MCPs)
|
|
70
|
+
- Load testing tools (k6, artillery, locust)
|
|
71
|
+
- Performance profiling (flamegraphs, memory profilers, bundle analyzers)
|
|
72
|
+
- Database inspection/scripting
|
|
73
|
+
- API testing tools (curl automation, Postman/Insomnia MCPs)
|
|
74
|
+
|
|
75
|
+
When present, the section provides example prompts engineers can give to agent sessions to exercise specific flows. When absent, the section notes which tooling categories would be valuable.
|
|
76
|
+
|
|
77
|
+
### Context Gathering for Tooling
|
|
78
|
+
|
|
79
|
+
Per [ref:.allhands/flows/E2E_TEST_PLAN_BUILDING.md::aa87ec8], during context gathering the flow runs `ah validation-tools list` to identify available suites. The `tools` field in suite output identifies which tooling is available for both stochastic and deterministic dimensions -- this informs which Section 3 tooling categories the project can support and surfaces tooling that may not be obvious from code inspection alone.
|
|
80
|
+
|
|
81
|
+
## Source Flow
|
|
82
|
+
|
|
83
|
+
[ref:.allhands/flows/E2E_TEST_PLAN_BUILDING.md::aa87ec8]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Hypothesis-driven emergent planning for continuous improvement beyond planned prompts -- a plan-only agent that creates typed prompt files for executors, with work mode diversification and post-refinement analysis"
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Emergent Refinement
|
|
6
|
+
|
|
7
|
+
Planned prompts address known requirements. Emergent refinement addresses the unknown -- the improvements, extensions, and experiments that only become visible after initial implementation exists. Per **Quality Engineering**, emergent work discovers which variants are valuable, not just what was explicitly requested.
|
|
8
|
+
|
|
9
|
+
## Core Concept: Hypothesis-Driven Planning
|
|
10
|
+
|
|
11
|
+
Every emergent prompt starts with a hypothesis: "If I implement X, then Y outcome will result." This is fundamentally different from planned prompts, which start with "The spec requires X." The hypothesis framing forces agents to articulate expected outcomes, making success measurable and failure informative.
|
|
12
|
+
|
|
13
|
+
The emergent agent is a **plan-only** agent -- it creates `type: emergent` prompt files but never executes them. Executors pick up these prompts through the standard execution loop. This separation of planning and execution keeps each concern bounded in its own context window.
|
|
14
|
+
|
|
15
|
+
[ref:.allhands/flows/EMERGENT_PLANNING.md::4f1d9bf]
|
|
16
|
+
|
|
17
|
+
## Planning Lifecycle
|
|
18
|
+
|
|
19
|
+
```mermaid
|
|
20
|
+
stateDiagram-v2
|
|
21
|
+
[*] --> ContextGathering
|
|
22
|
+
ContextGathering --> GapAssessment
|
|
23
|
+
GapAssessment --> HypothesisFormation
|
|
24
|
+
HypothesisFormation --> PromptCreation
|
|
25
|
+
PromptCreation --> [*]
|
|
26
|
+
|
|
27
|
+
state ContextGathering {
|
|
28
|
+
[*] --> ReadAlignment
|
|
29
|
+
ReadAlignment --> SearchMemories
|
|
30
|
+
SearchMemories --> IdentifyGaps
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
state PromptCreation {
|
|
34
|
+
[*] --> DiscoverValidation
|
|
35
|
+
DiscoverValidation --> CreatePromptFiles
|
|
36
|
+
CreatePromptFiles --> Stop
|
|
37
|
+
}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
The planner stops after creating prompt files. Executors handle implementation, and validation suites discovered by the planner are attached to prompts for executors to run.
|
|
41
|
+
|
|
42
|
+
## Work Mode Diversification
|
|
43
|
+
|
|
44
|
+
Emergent agents select from three work modes, cycling between them based on what prior prompts have already explored:
|
|
45
|
+
|
|
46
|
+
```mermaid
|
|
47
|
+
flowchart LR
|
|
48
|
+
CC[Core Goal Work] -->|reveals gaps| AI[Adjacent Improvements]
|
|
49
|
+
AI -->|exposes new core needs| CC
|
|
50
|
+
AI -->|stress-tests assumptions| NE[Novel Experiments]
|
|
51
|
+
NE -->|compounds back into| CC
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
| Mode | Purpose | Examples |
|
|
55
|
+
|------|---------|---------|
|
|
56
|
+
| Core Goal Work | Directly addresses spec goals, acceptance criteria, known gaps | Missing edge cases, error recovery paths |
|
|
57
|
+
| Adjacent Improvements | Tangentially related enhancements that compound core work | Performance optimization, UX polish |
|
|
58
|
+
| Novel Experiments | Creative extensions behind feature flags that stress-test assumptions | Alternative approaches, exploratory features |
|
|
59
|
+
|
|
60
|
+
These are not sequential phases. An agent doing novel experiments may discover a core stability gap, returning to consolidation. Per **Knowledge Compounding**, each mode feeds the others -- adjacent work exposes core needs, novel work stress-tests assumptions.
|
|
61
|
+
|
|
62
|
+
## Validation Discovery
|
|
63
|
+
|
|
64
|
+
The planner discovers applicable validation suites during prompt creation and attaches them to prompt frontmatter via `validation_suites`. The planner does not run validation -- executors do when they pick up the prompts. This ensures emergent work meets the same quality bar as planned work without conflating planning with execution.
|
|
65
|
+
|
|
66
|
+
## Completion Protocol
|
|
67
|
+
|
|
68
|
+
The planner completes after creating prompt files. Each prompt includes the work mode type in its metadata so subsequent emergent planning rounds can diversify their mode selection. Alignment files and prompt files are not git tracked -- only implementation changes committed by executors are.
|
|
69
|
+
|
|
70
|
+
The planner must always produce at least one prompt. If core goals are met, it creates adjacent improvements or novel experiments. Per **Knowledge Compounding**, each round compounds work.
|
|
71
|
+
|
|
72
|
+
## Post-Refinement Analysis
|
|
73
|
+
|
|
74
|
+
[ref:.allhands/flows/shared/EMERGENT_REFINEMENT_ANALYSIS.md::f3f4914]
|
|
75
|
+
|
|
76
|
+
After a batch of emergent prompts completes, an analysis phase evaluates each one:
|
|
77
|
+
|
|
78
|
+
### Classification Decision Tree
|
|
79
|
+
|
|
80
|
+
```mermaid
|
|
81
|
+
flowchart TD
|
|
82
|
+
EP[Emergent Prompt] --> E{Evaluate}
|
|
83
|
+
E -->|Strong hypothesis, effective, aligned| K[Keep]
|
|
84
|
+
E -->|Good hypothesis, execution gaps| I[Improve]
|
|
85
|
+
E -->|Hypothesis doesn't support goal| EL[Eliminate]
|
|
86
|
+
|
|
87
|
+
I --> IP[Create improvement patch prompt]
|
|
88
|
+
EL --> RP[Create reversion patch prompt]
|
|
89
|
+
K --> Done[No action needed]
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
| Classification | Criteria | Action |
|
|
93
|
+
|---------------|----------|--------|
|
|
94
|
+
| Keep | Strong hypothesis, effective execution, aligned with goals | None |
|
|
95
|
+
| Improve | Good hypothesis, but execution gaps remain | Create `type: user-patch` improvement prompt |
|
|
96
|
+
| Eliminate | Hypothesis doesn't support spec goal | Create `type: user-patch` reversion prompt using git hashes |
|
|
97
|
+
|
|
98
|
+
### Engineer Decision Point
|
|
99
|
+
|
|
100
|
+
The analysis presents findings holistically -- comparing emergent prompts against each other, highlighting patterns of effective versus ineffective hypotheses. The engineer accepts, adjusts, or overrides recommendations. Per **Knowledge Compounding**, all decisions and rationale are documented in the alignment doc to prevent future agents from re-proposing eliminated approaches.
|
|
101
|
+
|
|
102
|
+
## Why Emergent Refinement Exists
|
|
103
|
+
|
|
104
|
+
Per **Prompt Files as Units of Work**, novelty emerges from prompt tasking. Planned prompts capture what engineers know they want. Emergent refinement captures what they didn't know they wanted until the system existed. The separation of planning from execution ensures the planner can focus entirely on hypothesis quality while executors focus on implementation quality. The framing as "indefinite compounding" rather than "percentage complete" reflects the principle that there is always a next valuable iteration to discover.
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Guidelines for authoring harness flow files driven by first principles and adding MCP server integrations to extend harness capabilities"
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Flow Authoring and MCP Tool Integration
|
|
6
|
+
|
|
7
|
+
Two complementary authoring guidelines: one for writing harness flows (the instruction layer), one for adding MCP server integrations (the tool layer). Both shape how agents receive direction and capability.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Flow Authoring
|
|
12
|
+
|
|
13
|
+
[ref:.allhands/flows/shared/WRITING_HARNESS_FLOWS.md::f39048c]
|
|
14
|
+
|
|
15
|
+
### Principle-to-Directive Mapping
|
|
16
|
+
|
|
17
|
+
Every flow directive traces back to a first principle from [ref:.allhands/principles.md::0610b13]. This mapping is the foundation of flow authoring:
|
|
18
|
+
|
|
19
|
+
| First Principle | What It Means for Flows |
|
|
20
|
+
|-----------------|------------------------|
|
|
21
|
+
| **Context is Precious** | Be brief. Progressive disclosure. Reference rather than repeat. |
|
|
22
|
+
| **Frontier Models are Capable** | Provide "why," trust agents to deduce "what" and "how." |
|
|
23
|
+
| **Knowledge Compounding** | DRY -- centralize instructions, use decision trees that reference capability chunks. |
|
|
24
|
+
|
|
25
|
+
When a flow instructs a behavior, it must cite the motivating principle by name. This teaches agents to think in terms of the harness philosophy, not just follow instructions.
|
|
26
|
+
|
|
27
|
+
### Flow Anatomy
|
|
28
|
+
|
|
29
|
+
Flows use XML tags for structural attention:
|
|
30
|
+
|
|
31
|
+
| Tag | Purpose |
|
|
32
|
+
|-----|---------|
|
|
33
|
+
| `<goal>` | Motivations and contribution to the wider harness |
|
|
34
|
+
| `<constraints>` | Hard rules (NEVER / MUST / ALWAYS) |
|
|
35
|
+
| `<ownership>` | Files and domains the agent is restricted to |
|
|
36
|
+
| `<success_criteria>` | Validation criteria for task completion |
|
|
37
|
+
| `<inputs>` | Inputs required for the flow to execute |
|
|
38
|
+
| `<outputs>` | Outputs expected from the flow |
|
|
39
|
+
|
|
40
|
+
Body sections use `##` headers as capability phases (Context Gathering, Implementation, Validation, Completion). Bullet points start with action verbs. Paths and commands are backtick-wrapped. Conditionals use flat "If X - Y" patterns.
|
|
41
|
+
|
|
42
|
+
### File Organization
|
|
43
|
+
|
|
44
|
+
- `flows/` root: Agent default flows, disclosed immediately on agent startup
|
|
45
|
+
- `flows/subdirectories/`: Progressively disclosed flows with `<inputs>` and `<outputs>` tags, invoked by other flows
|
|
46
|
+
|
|
47
|
+
The northstar example flow is [ref:.allhands/flows/PROMPT_TASK_EXECUTION.md::9baf478].
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## MCP Server Integration
|
|
52
|
+
|
|
53
|
+
[ref:.allhands/flows/shared/WRITING_HARNESS_MCP_TOOLS.md::fad1587]
|
|
54
|
+
|
|
55
|
+
### Integration Phases
|
|
56
|
+
|
|
57
|
+
```mermaid
|
|
58
|
+
flowchart LR
|
|
59
|
+
R[Research] --> B[Build Config]
|
|
60
|
+
B --> E[Environment Setup]
|
|
61
|
+
E --> V[Validation]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**Research**: Investigate the MCP package via `ah tavily search` and `ah context7 search`. Identify transport type (stdio, http, sse), command/args, environment variables, and authentication method.
|
|
65
|
+
|
|
66
|
+
**Build Config**: Copy the template at `.allhands/harness/src/mcp/_template.ts` and populate with researched values -- name, description, transport config, environment variable references using `${VAR_NAME}` syntax, statefulness, and tool hints.
|
|
67
|
+
|
|
68
|
+
**Environment Setup**: Document required variables (name, where to obtain, expected format) without adding actual values. Check `.env.ai` for existing variables.
|
|
69
|
+
|
|
70
|
+
**Validation**: Build harness, verify server appears in `ah tools --list`, verify tools are discovered via `ah tools <server-name>`, and test a read-only tool call.
|
|
71
|
+
|
|
72
|
+
### Config Structure
|
|
73
|
+
|
|
74
|
+
Each MCP server config lives at `.allhands/harness/src/mcp/<server-name>.ts` and specifies:
|
|
75
|
+
|
|
76
|
+
| Field | Purpose |
|
|
77
|
+
|-------|---------|
|
|
78
|
+
| `name` | Short identifier (used in `ah tools <name>:tool`) |
|
|
79
|
+
| `description` | What the server provides |
|
|
80
|
+
| `type` | Transport: `stdio`, `http`, or `sse` |
|
|
81
|
+
| `command` / `args` | For stdio transport |
|
|
82
|
+
| `url` | For http/sse transport |
|
|
83
|
+
| `env` | Environment variable references |
|
|
84
|
+
| `stateful` | Whether server maintains session state |
|
|
85
|
+
| `toolHints` | Helpful hints for key tools |
|
|
86
|
+
|
|
87
|
+
### Design Decision: Sub-agent Execution
|
|
88
|
+
|
|
89
|
+
MCP integration runs as a sub-agent to avoid blocking the main thread. The main thread can proceed knowing the MCP server is (or will be) available, receiving a completion report with config path, available tools, environment requirements, and validation status.
|