hatch3r 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -6
- package/agents/hatch3r-a11y-auditor.md +13 -2
- package/agents/hatch3r-architect.md +20 -1
- package/agents/hatch3r-ci-watcher.md +25 -1
- package/agents/hatch3r-context-rules.md +15 -3
- package/agents/hatch3r-dependency-auditor.md +23 -2
- package/agents/hatch3r-devops.md +11 -0
- package/agents/hatch3r-docs-writer.md +27 -2
- package/agents/hatch3r-fixer.md +46 -3
- package/agents/hatch3r-implementer.md +19 -1
- package/agents/hatch3r-learnings-loader.md +19 -0
- package/agents/hatch3r-lint-fixer.md +11 -0
- package/agents/hatch3r-perf-profiler.md +21 -1
- package/agents/hatch3r-researcher.md +51 -911
- package/agents/hatch3r-reviewer.md +24 -2
- package/agents/hatch3r-security-auditor.md +20 -0
- package/agents/hatch3r-test-writer.md +24 -0
- package/agents/modes/architecture.md +1 -0
- package/agents/modes/boundary-analysis.md +2 -1
- package/agents/modes/codebase-impact.md +1 -0
- package/agents/modes/complexity-risk.md +1 -0
- package/agents/modes/coverage-analysis.md +1 -0
- package/agents/modes/current-state.md +1 -0
- package/agents/modes/feature-design.md +1 -0
- package/agents/modes/impact-analysis.md +1 -0
- package/agents/modes/library-docs.md +2 -1
- package/agents/modes/migration-path.md +1 -0
- package/agents/modes/prior-art.md +1 -0
- package/agents/modes/refactoring-strategy.md +1 -0
- package/agents/modes/regression.md +1 -0
- package/agents/modes/requirements-elicitation.md +1 -0
- package/agents/modes/risk-assessment.md +1 -0
- package/agents/modes/risk-prioritization.md +1 -0
- package/agents/modes/root-cause.md +1 -0
- package/agents/modes/similar-implementation.md +2 -1
- package/agents/modes/symptom-trace.md +1 -0
- package/agents/modes/test-pattern.md +2 -1
- package/agents/shared/external-knowledge.md +10 -0
- package/agents/shared/quality-charter.md +18 -0
- package/checks/README.md +1 -0
- package/checks/accessibility.md +55 -0
- package/commands/board/pickup-azure-devops.md +1 -0
- package/commands/board/pickup-delegation-multi.md +6 -1
- package/commands/board/pickup-delegation.md +1 -0
- package/commands/board/pickup-github.md +1 -0
- package/commands/board/pickup-gitlab.md +1 -0
- package/commands/board/pickup-modes.md +1 -0
- package/commands/board/pickup-post-impl.md +2 -1
- package/commands/board/shared-azure-devops.md +1 -0
- package/commands/board/shared-board-overview.md +1 -0
- package/commands/board/shared-github.md +1 -0
- package/commands/board/shared-gitlab.md +1 -0
- package/commands/hatch3r-agent-customize.md +1 -0
- package/commands/hatch3r-api-spec.md +1 -0
- package/commands/hatch3r-benchmark.md +4 -3
- package/commands/hatch3r-board-fill.md +52 -9
- package/commands/hatch3r-board-groom.md +69 -5
- package/commands/hatch3r-board-init.md +2 -1
- package/commands/hatch3r-board-pickup.md +1 -0
- package/commands/hatch3r-board-refresh.md +1 -0
- package/commands/hatch3r-board-shared.md +34 -3
- package/commands/hatch3r-bug-plan.md +2 -1
- package/commands/hatch3r-codebase-map.md +4 -3
- package/commands/hatch3r-command-customize.md +2 -1
- package/commands/hatch3r-context-health.md +1 -0
- package/commands/hatch3r-cost-tracking.md +1 -0
- package/commands/hatch3r-debug.md +4 -3
- package/commands/hatch3r-dep-audit.md +3 -0
- package/commands/hatch3r-feature-plan.md +3 -2
- package/commands/hatch3r-healthcheck.md +1 -0
- package/commands/hatch3r-hooks.md +5 -0
- package/commands/hatch3r-learn.md +1 -0
- package/commands/hatch3r-migration-plan.md +3 -2
- package/commands/hatch3r-onboard.md +2 -1
- package/commands/hatch3r-project-spec.md +4 -3
- package/commands/hatch3r-quick-change.md +2 -0
- package/commands/hatch3r-recipe.md +1 -0
- package/commands/hatch3r-refactor-plan.md +2 -1
- package/commands/hatch3r-release.md +4 -1
- package/commands/hatch3r-revision.md +2 -1
- package/commands/hatch3r-roadmap.md +5 -4
- package/commands/hatch3r-rule-customize.md +1 -0
- package/commands/hatch3r-security-audit.md +1 -0
- package/commands/hatch3r-skill-customize.md +1 -0
- package/commands/hatch3r-test-plan.md +3 -2
- package/commands/hatch3r-workflow.md +5 -0
- package/dist/cli/index.js +7467 -4582
- package/dist/cli/index.js.map +1 -1
- package/hooks/hatch3r-ci-failure.md +1 -0
- package/hooks/hatch3r-file-save.md +1 -0
- package/hooks/hatch3r-post-merge.md +1 -0
- package/hooks/hatch3r-pre-commit.md +1 -0
- package/hooks/hatch3r-pre-push.md +1 -0
- package/hooks/hatch3r-session-start.md +1 -0
- package/package.json +19 -4
- package/rules/hatch3r-accessibility-standards.md +2 -1
- package/rules/hatch3r-accessibility-standards.mdc +1 -1
- package/rules/hatch3r-agent-orchestration-detail.md +49 -1
- package/rules/hatch3r-agent-orchestration-detail.mdc +47 -1
- package/rules/hatch3r-agent-orchestration.md +87 -5
- package/rules/hatch3r-agent-orchestration.mdc +85 -5
- package/rules/hatch3r-api-design.md +2 -1
- package/rules/hatch3r-api-design.mdc +1 -1
- package/rules/hatch3r-browser-verification.md +4 -2
- package/rules/hatch3r-browser-verification.mdc +1 -0
- package/rules/hatch3r-ci-cd.md +2 -1
- package/rules/hatch3r-ci-cd.mdc +1 -1
- package/rules/hatch3r-code-standards.md +15 -2
- package/rules/hatch3r-code-standards.mdc +12 -0
- package/rules/hatch3r-component-conventions.md +2 -1
- package/rules/hatch3r-component-conventions.mdc +1 -0
- package/rules/hatch3r-data-classification.md +2 -1
- package/rules/hatch3r-data-classification.mdc +1 -1
- package/rules/hatch3r-deep-context.md +26 -1
- package/rules/hatch3r-deep-context.mdc +25 -1
- package/rules/hatch3r-dependency-management.md +2 -1
- package/rules/hatch3r-dependency-management.mdc +1 -1
- package/rules/hatch3r-feature-flags.md +2 -0
- package/rules/hatch3r-feature-flags.mdc +1 -0
- package/rules/hatch3r-git-conventions.md +2 -1
- package/rules/hatch3r-git-conventions.mdc +2 -1
- package/rules/hatch3r-i18n.md +2 -1
- package/rules/hatch3r-i18n.mdc +1 -0
- package/rules/hatch3r-learning-consult.md +11 -1
- package/rules/hatch3r-learning-consult.mdc +11 -1
- package/rules/hatch3r-migrations.md +2 -1
- package/rules/hatch3r-migrations.mdc +1 -1
- package/rules/hatch3r-observability-logging.md +34 -0
- package/rules/hatch3r-observability-logging.mdc +30 -0
- package/rules/hatch3r-observability-metrics.md +74 -0
- package/rules/hatch3r-observability-metrics.mdc +70 -0
- package/rules/hatch3r-observability-tracing-detail.md +160 -0
- package/rules/hatch3r-observability-tracing-detail.mdc +63 -0
- package/rules/hatch3r-observability-tracing.md +86 -0
- package/rules/hatch3r-observability-tracing.mdc +77 -0
- package/rules/hatch3r-observability.md +9 -448
- package/rules/hatch3r-observability.mdc +7 -448
- package/rules/hatch3r-performance-budgets.md +2 -0
- package/rules/hatch3r-performance-budgets.mdc +1 -0
- package/rules/hatch3r-secrets-management.md +2 -1
- package/rules/hatch3r-secrets-management.mdc +1 -1
- package/rules/hatch3r-security-patterns.md +3 -2
- package/rules/hatch3r-security-patterns.mdc +1 -1
- package/rules/hatch3r-testing.md +12 -2
- package/rules/hatch3r-testing.mdc +10 -1
- package/rules/hatch3r-theming.md +3 -2
- package/rules/hatch3r-theming.mdc +1 -0
- package/rules/hatch3r-tooling-hierarchy.md +3 -2
- package/rules/hatch3r-tooling-hierarchy.mdc +1 -1
- package/skills/hatch3r-a11y-audit/SKILL.md +11 -4
- package/skills/hatch3r-agent-customize/SKILL.md +1 -0
- package/skills/hatch3r-api-spec/SKILL.md +9 -2
- package/skills/hatch3r-architecture-review/SKILL.md +7 -0
- package/skills/hatch3r-bug-fix/SKILL.md +16 -7
- package/skills/hatch3r-ci-pipeline/SKILL.md +8 -1
- package/skills/hatch3r-command-customize/SKILL.md +1 -0
- package/skills/hatch3r-context-health/SKILL.md +23 -2
- package/skills/hatch3r-cost-tracking/SKILL.md +16 -6
- package/skills/hatch3r-customize/SKILL.md +8 -1
- package/skills/hatch3r-dep-audit/SKILL.md +9 -2
- package/skills/hatch3r-feature/SKILL.md +12 -4
- package/skills/hatch3r-gh-agentic-workflows/SKILL.md +7 -0
- package/skills/hatch3r-incident-response/SKILL.md +7 -0
- package/skills/hatch3r-issue-workflow/SKILL.md +8 -1
- package/skills/hatch3r-logical-refactor/SKILL.md +8 -1
- package/skills/hatch3r-migration/SKILL.md +7 -0
- package/skills/hatch3r-perf-audit/SKILL.md +9 -2
- package/skills/hatch3r-pr-creation/SKILL.md +8 -1
- package/skills/hatch3r-qa-validation/SKILL.md +8 -1
- package/skills/hatch3r-recipe/SKILL.md +8 -1
- package/skills/hatch3r-refactor/SKILL.md +10 -2
- package/skills/hatch3r-release/SKILL.md +8 -1
- package/skills/hatch3r-rule-customize/SKILL.md +1 -0
- package/skills/hatch3r-skill-customize/SKILL.md +1 -0
- package/skills/hatch3r-visual-refactor/SKILL.md +12 -5
package/package.json
CHANGED
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hatch3r",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.5.0",
|
|
4
4
|
"description": "Battle-tested agentic coding setup framework. One command to hatch your agent stack -- agents, skills, rules, commands, and MCP for every major AI coding tool.",
|
|
5
5
|
"type": "module",
|
|
6
|
+
"exports": {
|
|
7
|
+
".": {
|
|
8
|
+
"import": "./dist/cli/index.js",
|
|
9
|
+
"types": "./dist/cli/index.d.ts"
|
|
10
|
+
}
|
|
11
|
+
},
|
|
6
12
|
"bin": {
|
|
7
13
|
"hatch3r": "./dist/cli/index.js"
|
|
8
14
|
},
|
|
@@ -13,7 +19,8 @@
|
|
|
13
19
|
"typecheck": "tsc --noEmit",
|
|
14
20
|
"prepublishOnly": "npm run build",
|
|
15
21
|
"test": "vitest run",
|
|
16
|
-
"test:watch": "vitest"
|
|
22
|
+
"test:watch": "vitest",
|
|
23
|
+
"lockfile:check": "lockfile-lint --path package-lock.json --type npm --allowed-hosts npm --validate-https"
|
|
17
24
|
},
|
|
18
25
|
"keywords": [
|
|
19
26
|
"agents",
|
|
@@ -44,11 +51,15 @@
|
|
|
44
51
|
"bugs": {
|
|
45
52
|
"url": "https://github.com/hatch3r/hatch3r/issues"
|
|
46
53
|
},
|
|
54
|
+
"publishConfig": {
|
|
55
|
+
"access": "public",
|
|
56
|
+
"registry": "https://registry.npmjs.org"
|
|
57
|
+
},
|
|
47
58
|
"engines": {
|
|
48
59
|
"node": ">=22.0.0"
|
|
49
60
|
},
|
|
50
61
|
"files": [
|
|
51
|
-
"dist/",
|
|
62
|
+
"dist/cli/",
|
|
52
63
|
"agents/",
|
|
53
64
|
"checks/",
|
|
54
65
|
"commands/",
|
|
@@ -73,12 +84,16 @@
|
|
|
73
84
|
"@types/node": "^25.5.0",
|
|
74
85
|
"@vitest/coverage-v8": "^4.1.2",
|
|
75
86
|
"eslint": "^10.1.0",
|
|
87
|
+
"lockfile-lint": "^5.0.0",
|
|
76
88
|
"tsup": "^8.0.0",
|
|
77
|
-
"typescript": "^
|
|
89
|
+
"typescript": "^6.0.2",
|
|
78
90
|
"typescript-eslint": "^8.57.2",
|
|
79
91
|
"vitest": "^4.1.2"
|
|
80
92
|
},
|
|
81
93
|
"overrides": {
|
|
82
94
|
"flatted": "^3.4.2"
|
|
95
|
+
},
|
|
96
|
+
"comments": {
|
|
97
|
+
"overrides/flatted": "Pinned to >=3.4.2 to resolve security advisory in transitive eslint > flat-cache > flatted dependency"
|
|
83
98
|
}
|
|
84
99
|
}
|
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
id: hatch3r-accessibility-standards
|
|
3
3
|
type: rule
|
|
4
4
|
description: Accessibility standards covering WCAG 2.2 AA compliance, keyboard navigation, screen readers, and ARIA patterns
|
|
5
|
-
scope:
|
|
5
|
+
scope: "**/*.vue,**/*.jsx,**/*.tsx,**/*.svelte,**/components/**,**/*.html,**/*a11y*,**/*accessibility*"
|
|
6
6
|
tags: [a11y]
|
|
7
|
+
quality_charter: agents/shared/quality-charter.md
|
|
7
8
|
---
|
|
8
9
|
# Accessibility Standards
|
|
9
10
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Accessibility standards covering WCAG 2.2 AA compliance, keyboard navigation, screen readers, and ARIA patterns
|
|
3
|
-
|
|
3
|
+
globs: ["**/*.vue", "**/*.jsx", "**/*.tsx", "**/*.svelte", "**/components/**", "**/*.html", "**/*a11y*", "**/*accessibility*"]
|
|
4
4
|
---
|
|
5
5
|
# Accessibility Standards
|
|
6
6
|
|
|
@@ -3,7 +3,9 @@ id: hatch3r-agent-orchestration-detail
|
|
|
3
3
|
type: rule
|
|
4
4
|
description: Extended orchestration reference — PipelineContext schemas, resilience protocols, observability integration, and auto-mode guardrails
|
|
5
5
|
scope: conditional
|
|
6
|
+
globs: "**/.agents/**,**/pipeline/**,**/*orchestrat*,**/*agent*"
|
|
6
7
|
tags: [core]
|
|
8
|
+
quality_charter: agents/shared/quality-charter.md
|
|
7
9
|
---
|
|
8
10
|
# Agent Orchestration — Extended Reference
|
|
9
11
|
|
|
@@ -20,6 +22,14 @@ PipelineContext {
|
|
|
20
22
|
issueRef: string | null // Issue number or null for plain chat
|
|
21
23
|
deepContextTier: 1 | 2 | 3 // From hatch3r-deep-context scoring
|
|
22
24
|
|
|
25
|
+
// Detected project type for specialist selection (Finding #56)
|
|
26
|
+
projectType?: {
|
|
27
|
+
languages: string[] // From repo analysis (e.g., "typescript", "python", "go")
|
|
28
|
+
frameworks: string[] // Detected frameworks (e.g., "next", "express")
|
|
29
|
+
isMonorepo: boolean
|
|
30
|
+
packageManager: string // "npm" | "yarn" | "pnpm" | "bun" | "unknown"
|
|
31
|
+
}
|
|
32
|
+
|
|
23
33
|
// Phase 1 outputs (Research)
|
|
24
34
|
researchFindings: {
|
|
25
35
|
modes: string[] // Researcher modes used
|
|
@@ -31,11 +41,14 @@ PipelineContext {
|
|
|
31
41
|
resolvedRequirements: object | null // From requirements-elicitation
|
|
32
42
|
}
|
|
33
43
|
|
|
44
|
+
// Research gap flags from mid-implementation checkpoint (Finding #52)
|
|
45
|
+
researchGaps?: string[] // Gaps identified during Phase 2
|
|
46
|
+
|
|
34
47
|
// Phase 2 outputs (Implementation)
|
|
35
48
|
implementationResult: {
|
|
36
49
|
filesChanged: string[]
|
|
37
50
|
testsWritten: string[]
|
|
38
|
-
status: "SUCCESS" | "PARTIAL" | "FAILED"
|
|
51
|
+
status: "SUCCESS" | "PARTIAL" | "FAILED" | "SKIPPED" | "TIMEOUT"
|
|
39
52
|
reason: string | null
|
|
40
53
|
}
|
|
41
54
|
|
|
@@ -65,6 +78,8 @@ PipelineContext {
|
|
|
65
78
|
}
|
|
66
79
|
```
|
|
67
80
|
|
|
81
|
+
The TypeScript implementation of this schema with runtime validation is in `src/pipeline/pipelineContext.ts`. Use `validatePhaseTransition()` to verify context completeness before advancing between phases.
|
|
82
|
+
|
|
68
83
|
## Resilience and Failure Handling
|
|
69
84
|
|
|
70
85
|
### Phase Failure Protocols
|
|
@@ -76,6 +91,7 @@ PipelineContext {
|
|
|
76
91
|
| Phase 2 (Implementation) | Build/test failure | Attempt self-fix (max 2 iterations). Escalate to user if unresolved. |
|
|
77
92
|
| Phase 2 (Implementation) | Scope creep detected | Halt. Surface deviation to user. Resume only with approval. |
|
|
78
93
|
| Phase 3 (Review) | Max iterations (3) | Surface unresolved findings to user. Do not merge. |
|
|
94
|
+
| Phase 3 (Review) | DESIGN_OBJECTION verdict | Terminate review loop immediately. Surface the objection and alternative approaches to the user for an architectural decision. Do not spawn fixer. |
|
|
79
95
|
| Phase 3 (Review) | Fixer introduces regressions | Revert fixer changes. Surface original findings + regression to user. |
|
|
80
96
|
| Phase 4 (Quality) | Specialist timeout | Log timeout. Continue with available results. Flag in output. |
|
|
81
97
|
| Phase 4 (Quality) | Validation pass fails | Spawn fixer (max 2 attempts). Surface if unresolved. |
|
|
@@ -157,3 +173,35 @@ Auto-mode MUST halt and surface to user when:
|
|
|
157
173
|
|
|
158
174
|
- **Token budget:** If cumulative subagent token usage exceeds 80% of estimated budget, surface to user before spawning additional agents.
|
|
159
175
|
- **Time budget:** If pipeline duration exceeds 2x the estimated time (based on deep context tier), surface status and request continuation approval.
|
|
176
|
+
|
|
177
|
+
## Adaptive Pipeline Behavior
|
|
178
|
+
|
|
179
|
+
### Complexity-Driven Adaptation
|
|
180
|
+
|
|
181
|
+
The pipeline should adapt its behavior based on observed task complexity, not just the initial tier assignment:
|
|
182
|
+
|
|
183
|
+
| Signal During Execution | Adaptation |
|
|
184
|
+
|------------------------|------------|
|
|
185
|
+
| Phase 1 research finds >10 affected files (initial estimate was <5) | Upgrade tier to 3 if currently 2. Re-run `codebase-impact` at `deep` depth before Phase 2. |
|
|
186
|
+
| Phase 2 implementer reports >3 research gaps | Pause Phase 2. Run targeted researcher with gap-specific modes before continuing. |
|
|
187
|
+
| Phase 3 review loop reaches iteration 2 with increasing Critical count | Classify as complexity underestimate. Surface to user with recommendation to break the task into smaller sub-tasks. |
|
|
188
|
+
| Phase 4 validation pass fails on first attempt | Check whether failure is in test-writer's new tests (expected -- fix test) or in pre-existing tests (regression -- fix implementation). Route to appropriate fixer. |
|
|
189
|
+
|
|
190
|
+
### Post-Pipeline Learning
|
|
191
|
+
|
|
192
|
+
After pipeline completion, the orchestrator should capture lessons for future runs:
|
|
193
|
+
|
|
194
|
+
1. **Tier accuracy:** Was the initial tier correct? If the pipeline needed adaptation (above), log the mismatch for the learnings system.
|
|
195
|
+
2. **Phase duration ratios:** Record time spent per phase. Anomalous ratios (e.g., Phase 3 taking 5x Phase 2) indicate systemic issues worth investigating.
|
|
196
|
+
3. **Specialist value:** Record which Phase 4 specialists produced actionable findings vs. clean reports. Over time, this data informs smarter specialist dispatch.
|
|
197
|
+
|
|
198
|
+
## Context Token Optimization
|
|
199
|
+
|
|
200
|
+
When pipeline context exceeds 50% of the available context window, apply these compression strategies in order:
|
|
201
|
+
|
|
202
|
+
1. **Summarize Phase 1 output.** Replace full research findings with a structured summary: affected files (list), blast radius (count + top 3), key conventions (bullet points). Keep raw data only for the fields the current phase needs.
|
|
203
|
+
2. **Prune resolved findings.** After Phase 3 review loop, remove findings that were fixed and confirmed. Only carry forward unresolved findings.
|
|
204
|
+
3. **Collapse specialist results.** In the final output, summarize specialist results as a single status table rather than including full specialist reports. Full reports are available on request.
|
|
205
|
+
4. **Never truncate security findings.** Security auditor output is always included in full regardless of context pressure.
|
|
206
|
+
|
|
207
|
+
These strategies preserve decision-critical information while reducing token overhead for long pipelines.
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Extended orchestration reference — PipelineContext schemas, resilience protocols, observability integration, and auto-mode guardrails
|
|
3
|
+
globs: ["**/.agents/**", "**/pipeline/**", "**/*orchestrat*", "**/*agent*"]
|
|
3
4
|
alwaysApply: false
|
|
4
5
|
---
|
|
5
6
|
# Agent Orchestration — Extended Reference
|
|
@@ -17,6 +18,14 @@ PipelineContext {
|
|
|
17
18
|
issueRef: string | null // Issue number or null for plain chat
|
|
18
19
|
deepContextTier: 1 | 2 | 3 // From hatch3r-deep-context scoring
|
|
19
20
|
|
|
21
|
+
// Detected project type for specialist selection (Finding #56)
|
|
22
|
+
projectType?: {
|
|
23
|
+
languages: string[] // From repo analysis (e.g., "typescript", "python", "go")
|
|
24
|
+
frameworks: string[] // Detected frameworks (e.g., "next", "express")
|
|
25
|
+
isMonorepo: boolean
|
|
26
|
+
packageManager: string // "npm" | "yarn" | "pnpm" | "bun" | "unknown"
|
|
27
|
+
}
|
|
28
|
+
|
|
20
29
|
// Phase 1 outputs (Research)
|
|
21
30
|
researchFindings: {
|
|
22
31
|
modes: string[] // Researcher modes used
|
|
@@ -28,11 +37,14 @@ PipelineContext {
|
|
|
28
37
|
resolvedRequirements: object | null // From requirements-elicitation
|
|
29
38
|
}
|
|
30
39
|
|
|
40
|
+
// Research gap flags from mid-implementation checkpoint (Finding #52)
|
|
41
|
+
researchGaps?: string[] // Gaps identified during Phase 2
|
|
42
|
+
|
|
31
43
|
// Phase 2 outputs (Implementation)
|
|
32
44
|
implementationResult: {
|
|
33
45
|
filesChanged: string[]
|
|
34
46
|
testsWritten: string[]
|
|
35
|
-
status: "SUCCESS" | "PARTIAL" | "FAILED"
|
|
47
|
+
status: "SUCCESS" | "PARTIAL" | "FAILED" | "SKIPPED" | "TIMEOUT"
|
|
36
48
|
reason: string | null
|
|
37
49
|
}
|
|
38
50
|
|
|
@@ -62,6 +74,8 @@ PipelineContext {
|
|
|
62
74
|
}
|
|
63
75
|
```
|
|
64
76
|
|
|
77
|
+
The TypeScript implementation of this schema with runtime validation is in `src/pipeline/pipelineContext.ts`. Use `validatePhaseTransition()` to verify context completeness before advancing between phases.
|
|
78
|
+
|
|
65
79
|
## Resilience and Failure Handling
|
|
66
80
|
|
|
67
81
|
### Phase Failure Protocols
|
|
@@ -154,3 +168,35 @@ Auto-mode MUST halt and surface to user when:
|
|
|
154
168
|
|
|
155
169
|
- **Token budget:** If cumulative subagent token usage exceeds 80% of estimated budget, surface to user before spawning additional agents.
|
|
156
170
|
- **Time budget:** If pipeline duration exceeds 2x the estimated time (based on deep context tier), surface status and request continuation approval.
|
|
171
|
+
|
|
172
|
+
## Adaptive Pipeline Behavior
|
|
173
|
+
|
|
174
|
+
### Complexity-Driven Adaptation
|
|
175
|
+
|
|
176
|
+
The pipeline should adapt its behavior based on observed task complexity, not just the initial tier assignment:
|
|
177
|
+
|
|
178
|
+
| Signal During Execution | Adaptation |
|
|
179
|
+
|------------------------|------------|
|
|
180
|
+
| Phase 1 research finds >10 affected files (initial estimate was <5) | Upgrade tier to 3 if currently 2. Re-run `codebase-impact` at `deep` depth before Phase 2. |
|
|
181
|
+
| Phase 2 implementer reports >3 research gaps | Pause Phase 2. Run targeted researcher with gap-specific modes before continuing. |
|
|
182
|
+
| Phase 3 review loop reaches iteration 2 with increasing Critical count | Classify as complexity underestimate. Surface to user with recommendation to break the task into smaller sub-tasks. |
|
|
183
|
+
| Phase 4 validation pass fails on first attempt | Check whether failure is in test-writer's new tests (expected -- fix test) or in pre-existing tests (regression -- fix implementation). Route to appropriate fixer. |
|
|
184
|
+
|
|
185
|
+
### Post-Pipeline Learning
|
|
186
|
+
|
|
187
|
+
After pipeline completion, the orchestrator should capture lessons for future runs:
|
|
188
|
+
|
|
189
|
+
1. **Tier accuracy:** Was the initial tier correct? If the pipeline needed adaptation (above), log the mismatch for the learnings system.
|
|
190
|
+
2. **Phase duration ratios:** Record time spent per phase. Anomalous ratios (e.g., Phase 3 taking 5x Phase 2) indicate systemic issues worth investigating.
|
|
191
|
+
3. **Specialist value:** Record which Phase 4 specialists produced actionable findings vs. clean reports. Over time, this data informs smarter specialist dispatch.
|
|
192
|
+
|
|
193
|
+
## Context Token Optimization
|
|
194
|
+
|
|
195
|
+
When pipeline context exceeds 50% of the available context window, apply these compression strategies in order:
|
|
196
|
+
|
|
197
|
+
1. **Summarize Phase 1 output.** Replace full research findings with a structured summary: affected files (list), blast radius (count + top 3), key conventions (bullet points). Keep raw data only for the fields the current phase needs.
|
|
198
|
+
2. **Prune resolved findings.** After Phase 3 review loop, remove findings that were fixed and confirmed. Only carry forward unresolved findings.
|
|
199
|
+
3. **Collapse specialist results.** In the final output, summarize specialist results as a single status table rather than including full specialist reports. Full reports are available on request.
|
|
200
|
+
4. **Never truncate security findings.** Security auditor output is always included in full regardless of context pressure.
|
|
201
|
+
|
|
202
|
+
These strategies preserve decision-critical information while reducing token overhead for long pipelines.
|
|
@@ -4,6 +4,7 @@ type: rule
|
|
|
4
4
|
description: Mandatory agent delegation, skill loading, and subagent usage directives for ALL tasks in ALL contexts
|
|
5
5
|
scope: always
|
|
6
6
|
tags: [core]
|
|
7
|
+
quality_charter: agents/shared/quality-charter.md
|
|
7
8
|
---
|
|
8
9
|
# Agent Orchestration
|
|
9
10
|
|
|
@@ -82,6 +83,21 @@ Spawn `hatch3r-implementer` via Task tool for ALL code changes. Never implement
|
|
|
82
83
|
|
|
83
84
|
**Implementer prompt enrichment (Tier 2+):** Include `similar-implementation` findings as "Reference Conventions", resolved `requirements-elicitation` answers as "Resolved Requirements", and blast radius data (Tier 3 only).
|
|
84
85
|
|
|
86
|
+
### Mid-Implementation Research Gap Checkpoint
|
|
87
|
+
|
|
88
|
+
At the midpoint of Phase 2 (after initial files are modified but before completion), the implementer MUST evaluate whether research gaps exist. This prevents discovering missing context too late in the pipeline.
|
|
89
|
+
|
|
90
|
+
**Checkpoint triggers:**
|
|
91
|
+
1. Implementation requires modifying a file not listed in `researchFindings.affectedFiles`.
|
|
92
|
+
2. An undocumented dependency or integration point is discovered.
|
|
93
|
+
3. The implementer's confidence drops below "medium" for any sub-task.
|
|
94
|
+
4. A test file expected from research does not exist or covers different behavior.
|
|
95
|
+
|
|
96
|
+
**Actions when gaps are detected:**
|
|
97
|
+
- Log the gap in `PipelineContext.researchGaps`.
|
|
98
|
+
- If the gap is blocking (cannot proceed without the missing context): pause implementation, surface the gap to the orchestrator, and request a targeted re-run of `hatch3r-researcher` with the specific modes needed.
|
|
99
|
+
- If the gap is non-blocking (can proceed with assumptions): document the assumption, continue implementation, and flag for reviewer attention in Phase 3.
|
|
100
|
+
|
|
85
101
|
### Per-Task Mini-Review
|
|
86
102
|
|
|
87
103
|
For multi-sub-task implementations, the implementer performs a lightweight mini-review after each sub-task: verify correctness, check interface contracts, validate no regressions, gate progression. Mini-reviews are internal (no separate reviewer agent).
|
|
@@ -93,17 +109,47 @@ For multi-sub-task implementations, the implementer performs a lightweight mini-
|
|
|
93
109
|
1. Spawn `hatch3r-reviewer` with diff and acceptance criteria. Reviewer includes blast radius summary.
|
|
94
110
|
2. Critical/Warning findings: spawn `hatch3r-fixer` with full reviewer output.
|
|
95
111
|
3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max 3 iterations.
|
|
96
|
-
4. **Confirmation pass** after clean review: lightweight re-review for fix-driven regressions and acceptance criteria completeness.
|
|
97
|
-
5. Max iterations reached: surface to user
|
|
112
|
+
4. **Confirmation pass** after clean review: lightweight re-review for fix-driven regressions and acceptance criteria completeness. The confirmation pass checks only: (a) no new test failures compared to Phase 2 baseline, (b) no type errors introduced, (c) acceptance criteria from the issue are still met. It does not re-run the full review checklist.
|
|
113
|
+
5. Max iterations reached: surface to user with a structured summary: iteration count, remaining Critical findings (with file:line), remaining Warning findings, and a recommendation (fix manually vs. accept risk). Never present raw reviewer output without summarization.
|
|
114
|
+
6. **Review gate confidence signal:** When the review loop exits with a clean verdict, record the iteration count in `PipelineContext.reviewResult.iterations`. Clean-on-first-pass (iteration 1) signals higher confidence than clean-after-multiple-iterations (iteration 2-3). Phase 4 specialists and the orchestrator should factor this into their risk assessment.
|
|
98
115
|
|
|
99
116
|
**Phase 4 — Final Quality** (after review loop is clean):
|
|
100
117
|
|
|
101
|
-
Launch parallel subagents
|
|
118
|
+
Launch parallel subagents -- no artificial concurrency limit.
|
|
102
119
|
|
|
103
120
|
- **Always:** `hatch3r-test-writer`, `hatch3r-security-auditor`
|
|
104
121
|
- **Evaluate:** `hatch3r-docs-writer` (when APIs/architecture/UX affected)
|
|
105
122
|
- **Conditional:** `hatch3r-lint-fixer`, `hatch3r-a11y-auditor`, `hatch3r-perf-profiler`, `hatch3r-dependency-auditor`, `hatch3r-architect`, `hatch3r-devops`
|
|
106
123
|
|
|
124
|
+
**Specialist Prompt Enrichment:** When spawning Phase 4 specialists, include:
|
|
125
|
+
- The `filesChanged` list from Phase 2 so specialists focus on affected code.
|
|
126
|
+
- The review verdict summary from Phase 3 so specialists do not re-flag already-reviewed issues.
|
|
127
|
+
- The `researchFindings.blastRadius` so specialists can assess downstream impact of their changes.
|
|
128
|
+
|
|
129
|
+
**Phase 4 Specialist Trigger Table:**
|
|
130
|
+
|
|
131
|
+
| Specialist | Mode | Trigger Conditions |
|
|
132
|
+
|-----------|------|--------------------|
|
|
133
|
+
| `hatch3r-test-writer` | Always | Any code change |
|
|
134
|
+
| `hatch3r-security-auditor` | Always | Any code change |
|
|
135
|
+
| `hatch3r-docs-writer` | Evaluate | Public API, architecture, or UX changes |
|
|
136
|
+
| `hatch3r-lint-fixer` | Conditional | Lint/type errors present |
|
|
137
|
+
| `hatch3r-a11y-auditor` | Conditional | UI/accessibility changes |
|
|
138
|
+
| `hatch3r-perf-profiler` | Conditional | Performance-sensitive changes |
|
|
139
|
+
| `hatch3r-dependency-auditor` | Conditional | Dependency files modified (package.json, go.mod, Cargo.toml, requirements.txt, Gemfile, pom.xml, pubspec.yaml, mix.exs, composer.json, and their lockfiles) |
|
|
140
|
+
| `hatch3r-architect` | Conditional | Architectural decisions, new modules/services |
|
|
141
|
+
| `hatch3r-devops` | Conditional | CI/CD or infrastructure changes |
|
|
142
|
+
|
|
143
|
+
**Project-Type-Aware Specialist Selection:**
|
|
144
|
+
|
|
145
|
+
When `PipelineContext.projectType` is available (populated from repo analysis), use the detected languages and frameworks to enrich specialist prompts with language-specific hints. For example:
|
|
146
|
+
- **TypeScript/JavaScript:** Include strict mode checks for lint-fixer, framework-specific test patterns for test-writer.
|
|
147
|
+
- **Python:** Include ruff/mypy hints for lint-fixer, pytest patterns for test-writer, SSTI/SQLi checks for security-auditor.
|
|
148
|
+
- **Go:** Include golangci-lint for lint-fixer, govulncheck for security-auditor, table-driven test patterns for test-writer.
|
|
149
|
+
- **Rust:** Include clippy lints for lint-fixer, cargo-audit for security-auditor.
|
|
150
|
+
|
|
151
|
+
See `src/pipeline/pipelineContext.ts` for the full `LANGUAGE_SPECIALIST_CONFIGS` mapping.
|
|
152
|
+
|
|
107
153
|
### Phase 4 Validation Pass
|
|
108
154
|
|
|
109
155
|
After all Phase 4 specialists complete, run a validation pass to catch regressions:
|
|
@@ -112,6 +158,7 @@ After all Phase 4 specialists complete, run a validation pass to catch regressio
|
|
|
112
158
|
2. No new failures: proceed to completion.
|
|
113
159
|
3. New failures: identify causing specialist, spawn `hatch3r-fixer`, re-validate (max 2 iterations).
|
|
114
160
|
4. Persistent regressions: surface to user. Do not silently accept.
|
|
161
|
+
5. If any specialist produced code fixes (not just findings), spawn a lightweight `hatch3r-reviewer` re-review scoped to files modified by Phase 4 specialists. This prevents specialist fixes from bypassing the Phase 3 review gate. Max 1 re-review iteration; Critical findings trigger a single fixer pass.
|
|
115
162
|
|
|
116
163
|
### Specialist Success Criteria
|
|
117
164
|
|
|
@@ -149,6 +196,15 @@ Skill-referenced agent delegations are mandatory.
|
|
|
149
196
|
3. Launch independent subagents in parallel — maximum parallelism.
|
|
150
197
|
4. Await and review results. Surface BLOCKED or PARTIAL to user.
|
|
151
198
|
|
|
199
|
+
## Cross-Phase Error Propagation
|
|
200
|
+
|
|
201
|
+
When a phase produces a non-SUCCESS status, the orchestrator must propagate error context to downstream phases rather than silently dropping it:
|
|
202
|
+
|
|
203
|
+
1. **Phase 1 PARTIAL** (incomplete research): Include the `researchGaps` list in the implementer prompt so the implementer knows which areas lack verified context. Set implementer confidence expectations accordingly.
|
|
204
|
+
2. **Phase 2 PARTIAL** (incomplete implementation): Include the `reason` field and list of unimplemented acceptance criteria in the reviewer prompt. The reviewer must distinguish between "not done yet" and "done incorrectly."
|
|
205
|
+
3. **Phase 3 UNRESOLVED** (review loop exhausted): Include the unresolved findings list in the Phase 4 specialist prompts. Specialists must not introduce changes that conflict with known unresolved issues.
|
|
206
|
+
4. **Phase 4 specialist FAILED**: Include the failure reason when surfacing to the user. Never report "Phase 4 failed" without specifying which specialist failed and why.
|
|
207
|
+
|
|
152
208
|
## Correlation ID
|
|
153
209
|
|
|
154
210
|
Generate a UUID v4 per top-level task before Phase 1. Include in every subagent prompt as `correlation_id`. All subagents include it in logs, outputs, and status reports. Epic sub-issues get individual IDs; batch tasks share one ID with a sub-task index.
|
|
@@ -175,11 +231,37 @@ All subagents MUST map findings to this scale.
|
|
|
175
231
|
| **SKIPPED** | Intentionally not executed. |
|
|
176
232
|
| **TIMEOUT** | Time budget exceeded; forward partial output. |
|
|
177
233
|
|
|
234
|
+
## Phase Skip Criteria
|
|
235
|
+
|
|
236
|
+
Consistent criteria for when each pipeline phase can be safely skipped. All commands that use the pipeline MUST reference these criteria — do not invent command-specific skip rules.
|
|
237
|
+
|
|
238
|
+
| Phase | Can Skip When | Mandatory Minimum (even when skipped) |
|
|
239
|
+
|-------|--------------|--------------------------------------|
|
|
240
|
+
| **Phase 1 (Research)** | Trivial single-line edit (typo, comment, single-value config); Tier 1 single-file change with no cross-module impact; Research already cached in PipelineContext | Affected files identified (even via quick scan); existing tests noted |
|
|
241
|
+
| **Phase 2 (Implement)** | Never — implementation is always required for code changes | All changes via hatch3r-implementer (never inline except trivial items in quick-change) |
|
|
242
|
+
| **Phase 3 (Review)** | All items trivial (quick-change only); documentation-only change with no code | Quality checks (lint/typecheck/test) must pass; acceptance criteria verified |
|
|
243
|
+
| **Phase 4 (Quality)** | Review loop unresolved AND user chose manual resolution; documentation-only; all trivial + quality checks pass (quick-change only) | test-writer + security-auditor always required for code changes; quality checks must pass |
|
|
244
|
+
|
|
245
|
+
See `src/pipeline/pipelineContext.ts` for the programmatic `PHASE_SKIP_CRITERIA` constant.
|
|
246
|
+
|
|
247
|
+
## Root-Cause Depth Requirements
|
|
248
|
+
|
|
249
|
+
When a pipeline phase reports a failure or unexpected result, the orchestrator must perform root-cause classification before deciding the next action:
|
|
250
|
+
|
|
251
|
+
| Symptom | Shallow Fix (avoid) | Root-Cause Fix (required) |
|
|
252
|
+
|---------|---------------------|---------------------------|
|
|
253
|
+
| Test failure after Phase 2 | Disable or skip the failing test | Identify why the implementation breaks the test -- fix the code or update the test with justification |
|
|
254
|
+
| Lint errors after Phase 4 | Add `eslint-disable` comments | Fix the underlying code pattern that triggers the lint rule |
|
|
255
|
+
| Type errors after fixer changes | Cast with `as any` | Trace the type mismatch to its source and fix the type definition or usage |
|
|
256
|
+
| Review loop not converging | Surface to user after 3 iterations without analysis | Classify whether findings are oscillating (fixer A breaks what fixer B fixed) and surface the conflict pattern |
|
|
257
|
+
|
|
258
|
+
The orchestrator must reject superficial fixes from any subagent. If a fixer's output contains suppression patterns (disable comments, `any` casts, test skips without linked issues), classify as PARTIAL and re-run with an adjusted prompt that requests a root-cause fix.
|
|
259
|
+
|
|
178
260
|
## Task Context Protocols
|
|
179
261
|
|
|
180
262
|
**Single-task plain chat:** Classify task type, create synthetic issue context, run full pipeline. For issue references, fetch details via platform CLI.
|
|
181
263
|
|
|
182
|
-
**Multi-task plain chat:** Parse into discrete tasks, classify each, build dependency graph, parallelize researchers and implementers per dependency level, run review loop after all implementations, then Phase 4 specialists.
|
|
264
|
+
**Multi-task plain chat:** Parse into discrete tasks, classify each, build dependency graph, parallelize researchers and implementers per dependency level, run review loop after all implementations, then Phase 4 specialists. When parallel implementers modify the same file: accept disjoint region edits, merge overlapping regions using the larger-scope change as base, and halt on semantic conflicts (contradictory interface/contract changes) for user resolution.
|
|
183
265
|
|
|
184
266
|
**Auto-mode guardrails:** In unattended execution, verify scope containment, no unapproved destructive operations, and output schema compliance after each phase. Halt on violation. See `hatch3r-agent-orchestration-detail` for full guardrail specifications.
|
|
185
267
|
|
|
@@ -201,6 +283,6 @@ All `scope: always` rules apply to every task including subagent work. Include r
|
|
|
201
283
|
- `hatch3r-dependency-management` -- dependency-auditor
|
|
202
284
|
|
|
203
285
|
**Tier 3 -- On-demand:**
|
|
204
|
-
- `hatch3r-api-design`, `hatch3r-secrets-management`, `hatch3r-data-classification`, `hatch3r-performance-budgets`, `hatch3r-browser-verification`, `hatch3r-component-conventions`, `hatch3r-i18n`, `hatch3r-theming`, `hatch3r-migrations`, `hatch3r-feature-flags`, `hatch3r-observability`
|
|
286
|
+
- `hatch3r-api-design`, `hatch3r-secrets-management`, `hatch3r-data-classification`, `hatch3r-performance-budgets`, `hatch3r-browser-verification`, `hatch3r-component-conventions`, `hatch3r-i18n`, `hatch3r-theming`, `hatch3r-migrations`, `hatch3r-feature-flags`, `hatch3r-observability-logging`, `hatch3r-observability-metrics`, `hatch3r-observability-tracing`, `hatch3r-observability-tracing-detail`
|
|
205
287
|
|
|
206
288
|
For limited context windows, Tier 1 is mandatory. Tier 2/3 included selectively by agent role and task scope.
|
|
@@ -79,6 +79,21 @@ Spawn `hatch3r-implementer` via Task tool for ALL code changes. Never implement
|
|
|
79
79
|
|
|
80
80
|
**Implementer prompt enrichment (Tier 2+):** Include `similar-implementation` findings as "Reference Conventions", resolved `requirements-elicitation` answers as "Resolved Requirements", and blast radius data (Tier 3 only).
|
|
81
81
|
|
|
82
|
+
### Mid-Implementation Research Gap Checkpoint
|
|
83
|
+
|
|
84
|
+
At the midpoint of Phase 2 (after initial files are modified but before completion), the implementer MUST evaluate whether research gaps exist. This prevents discovering missing context too late in the pipeline.
|
|
85
|
+
|
|
86
|
+
**Checkpoint triggers:**
|
|
87
|
+
1. Implementation requires modifying a file not listed in `researchFindings.affectedFiles`.
|
|
88
|
+
2. An undocumented dependency or integration point is discovered.
|
|
89
|
+
3. The implementer's confidence drops below "medium" for any sub-task.
|
|
90
|
+
4. A test file expected from research does not exist or covers different behavior.
|
|
91
|
+
|
|
92
|
+
**Actions when gaps are detected:**
|
|
93
|
+
- Log the gap in `PipelineContext.researchGaps`.
|
|
94
|
+
- If the gap is blocking (cannot proceed without the missing context): pause implementation, surface the gap to the orchestrator, and request a targeted re-run of `hatch3r-researcher` with the specific modes needed.
|
|
95
|
+
- If the gap is non-blocking (can proceed with assumptions): document the assumption, continue implementation, and flag for reviewer attention in Phase 3.
|
|
96
|
+
|
|
82
97
|
### Per-Task Mini-Review
|
|
83
98
|
|
|
84
99
|
For multi-sub-task implementations, the implementer performs a lightweight mini-review after each sub-task: verify correctness, check interface contracts, validate no regressions, gate progression. Mini-reviews are internal (no separate reviewer agent).
|
|
@@ -90,17 +105,46 @@ For multi-sub-task implementations, the implementer performs a lightweight mini-
|
|
|
90
105
|
1. Spawn `hatch3r-reviewer` with diff and acceptance criteria. Reviewer includes blast radius summary.
|
|
91
106
|
2. Critical/Warning findings: spawn `hatch3r-fixer` with full reviewer output.
|
|
92
107
|
3. Re-review after fixes. Repeat until 0 Critical + 0 Warning, or max 3 iterations.
|
|
93
|
-
4. **Confirmation pass** after clean review: lightweight re-review for fix-driven regressions and acceptance criteria completeness.
|
|
94
|
-
5. Max iterations reached: surface to user
|
|
108
|
+
4. **Confirmation pass** after clean review: lightweight re-review for fix-driven regressions and acceptance criteria completeness. The confirmation pass checks only: (a) no new test failures compared to Phase 2 baseline, (b) no type errors introduced, (c) acceptance criteria from the issue are still met. It does not re-run the full review checklist.
|
|
109
|
+
5. Max iterations reached: surface to user with a structured summary: iteration count, remaining Critical findings (with file:line), remaining Warning findings, and a recommendation (fix manually vs. accept risk). Never present raw reviewer output without summarization.
|
|
95
110
|
|
|
96
111
|
**Phase 4 — Final Quality** (after review loop is clean):
|
|
97
112
|
|
|
98
|
-
Launch parallel subagents
|
|
113
|
+
Launch parallel subagents -- no artificial concurrency limit.
|
|
99
114
|
|
|
100
115
|
- **Always:** `hatch3r-test-writer`, `hatch3r-security-auditor`
|
|
101
116
|
- **Evaluate:** `hatch3r-docs-writer` (when APIs/architecture/UX affected)
|
|
102
117
|
- **Conditional:** `hatch3r-lint-fixer`, `hatch3r-a11y-auditor`, `hatch3r-perf-profiler`, `hatch3r-dependency-auditor`, `hatch3r-architect`, `hatch3r-devops`
|
|
103
118
|
|
|
119
|
+
**Specialist Prompt Enrichment:** When spawning Phase 4 specialists, include:
|
|
120
|
+
- The `filesChanged` list from Phase 2 so specialists focus on affected code.
|
|
121
|
+
- The review verdict summary from Phase 3 so specialists do not re-flag already-reviewed issues.
|
|
122
|
+
- The `researchFindings.blastRadius` so specialists can assess downstream impact of their changes.
|
|
123
|
+
|
|
124
|
+
**Phase 4 Specialist Trigger Table:**
|
|
125
|
+
|
|
126
|
+
| Specialist | Mode | Trigger Conditions |
|
|
127
|
+
|-----------|------|--------------------|
|
|
128
|
+
| `hatch3r-test-writer` | Always | Any code change |
|
|
129
|
+
| `hatch3r-security-auditor` | Always | Any code change |
|
|
130
|
+
| `hatch3r-docs-writer` | Evaluate | Public API, architecture, or UX changes |
|
|
131
|
+
| `hatch3r-lint-fixer` | Conditional | Lint/type errors present |
|
|
132
|
+
| `hatch3r-a11y-auditor` | Conditional | UI/accessibility changes |
|
|
133
|
+
| `hatch3r-perf-profiler` | Conditional | Performance-sensitive changes |
|
|
134
|
+
| `hatch3r-dependency-auditor` | Conditional | Dependency files modified (package.json, go.mod, Cargo.toml, requirements.txt, Gemfile, pom.xml, pubspec.yaml, mix.exs, composer.json, and their lockfiles) |
|
|
135
|
+
| `hatch3r-architect` | Conditional | Architectural decisions, new modules/services |
|
|
136
|
+
| `hatch3r-devops` | Conditional | CI/CD or infrastructure changes |
|
|
137
|
+
|
|
138
|
+
**Project-Type-Aware Specialist Selection:**
|
|
139
|
+
|
|
140
|
+
When `PipelineContext.projectType` is available (populated from repo analysis), use the detected languages and frameworks to enrich specialist prompts with language-specific hints. For example:
|
|
141
|
+
- **TypeScript/JavaScript:** Include strict mode checks for lint-fixer, framework-specific test patterns for test-writer.
|
|
142
|
+
- **Python:** Include ruff/mypy hints for lint-fixer, pytest patterns for test-writer, SSTI/SQLi checks for security-auditor.
|
|
143
|
+
- **Go:** Include golangci-lint for lint-fixer, govulncheck for security-auditor, table-driven test patterns for test-writer.
|
|
144
|
+
- **Rust:** Include clippy lints for lint-fixer, cargo-audit for security-auditor.
|
|
145
|
+
|
|
146
|
+
See `src/pipeline/pipelineContext.ts` for the full `LANGUAGE_SPECIALIST_CONFIGS` mapping.
|
|
147
|
+
|
|
104
148
|
### Phase 4 Validation Pass
|
|
105
149
|
|
|
106
150
|
After all Phase 4 specialists complete, run a validation pass to catch regressions:
|
|
@@ -109,6 +153,7 @@ After all Phase 4 specialists complete, run a validation pass to catch regressio
|
|
|
109
153
|
2. No new failures: proceed to completion.
|
|
110
154
|
3. New failures: identify causing specialist, spawn `hatch3r-fixer`, re-validate (max 2 iterations).
|
|
111
155
|
4. Persistent regressions: surface to user. Do not silently accept.
|
|
156
|
+
5. If any specialist produced code fixes (not just findings), spawn a lightweight `hatch3r-reviewer` re-review scoped to files modified by Phase 4 specialists. This prevents specialist fixes from bypassing the Phase 3 review gate. Max 1 re-review iteration; Critical findings trigger a single fixer pass.
|
|
112
157
|
|
|
113
158
|
### Specialist Success Criteria
|
|
114
159
|
|
|
@@ -146,6 +191,15 @@ Skill-referenced agent delegations are mandatory.
|
|
|
146
191
|
3. Launch independent subagents in parallel — maximum parallelism.
|
|
147
192
|
4. Await and review results. Surface BLOCKED or PARTIAL to user.
|
|
148
193
|
|
|
194
|
+
## Cross-Phase Error Propagation
|
|
195
|
+
|
|
196
|
+
When a phase produces a non-SUCCESS status, the orchestrator must propagate error context to downstream phases rather than silently dropping it:
|
|
197
|
+
|
|
198
|
+
1. **Phase 1 PARTIAL** (incomplete research): Include the `researchGaps` list in the implementer prompt so the implementer knows which areas lack verified context. Set implementer confidence expectations accordingly.
|
|
199
|
+
2. **Phase 2 PARTIAL** (incomplete implementation): Include the `reason` field and list of unimplemented acceptance criteria in the reviewer prompt. The reviewer must distinguish between "not done yet" and "done incorrectly."
|
|
200
|
+
3. **Phase 3 UNRESOLVED** (review loop exhausted): Include the unresolved findings list in the Phase 4 specialist prompts. Specialists must not introduce changes that conflict with known unresolved issues.
|
|
201
|
+
4. **Phase 4 specialist FAILED**: Include the failure reason when surfacing to the user. Never report "Phase 4 failed" without specifying which specialist failed and why.
|
|
202
|
+
|
|
149
203
|
## Correlation ID
|
|
150
204
|
|
|
151
205
|
Generate a UUID v4 per top-level task before Phase 1. Include in every subagent prompt as `correlation_id`. All subagents include it in logs, outputs, and status reports. Epic sub-issues get individual IDs; batch tasks share one ID with a sub-task index.
|
|
@@ -172,11 +226,37 @@ All subagents MUST map findings to this scale.
|
|
|
172
226
|
| **SKIPPED** | Intentionally not executed. |
|
|
173
227
|
| **TIMEOUT** | Time budget exceeded; forward partial output. |
|
|
174
228
|
|
|
229
|
+
## Phase Skip Criteria
|
|
230
|
+
|
|
231
|
+
Consistent criteria for when each pipeline phase can be safely skipped. All commands that use the pipeline MUST reference these criteria — do not invent command-specific skip rules.
|
|
232
|
+
|
|
233
|
+
| Phase | Can Skip When | Mandatory Minimum (even when skipped) |
|
|
234
|
+
|-------|--------------|--------------------------------------|
|
|
235
|
+
| **Phase 1 (Research)** | Trivial single-line edit (typo, comment, single-value config); Tier 1 single-file change with no cross-module impact; Research already cached in PipelineContext | Affected files identified (even via quick scan); existing tests noted |
|
|
236
|
+
| **Phase 2 (Implement)** | Never — implementation is always required for code changes | All changes via hatch3r-implementer (never inline except trivial items in quick-change) |
|
|
237
|
+
| **Phase 3 (Review)** | All items trivial (quick-change only); documentation-only change with no code | Quality checks (lint/typecheck/test) must pass; acceptance criteria verified |
|
|
238
|
+
| **Phase 4 (Quality)** | Review loop unresolved AND user chose manual resolution; documentation-only; all trivial + quality checks pass (quick-change only) | test-writer + security-auditor always required for code changes; quality checks must pass |
|
|
239
|
+
|
|
240
|
+
See `src/pipeline/pipelineContext.ts` for the programmatic `PHASE_SKIP_CRITERIA` constant.
|
|
241
|
+
|
|
242
|
+
## Root-Cause Depth Requirements
|
|
243
|
+
|
|
244
|
+
When a pipeline phase reports a failure or unexpected result, the orchestrator must perform root-cause classification before deciding the next action:
|
|
245
|
+
|
|
246
|
+
| Symptom | Shallow Fix (avoid) | Root-Cause Fix (required) |
|
|
247
|
+
|---------|---------------------|---------------------------|
|
|
248
|
+
| Test failure after Phase 2 | Disable or skip the failing test | Identify why the implementation breaks the test -- fix the code or update the test with justification |
|
|
249
|
+
| Lint errors after Phase 4 | Add `eslint-disable` comments | Fix the underlying code pattern that triggers the lint rule |
|
|
250
|
+
| Type errors after fixer changes | Cast with `as any` | Trace the type mismatch to its source and fix the type definition or usage |
|
|
251
|
+
| Review loop not converging | Surface to user after 3 iterations without analysis | Classify whether findings are oscillating (fixer A breaks what fixer B fixed) and surface the conflict pattern |
|
|
252
|
+
|
|
253
|
+
The orchestrator must reject superficial fixes from any subagent. If a fixer's output contains suppression patterns (disable comments, `any` casts, test skips without linked issues), classify as PARTIAL and re-run with an adjusted prompt that requests a root-cause fix.
|
|
254
|
+
|
|
175
255
|
## Task Context Protocols
|
|
176
256
|
|
|
177
257
|
**Single-task plain chat:** Classify task type, create synthetic issue context, run full pipeline. For issue references, fetch details via platform CLI.
|
|
178
258
|
|
|
179
|
-
**Multi-task plain chat:** Parse into discrete tasks, classify each, build dependency graph, parallelize researchers and implementers per dependency level, run review loop after all implementations, then Phase 4 specialists.
|
|
259
|
+
**Multi-task plain chat:** Parse into discrete tasks, classify each, build dependency graph, parallelize researchers and implementers per dependency level, run review loop after all implementations, then Phase 4 specialists. When parallel implementers modify the same file: accept disjoint region edits, merge overlapping regions using the larger-scope change as base, and halt on semantic conflicts (contradictory interface/contract changes) for user resolution.
|
|
180
260
|
|
|
181
261
|
**Auto-mode guardrails:** In unattended execution, verify scope containment, no unapproved destructive operations, and output schema compliance after each phase. Halt on violation. See `hatch3r-agent-orchestration-detail` for full guardrail specifications.
|
|
182
262
|
|
|
@@ -198,6 +278,6 @@ All `scope: always` rules apply to every task including subagent work. Include r
|
|
|
198
278
|
- `hatch3r-dependency-management` -- dependency-auditor
|
|
199
279
|
|
|
200
280
|
**Tier 3 -- On-demand:**
|
|
201
|
-
- `hatch3r-api-design`, `hatch3r-secrets-management`, `hatch3r-data-classification`, `hatch3r-performance-budgets`, `hatch3r-browser-verification`, `hatch3r-component-conventions`, `hatch3r-i18n`, `hatch3r-theming`, `hatch3r-migrations`, `hatch3r-feature-flags`, `hatch3r-observability`
|
|
281
|
+
- `hatch3r-api-design`, `hatch3r-secrets-management`, `hatch3r-data-classification`, `hatch3r-performance-budgets`, `hatch3r-browser-verification`, `hatch3r-component-conventions`, `hatch3r-i18n`, `hatch3r-theming`, `hatch3r-migrations`, `hatch3r-feature-flags`, `hatch3r-observability-logging`, `hatch3r-observability-metrics`, `hatch3r-observability-tracing`, `hatch3r-observability-tracing-detail`
|
|
202
282
|
|
|
203
283
|
For limited context windows, Tier 1 is mandatory. Tier 2/3 included selectively by agent role and task scope.
|
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
id: hatch3r-api-design
|
|
3
3
|
type: rule
|
|
4
4
|
description: API endpoint and contract design patterns for the project
|
|
5
|
-
scope:
|
|
5
|
+
scope: "**/api/**,**/routes/**,**/controllers/**,**/endpoints/**,**/*route*,**/*controller*,**/*endpoint*,**/*handler*,**/graphql/**,**/trpc/**"
|
|
6
6
|
tags: [planning]
|
|
7
|
+
quality_charter: agents/shared/quality-charter.md
|
|
7
8
|
---
|
|
8
9
|
# API Design
|
|
9
10
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: API endpoint and contract design patterns for the project
|
|
3
|
-
|
|
3
|
+
globs: ["**/api/**", "**/routes/**", "**/controllers/**", "**/endpoints/**", "**/*route*", "**/*controller*", "**/*endpoint*", "**/*handler*", "**/graphql/**", "**/trpc/**"]
|
|
4
4
|
---
|
|
5
5
|
# API Design
|
|
6
6
|
|