mustard-claude 3.1.28 → 3.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/templates/commands/mustard/feature/SKILL.md +1 -1
- package/templates/commands/mustard/resume/SKILL.md +45 -9
- package/templates/commands/mustard/templates/agent-prompt/SKILL.md +59 -0
- package/templates/hooks/__tests__/hooks.test.js +13 -16
- package/templates/hooks/_lib/knowledge-extract.js +6 -4
- package/templates/scripts/metrics-collect.js +7 -6
package/package.json
CHANGED
|
@@ -266,7 +266,7 @@ After each agent returns, check the return value for an escalation status before
|
|
|
266
266
|
|
|
267
267
|
If two or more agents in the same wave return `CONCERN`, surface all concerns together before starting the next wave. See `.claude/pipeline-config.md` Escalation Statuses and Diagnostic Failure Routing for the full status table.
|
|
268
268
|
|
|
269
|
-
9. **REVIEW** — dispatch review agent for each affected subproject (reads guards + relevant skills, runs 7-category checklist: SOLID, Design System, Patterns, i18n, Integration, Build, Elegance). REJECTED →
|
|
269
|
+
9. **REVIEW** — dispatch review agent for each affected subproject (reads guards + relevant skills, runs 7-category checklist: SOLID, Design System, Patterns, i18n, Integration, Build, Elegance). REJECTED → see `resume/SKILL.md § Fix Loop Dispatch Protocol` (max 2 loops).
|
|
270
270
|
|
|
271
271
|
Re-reviews always dispatch with `model: "sonnet"` (see `review/SKILL.md § Model Selection`).
|
|
272
272
|
10. All passed + APPROVED → CLOSE flow inline (sync registry, move spec, cleanup state)
|
|
@@ -110,7 +110,7 @@ Run `node .claude/scripts/diff-context.js --subproject {subproject_path}` per su
|
|
|
110
110
|
- `{entity_info}` → `_patterns` type, refs, subs from registry
|
|
111
111
|
- `{role}`, `{boundary}`, `{return_sections}` → from Role Rules table in config
|
|
112
112
|
- `{validate_command}`, `{build_command}` → from Agents table in config
|
|
113
|
-
- `{retry_context}` → empty on first dispatch
|
|
113
|
+
- `{retry_context}` → empty on first dispatch. On retry, fill per `agent-prompt/SKILL.md § Retry Modes`. Granular retries use Step 4 § Granular Retry Protocol. Fix-loops (after REJECTED review) use Step 19b § Fix Loop Dispatch Protocol.
|
|
114
114
|
- `{task_steps}` → checkboxed steps from spec
|
|
115
115
|
- `{recommended_skills}` → from Skill Recommendations in `.claude/pipeline-config.md`:
|
|
116
116
|
1. Glob `{subproject}/.claude/skills/` for generated pattern skills
|
|
@@ -152,8 +152,36 @@ If two or more agents in the same wave return `CONCERN`, surface all concerns to
|
|
|
152
152
|
- Checklist categories: **SOLID, Design System, Patterns, i18n, Integration, Build, Elegance**
|
|
153
153
|
- Each issue classified: CRITICAL (blocks), WARNING (recommended), NOTE (suggestion)
|
|
154
154
|
- APPROVED (zero CRITICAL) → CLOSE
|
|
155
|
-
- REJECTED (any CRITICAL) →
|
|
155
|
+
- REJECTED (any CRITICAL) → see Step 19b § Fix Loop Dispatch Protocol (max 2 loops)
|
|
156
156
|
- **NEVER skip review** — not even for Light scope. Light scope gets same checklist, just fewer files to review
|
|
157
|
+
|
|
158
|
+
### Step 19b: Fix Loop Dispatch Protocol
|
|
159
|
+
|
|
160
|
+
When REVIEW returns REJECTED (any CRITICAL):
|
|
161
|
+
|
|
162
|
+
1. Read `.claude/.agent-memory/_index.json`, find last entry where `agent_type == {review_target_agent_type}` and `pipeline == {spec-name}`. If absent (shouldn't happen but be defensive): fall back to first-dispatch template.
|
|
163
|
+
2. Extract:
|
|
164
|
+
- `prior_summary` ← `entry.summary`
|
|
165
|
+
- `files_modified` ← `entry.details.files_modified` (list)
|
|
166
|
+
3. Extract review findings VERBATIM:
|
|
167
|
+
- All CRITICAL findings (required)
|
|
168
|
+
- All WARNING findings (optional — include if fix is cheap)
|
|
169
|
+
- Copy the exact text returned by the review agent; do NOT paraphrase
|
|
170
|
+
4. Compose `{retry_context}` using Mode=fix-loop format (see `agent-prompt/SKILL.md § Retry Modes`). Set K = current loop number (1 or 2; max 2 fix-loops):
|
|
171
|
+
```
|
|
172
|
+
## RETRY CONTEXT
|
|
173
|
+
**Mode:** fix-loop ({K}/2)
|
|
174
|
+
**Prior dispatch:** {prior_summary}
|
|
175
|
+
**Files modified previously:**
|
|
176
|
+
{files_modified}
|
|
177
|
+
**Review findings (verbatim):**
|
|
178
|
+
{findings_verbatim}
|
|
179
|
+
```
|
|
180
|
+
5. Render the **Minimal Retry Template** from `agent-prompt/SKILL.md § Retry Modes` (skips CONTEXT/REFERENCE/ENTITY/SKILLS/WEB VALIDATION/ROLE/RECIPE).
|
|
181
|
+
6. Dispatch the same `subagent_type` + `model` as the original impl agent (do NOT change the role or model).
|
|
182
|
+
7. On return, re-dispatch REVIEW agent (normal dispatch, not retry — review is read-only).
|
|
183
|
+
8. If review still REJECTED after 2 fix-loops: STOP + report exhausted retries.
|
|
184
|
+
|
|
157
185
|
20. **CLOSE:**
|
|
158
186
|
- `node .claude/scripts/sync-registry.js`
|
|
159
187
|
- Spec: `Status: completed`, `Phase: CLOSE`, all `[ ]` → `[x]`
|
|
@@ -170,13 +198,21 @@ When an agent fails:
|
|
|
170
198
|
- Build error → retry from build step (don't redo edits)
|
|
171
199
|
- Edit error → retry from that edit step
|
|
172
200
|
- Unknown → retry all remaining unchecked steps
|
|
173
|
-
3. **Re-dispatch with retry context** — fill `{retry_context}`
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
201
|
+
3. **Re-dispatch with retry context** — fill `{retry_context}` using Mode=granular format:
|
|
202
|
+
- Read `.claude/.agent-memory/_index.json`, find last entry where `agent_type == {failed_agent_type}` and `pipeline == {spec-name}`
|
|
203
|
+
- Extract `entry.summary` → `prior_summary`; `entry.details.files_modified` → `files_modified` (list)
|
|
204
|
+
- Fill:
|
|
205
|
+
```
|
|
206
|
+
## RETRY CONTEXT
|
|
207
|
+
**Mode:** granular
|
|
208
|
+
**Prior dispatch:** {prior_summary}
|
|
209
|
+
**Files modified previously:**
|
|
210
|
+
{files_modified}
|
|
211
|
+
**Previous error:** {error_message}
|
|
212
|
+
**Resume from step:** {N+1}
|
|
213
|
+
```
|
|
214
|
+
- Set `{task_steps}` to only the remaining steps ({N+1} onwards)
|
|
215
|
+
- Use the **Minimal Retry Template** from `agent-prompt/SKILL.md § Retry Modes` (skips CONTEXT/REFERENCE/ENTITY/SKILLS/WEB VALIDATION/ROLE/RECIPE blocks)
|
|
180
216
|
4. **Spec checkboxes:** steps 1-{N} already `[x]`, remaining continue `[ ]`
|
|
181
217
|
5. **Max 2 retries per agent** — exhausted → STOP + report
|
|
182
218
|
|
|
@@ -12,6 +12,8 @@ Single unified template for all dispatches:
|
|
|
12
12
|
|
|
13
13
|
## Dispatch Template
|
|
14
14
|
|
|
15
|
+
> **First-dispatch only.** When `{retry_context}` is non-empty (granular or fix-loop retry), use the **Minimal Retry Template** from `§ Retry Modes` instead — omit CONTEXT, REFERENCE, ENTITY, SKILLS, WEB VALIDATION, ROLE, and RECIPE blocks.
|
|
16
|
+
|
|
15
17
|
```
|
|
16
18
|
## CONTEXT
|
|
17
19
|
1. Read `{subproject}/CLAUDE.md` — guards, stack, paths
|
|
@@ -53,6 +55,63 @@ Guards carregados via CLAUDE.md acima — respeite sem exceção.
|
|
|
53
55
|
|
|
54
56
|
---
|
|
55
57
|
|
|
58
|
+
## Retry Modes
|
|
59
|
+
|
|
60
|
+
`{retry_context}` has 3 states:
|
|
61
|
+
|
|
62
|
+
| Mode | When | `{retry_context}` content |
|
|
63
|
+
|------|------|---------------------------|
|
|
64
|
+
| `empty` | First dispatch | Empty string — full Dispatch Template above is used |
|
|
65
|
+
| `granular` | A step failed (PARTIAL escalation) | Enriched retry header (see below) |
|
|
66
|
+
| `fix-loop` | Review returned REJECTED | Enriched retry header with verbatim findings (see below) |
|
|
67
|
+
|
|
68
|
+
`prior_summary` and `files_modified` come from the latest `.agent-memory/_index.json` entry matching `{agent_type, pipeline}`.
|
|
69
|
+
|
|
70
|
+
### `granular` format
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
## RETRY CONTEXT
|
|
74
|
+
**Mode:** granular
|
|
75
|
+
**Prior dispatch:** {prior_summary}
|
|
76
|
+
**Files modified previously:**
|
|
77
|
+
{files_modified}
|
|
78
|
+
**Previous error:** {error_message}
|
|
79
|
+
**Resume from step:** {N+1}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### `fix-loop` format
|
|
83
|
+
|
|
84
|
+
```
|
|
85
|
+
## RETRY CONTEXT
|
|
86
|
+
**Mode:** fix-loop ({K}/2)
|
|
87
|
+
**Prior dispatch:** {prior_summary}
|
|
88
|
+
**Files modified previously:**
|
|
89
|
+
{files_modified}
|
|
90
|
+
**Review findings (verbatim):**
|
|
91
|
+
{findings_verbatim}
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Minimal Retry Template
|
|
95
|
+
|
|
96
|
+
When `{retry_context}` is non-empty, the orchestrator renders this template instead of the full Dispatch Template. Omits CONTEXT/REFERENCE/ENTITY/SKILLS/WEB VALIDATION/ROLE/RECIPE — prior context is still cached; DON'T re-Read CLAUDE.md/guards/registry unless a modified file changed on disk since last dispatch.
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
{retry_context}
|
|
100
|
+
|
|
101
|
+
## EFFICIENCY
|
|
102
|
+
- Absolute paths, no cd
|
|
103
|
+
- Read each file once (prior context cached — skip CLAUDE.md/guards/registry re-reads unless file changed on disk)
|
|
104
|
+
- Max 3 build attempts, then STOP + report
|
|
105
|
+
- Return cap: follow pipeline-config.md Max Return limits. Focus on: files changed + non-obvious decisions + blockers only.
|
|
106
|
+
|
|
107
|
+
## TASK
|
|
108
|
+
{task_steps}
|
|
109
|
+
|
|
110
|
+
Guards carregados via CLAUDE.md acima — respeite sem exceção.
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
56
115
|
## Skill-Based Context Loading
|
|
57
116
|
|
|
58
117
|
Skills provide progressive disclosure — agents load only what they need:
|
|
@@ -1814,21 +1814,19 @@ describe("knowledge-extract prescriptions", () => {
|
|
|
1814
1814
|
}];
|
|
1815
1815
|
|
|
1816
1816
|
const patterns = extractPatternsFromStates(states);
|
|
1817
|
-
// retries > 2 triggers the high-retry entry
|
|
1818
|
-
const retryEntry = patterns.find(p => p.name === "high-retry-login-feature");
|
|
1819
|
-
assert.ok(retryEntry, "Expected high-retry entry");
|
|
1817
|
+
// retries > 2 triggers the high-hook-retry entry
|
|
1818
|
+
const retryEntry = patterns.find(p => p.name === "high-hook-retry-login-feature");
|
|
1819
|
+
assert.ok(retryEntry, "Expected high-hook-retry entry");
|
|
1820
1820
|
assert.ok(retryEntry.prescription, "Expected prescription field");
|
|
1821
1821
|
assert.ok(
|
|
1822
1822
|
/delegate investigation via Task\(general-purpose\)/.test(retryEntry.prescription),
|
|
1823
1823
|
"Prescription should instruct delegation via Task(general-purpose)"
|
|
1824
1824
|
);
|
|
1825
1825
|
assert.ok(retryEntry.tags.includes("prescriptive"), "Tags should include 'prescriptive'");
|
|
1826
|
-
|
|
1827
|
-
assert.ok(retryEntry.tags.includes("retry"));
|
|
1826
|
+
assert.ok(retryEntry.tags.includes("hook-retry"));
|
|
1828
1827
|
assert.ok(retryEntry.tags.includes("pipeline"));
|
|
1829
1828
|
assert.ok(retryEntry.tags.includes("lesson"));
|
|
1830
|
-
|
|
1831
|
-
assert.ok(retryEntry.description.includes("4 retries"));
|
|
1829
|
+
assert.ok(retryEntry.description.includes("4 hook-level retries"));
|
|
1832
1830
|
});
|
|
1833
1831
|
|
|
1834
1832
|
it("should emit fragmentation prescription when apiCalls > 50 AND retries > 3", () => {
|
|
@@ -1843,7 +1841,7 @@ describe("knowledge-extract prescriptions", () => {
|
|
|
1843
1841
|
}];
|
|
1844
1842
|
|
|
1845
1843
|
const patterns = extractPatternsFromStates(states);
|
|
1846
|
-
// apiCalls > 50 triggers heavy-pipeline; retries > 2 also triggers high-retry.
|
|
1844
|
+
// apiCalls > 50 triggers heavy-pipeline; retries > 2 also triggers high-hook-retry.
|
|
1847
1845
|
const heavyEntry = patterns.find(p => p.name === "heavy-pipeline-big-refactor");
|
|
1848
1846
|
assert.ok(heavyEntry, "Expected heavy-pipeline entry");
|
|
1849
1847
|
assert.ok(heavyEntry.prescription, "Expected prescription field");
|
|
@@ -1858,7 +1856,7 @@ describe("knowledge-extract prescriptions", () => {
|
|
|
1858
1856
|
});
|
|
1859
1857
|
|
|
1860
1858
|
it("should emit reactive-iteration prescription when Edit > 15 and Write < 3", () => {
|
|
1861
|
-
// Edit=20 > 15, Write=1 < 3, retries=3 to trigger the high-retry entry
|
|
1859
|
+
// Edit=20 > 15, Write=1 < 3, retries=3 to trigger the high-hook-retry entry
|
|
1862
1860
|
// (needs retries > 2 OR apiCalls > 50 to produce any entry at all).
|
|
1863
1861
|
// Pick retries=3 and small Bash/Agent to avoid L0-violation heuristic dominance
|
|
1864
1862
|
// but note: the heuristic checks order — L0 fires first if bash+edit>3*agent AND retries>2.
|
|
@@ -1873,8 +1871,8 @@ describe("knowledge-extract prescriptions", () => {
|
|
|
1873
1871
|
}];
|
|
1874
1872
|
|
|
1875
1873
|
const patterns = extractPatternsFromStates(states);
|
|
1876
|
-
const retryEntry = patterns.find(p => p.name === "high-retry-tweak-hell");
|
|
1877
|
-
assert.ok(retryEntry, "Expected high-retry entry");
|
|
1874
|
+
const retryEntry = patterns.find(p => p.name === "high-hook-retry-tweak-hell");
|
|
1875
|
+
assert.ok(retryEntry, "Expected high-hook-retry entry");
|
|
1878
1876
|
assert.ok(retryEntry.prescription, "Expected prescription field");
|
|
1879
1877
|
assert.ok(
|
|
1880
1878
|
/investigate with Read\+Grep BEFORE editing/.test(retryEntry.prescription),
|
|
@@ -1884,7 +1882,7 @@ describe("knowledge-extract prescriptions", () => {
|
|
|
1884
1882
|
});
|
|
1885
1883
|
|
|
1886
1884
|
it("should NOT add prescription or prescriptive tag when no heuristic matches", () => {
|
|
1887
|
-
// retries=3 to trigger high-retry entry, but balanced tools so none of the
|
|
1885
|
+
// retries=3 to trigger high-hook-retry entry, but balanced tools so none of the
|
|
1888
1886
|
// heuristics fire (edit<=15, apiCalls<=50, bash+edit not >3*agent).
|
|
1889
1887
|
const states = [{
|
|
1890
1888
|
specName: "mild-case",
|
|
@@ -1896,13 +1894,12 @@ describe("knowledge-extract prescriptions", () => {
|
|
|
1896
1894
|
}];
|
|
1897
1895
|
|
|
1898
1896
|
const patterns = extractPatternsFromStates(states);
|
|
1899
|
-
const retryEntry = patterns.find(p => p.name === "high-retry-mild-case");
|
|
1900
|
-
assert.ok(retryEntry, "Expected high-retry entry");
|
|
1897
|
+
const retryEntry = patterns.find(p => p.name === "high-hook-retry-mild-case");
|
|
1898
|
+
assert.ok(retryEntry, "Expected high-hook-retry entry");
|
|
1901
1899
|
assert.equal(retryEntry.prescription, undefined, "No prescription when no heuristic matches");
|
|
1902
1900
|
assert.ok(!retryEntry.tags.includes("prescriptive"),
|
|
1903
1901
|
"'prescriptive' tag must NOT be added when no prescription");
|
|
1904
|
-
|
|
1905
|
-
assert.ok(retryEntry.tags.includes("retry"));
|
|
1902
|
+
assert.ok(retryEntry.tags.includes("hook-retry"));
|
|
1906
1903
|
assert.ok(retryEntry.description);
|
|
1907
1904
|
assert.equal(retryEntry.source, "session-knowledge");
|
|
1908
1905
|
});
|
|
@@ -82,15 +82,17 @@ function extractPatternsFromStates(stateObjects) {
|
|
|
82
82
|
var label = state.specName || state._file || 'unknown';
|
|
83
83
|
var prescription = derivePrescription(metrics);
|
|
84
84
|
|
|
85
|
-
// High retry count → lesson
|
|
85
|
+
// High hook-retry count → lesson. Counts hook/sandbox events, not agent
|
|
86
|
+
// redispatches — a clean Pass@1 pipeline can still accumulate dozens.
|
|
86
87
|
if (metrics.retries && metrics.retries > 2) {
|
|
87
88
|
var retryEntry = {
|
|
88
89
|
type: 'convention',
|
|
89
|
-
name: 'high-retry-' + label,
|
|
90
|
-
description: 'Pipeline
|
|
90
|
+
name: 'high-hook-retry-' + label,
|
|
91
|
+
description: 'Pipeline triggered ' + metrics.retries + ' hook-level retries ' +
|
|
92
|
+
'(sandbox/stash-pop/re-prompts — not agent redispatches). Tool breakdown: ' +
|
|
91
93
|
JSON.stringify(metrics.toolBreakdown || {}),
|
|
92
94
|
source: 'session-knowledge',
|
|
93
|
-
tags: ['retry', 'pipeline', 'lesson'],
|
|
95
|
+
tags: ['hook-retry', 'pipeline', 'lesson'],
|
|
94
96
|
};
|
|
95
97
|
if (prescription) {
|
|
96
98
|
retryEntry.prescription = prescription;
|
|
@@ -33,7 +33,7 @@ function main() {
|
|
|
33
33
|
const statesDir = path.join(claudeDir, '.pipeline-states');
|
|
34
34
|
const activeSpecDir = path.join(claudeDir, 'spec', 'active');
|
|
35
35
|
if (fs.existsSync(statesDir)) {
|
|
36
|
-
const files = fs.readdirSync(statesDir).filter(f => f.endsWith('.json'));
|
|
36
|
+
const files = fs.readdirSync(statesDir).filter(f => f.endsWith('.json') && !f.endsWith('.metrics.json'));
|
|
37
37
|
const activeBuckets = [];
|
|
38
38
|
const orphanedBuckets = [];
|
|
39
39
|
for (const f of files) {
|
|
@@ -49,7 +49,7 @@ function main() {
|
|
|
49
49
|
lines.push(`## ${isOrphaned ? 'Orphaned' : 'Active'}: ${name}`);
|
|
50
50
|
lines.push(`- Duration: ${duration}`);
|
|
51
51
|
lines.push(`- API calls: ${m.apiCalls || 0}`);
|
|
52
|
-
lines.push(`-
|
|
52
|
+
lines.push(`- Hook retries: ${m.retries || 0}`);
|
|
53
53
|
if (m.toolBreakdown && Object.keys(m.toolBreakdown).length > 0) {
|
|
54
54
|
lines.push('- Tool breakdown:');
|
|
55
55
|
for (const [tool, count] of Object.entries(m.toolBreakdown).sort((a, b) => b[1] - a[1])) {
|
|
@@ -102,7 +102,7 @@ function main() {
|
|
|
102
102
|
parts.push(`### ${name}`);
|
|
103
103
|
parts.push(`- Duration: ${duration}`);
|
|
104
104
|
parts.push(`- API calls: ${m.apiCalls || 0}`);
|
|
105
|
-
parts.push(`-
|
|
105
|
+
parts.push(`- Hook retries: ${m.retries || 0}`);
|
|
106
106
|
if (m.rtkSavings) {
|
|
107
107
|
parts.push(`- RTK savings: ${m.rtkSavings.pct}% (${Math.round((m.rtkSavings.saved || 0) / 1000)}k tokens)`);
|
|
108
108
|
}
|
|
@@ -119,7 +119,7 @@ function main() {
|
|
|
119
119
|
parts.push('## Averages (last ' + count + ' pipelines)');
|
|
120
120
|
parts.push(`- Avg duration: ${formatMs(Math.round(totalDurationMs / count))}`);
|
|
121
121
|
parts.push(`- Avg API calls: ${Math.round(totalCalls / count)}`);
|
|
122
|
-
parts.push(`- Avg retries: ${Math.round(totalRetries / count)}`);
|
|
122
|
+
parts.push(`- Avg hook retries: ${Math.round(totalRetries / count)}`);
|
|
123
123
|
parts.push('');
|
|
124
124
|
}
|
|
125
125
|
|
|
@@ -184,8 +184,9 @@ function main() {
|
|
|
184
184
|
var pass1Pct = Math.round((pass1Count / totalPipelines) * 100);
|
|
185
185
|
var avgRetries = (totalRetrySum / totalPipelines).toFixed(1);
|
|
186
186
|
parts.push('## Pass@1 Metrics');
|
|
187
|
-
parts.push('- Pass@1: ' + pass1Pct + '% (' + pass1Count + '/' + totalPipelines + ' completed
|
|
188
|
-
parts.push('- Avg retries per pipeline: ' + avgRetries);
|
|
187
|
+
parts.push('- Pass@1 (hook-level): ' + pass1Pct + '% (' + pass1Count + '/' + totalPipelines + ' completed with zero hook retries)');
|
|
188
|
+
parts.push('- Avg hook retries per pipeline: ' + avgRetries);
|
|
189
|
+
parts.push('- Note: counts hook/sandbox events, not agent redispatches. True agent-level Pass@1 not yet tracked.');
|
|
189
190
|
parts.push('');
|
|
190
191
|
}
|
|
191
192
|
}
|