@haoyiyin/workflow 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -8
- package/src/agents/contracts.ts +559 -0
- package/src/agents/dispatcher-enhanced.ts +350 -0
- package/src/agents/dispatcher.ts +680 -0
- package/src/agents/index.ts +48 -0
- package/src/agents/resilience.ts +255 -0
- package/src/agents/token-budget.ts +83 -0
- package/src/agents/types.ts +73 -0
- package/src/guard/main-agent.ts +245 -0
- package/src/hooks/builtin/index.ts +8 -0
- package/src/hooks/builtin/on-error.ts +23 -0
- package/src/hooks/builtin/post-execute.ts +40 -0
- package/src/hooks/builtin/post-plan.ts +23 -0
- package/src/hooks/builtin/pre-execute.ts +30 -0
- package/src/hooks/builtin/pre-plan.ts +26 -0
- package/src/hooks/index.ts +7 -0
- package/src/hooks/loader.ts +98 -0
- package/src/hooks/manager.ts +99 -0
- package/src/hooks/types-enhanced.ts +38 -0
- package/src/hooks/types.ts +35 -0
- package/src/index.ts +127 -0
- package/src/persistence/index.ts +17 -0
- package/src/persistence/plan-md.ts +141 -0
- package/src/persistence/state-md.ts +167 -0
- package/src/persistence/types.ts +89 -0
- package/src/router/classifier.ts +610 -0
- package/src/router/guard.ts +483 -0
- package/src/router/index.ts +22 -0
- package/src/router/router.ts +108 -0
- package/src/router/types.ts +127 -0
- package/src/skills/agents-md/SKILL.md +45 -0
- package/src/skills/agents-md/index.ts +33 -0
- package/src/skills/execute-plan/SKILL.md +60 -0
- package/src/skills/execute-plan/index.ts +970 -0
- package/src/skills/index.ts +13 -0
- package/src/skills/quick-task/SKILL.md +54 -0
- package/src/skills/quick-task/index.ts +346 -0
- package/src/skills/registry.ts +59 -0
- package/src/skills/review-diff/SKILL.md +53 -0
- package/src/skills/review-diff/index.ts +394 -0
- package/src/skills/skill.ts +59 -0
- package/src/skills/systematic-debugging/SKILL.md +56 -0
- package/src/skills/systematic-debugging/index.ts +404 -0
- package/src/skills/tdd/SKILL.md +52 -0
- package/src/skills/tdd/index.ts +409 -0
- package/src/skills/to-plan/SKILL.md +56 -0
- package/src/skills/to-plan/index-enhanced.ts +551 -0
- package/src/skills/to-plan/index.ts +586 -0
- package/src/skills/types.ts +47 -0
- package/src/state/cleanup.ts +118 -0
- package/src/state/index.ts +8 -0
- package/src/state/manager.ts +96 -0
- package/src/state/persistence.ts +77 -0
- package/src/state/types.ts +30 -0
- package/src/state/validator.ts +78 -0
- package/src/types.ts +102 -0
- package/src/utils/compress.ts +347 -0
- package/src/utils/git.ts +82 -0
- package/src/utils/index.ts +6 -0
- package/src/utils/logger.ts +23 -0
- package/src/utils/paths.ts +55 -0
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Systematic Debugging Skill - Find root cause before fixing
|
|
3
|
+
*
|
|
4
|
+
* Use case: Bug investigation, test failure, unexpected behavior
|
|
5
|
+
* Pattern: Reproduce → Gather Evidence → Diagnose (hypothesize + test) →
|
|
6
|
+
* Apply Fix → Verify
|
|
7
|
+
*
|
|
8
|
+
* CRITICAL RULE: No fix before root cause is confirmed.
|
|
9
|
+
* Spawns debugger subagent(s) for each phase.
|
|
10
|
+
* Prevents the "fix-first-ask-questions-later" anti-pattern.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { z } from 'zod'
|
|
14
|
+
import { Skill } from '../skill.js'
|
|
15
|
+
import type { SkillContext } from '../types.js'
|
|
16
|
+
import { createDispatcher } from '../../agents/dispatcher.js'
|
|
17
|
+
import { researcherContract } from '../../agents/contracts.js'
|
|
18
|
+
import { createMainAgentGuard } from '../../guard/main-agent.js'
|
|
19
|
+
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Schemas
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
const DebugInputSchema = z.object({
|
|
25
|
+
symptom: z.string().min(1, 'Symptom description is required'),
|
|
26
|
+
reproduceCommand: z.string().optional(),
|
|
27
|
+
logs: z.string().optional(),
|
|
28
|
+
recentChanges: z.string().optional(),
|
|
29
|
+
model: z.string().optional(),
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
const DebugOutputSchema = z.object({
|
|
33
|
+
rootCause: z.string(),
|
|
34
|
+
confidence: z.enum(['high', 'medium', 'low']),
|
|
35
|
+
location: z.object({
|
|
36
|
+
file: z.string().optional(),
|
|
37
|
+
line: z.number().optional(),
|
|
38
|
+
function: z.string().optional(),
|
|
39
|
+
}),
|
|
40
|
+
fixApplied: z.boolean(),
|
|
41
|
+
fixDescription: z.string().optional(),
|
|
42
|
+
verification: z.string(),
|
|
43
|
+
risks: z.array(z.string()),
|
|
44
|
+
tokensUsed: z.number(),
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
type DebugInput = z.infer<typeof DebugInputSchema>
|
|
48
|
+
type DebugOutput = z.infer<typeof DebugOutputSchema>
|
|
49
|
+
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Prompt builders (pure functions)
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
function buildReproducePrompt(
|
|
55
|
+
symptom: string,
|
|
56
|
+
reproduceCommand?: string,
|
|
57
|
+
logs?: string,
|
|
58
|
+
): string {
|
|
59
|
+
const sections: string[] = [
|
|
60
|
+
`## Reproduce the Issue`,
|
|
61
|
+
'',
|
|
62
|
+
`**Symptom**: ${symptom}`,
|
|
63
|
+
]
|
|
64
|
+
if (reproduceCommand) {
|
|
65
|
+
sections.push('', `**Command**: \`${reproduceCommand}\``)
|
|
66
|
+
}
|
|
67
|
+
if (logs) {
|
|
68
|
+
sections.push('', '## Logs', '', logs.slice(0, 2000))
|
|
69
|
+
}
|
|
70
|
+
sections.push(
|
|
71
|
+
'',
|
|
72
|
+
'Confirm reproduction. Report exact steps, observed vs expected.',
|
|
73
|
+
)
|
|
74
|
+
return sections.join('\n')
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function buildEvidencePrompt(
|
|
78
|
+
symptom: string,
|
|
79
|
+
reproOutput: string,
|
|
80
|
+
recentChanges?: string,
|
|
81
|
+
): string {
|
|
82
|
+
return [
|
|
83
|
+
`## Gather Evidence`,
|
|
84
|
+
'',
|
|
85
|
+
`**Symptom**: ${symptom}`,
|
|
86
|
+
'',
|
|
87
|
+
'## Reproduction',
|
|
88
|
+
reproOutput.slice(0, 2000),
|
|
89
|
+
'',
|
|
90
|
+
recentChanges
|
|
91
|
+
? `## Recent Changes\n${recentChanges}`
|
|
92
|
+
: '## Recent Changes\nCheck git log for related commits.',
|
|
93
|
+
'',
|
|
94
|
+
'Identify suspect files, key functions, failure points.',
|
|
95
|
+
'Trace execution path. Look for error handling gaps, state issues, race conditions.',
|
|
96
|
+
].join('\n')
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function buildDiagnosePrompt(
|
|
100
|
+
symptom: string,
|
|
101
|
+
evidenceOutput: string,
|
|
102
|
+
knownIssues?: string,
|
|
103
|
+
): string {
|
|
104
|
+
return [
|
|
105
|
+
`## Diagnose Root Cause`,
|
|
106
|
+
'',
|
|
107
|
+
`**Symptom**: ${symptom}`,
|
|
108
|
+
'',
|
|
109
|
+
'## Evidence',
|
|
110
|
+
evidenceOutput.slice(0, 3000),
|
|
111
|
+
...(knownIssues ? ['', '## Known Issues (from research)', knownIssues.slice(0, 1000)] : []),
|
|
112
|
+
'',
|
|
113
|
+
'## Process',
|
|
114
|
+
'1. Form 2-3 hypotheses, rank by likelihood',
|
|
115
|
+
'2. Test top hypothesis with targeted checks',
|
|
116
|
+
'3. Eliminate incorrect hypotheses',
|
|
117
|
+
'4. Confirm root cause with evidence',
|
|
118
|
+
'',
|
|
119
|
+
'## CRITICAL: Diagnosis Only — NO Fix Yet',
|
|
120
|
+
'Do NOT write any code changes in this phase.',
|
|
121
|
+
'You are confirming the root cause, not fixing it.',
|
|
122
|
+
'',
|
|
123
|
+
'## Output Format',
|
|
124
|
+
'',
|
|
125
|
+
'```',
|
|
126
|
+
'Root Cause: <description>',
|
|
127
|
+
'Confidence: high|medium|low',
|
|
128
|
+
'File: <path>',
|
|
129
|
+
'Line: <number>',
|
|
130
|
+
'Function: <name>',
|
|
131
|
+
'Evidence: <proof>',
|
|
132
|
+
'Eliminated: <hypotheses ruled out>',
|
|
133
|
+
'Fix: <proposed fix description>',
|
|
134
|
+
'```',
|
|
135
|
+
].join('\n')
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function buildApplyFixPrompt(diagnosis: {
|
|
139
|
+
rootCause: string
|
|
140
|
+
confidence: string
|
|
141
|
+
file?: string
|
|
142
|
+
line?: number
|
|
143
|
+
}): string {
|
|
144
|
+
return [
|
|
145
|
+
`## Apply Fix`,
|
|
146
|
+
'',
|
|
147
|
+
`**Root Cause**: ${diagnosis.rootCause}`,
|
|
148
|
+
`**Confidence**: ${diagnosis.confidence}`,
|
|
149
|
+
diagnosis.file ? `**File**: ${diagnosis.file}` : '',
|
|
150
|
+
diagnosis.line ? `**Line**: ${diagnosis.line}` : '',
|
|
151
|
+
'',
|
|
152
|
+
'Apply ONE minimal fix targeting the confirmed root cause.',
|
|
153
|
+
'Run reproduction test to confirm fix works.',
|
|
154
|
+
'Do NOT refactor unrelated code.',
|
|
155
|
+
diagnosis.confidence === 'low'
|
|
156
|
+
? 'Low confidence: prefer a safer, more conservative fix.'
|
|
157
|
+
: '',
|
|
158
|
+
]
|
|
159
|
+
.filter(Boolean)
|
|
160
|
+
.join('\n')
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function buildVerifyPrompt(
|
|
164
|
+
symptom: string,
|
|
165
|
+
fixOutput: string,
|
|
166
|
+
): string {
|
|
167
|
+
return [
|
|
168
|
+
`## Verify Fix`,
|
|
169
|
+
'',
|
|
170
|
+
`**Original**: ${symptom}`,
|
|
171
|
+
'',
|
|
172
|
+
'## Fix Applied',
|
|
173
|
+
fixOutput.slice(0, 2000),
|
|
174
|
+
'',
|
|
175
|
+
'Re-run reproduction steps. Run full test suite.',
|
|
176
|
+
'Return PASS or FAIL with details.',
|
|
177
|
+
].join('\n')
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// ---------------------------------------------------------------------------
|
|
181
|
+
// Parsing helpers (pure functions)
|
|
182
|
+
// ---------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
interface ParsedDiagnosis {
|
|
185
|
+
rootCause: string
|
|
186
|
+
confidence: 'high' | 'medium' | 'low'
|
|
187
|
+
file?: string
|
|
188
|
+
line?: number
|
|
189
|
+
func?: string
|
|
190
|
+
fixDescription?: string
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function parseDiagnosis(output: string): ParsedDiagnosis {
|
|
194
|
+
const rootCauseMatch = output.match(/root cause:?\s*(.+)/i)
|
|
195
|
+
const confidenceMatch = output.match(/confidence:?\s*(high|medium|low)/i)
|
|
196
|
+
const fileMatch = output.match(/file:?\s*(\S+\.\w+)/i)
|
|
197
|
+
const lineMatch = output.match(/line:?\s*(\d+)/i)
|
|
198
|
+
const funcMatch = output.match(/function:?\s*(\S+)/i)
|
|
199
|
+
const fixMatch = output.match(/fix:?\s*(.+)/i)
|
|
200
|
+
|
|
201
|
+
let confidence: 'high' | 'medium' | 'low' = 'medium'
|
|
202
|
+
if (confidenceMatch) {
|
|
203
|
+
const c = confidenceMatch[1].toLowerCase()
|
|
204
|
+
if (c === 'high' || c === 'medium' || c === 'low') confidence = c
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
rootCause: rootCauseMatch?.[1]?.trim() ?? output.slice(0, 200),
|
|
209
|
+
confidence,
|
|
210
|
+
file: fileMatch?.[1]?.trim(),
|
|
211
|
+
line: lineMatch ? parseInt(lineMatch[1], 10) : undefined,
|
|
212
|
+
func: funcMatch?.[1]?.trim(),
|
|
213
|
+
fixDescription: fixMatch?.[1]?.trim(),
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
// Skill class
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
export class SystematicDebuggingSkill extends Skill<DebugInput, DebugOutput> {
|
|
222
|
+
constructor() {
|
|
223
|
+
super({
|
|
224
|
+
name: 'systematic-debugging',
|
|
225
|
+
description:
|
|
226
|
+
'Find root cause before fixing. Spawns debugger subagent for systematic diagnosis.',
|
|
227
|
+
requires: ['tdd'],
|
|
228
|
+
inputSchema: DebugInputSchema as z.ZodType<DebugInput>,
|
|
229
|
+
outputSchema: DebugOutputSchema,
|
|
230
|
+
})
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
async execute(
|
|
234
|
+
input: DebugInput,
|
|
235
|
+
context: SkillContext,
|
|
236
|
+
): Promise<DebugOutput> {
|
|
237
|
+
const { config, logger } = context
|
|
238
|
+
const dispatcher = createDispatcher(logger)
|
|
239
|
+
const guard = createMainAgentGuard({}, logger)
|
|
240
|
+
let totalTokens = 0
|
|
241
|
+
|
|
242
|
+
guard.activateEmbargo()
|
|
243
|
+
|
|
244
|
+
try {
|
|
245
|
+
// Step 1: Reproduce
|
|
246
|
+
logger.info(`[debug] Reproducing: ${input.symptom.slice(0, 80)}`)
|
|
247
|
+
const reproResult = await dispatcher.dispatch(
|
|
248
|
+
{ role: 'debugger', model: input.model || config.defaultModel },
|
|
249
|
+
{
|
|
250
|
+
permissions: {
|
|
251
|
+
readFiles: true,
|
|
252
|
+
searchCode: false,
|
|
253
|
+
runCommands: true,
|
|
254
|
+
writeFiles: false,
|
|
255
|
+
gitOperations: false,
|
|
256
|
+
},
|
|
257
|
+
prompt: buildReproducePrompt(
|
|
258
|
+
input.symptom,
|
|
259
|
+
input.reproduceCommand,
|
|
260
|
+
input.logs,
|
|
261
|
+
),
|
|
262
|
+
owns: [],
|
|
263
|
+
reads: [],
|
|
264
|
+
},
|
|
265
|
+
)
|
|
266
|
+
totalTokens += reproResult.tokensUsed
|
|
267
|
+
|
|
268
|
+
// Step 2: Gather evidence (read-only)
|
|
269
|
+
logger.info('[debug] Gathering evidence')
|
|
270
|
+
const evidenceResult = await dispatcher.dispatch(
|
|
271
|
+
{ role: 'explorer', model: input.model || config.defaultModel },
|
|
272
|
+
{
|
|
273
|
+
permissions: {
|
|
274
|
+
readFiles: true,
|
|
275
|
+
searchCode: true,
|
|
276
|
+
runCommands: false,
|
|
277
|
+
writeFiles: false,
|
|
278
|
+
gitOperations: false,
|
|
279
|
+
},
|
|
280
|
+
prompt: buildEvidencePrompt(
|
|
281
|
+
input.symptom,
|
|
282
|
+
reproResult.output,
|
|
283
|
+
input.recentChanges,
|
|
284
|
+
),
|
|
285
|
+
owns: [],
|
|
286
|
+
reads: [],
|
|
287
|
+
},
|
|
288
|
+
)
|
|
289
|
+
totalTokens += evidenceResult.tokensUsed
|
|
290
|
+
|
|
291
|
+
// Step 2b: Research known issues (optional)
|
|
292
|
+
let knownIssuesContext = ''
|
|
293
|
+
const shouldResearch = /npm|yarn|package|version|error|fail|crash|exception/i.test(input.symptom)
|
|
294
|
+
if (shouldResearch) {
|
|
295
|
+
logger.info('[debug] Step 2b: Researching known issues...')
|
|
296
|
+
const researchResult = await dispatcher.dispatch(
|
|
297
|
+
{ role: 'researcher', model: input.model || config.defaultModel },
|
|
298
|
+
researcherContract({
|
|
299
|
+
topic: input.symptom,
|
|
300
|
+
scope: 'technical',
|
|
301
|
+
questions: [
|
|
302
|
+
'Are there known issues or bug reports?',
|
|
303
|
+
'Any recent breaking changes or deprecations?',
|
|
304
|
+
'What are common solutions or workarounds?'
|
|
305
|
+
],
|
|
306
|
+
timeRange: '1y'
|
|
307
|
+
})
|
|
308
|
+
)
|
|
309
|
+
totalTokens += researchResult.tokensUsed
|
|
310
|
+
knownIssuesContext = researchResult.output.slice(0, 1200)
|
|
311
|
+
logger.info(`[debug] Research complete: ${knownIssuesContext ? 'found known issues' : 'no relevant results'}`)
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Step 3: Diagnose (NO FIX YET)
|
|
315
|
+
logger.info('[debug] Diagnosing root cause (no fix yet)')
|
|
316
|
+
const diagnoseResult = await dispatcher.dispatch(
|
|
317
|
+
{ role: 'debugger', model: input.model || config.defaultModel },
|
|
318
|
+
{
|
|
319
|
+
permissions: {
|
|
320
|
+
readFiles: true,
|
|
321
|
+
searchCode: false,
|
|
322
|
+
runCommands: true,
|
|
323
|
+
writeFiles: false,
|
|
324
|
+
gitOperations: false,
|
|
325
|
+
},
|
|
326
|
+
prompt: buildDiagnosePrompt(
|
|
327
|
+
input.symptom,
|
|
328
|
+
evidenceResult.output,
|
|
329
|
+
knownIssuesContext,
|
|
330
|
+
),
|
|
331
|
+
owns: [],
|
|
332
|
+
reads: [],
|
|
333
|
+
},
|
|
334
|
+
)
|
|
335
|
+
totalTokens += diagnoseResult.tokensUsed
|
|
336
|
+
const diagnosis = parseDiagnosis(diagnoseResult.output)
|
|
337
|
+
|
|
338
|
+
// Step 4: Apply fix (only after root cause confirmed)
|
|
339
|
+
logger.info(
|
|
340
|
+
`[debug] Applying fix (confidence: ${diagnosis.confidence})`,
|
|
341
|
+
)
|
|
342
|
+
const fixResult = await dispatcher.dispatch(
|
|
343
|
+
{
|
|
344
|
+
role: 'debugger',
|
|
345
|
+
model: input.model || config.defaultModel,
|
|
346
|
+
isolation: 'worktree',
|
|
347
|
+
},
|
|
348
|
+
{
|
|
349
|
+
permissions: {
|
|
350
|
+
readFiles: true,
|
|
351
|
+
searchCode: false,
|
|
352
|
+
runCommands: true,
|
|
353
|
+
writeFiles: true,
|
|
354
|
+
gitOperations: false,
|
|
355
|
+
},
|
|
356
|
+
prompt: buildApplyFixPrompt(diagnosis),
|
|
357
|
+
owns: diagnosis.file ? [diagnosis.file] : [],
|
|
358
|
+
reads: [],
|
|
359
|
+
},
|
|
360
|
+
)
|
|
361
|
+
totalTokens += fixResult.tokensUsed
|
|
362
|
+
|
|
363
|
+
// Step 5: Verify
|
|
364
|
+
logger.info('[debug] Verifying fix')
|
|
365
|
+
const verifyResult = await dispatcher.dispatch(
|
|
366
|
+
{ role: 'verifier', model: input.model || config.defaultModel },
|
|
367
|
+
{
|
|
368
|
+
permissions: {
|
|
369
|
+
readFiles: false,
|
|
370
|
+
searchCode: false,
|
|
371
|
+
runCommands: true,
|
|
372
|
+
writeFiles: false,
|
|
373
|
+
gitOperations: false,
|
|
374
|
+
},
|
|
375
|
+
prompt: buildVerifyPrompt(input.symptom, fixResult.output),
|
|
376
|
+
owns: [],
|
|
377
|
+
reads: [],
|
|
378
|
+
},
|
|
379
|
+
)
|
|
380
|
+
totalTokens += verifyResult.tokensUsed
|
|
381
|
+
|
|
382
|
+
return {
|
|
383
|
+
rootCause: diagnosis.rootCause,
|
|
384
|
+
confidence: diagnosis.confidence,
|
|
385
|
+
location: {
|
|
386
|
+
file: diagnosis.file,
|
|
387
|
+
line: diagnosis.line,
|
|
388
|
+
function: diagnosis.func,
|
|
389
|
+
},
|
|
390
|
+
fixApplied: fixResult.status === 'success',
|
|
391
|
+
fixDescription:
|
|
392
|
+
diagnosis.fixDescription ?? fixResult.output.slice(0, 500),
|
|
393
|
+
verification: verifyResult.output.slice(0, 500),
|
|
394
|
+
risks: [],
|
|
395
|
+
tokensUsed: totalTokens,
|
|
396
|
+
}
|
|
397
|
+
} finally {
|
|
398
|
+
guard.deactivateEmbargo()
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
export const systematicDebuggingSkill = new SystematicDebuggingSkill()
|
|
404
|
+
export default systematicDebuggingSkill
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: tdd
|
|
3
|
+
description: Enforce RED/GREEN/REFACTOR TDD cycle with isolated implementer subagents. No implementation before a failing test exists.
|
|
4
|
+
requires: []
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## ⛔ MAIN AGENT CONSTRAINT
|
|
8
|
+
|
|
9
|
+
You are a THIN DISPATCHER. Your ONLY job is to dispatch subagents.
|
|
10
|
+
You MUST NOT: read source files, search code, write/edit files, run tests, run git commands.
|
|
11
|
+
You MAY only: read plan/state files, dispatch subagents via Agent tool, relay results.
|
|
12
|
+
For EVERY user request (including this skill), dispatch a subagent. Never execute yourself.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Use/Exclude Matrix
|
|
17
|
+
|
|
18
|
+
| Use When | Exclude When |
|
|
19
|
+
|---|---|
|
|
20
|
+
| New feature or behavior implementation | Fixing a typo or formatting (use quick-task) |
|
|
21
|
+
| Bugfix with testable reproduction | Behavior already fully tested |
|
|
22
|
+
| Need confidence via test-first discipline | Exploratory/prototype code |
|
|
23
|
+
| Refactoring that must preserve behavior | Configuration or documentation changes |
|
|
24
|
+
|
|
25
|
+
## Workflow
|
|
26
|
+
|
|
27
|
+
1. **RED** — Dispatch implementer subagent (worktree) to write a single failing test. Must confirm test fails for the right reason. If test passes unexpectedly, return INVALID_RED.
|
|
28
|
+
2. **GREEN** — Dispatch implementer subagent (worktree) to write minimal code to pass the test. Code can be hacky; refactor comes next.
|
|
29
|
+
3. **REFACTOR** — Dispatch implementer subagent (worktree) to improve code quality while keeping tests green. No behavior changes allowed.
|
|
30
|
+
4. **Verify** — Dispatch verifier subagent to run full test suite and check for regressions.
|
|
31
|
+
|
|
32
|
+
## Output Spec
|
|
33
|
+
|
|
34
|
+
| Field | Type | Description |
|
|
35
|
+
|---|---|---|
|
|
36
|
+
| `status` | enum | `success`, `failure`, `invalid-red` |
|
|
37
|
+
| `phases` | array | RED/GREEN/REFACTOR each with phase, status, command, evidence |
|
|
38
|
+
| `broaderVerification.command` | string | Verification command run |
|
|
39
|
+
| `broaderVerification.result` | enum | `PASS`, `FAIL`, `NOT_RUN` |
|
|
40
|
+
| `filesModified` | string[] | All files changed during TDD cycle |
|
|
41
|
+
| `summary` | string | Human-readable cycle summary |
|
|
42
|
+
| `tokensUsed` | number | Total tokens consumed |
|
|
43
|
+
|
|
44
|
+
## Error Handling
|
|
45
|
+
|
|
46
|
+
| Error | Action |
|
|
47
|
+
|---|---|
|
|
48
|
+
| RED: test passes unexpectedly | Return status=invalid-red; behavior may already exist |
|
|
49
|
+
| RED: subagent fails | Return status=failure with phase evidence |
|
|
50
|
+
| GREEN: subagent fails | Return status=failure; preserve RED phase results |
|
|
51
|
+
| REFACTOR: subagent fails | Record phase as skipped; return success if GREEN passed |
|
|
52
|
+
| Verifier reports regressions | Return broaderVerification.result=FAIL |
|