@haoyiyin/workflow 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/package.json +15 -10
  2. package/scripts/postinstall.js +2 -2
  3. package/src/agents/contracts.ts +559 -0
  4. package/src/agents/dispatcher-enhanced.ts +350 -0
  5. package/src/agents/dispatcher.ts +680 -0
  6. package/src/agents/index.ts +48 -0
  7. package/src/agents/resilience.ts +255 -0
  8. package/src/agents/token-budget.ts +83 -0
  9. package/src/agents/types.ts +73 -0
  10. package/src/guard/main-agent.ts +245 -0
  11. package/src/hooks/builtin/index.ts +8 -0
  12. package/src/hooks/builtin/on-error.ts +23 -0
  13. package/src/hooks/builtin/post-execute.ts +40 -0
  14. package/src/hooks/builtin/post-plan.ts +23 -0
  15. package/src/hooks/builtin/pre-execute.ts +30 -0
  16. package/src/hooks/builtin/pre-plan.ts +26 -0
  17. package/src/hooks/index.ts +7 -0
  18. package/src/hooks/loader.ts +98 -0
  19. package/src/hooks/manager.ts +99 -0
  20. package/src/hooks/types-enhanced.ts +38 -0
  21. package/src/hooks/types.ts +35 -0
  22. package/src/index.ts +127 -0
  23. package/src/persistence/index.ts +17 -0
  24. package/src/persistence/plan-md.ts +141 -0
  25. package/src/persistence/state-md.ts +167 -0
  26. package/src/persistence/types.ts +89 -0
  27. package/src/router/classifier.ts +610 -0
  28. package/src/router/guard.ts +483 -0
  29. package/src/router/index.ts +22 -0
  30. package/src/router/router.ts +108 -0
  31. package/src/router/types.ts +127 -0
  32. package/src/skills/agents-md/SKILL.md +45 -0
  33. package/src/skills/agents-md/index.ts +33 -0
  34. package/src/skills/execute-plan/SKILL.md +60 -0
  35. package/src/skills/execute-plan/index.ts +970 -0
  36. package/src/skills/index.ts +13 -0
  37. package/src/skills/quick-task/SKILL.md +54 -0
  38. package/src/skills/quick-task/index.ts +346 -0
  39. package/src/skills/registry.ts +59 -0
  40. package/src/skills/review-diff/SKILL.md +53 -0
  41. package/src/skills/review-diff/index.ts +394 -0
  42. package/src/skills/skill.ts +59 -0
  43. package/src/skills/systematic-debugging/SKILL.md +56 -0
  44. package/src/skills/systematic-debugging/index.ts +404 -0
  45. package/src/skills/tdd/SKILL.md +52 -0
  46. package/src/skills/tdd/index.ts +409 -0
  47. package/src/skills/to-plan/SKILL.md +56 -0
  48. package/src/skills/to-plan/index-enhanced.ts +551 -0
  49. package/src/skills/to-plan/index.ts +586 -0
  50. package/src/skills/types.ts +47 -0
  51. package/src/state/cleanup.ts +118 -0
  52. package/src/state/index.ts +8 -0
  53. package/src/state/manager.ts +96 -0
  54. package/src/state/persistence.ts +77 -0
  55. package/src/state/types.ts +30 -0
  56. package/src/state/validator.ts +78 -0
  57. package/src/types.ts +102 -0
  58. package/src/utils/compress.ts +347 -0
  59. package/src/utils/git.ts +82 -0
  60. package/src/utils/index.ts +6 -0
  61. package/src/utils/logger.ts +23 -0
  62. package/src/utils/paths.ts +55 -0
@@ -0,0 +1,404 @@
1
+ /**
2
+ * Systematic Debugging Skill - Find root cause before fixing
3
+ *
4
+ * Use case: Bug investigation, test failure, unexpected behavior
5
+ * Pattern: Reproduce → Gather Evidence → Diagnose (hypothesize + test) →
6
+ * Apply Fix → Verify
7
+ *
8
+ * CRITICAL RULE: No fix before root cause is confirmed.
9
+ * Spawns debugger subagent(s) for each phase.
10
+ * Prevents the "fix-first-ask-questions-later" anti-pattern.
11
+ */
12
+
13
+ import { z } from 'zod'
14
+ import { Skill } from '../skill.js'
15
+ import type { SkillContext } from '../types.js'
16
+ import { createDispatcher } from '../../agents/dispatcher.js'
17
+ import { researcherContract } from '../../agents/contracts.js'
18
+ import { createMainAgentGuard } from '../../guard/main-agent.js'
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Schemas
22
+ // ---------------------------------------------------------------------------
23
+
24
+ const DebugInputSchema = z.object({
25
+ symptom: z.string().min(1, 'Symptom description is required'),
26
+ reproduceCommand: z.string().optional(),
27
+ logs: z.string().optional(),
28
+ recentChanges: z.string().optional(),
29
+ model: z.string().optional(),
30
+ })
31
+
32
+ const DebugOutputSchema = z.object({
33
+ rootCause: z.string(),
34
+ confidence: z.enum(['high', 'medium', 'low']),
35
+ location: z.object({
36
+ file: z.string().optional(),
37
+ line: z.number().optional(),
38
+ function: z.string().optional(),
39
+ }),
40
+ fixApplied: z.boolean(),
41
+ fixDescription: z.string().optional(),
42
+ verification: z.string(),
43
+ risks: z.array(z.string()),
44
+ tokensUsed: z.number(),
45
+ })
46
+
47
+ type DebugInput = z.infer<typeof DebugInputSchema>
48
+ type DebugOutput = z.infer<typeof DebugOutputSchema>
49
+
50
+ // ---------------------------------------------------------------------------
51
+ // Prompt builders (pure functions)
52
+ // ---------------------------------------------------------------------------
53
+
54
+ function buildReproducePrompt(
55
+ symptom: string,
56
+ reproduceCommand?: string,
57
+ logs?: string,
58
+ ): string {
59
+ const sections: string[] = [
60
+ `## Reproduce the Issue`,
61
+ '',
62
+ `**Symptom**: ${symptom}`,
63
+ ]
64
+ if (reproduceCommand) {
65
+ sections.push('', `**Command**: \`${reproduceCommand}\``)
66
+ }
67
+ if (logs) {
68
+ sections.push('', '## Logs', '', logs.slice(0, 2000))
69
+ }
70
+ sections.push(
71
+ '',
72
+ 'Confirm reproduction. Report exact steps, observed vs expected.',
73
+ )
74
+ return sections.join('\n')
75
+ }
76
+
77
+ function buildEvidencePrompt(
78
+ symptom: string,
79
+ reproOutput: string,
80
+ recentChanges?: string,
81
+ ): string {
82
+ return [
83
+ `## Gather Evidence`,
84
+ '',
85
+ `**Symptom**: ${symptom}`,
86
+ '',
87
+ '## Reproduction',
88
+ reproOutput.slice(0, 2000),
89
+ '',
90
+ recentChanges
91
+ ? `## Recent Changes\n${recentChanges}`
92
+ : '## Recent Changes\nCheck git log for related commits.',
93
+ '',
94
+ 'Identify suspect files, key functions, failure points.',
95
+ 'Trace execution path. Look for error handling gaps, state issues, race conditions.',
96
+ ].join('\n')
97
+ }
98
+
99
+ function buildDiagnosePrompt(
100
+ symptom: string,
101
+ evidenceOutput: string,
102
+ knownIssues?: string,
103
+ ): string {
104
+ return [
105
+ `## Diagnose Root Cause`,
106
+ '',
107
+ `**Symptom**: ${symptom}`,
108
+ '',
109
+ '## Evidence',
110
+ evidenceOutput.slice(0, 3000),
111
+ ...(knownIssues ? ['', '## Known Issues (from research)', knownIssues.slice(0, 1000)] : []),
112
+ '',
113
+ '## Process',
114
+ '1. Form 2-3 hypotheses, rank by likelihood',
115
+ '2. Test top hypothesis with targeted checks',
116
+ '3. Eliminate incorrect hypotheses',
117
+ '4. Confirm root cause with evidence',
118
+ '',
119
+ '## CRITICAL: Diagnosis Only — NO Fix Yet',
120
+ 'Do NOT write any code changes in this phase.',
121
+ 'You are confirming the root cause, not fixing it.',
122
+ '',
123
+ '## Output Format',
124
+ '',
125
+ '```',
126
+ 'Root Cause: <description>',
127
+ 'Confidence: high|medium|low',
128
+ 'File: <path>',
129
+ 'Line: <number>',
130
+ 'Function: <name>',
131
+ 'Evidence: <proof>',
132
+ 'Eliminated: <hypotheses ruled out>',
133
+ 'Fix: <proposed fix description>',
134
+ '```',
135
+ ].join('\n')
136
+ }
137
+
138
+ function buildApplyFixPrompt(diagnosis: {
139
+ rootCause: string
140
+ confidence: string
141
+ file?: string
142
+ line?: number
143
+ }): string {
144
+ return [
145
+ `## Apply Fix`,
146
+ '',
147
+ `**Root Cause**: ${diagnosis.rootCause}`,
148
+ `**Confidence**: ${diagnosis.confidence}`,
149
+ diagnosis.file ? `**File**: ${diagnosis.file}` : '',
150
+ diagnosis.line ? `**Line**: ${diagnosis.line}` : '',
151
+ '',
152
+ 'Apply ONE minimal fix targeting the confirmed root cause.',
153
+ 'Run reproduction test to confirm fix works.',
154
+ 'Do NOT refactor unrelated code.',
155
+ diagnosis.confidence === 'low'
156
+ ? 'Low confidence: prefer a safer, more conservative fix.'
157
+ : '',
158
+ ]
159
+ .filter(Boolean)
160
+ .join('\n')
161
+ }
162
+
163
+ function buildVerifyPrompt(
164
+ symptom: string,
165
+ fixOutput: string,
166
+ ): string {
167
+ return [
168
+ `## Verify Fix`,
169
+ '',
170
+ `**Original**: ${symptom}`,
171
+ '',
172
+ '## Fix Applied',
173
+ fixOutput.slice(0, 2000),
174
+ '',
175
+ 'Re-run reproduction steps. Run full test suite.',
176
+ 'Return PASS or FAIL with details.',
177
+ ].join('\n')
178
+ }
179
+
180
+ // ---------------------------------------------------------------------------
181
+ // Parsing helpers (pure functions)
182
+ // ---------------------------------------------------------------------------
183
+
184
+ interface ParsedDiagnosis {
185
+ rootCause: string
186
+ confidence: 'high' | 'medium' | 'low'
187
+ file?: string
188
+ line?: number
189
+ func?: string
190
+ fixDescription?: string
191
+ }
192
+
193
+ function parseDiagnosis(output: string): ParsedDiagnosis {
194
+ const rootCauseMatch = output.match(/root cause:?\s*(.+)/i)
195
+ const confidenceMatch = output.match(/confidence:?\s*(high|medium|low)/i)
196
+ const fileMatch = output.match(/file:?\s*(\S+\.\w+)/i)
197
+ const lineMatch = output.match(/line:?\s*(\d+)/i)
198
+ const funcMatch = output.match(/function:?\s*(\S+)/i)
199
+ const fixMatch = output.match(/fix:?\s*(.+)/i)
200
+
201
+ let confidence: 'high' | 'medium' | 'low' = 'medium'
202
+ if (confidenceMatch) {
203
+ const c = confidenceMatch[1].toLowerCase()
204
+ if (c === 'high' || c === 'medium' || c === 'low') confidence = c
205
+ }
206
+
207
+ return {
208
+ rootCause: rootCauseMatch?.[1]?.trim() ?? output.slice(0, 200),
209
+ confidence,
210
+ file: fileMatch?.[1]?.trim(),
211
+ line: lineMatch ? parseInt(lineMatch[1], 10) : undefined,
212
+ func: funcMatch?.[1]?.trim(),
213
+ fixDescription: fixMatch?.[1]?.trim(),
214
+ }
215
+ }
216
+
217
+ // ---------------------------------------------------------------------------
218
+ // Skill class
219
+ // ---------------------------------------------------------------------------
220
+
221
+ export class SystematicDebuggingSkill extends Skill<DebugInput, DebugOutput> {
222
+ constructor() {
223
+ super({
224
+ name: 'systematic-debugging',
225
+ description:
226
+ 'Find root cause before fixing. Spawns debugger subagent for systematic diagnosis.',
227
+ requires: ['tdd'],
228
+ inputSchema: DebugInputSchema as z.ZodType<DebugInput>,
229
+ outputSchema: DebugOutputSchema,
230
+ })
231
+ }
232
+
233
+ async execute(
234
+ input: DebugInput,
235
+ context: SkillContext,
236
+ ): Promise<DebugOutput> {
237
+ const { config, logger } = context
238
+ const dispatcher = createDispatcher(logger)
239
+ const guard = createMainAgentGuard({}, logger)
240
+ let totalTokens = 0
241
+
242
+ guard.activateEmbargo()
243
+
244
+ try {
245
+ // Step 1: Reproduce
246
+ logger.info(`[debug] Reproducing: ${input.symptom.slice(0, 80)}`)
247
+ const reproResult = await dispatcher.dispatch(
248
+ { role: 'debugger', model: input.model || config.defaultModel },
249
+ {
250
+ permissions: {
251
+ readFiles: true,
252
+ searchCode: false,
253
+ runCommands: true,
254
+ writeFiles: false,
255
+ gitOperations: false,
256
+ },
257
+ prompt: buildReproducePrompt(
258
+ input.symptom,
259
+ input.reproduceCommand,
260
+ input.logs,
261
+ ),
262
+ owns: [],
263
+ reads: [],
264
+ },
265
+ )
266
+ totalTokens += reproResult.tokensUsed
267
+
268
+ // Step 2: Gather evidence (read-only)
269
+ logger.info('[debug] Gathering evidence')
270
+ const evidenceResult = await dispatcher.dispatch(
271
+ { role: 'explorer', model: input.model || config.defaultModel },
272
+ {
273
+ permissions: {
274
+ readFiles: true,
275
+ searchCode: true,
276
+ runCommands: false,
277
+ writeFiles: false,
278
+ gitOperations: false,
279
+ },
280
+ prompt: buildEvidencePrompt(
281
+ input.symptom,
282
+ reproResult.output,
283
+ input.recentChanges,
284
+ ),
285
+ owns: [],
286
+ reads: [],
287
+ },
288
+ )
289
+ totalTokens += evidenceResult.tokensUsed
290
+
291
+ // Step 2b: Research known issues (optional)
292
+ let knownIssuesContext = ''
293
+ const shouldResearch = /npm|yarn|package|version|error|fail|crash|exception/i.test(input.symptom)
294
+ if (shouldResearch) {
295
+ logger.info('[debug] Step 2b: Researching known issues...')
296
+ const researchResult = await dispatcher.dispatch(
297
+ { role: 'researcher', model: input.model || config.defaultModel },
298
+ researcherContract({
299
+ topic: input.symptom,
300
+ scope: 'technical',
301
+ questions: [
302
+ 'Are there known issues or bug reports?',
303
+ 'Any recent breaking changes or deprecations?',
304
+ 'What are common solutions or workarounds?'
305
+ ],
306
+ timeRange: '1y'
307
+ })
308
+ )
309
+ totalTokens += researchResult.tokensUsed
310
+ knownIssuesContext = researchResult.output.slice(0, 1200)
311
+ logger.info(`[debug] Research complete: ${knownIssuesContext ? 'found known issues' : 'no relevant results'}`)
312
+ }
313
+
314
+ // Step 3: Diagnose (NO FIX YET)
315
+ logger.info('[debug] Diagnosing root cause (no fix yet)')
316
+ const diagnoseResult = await dispatcher.dispatch(
317
+ { role: 'debugger', model: input.model || config.defaultModel },
318
+ {
319
+ permissions: {
320
+ readFiles: true,
321
+ searchCode: false,
322
+ runCommands: true,
323
+ writeFiles: false,
324
+ gitOperations: false,
325
+ },
326
+ prompt: buildDiagnosePrompt(
327
+ input.symptom,
328
+ evidenceResult.output,
329
+ knownIssuesContext,
330
+ ),
331
+ owns: [],
332
+ reads: [],
333
+ },
334
+ )
335
+ totalTokens += diagnoseResult.tokensUsed
336
+ const diagnosis = parseDiagnosis(diagnoseResult.output)
337
+
338
+ // Step 4: Apply fix (only after root cause confirmed)
339
+ logger.info(
340
+ `[debug] Applying fix (confidence: ${diagnosis.confidence})`,
341
+ )
342
+ const fixResult = await dispatcher.dispatch(
343
+ {
344
+ role: 'debugger',
345
+ model: input.model || config.defaultModel,
346
+ isolation: 'worktree',
347
+ },
348
+ {
349
+ permissions: {
350
+ readFiles: true,
351
+ searchCode: false,
352
+ runCommands: true,
353
+ writeFiles: true,
354
+ gitOperations: false,
355
+ },
356
+ prompt: buildApplyFixPrompt(diagnosis),
357
+ owns: diagnosis.file ? [diagnosis.file] : [],
358
+ reads: [],
359
+ },
360
+ )
361
+ totalTokens += fixResult.tokensUsed
362
+
363
+ // Step 5: Verify
364
+ logger.info('[debug] Verifying fix')
365
+ const verifyResult = await dispatcher.dispatch(
366
+ { role: 'verifier', model: input.model || config.defaultModel },
367
+ {
368
+ permissions: {
369
+ readFiles: false,
370
+ searchCode: false,
371
+ runCommands: true,
372
+ writeFiles: false,
373
+ gitOperations: false,
374
+ },
375
+ prompt: buildVerifyPrompt(input.symptom, fixResult.output),
376
+ owns: [],
377
+ reads: [],
378
+ },
379
+ )
380
+ totalTokens += verifyResult.tokensUsed
381
+
382
+ return {
383
+ rootCause: diagnosis.rootCause,
384
+ confidence: diagnosis.confidence,
385
+ location: {
386
+ file: diagnosis.file,
387
+ line: diagnosis.line,
388
+ function: diagnosis.func,
389
+ },
390
+ fixApplied: fixResult.status === 'success',
391
+ fixDescription:
392
+ diagnosis.fixDescription ?? fixResult.output.slice(0, 500),
393
+ verification: verifyResult.output.slice(0, 500),
394
+ risks: [],
395
+ tokensUsed: totalTokens,
396
+ }
397
+ } finally {
398
+ guard.deactivateEmbargo()
399
+ }
400
+ }
401
+ }
402
+
403
+ export const systematicDebuggingSkill = new SystematicDebuggingSkill()
404
+ export default systematicDebuggingSkill
@@ -0,0 +1,52 @@
1
+ ---
2
+ name: tdd
3
+ description: Enforce RED/GREEN/REFACTOR TDD cycle with isolated implementer subagents. No implementation before a failing test exists.
4
+ requires: []
5
+ ---
6
+
7
+ ## ⛔ MAIN AGENT CONSTRAINT
8
+
9
+ You are a THIN DISPATCHER. Your ONLY job is to dispatch subagents.
10
+ You MUST NOT: read source files, search code, write/edit files, run tests, run git commands.
11
+ You MAY only: read plan/state files, dispatch subagents via Agent tool, relay results.
12
+ For EVERY user request (including this skill), dispatch a subagent. Never execute yourself.
13
+
14
+ ---
15
+
16
+ ## Use/Exclude Matrix
17
+
18
+ | Use When | Exclude When |
19
+ |---|---|
20
+ | New feature or behavior implementation | Fixing a typo or formatting (use quick-task) |
21
+ | Bugfix with testable reproduction | Behavior already fully tested |
22
+ | Need confidence via test-first discipline | Exploratory/prototype code |
23
+ | Refactoring that must preserve behavior | Configuration or documentation changes |
24
+
25
+ ## Workflow
26
+
27
+ 1. **RED** — Dispatch implementer subagent (worktree) to write a single failing test. Must confirm test fails for the right reason. If test passes unexpectedly, return INVALID_RED.
28
+ 2. **GREEN** — Dispatch implementer subagent (worktree) to write minimal code to pass the test. Code can be hacky; refactor comes next.
29
+ 3. **REFACTOR** — Dispatch implementer subagent (worktree) to improve code quality while keeping tests green. No behavior changes allowed.
30
+ 4. **Verify** — Dispatch verifier subagent to run full test suite and check for regressions.
31
+
32
+ ## Output Spec
33
+
34
+ | Field | Type | Description |
35
+ |---|---|---|
36
+ | `status` | enum | `success`, `failure`, `invalid-red` |
37
+ | `phases` | array | RED/GREEN/REFACTOR each with phase, status, command, evidence |
38
+ | `broaderVerification.command` | string | Verification command run |
39
+ | `broaderVerification.result` | enum | `PASS`, `FAIL`, `NOT_RUN` |
40
+ | `filesModified` | string[] | All files changed during TDD cycle |
41
+ | `summary` | string | Human-readable cycle summary |
42
+ | `tokensUsed` | number | Total tokens consumed |
43
+
44
+ ## Error Handling
45
+
46
+ | Error | Action |
47
+ |---|---|
48
+ | RED: test passes unexpectedly | Return status=invalid-red; behavior may already exist |
49
+ | RED: subagent fails | Return status=failure with phase evidence |
50
+ | GREEN: subagent fails | Return status=failure; preserve RED phase results |
51
+ | REFACTOR: subagent fails | Record phase as skipped; return success if GREEN passed |
52
+ | Verifier reports regressions | Return broaderVerification.result=FAIL |