guild-agents 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +19 -6
  2. package/bin/guild.js +46 -0
  3. package/package.json +2 -2
  4. package/src/commands/eval.js +225 -0
  5. package/src/commands/stats.js +147 -0
  6. package/src/templates/agents/advisor.md +0 -1
  7. package/src/templates/agents/developer.md +2 -2
  8. package/src/templates/agents/qa.md +1 -1
  9. package/src/templates/agents/tech-lead.md +2 -2
  10. package/src/templates/skills/build-feature/SKILL.md +53 -80
  11. package/src/templates/skills/build-feature/evals/evals.json +1 -2
  12. package/src/templates/skills/build-feature/evals/triggers.json +16 -0
  13. package/src/templates/skills/council/SKILL.md +2 -2
  14. package/src/templates/skills/council/evals/triggers.json +16 -0
  15. package/src/templates/skills/create-pr/evals/evals.json +44 -0
  16. package/src/templates/skills/create-pr/evals/triggers.json +16 -0
  17. package/src/templates/skills/debug/SKILL.md +1 -1
  18. package/src/templates/skills/debug/evals/triggers.json +16 -0
  19. package/src/templates/skills/dev-flow/SKILL.md +10 -12
  20. package/src/templates/skills/dev-flow/evals/evals.json +36 -0
  21. package/src/templates/skills/dev-flow/evals/triggers.json +16 -0
  22. package/src/templates/skills/guild-specialize/SKILL.md +0 -4
  23. package/src/templates/skills/guild-specialize/evals/evals.json +54 -0
  24. package/src/templates/skills/guild-specialize/evals/triggers.json +16 -0
  25. package/src/templates/skills/new-feature/evals/evals.json +41 -0
  26. package/src/templates/skills/new-feature/evals/triggers.json +16 -0
  27. package/src/templates/skills/qa-cycle/evals/evals.json +46 -0
  28. package/src/templates/skills/qa-cycle/evals/triggers.json +16 -0
  29. package/src/templates/skills/re-specialize/evals/evals.json +48 -0
  30. package/src/templates/skills/re-specialize/evals/triggers.json +16 -0
  31. package/src/templates/skills/review/evals/evals.json +43 -0
  32. package/src/templates/skills/review/evals/triggers.json +16 -0
  33. package/src/templates/skills/session-end/evals/evals.json +40 -0
  34. package/src/templates/skills/session-end/evals/triggers.json +16 -0
  35. package/src/templates/skills/session-start/evals/evals.json +50 -0
  36. package/src/templates/skills/session-start/evals/triggers.json +16 -0
  37. package/src/templates/skills/status/SKILL.md +1 -1
  38. package/src/templates/skills/status/evals/evals.json +40 -0
  39. package/src/templates/skills/status/evals/triggers.json +16 -0
  40. package/src/templates/skills/tdd/evals/triggers.json +16 -0
  41. package/src/templates/skills/verify/evals/triggers.json +16 -0
  42. package/src/utils/accounting.js +139 -0
  43. package/src/utils/benchmark.js +128 -0
  44. package/src/utils/description-analyzer.js +92 -0
  45. package/src/utils/dispatch-protocol.js +0 -3
  46. package/src/utils/executor.js +133 -23
  47. package/src/utils/pricing.js +28 -0
  48. package/src/utils/semantic-matcher.js +91 -0
  49. package/src/utils/trigger-matcher.js +64 -0
  50. package/src/utils/trigger-runner.js +132 -0
  51. package/src/templates/agents/db-migration.md +0 -51
  52. package/src/templates/agents/platform-expert.md +0 -92
  53. package/src/templates/agents/product-owner.md +0 -52
@@ -12,19 +12,13 @@ workflow:
12
12
  produces: [evaluation-report, verdict]
13
13
  model-tier: reasoning
14
14
  on-failure: abort
15
- - id: specify
16
- role: product-owner
17
- intent: "Break the feature into concrete tasks with verifiable acceptance criteria. Estimate effort and suggest implementation order."
18
- requires: [feature-description, evaluation-report]
19
- produces: [task-list, acceptance-criteria]
20
- model-tier: reasoning
21
- condition: step.evaluate.verdict != rejected
22
15
  - id: design
23
16
  role: tech-lead
24
- intent: "Define implementation approach: files to modify, patterns to follow, interfaces, and technical risks."
25
- requires: [task-list, acceptance-criteria]
26
- produces: [technical-plan]
17
+ intent: "Break the feature into concrete tasks with acceptance criteria. Define implementation approach: files to modify, patterns to follow, interfaces, and technical risks."
18
+ requires: [feature-description, evaluation-report]
19
+ produces: [task-list, acceptance-criteria, technical-plan]
27
20
  model-tier: reasoning
21
+ condition: step.evaluate.verdict != rejected
28
22
  - id: implement
29
23
  role: developer
30
24
  intent: "Implement the feature following the technical plan. Write unit tests. Make atomic commits."
@@ -131,19 +125,18 @@ git worktree remove .claude/worktrees/[branch-name]
131
125
 
132
126
  When running a single build-feature, a simple `git checkout -b` is sufficient.
133
127
 
134
- ## 6-Phase Pipeline
128
+ ## 5-Phase Pipeline
135
129
 
136
130
  ### Progress Display
137
131
 
138
132
  At the start of each phase, display a progress indicator to the user before any agent output:
139
133
 
140
134
  ```text
141
- [1/6] Advisor (opus) — Evaluating feature...
142
- [2/6] Product Owner (opus) — Defining spec...
143
- [3/6] Tech Lead (opus) — Defining technical approach...
144
- [4/6] Developer (sonnet) — Implementing...
145
- [5/6] Code Reviewer (opus) — Reviewing changes...
146
- [6/6] QA (sonnet) — Validating acceptance criteria...
135
+ [1/5] Advisor (opus) — Evaluating feature...
136
+ [2/5] Tech Lead (opus) — Defining spec and technical approach...
137
+ [3/5] Developer (sonnet) — Implementing...
138
+ [4/5] Code Reviewer (opus) — Reviewing changes...
139
+ [5/5] QA (sonnet) — Validating acceptance criteria...
147
140
  ```
148
141
 
149
142
  Model names are resolved from the step's `model-tier` using the `max` profile: reasoning=opus, execution=sonnet, routine=haiku. System/gate steps do not show a model name.
@@ -151,15 +144,15 @@ Model names are resolved from the step's `model-tier` using the `max` profile: r
151
144
  When a phase loops (review-fix or QA-review cycles), show the iteration:
152
145
 
153
146
  ```text
154
- [5/6 · round 2] Code Reviewer (opus) — Re-reviewing after fixes...
155
- [4/6 · round 2] Developer (sonnet) — Fixing review blockers...
147
+ [4/5 · round 2] Code Reviewer (opus) — Re-reviewing after fixes...
148
+ [3/5 · round 2] Developer (sonnet) — Fixing review blockers...
156
149
  ```
157
150
 
158
151
  This indicator MUST be displayed before spawning the agent for that phase.
159
152
 
160
153
  ### Phase 1 — Evaluation (Advisor)
161
154
 
162
- **Progress:** `[1/6] Advisor (opus) — Evaluating feature...`
155
+ **Progress:** `[1/5] Advisor (opus) — Evaluating feature...`
163
156
  **Agent:** Reads `.claude/agents/advisor.md` via Task tool with `model: "opus"`
164
157
  **Input:** The feature description provided by the user
165
158
  **Process:**
@@ -172,39 +165,26 @@ This indicator MUST be displayed before spawning the agent for that phase.
172
165
  **Trace data:** Verdict (Approved/Rejected/Approved with conditions), risks identified, conditions if any
173
166
  **Exit condition:** If the Advisor rejects the feature, the pipeline stops here. Inform the user of the reason and suggest adjustments if any.
174
167
 
175
- ### Phase 2 — Specification (Product Owner)
176
-
177
- **Progress:** `[2/6] Product Owner (opus) — Defining spec...`
178
- **Agent:** Reads `.claude/agents/product-owner.md` via Task tool with `model: "opus"`
179
- **Input:** The feature approved by the Advisor + their observations
180
- **Process:**
181
-
182
- 1. The Product Owner breaks the feature into concrete tasks
183
- 2. Defines verifiable acceptance criteria for each task
184
- 3. Estimates effort and suggests implementation order
168
+ ### Phase 2 — Specification & Technical Approach (Tech Lead)
185
169
 
186
- **Output:** Task list with acceptance criteria, estimation, and order
187
- **Trace data:** Tasks defined count, acceptance criteria count, estimated effort
188
-
189
- ### Phase 3 — Technical Approach (Tech Lead)
190
-
191
- **Progress:** `[3/6] Tech Lead (opus) — Defining technical approach...`
170
+ **Progress:** `[2/5] Tech Lead (opus) Defining spec and technical approach...`
192
171
  **Agent:** Reads `.claude/agents/tech-lead.md` via Task tool with `model: "opus"`
193
- **Input:** Product Owner tasks + acceptance criteria
172
+ **Input:** The feature approved by the Advisor + their observations
194
173
  **Process:**
195
174
 
196
- 1. The Tech Lead defines the implementation approach
197
- 2. Identifies files to modify, patterns to follow, interfaces
175
+ 1. The Tech Lead breaks the feature into concrete tasks with verifiable acceptance criteria
176
+ 2. Defines the implementation approach: files to modify, patterns to follow, interfaces
198
177
  3. Anticipates technical risks and proposes mitigations
178
+ 4. Estimates effort and suggests implementation order
199
179
 
200
- **Output:** Technical plan with files, patterns, interfaces, and risks
201
- **Trace data:** Key patterns identified, files to modify, technical risks
180
+ **Output:** Task list with acceptance criteria + technical plan with files, patterns, interfaces, and risks
181
+ **Trace data:** Tasks defined count, acceptance criteria count, key patterns identified, files to modify, technical risks
202
182
 
203
- ### Phase 4 — Implementation (Developer)
183
+ ### Phase 3 — Implementation (Developer)
204
184
 
205
- **Progress:** `[4/6] Developer (sonnet) — Implementing...`
185
+ **Progress:** `[3/5] Developer (sonnet) — Implementing...`
206
186
  **Agent:** Reads `.claude/agents/developer.md` via Task tool with `model: "sonnet"`
207
- **Input:** Tech Lead technical plan + PO acceptance criteria
187
+ **Input:** Tech Lead technical plan + acceptance criteria
208
188
  **Process:**
209
189
 
210
190
  1. The Developer implements following the technical plan
@@ -217,7 +197,7 @@ This indicator MUST be displayed before spawning the agent for that phase.
217
197
 
218
198
  ### Pre-Review Gate (mandatory)
219
199
 
220
- Before advancing to Phase 5, run automated verification:
200
+ Before advancing to Phase 4, run automated verification:
221
201
 
222
202
  1. Run the project test commands (e.g., `npm test`) — if it fails, the Developer must fix before advancing
223
203
  2. Run the project lint commands (e.g., `npm run lint`) — if it fails, the Developer must fix before advancing
@@ -227,9 +207,9 @@ This gate CANNOT be skipped, even if the user requested phase skipping. The spec
227
207
 
228
208
  **Trace data:** Tests pass/fail, lint pass/fail
229
209
 
230
- ### Phase 5 — Review (Code Reviewer)
210
+ ### Phase 4 — Review (Code Reviewer)
231
211
 
232
- **Progress:** `[5/6] Code Reviewer (opus) — Reviewing changes...`
212
+ **Progress:** `[4/5] Code Reviewer (opus) — Reviewing changes...`
233
213
  **Agent:** Reads `.claude/agents/code-reviewer.md` via Task tool with `model: "opus"`
234
214
  **Input:** The implemented changes (git diff)
235
215
  **Process:**
@@ -239,13 +219,13 @@ This gate CANNOT be skipped, even if the user requested phase skipping. The spec
239
219
 
240
220
  **Output:** Review report with classified findings
241
221
  **Trace data:** Blockers count, warnings count, suggestions count, review-fix loops
242
- **Loop condition:** If there are Blocker findings, return to **Phase 4** for the Developer to fix them. Maximum 2 review-fix iterations.
222
+ **Loop condition:** If there are Blocker findings, return to **Phase 3** for the Developer to fix them. Maximum 2 review-fix iterations.
243
223
 
244
- ### Phase 6 — QA (delegates to /qa-cycle)
224
+ ### Phase 5 — QA (delegates to /qa-cycle)
245
225
 
246
- **Progress:** `[6/6] QA (sonnet) — Validating acceptance criteria...`
226
+ **Progress:** `[5/5] QA (sonnet) — Validating acceptance criteria...`
247
227
 
248
- Runs the `/qa-cycle` skill passing the PO acceptance criteria as context. The qa-cycle handles:
228
+ Runs the `/qa-cycle` skill passing the acceptance criteria as context. The qa-cycle handles:
249
229
 
250
230
  1. Running project tests and lint
251
231
  2. Validating acceptance criteria
@@ -253,7 +233,7 @@ Runs the `/qa-cycle` skill passing the PO acceptance criteria as context. The qa
253
233
  4. Bugfix cycle if issues arise (maximum 3 cycles)
254
234
 
255
235
  **Trace data:** Acceptance criteria verified count, bugs found, QA cycles
256
- **Additional loop condition:** If the qa-cycle bugfix introduces significant changes, return to **Phase 5** (Review) for verification. Maximum 2 review-QA cycles.
236
+ **Additional loop condition:** If the qa-cycle bugfix introduces significant changes, return to **Phase 4** (Review) for verification. Maximum 2 review-QA cycles.
257
237
 
258
238
  ## Checkpoint Commits
259
239
 
@@ -267,11 +247,10 @@ git commit -m "wip: [feature-name] phase N complete — [phase-name]"
267
247
  Pattern for each phase:
268
248
 
269
249
  - After Phase 1: `wip: [feature] phase 1 — advisor approved`
270
- - After Phase 2: `wip: [feature] phase 2 — PO spec ready`
271
- - After Phase 3: `wip: [feature] phase 3 — tech approach defined`
272
- - After Phase 4: `wip: [feature] phase 4 — implementation done` -- also write partial trace (phases 1-4) to spec and update status to `implementing`
273
- - After Phase 5: `wip: [feature] phase 5 — review passed`
274
- - After Phase 6: `wip: [feature] phase 6 — QA passed`
250
+ - After Phase 2: `wip: [feature] phase 2 — spec and tech approach defined`
251
+ - After Phase 3: `wip: [feature] phase 3 — implementation done` -- also write partial trace (phases 1-3) to spec and update status to `implementing`
252
+ - After Phase 4: `wip: [feature] phase 4 — review passed`
253
+ - After Phase 5: `wip: [feature] phase 5 — QA passed`
275
254
 
276
255
  Also update SESSION.md at each phase transition:
277
256
 
@@ -325,7 +304,7 @@ Append this section to the spec file:
325
304
 
326
305
  pipeline-start: [YYYY-MM-DD]
327
306
  pipeline-end: [YYYY-MM-DD]
328
- phases-completed: [N]/6
307
+ phases-completed: [N]/5
329
308
  review-fix-loops: [N]
330
309
  qa-cycles: [N]
331
310
  final-gate: pass | fail
@@ -335,19 +314,16 @@ final-gate: pass | fail
335
314
  - **Verdict**: [Approved/Rejected/Approved with conditions]
336
315
  - **Risks identified**: [list or "None"]
337
316
 
338
- ### Phase 2 — Specification
317
+ ### Phase 2 — Specification & Technical Approach
339
318
 
340
319
  - **Tasks defined**: [N]
341
320
  - **Acceptance criteria**: [N]
342
- - **Estimated effort**: [summary]
343
-
344
- ### Phase 3 — Technical Approach
345
-
346
321
  - **Key patterns**: [list]
347
322
  - **Files to modify**: [list]
348
323
  - **Technical risks**: [list or "None"]
324
+ - **Estimated effort**: [summary]
349
325
 
350
- ### Phase 4 — Implementation
326
+ ### Phase 3 — Implementation
351
327
 
352
328
  - **Files created/modified**: [list]
353
329
  - **Tests added**: [N]
@@ -358,14 +334,14 @@ final-gate: pass | fail
358
334
  - **Tests**: pass | fail
359
335
  - **Lint**: pass | fail
360
336
 
361
- ### Phase 5 — Review
337
+ ### Phase 4 — Review
362
338
 
363
339
  - **Blockers**: [N]
364
340
  - **Warnings**: [N]
365
341
  - **Suggestions**: [N]
366
342
  - **Review-fix loops**: [N]
367
343
 
368
- ### Phase 6 — QA
344
+ ### Phase 5 — QA
369
345
 
370
346
  - **Acceptance criteria verified**: [N]/[total]
371
347
  - **Bugs found**: [N]
@@ -380,15 +356,15 @@ final-gate: pass | fail
380
356
 
381
357
  ### When to write the trace
382
358
 
383
- - **Phase 4 checkpoint:** Write a partial trace covering phases 1-4 to the spec file. Set status to `implementing`. Include the spec file in the checkpoint commit.
359
+ - **Phase 3 checkpoint:** Write a partial trace covering phases 1-3 to the spec file. Set status to `implementing`. Include the spec file in the checkpoint commit.
384
360
  - **Pipeline completion:** Write the complete trace (all phases) to the spec file. Set status to `implemented`. Include the spec file in the final checkpoint commit.
385
361
 
386
362
  ## Final Gate (mandatory before Completion)
387
363
 
388
364
  Before declaring the pipeline as complete, run final verification:
389
365
 
390
- 1. Run project tests — if it fails, return to Phase 6 (QA/Bugfix)
391
- 2. Run project lint — if it fails, return to Phase 4 (Developer)
366
+ 1. Run project tests — if it fails, return to Phase 5 (QA/Bugfix)
367
+ 2. Run project lint — if it fails, return to Phase 3 (Developer)
392
368
  3. Both must pass with exit code 0
393
369
 
394
370
  This gate is the last safety net. It CANNOT be skipped under any circumstances.
@@ -423,7 +399,7 @@ When spawning agents via the Task tool, use these `subagent_type` values:
423
399
 
424
400
  | Guild Agent Role | subagent_type to use |
425
401
  | --- | --- |
426
- | advisor, product-owner, tech-lead | `"general-purpose"` |
402
+ | advisor, tech-lead | `"general-purpose"` |
427
403
  | developer, bugfix | `"general-purpose"` |
428
404
  | code-reviewer, qa | `"general-purpose"` |
429
405
 
@@ -445,22 +421,19 @@ The `model` parameter is resolved from the step's `model-tier`: reasoning→`"op
445
421
  ```text
446
422
  User: /build-feature add dark mode toggle to settings page
447
423
 
448
- [1/6] Advisor (opus) — Evaluating feature...
424
+ [1/5] Advisor (opus) — Evaluating feature...
449
425
  Approved. Low risk, aligns with UX roadmap.
450
426
 
451
- [2/6] Product Owner (opus) — Defining spec...
452
- 3 tasks defined with acceptance criteria.
453
-
454
- [3/6] Tech Lead (opus) — Defining technical approach...
455
- Use CSS variables + context provider pattern.
427
+ [2/5] Tech Lead (opus) — Defining spec and technical approach...
428
+ 3 tasks defined. Use CSS variables + context provider pattern.
456
429
 
457
- [4/6] Developer (sonnet) — Implementing...
430
+ [3/5] Developer (sonnet) — Implementing...
458
431
  Implemented ThemeContext, toggle component, CSS vars.
459
432
 
460
- [5/6] Code Reviewer (opus) — Reviewing changes...
433
+ [4/5] Code Reviewer (opus) — Reviewing changes...
461
434
  Passed. 1 suggestion (memoize context value).
462
435
 
463
- [6/6] QA (sonnet) — Validating acceptance criteria...
436
+ [5/5] QA (sonnet) — Validating acceptance criteria...
464
437
  All 3 acceptance criteria verified. 0 bugs.
465
438
 
466
439
  Feature complete. PR ready for merge.
@@ -468,7 +441,7 @@ Feature complete. PR ready for merge.
468
441
 
469
442
  ## Notes
470
443
 
471
- - If the user wants to skip phases (e.g., "already evaluated, implement directly"), allow skipping to Phase 4 but warn that validation is lost. Verification gates (pre-Review and final) are NEVER skipped
444
+ - If the user wants to skip phases (e.g., "already evaluated, implement directly"), allow skipping to Phase 3 but warn that validation is lost. Verification gates (pre-Review and final) are NEVER skipped
472
445
  - The pipeline is sequential: each phase depends on the output of the previous one
473
446
  - Review/QA loops have limits to prevent infinite cycles
474
447
  - In v1.x, parallel pipeline execution (multiple build-features via worktrees) is best-effort and depends on the host environment supporting concurrent agents
@@ -3,10 +3,9 @@
3
3
  "evals": [
4
4
  {
5
5
  "id": "bf-has-core-phases",
6
- "description": "Plan contains evaluate, specify, design, implement phases",
6
+ "description": "Plan contains evaluate, design, implement phases",
7
7
  "expectations": [
8
8
  { "text": "Has evaluate step", "assertion": "step-exists:evaluate" },
9
- { "text": "Has specify step", "assertion": "step-exists:specify" },
10
9
  { "text": "Has design step", "assertion": "step-exists:design" },
11
10
  { "text": "Has implement step", "assertion": "step-exists:implement" }
12
11
  ]
@@ -0,0 +1,16 @@
1
+ {
2
+ "skill": "build-feature",
3
+ "matcherType": "keyword",
4
+ "description": "Full pipeline: evaluation -> spec -> implementation -> review -> QA",
5
+ "threshold": 0.3,
6
+ "tests": [
7
+ { "prompt": "build a new feature with full pipeline", "shouldTrigger": true },
8
+ { "prompt": "implement this feature end to end", "shouldTrigger": true, "keywordExpected": false },
9
+ { "prompt": "run the full implementation pipeline", "shouldTrigger": true },
10
+ { "prompt": "I want to ship this end to end", "shouldTrigger": true, "keywordExpected": false },
11
+ { "prompt": "review my code", "shouldTrigger": false },
12
+ { "prompt": "create a pull request", "shouldTrigger": false },
13
+ { "prompt": "save my session", "shouldTrigger": false },
14
+ { "prompt": "debug this bug", "shouldTrigger": false }
15
+ ]
16
+ }
@@ -87,13 +87,13 @@ Invokes all 3 agents IN PARALLEL using Task tool:
87
87
 
88
88
  ### 2. Council Feature-Scope
89
89
 
90
- **Participants:** Advisor + Product Owner + Tech Lead
90
+ **Participants:** Advisor + Developer + Tech Lead
91
91
  **When it applies:** Defining feature scope, prioritizing functionality, evaluating product proposals
92
92
 
93
93
  Invokes all 3 agents IN PARALLEL using Task tool:
94
94
 
95
95
  - Task 1: Reads `.claude/agents/advisor.md` — domain and strategic vision perspective
96
- - Task 2: Reads `.claude/agents/product-owner.md` — user value and scope perspective
96
+ - Task 2: Reads `.claude/agents/developer.md` — implementability and pragmatism perspective
97
97
  - Task 3: Reads `.claude/agents/tech-lead.md` — technical feasibility and effort perspective
98
98
 
99
99
  ### 3. Council Tech-Debt
@@ -0,0 +1,16 @@
1
+ {
2
+ "skill": "council",
3
+ "matcherType": "keyword",
4
+ "description": "Convenes multiple agents to debate an important decision",
5
+ "threshold": 0.3,
6
+ "tests": [
7
+ { "prompt": "convene a council to debate this decision", "shouldTrigger": true },
8
+ { "prompt": "I need multiple agents to debate this", "shouldTrigger": true },
9
+ { "prompt": "let the council decide", "shouldTrigger": true, "keywordExpected": false },
10
+ { "prompt": "I need help making a decision", "shouldTrigger": true, "keywordExpected": false },
11
+ { "prompt": "build a new feature", "shouldTrigger": false },
12
+ { "prompt": "review my code", "shouldTrigger": false },
13
+ { "prompt": "save my session", "shouldTrigger": false },
14
+ { "prompt": "debug this bug", "shouldTrigger": false }
15
+ ]
16
+ }
@@ -0,0 +1,44 @@
1
+ {
2
+ "skill": "create-pr",
3
+ "evals": [
4
+ {
5
+ "id": "cpr-has-core-steps",
6
+ "description": "PR creation has verify, gather, generate, create steps",
7
+ "expectations": [
8
+ { "text": "Has verify-branch step", "assertion": "step-exists:verify-branch" },
9
+ { "text": "Has gather-context step", "assertion": "step-exists:gather-context" },
10
+ { "text": "Has generate-description step", "assertion": "step-exists:generate-description" },
11
+ { "text": "Has create-pr step", "assertion": "step-exists:create-pr" }
12
+ ]
13
+ },
14
+ {
15
+ "id": "cpr-all-system-role",
16
+ "description": "All steps use system role (no agent delegation)",
17
+ "expectations": [
18
+ { "text": "verify-branch is system", "assertion": "step-role:verify-branch:system" },
19
+ { "text": "gather-context is system", "assertion": "step-role:gather-context:system" },
20
+ { "text": "generate-description is system", "assertion": "step-role:generate-description:system" },
21
+ { "text": "create-pr is system", "assertion": "step-role:create-pr:system" },
22
+ { "text": "post-creation is system", "assertion": "step-role:post-creation:system" }
23
+ ]
24
+ },
25
+ {
26
+ "id": "cpr-gates",
27
+ "description": "Gates at description generation and post-creation",
28
+ "expectations": [
29
+ { "text": "Generate-description has gate", "assertion": "gate-exists:generate-description" },
30
+ { "text": "Post-creation has gate", "assertion": "gate-exists:post-creation" }
31
+ ]
32
+ },
33
+ {
34
+ "id": "cpr-dependencies",
35
+ "description": "Steps have correct dependency chain",
36
+ "expectations": [
37
+ { "text": "gather-context requires branch-state", "assertion": "step-requires:gather-context:branch-state" },
38
+ { "text": "generate-description requires commit-list", "assertion": "step-requires:generate-description:commit-list" },
39
+ { "text": "create-pr requires pr-description", "assertion": "step-requires:create-pr:pr-description" },
40
+ { "text": "post-creation requires pr-url", "assertion": "step-requires:post-creation:pr-url" }
41
+ ]
42
+ }
43
+ ]
44
+ }
@@ -0,0 +1,16 @@
1
+ {
2
+ "skill": "create-pr",
3
+ "matcherType": "keyword",
4
+ "description": "Create a pull request from the current branch with structured summary",
5
+ "threshold": 0.3,
6
+ "tests": [
7
+ { "prompt": "create a pull request", "shouldTrigger": true },
8
+ { "prompt": "open a PR for this branch", "shouldTrigger": true },
9
+ { "prompt": "push and create PR", "shouldTrigger": true },
10
+ { "prompt": "I'm ready to submit this for review", "shouldTrigger": true, "keywordExpected": false },
11
+ { "prompt": "review my code changes", "shouldTrigger": false },
12
+ { "prompt": "start a new feature", "shouldTrigger": false },
13
+ { "prompt": "deploy to production", "shouldTrigger": false },
14
+ { "prompt": "save my session", "shouldTrigger": false }
15
+ ]
16
+ }
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: debug
3
- description: "Discipline skill — systematic debugging process. Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes."
3
+ description: "Discipline skill — systematic debugging process. Use when encountering any bug, debug issue, test failure, broken function, or unexpected behavior, before proposing fixes."
4
4
  user-invocable: true
5
5
  ---
6
6
 
@@ -0,0 +1,16 @@
1
+ {
2
+ "skill": "debug",
3
+ "matcherType": "keyword",
4
+ "description": "Discipline skill — systematic debugging process. Use when encountering any bug, debug issue, test failure, broken function, or unexpected behavior, before proposing fixes.",
5
+ "threshold": 0.3,
6
+ "tests": [
7
+ { "prompt": "I have a bug in the login flow", "shouldTrigger": true, "keywordExpected": false },
8
+ { "prompt": "tests are failing unexpectedly", "shouldTrigger": true },
9
+ { "prompt": "unexpected behavior in the API", "shouldTrigger": true },
10
+ { "prompt": "help me debug this function", "shouldTrigger": true },
11
+ { "prompt": "create a new feature", "shouldTrigger": false },
12
+ { "prompt": "review my code", "shouldTrigger": false },
13
+ { "prompt": "save my session", "shouldTrigger": false },
14
+ { "prompt": "what phase am I in", "shouldTrigger": false }
15
+ ]
16
+ }
@@ -47,11 +47,10 @@ Read `SESSION.md` to determine:
47
47
  The pipeline phases are:
48
48
 
49
49
  1. **Evaluation** (Advisor) — go/no-go
50
- 2. **Specification** (Product Owner) — acceptance criteria
51
- 3. **Technical Approach** (Tech Lead) — implementation plan
52
- 4. **Implementation** (Developer) — code and tests
53
- 5. **Review** (Code Reviewer) quality review
54
- 6. **QA** — functional validation
50
+ 2. **Specification & Technical Approach** (Tech Lead) — tasks, acceptance criteria, implementation plan
51
+ 3. **Implementation** (Developer) — code and tests
52
+ 4. **Review** (Code Reviewer) — quality review
53
+ 5. **QA** — functional validation
55
54
 
56
55
  ### Step 3 — Present flow state
57
56
 
@@ -59,11 +58,10 @@ The pipeline phases are:
59
58
  Dev Flow — [feature name]
60
59
 
61
60
  [x] Phase 1 — Evaluation (completed)
62
- [x] Phase 2 — Specification (completed)
63
- [ ] Phase 3 — Technical Approach (pending) <-- you are here
64
- [ ] Phase 4 — Implementation
65
- [ ] Phase 5 — Review
66
- [ ] Phase 6 — QA
61
+ [x] Phase 2 — Specification & Technical Approach (completed)
62
+ [ ] Phase 3 — Implementation (pending) <-- you are here
63
+ [ ] Phase 4 — Review
64
+ [ ] Phase 5 — QA
67
65
 
68
66
  Next step: Run /build-feature to continue from Phase 3.
69
67
  ```
@@ -76,8 +74,8 @@ If there is no feature in progress, report that there is no active pipeline and
76
74
  User: /dev-flow
77
75
 
78
76
  Current pipeline: build-feature "add user preferences"
79
- Phase: 4 of 6 — Implementation
77
+ Phase: 3 of 5 — Implementation
80
78
  Developer agent active.
81
79
 
82
- Next: Phase 5 — Code Review
80
+ Next: Phase 4 — Code Review
83
81
  ```
@@ -0,0 +1,36 @@
1
+ {
2
+ "skill": "dev-flow",
3
+ "evals": [
4
+ {
5
+ "id": "df-has-steps",
6
+ "description": "Dev flow has read-state and present-flow steps",
7
+ "expectations": [
8
+ { "text": "Has read-state step", "assertion": "step-exists:read-state" },
9
+ { "text": "Has present-flow step", "assertion": "step-exists:present-flow" }
10
+ ]
11
+ },
12
+ {
13
+ "id": "df-all-system",
14
+ "description": "All steps are system role",
15
+ "expectations": [
16
+ { "text": "read-state is system", "assertion": "step-role:read-state:system" },
17
+ { "text": "present-flow is system", "assertion": "step-role:present-flow:system" }
18
+ ]
19
+ },
20
+ {
21
+ "id": "df-presentation-gate",
22
+ "description": "Present-flow step has a gate for user confirmation",
23
+ "expectations": [
24
+ { "text": "present-flow has gate", "assertion": "gate-exists:present-flow" }
25
+ ]
26
+ },
27
+ {
28
+ "id": "df-dependencies",
29
+ "description": "Present-flow requires session state",
30
+ "expectations": [
31
+ { "text": "present-flow requires session-state", "assertion": "step-requires:present-flow:session-state" },
32
+ { "text": "present-flow requires current-phase", "assertion": "step-requires:present-flow:current-phase" }
33
+ ]
34
+ }
35
+ ]
36
+ }
@@ -0,0 +1,16 @@
1
+ {
2
+ "skill": "dev-flow",
3
+ "matcherType": "keyword",
4
+ "description": "Shows current pipeline phase and what comes next",
5
+ "threshold": 0.3,
6
+ "tests": [
7
+ { "prompt": "what phase am I in", "shouldTrigger": true },
8
+ { "prompt": "show the current pipeline phase", "shouldTrigger": true },
9
+ { "prompt": "what comes next in the flow", "shouldTrigger": true },
10
+ { "prompt": "where did I leave off", "shouldTrigger": true, "keywordExpected": false },
11
+ { "prompt": "create a pull request", "shouldTrigger": false },
12
+ { "prompt": "review my code", "shouldTrigger": false },
13
+ { "prompt": "fix this bug", "shouldTrigger": false },
14
+ { "prompt": "run the tests", "shouldTrigger": false }
15
+ ]
16
+ }
@@ -126,13 +126,10 @@ Invoke the Tech Lead agent using Task tool with `model: "sonnet"` (execution tie
126
126
 
127
127
  - **advisor.md**: real project domain, target users
128
128
  - **tech-lead.md**: specific stack, detected patterns, architecture decisions
129
- - **product-owner.md**: existing functionality, visible backlog
130
129
  - **developer.md**: code conventions, main framework, file structure
131
130
  - **code-reviewer.md**: lint rules, project patterns, anti-patterns to watch
132
131
  - **qa.md**: testing framework, commands to run tests, current coverage
133
132
  - **bugfix.md**: debugging stack, logs, available tools
134
- - **db-migration.md**: ORM, migration tool, current schema (if applicable)
135
- - **platform-expert.md**: Claude Code version, known permission bugs, hook configuration
136
133
 
137
134
  When specializing agents, append a zone at the bottom of each agent file:
138
135
 
@@ -204,7 +201,6 @@ Tech Lead (sonnet) — Specializing agents...
204
201
  Agents updated:
205
202
  - developer.md: Specialized for Next.js + TypeScript
206
203
  - qa.md: Configured for Vitest + Playwright
207
- - db-migration.md: Configured for Prisma
208
204
 
209
205
  Run /status to see the full state.
210
206
  ```
@@ -0,0 +1,54 @@
1
+ {
2
+ "skill": "guild-specialize",
3
+ "evals": [
4
+ {
5
+ "id": "gs-has-core-steps",
6
+ "description": "Guild specialize has read, explore, enrich, specialize, confirm, commit steps",
7
+ "expectations": [
8
+ { "text": "Has read-base step", "assertion": "step-exists:read-base" },
9
+ { "text": "Has explore-project step", "assertion": "step-exists:explore-project" },
10
+ { "text": "Has enrich-claude-md step", "assertion": "step-exists:enrich-claude-md" },
11
+ { "text": "Has specialize-agents step", "assertion": "step-exists:specialize-agents" },
12
+ { "text": "Has confirm step", "assertion": "step-exists:confirm" },
13
+ { "text": "Has commit-enrichment step", "assertion": "step-exists:commit-enrichment" }
14
+ ]
15
+ },
16
+ {
17
+ "id": "gs-enrichment-uses-reasoning",
18
+ "description": "CLAUDE.md enrichment uses reasoning tier (opus)",
19
+ "expectations": [
20
+ { "text": "enrich-claude-md uses reasoning", "assertion": "step-model-tier:enrich-claude-md:reasoning" }
21
+ ]
22
+ },
23
+ {
24
+ "id": "gs-agents-use-execution",
25
+ "description": "Agent specialization uses execution tier (sonnet)",
26
+ "expectations": [
27
+ { "text": "specialize-agents uses execution", "assertion": "step-model-tier:specialize-agents:execution" }
28
+ ]
29
+ },
30
+ {
31
+ "id": "gs-gates",
32
+ "description": "Gates at exploration and confirmation",
33
+ "expectations": [
34
+ { "text": "explore-project has gate", "assertion": "gate-exists:explore-project" },
35
+ { "text": "confirm has gate", "assertion": "gate-exists:confirm" }
36
+ ]
37
+ },
38
+ {
39
+ "id": "gs-tech-lead-role",
40
+ "description": "Enrichment and specialization use tech-lead role",
41
+ "expectations": [
42
+ { "text": "enrich-claude-md is tech-lead", "assertion": "step-role:enrich-claude-md:tech-lead" },
43
+ { "text": "specialize-agents is tech-lead", "assertion": "step-role:specialize-agents:tech-lead" }
44
+ ]
45
+ },
46
+ {
47
+ "id": "gs-minimum-steps",
48
+ "description": "Has at least 6 steps",
49
+ "expectations": [
50
+ { "text": "At least 6 steps", "assertion": "step-count:6" }
51
+ ]
52
+ }
53
+ ]
54
+ }
@@ -0,0 +1,16 @@
1
+ {
2
+ "skill": "guild-specialize",
3
+ "matcherType": "keyword",
4
+ "description": "Enriches CLAUDE.md by exploring the project and specializes agents to the real stack",
5
+ "threshold": 0.3,
6
+ "tests": [
7
+ { "prompt": "specialize the agents for this project", "shouldTrigger": true },
8
+ { "prompt": "enrich CLAUDE.md with the project stack", "shouldTrigger": true },
9
+ { "prompt": "explore the project and specialize agents", "shouldTrigger": true },
10
+ { "prompt": "set up Guild for this codebase", "shouldTrigger": true, "keywordExpected": false },
11
+ { "prompt": "create a pull request", "shouldTrigger": false },
12
+ { "prompt": "review my code", "shouldTrigger": false },
13
+ { "prompt": "debug this bug", "shouldTrigger": false },
14
+ { "prompt": "save my session", "shouldTrigger": false }
15
+ ]
16
+ }