guild-agents 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -6
- package/bin/guild.js +46 -0
- package/package.json +2 -2
- package/src/commands/eval.js +225 -0
- package/src/commands/stats.js +147 -0
- package/src/templates/agents/advisor.md +0 -1
- package/src/templates/agents/developer.md +2 -2
- package/src/templates/agents/qa.md +1 -1
- package/src/templates/agents/tech-lead.md +2 -2
- package/src/templates/skills/build-feature/SKILL.md +53 -80
- package/src/templates/skills/build-feature/evals/evals.json +1 -2
- package/src/templates/skills/build-feature/evals/triggers.json +16 -0
- package/src/templates/skills/council/SKILL.md +2 -2
- package/src/templates/skills/council/evals/triggers.json +16 -0
- package/src/templates/skills/create-pr/evals/evals.json +44 -0
- package/src/templates/skills/create-pr/evals/triggers.json +16 -0
- package/src/templates/skills/debug/SKILL.md +1 -1
- package/src/templates/skills/debug/evals/triggers.json +16 -0
- package/src/templates/skills/dev-flow/SKILL.md +10 -12
- package/src/templates/skills/dev-flow/evals/evals.json +36 -0
- package/src/templates/skills/dev-flow/evals/triggers.json +16 -0
- package/src/templates/skills/guild-specialize/SKILL.md +0 -4
- package/src/templates/skills/guild-specialize/evals/evals.json +54 -0
- package/src/templates/skills/guild-specialize/evals/triggers.json +16 -0
- package/src/templates/skills/new-feature/evals/evals.json +41 -0
- package/src/templates/skills/new-feature/evals/triggers.json +16 -0
- package/src/templates/skills/qa-cycle/evals/evals.json +46 -0
- package/src/templates/skills/qa-cycle/evals/triggers.json +16 -0
- package/src/templates/skills/re-specialize/evals/evals.json +48 -0
- package/src/templates/skills/re-specialize/evals/triggers.json +16 -0
- package/src/templates/skills/review/evals/evals.json +43 -0
- package/src/templates/skills/review/evals/triggers.json +16 -0
- package/src/templates/skills/session-end/evals/evals.json +40 -0
- package/src/templates/skills/session-end/evals/triggers.json +16 -0
- package/src/templates/skills/session-start/evals/evals.json +50 -0
- package/src/templates/skills/session-start/evals/triggers.json +16 -0
- package/src/templates/skills/status/SKILL.md +1 -1
- package/src/templates/skills/status/evals/evals.json +40 -0
- package/src/templates/skills/status/evals/triggers.json +16 -0
- package/src/templates/skills/tdd/evals/triggers.json +16 -0
- package/src/templates/skills/verify/evals/triggers.json +16 -0
- package/src/utils/accounting.js +139 -0
- package/src/utils/benchmark.js +128 -0
- package/src/utils/description-analyzer.js +92 -0
- package/src/utils/dispatch-protocol.js +0 -3
- package/src/utils/executor.js +133 -23
- package/src/utils/pricing.js +28 -0
- package/src/utils/semantic-matcher.js +91 -0
- package/src/utils/trigger-matcher.js +64 -0
- package/src/utils/trigger-runner.js +132 -0
- package/src/templates/agents/db-migration.md +0 -51
- package/src/templates/agents/platform-expert.md +0 -92
- package/src/templates/agents/product-owner.md +0 -52
|
@@ -12,19 +12,13 @@ workflow:
|
|
|
12
12
|
produces: [evaluation-report, verdict]
|
|
13
13
|
model-tier: reasoning
|
|
14
14
|
on-failure: abort
|
|
15
|
-
- id: specify
|
|
16
|
-
role: product-owner
|
|
17
|
-
intent: "Break the feature into concrete tasks with verifiable acceptance criteria. Estimate effort and suggest implementation order."
|
|
18
|
-
requires: [feature-description, evaluation-report]
|
|
19
|
-
produces: [task-list, acceptance-criteria]
|
|
20
|
-
model-tier: reasoning
|
|
21
|
-
condition: step.evaluate.verdict != rejected
|
|
22
15
|
- id: design
|
|
23
16
|
role: tech-lead
|
|
24
|
-
intent: "Define implementation approach: files to modify, patterns to follow, interfaces, and technical risks."
|
|
25
|
-
requires: [
|
|
26
|
-
produces: [technical-plan]
|
|
17
|
+
intent: "Break the feature into concrete tasks with acceptance criteria. Define implementation approach: files to modify, patterns to follow, interfaces, and technical risks."
|
|
18
|
+
requires: [feature-description, evaluation-report]
|
|
19
|
+
produces: [task-list, acceptance-criteria, technical-plan]
|
|
27
20
|
model-tier: reasoning
|
|
21
|
+
condition: step.evaluate.verdict != rejected
|
|
28
22
|
- id: implement
|
|
29
23
|
role: developer
|
|
30
24
|
intent: "Implement the feature following the technical plan. Write unit tests. Make atomic commits."
|
|
@@ -131,19 +125,18 @@ git worktree remove .claude/worktrees/[branch-name]
|
|
|
131
125
|
|
|
132
126
|
When running a single build-feature, a simple `git checkout -b` is sufficient.
|
|
133
127
|
|
|
134
|
-
##
|
|
128
|
+
## 5-Phase Pipeline
|
|
135
129
|
|
|
136
130
|
### Progress Display
|
|
137
131
|
|
|
138
132
|
At the start of each phase, display a progress indicator to the user before any agent output:
|
|
139
133
|
|
|
140
134
|
```text
|
|
141
|
-
[1/
|
|
142
|
-
[2/
|
|
143
|
-
[3/
|
|
144
|
-
[4/
|
|
145
|
-
[5/
|
|
146
|
-
[6/6] QA (sonnet) — Validating acceptance criteria...
|
|
135
|
+
[1/5] Advisor (opus) — Evaluating feature...
|
|
136
|
+
[2/5] Tech Lead (opus) — Defining spec and technical approach...
|
|
137
|
+
[3/5] Developer (sonnet) — Implementing...
|
|
138
|
+
[4/5] Code Reviewer (opus) — Reviewing changes...
|
|
139
|
+
[5/5] QA (sonnet) — Validating acceptance criteria...
|
|
147
140
|
```
|
|
148
141
|
|
|
149
142
|
Model names are resolved from the step's `model-tier` using the `max` profile: reasoning=opus, execution=sonnet, routine=haiku. System/gate steps do not show a model name.
|
|
@@ -151,15 +144,15 @@ Model names are resolved from the step's `model-tier` using the `max` profile: r
|
|
|
151
144
|
When a phase loops (review-fix or QA-review cycles), show the iteration:
|
|
152
145
|
|
|
153
146
|
```text
|
|
154
|
-
[5
|
|
155
|
-
[
|
|
147
|
+
[4/5 · round 2] Code Reviewer (opus) — Re-reviewing after fixes...
|
|
148
|
+
[3/5 · round 2] Developer (sonnet) — Fixing review blockers...
|
|
156
149
|
```
|
|
157
150
|
|
|
158
151
|
This indicator MUST be displayed before spawning the agent for that phase.
|
|
159
152
|
|
|
160
153
|
### Phase 1 — Evaluation (Advisor)
|
|
161
154
|
|
|
162
|
-
**Progress:** `[1/
|
|
155
|
+
**Progress:** `[1/5] Advisor (opus) — Evaluating feature...`
|
|
163
156
|
**Agent:** Reads `.claude/agents/advisor.md` via Task tool with `model: "opus"`
|
|
164
157
|
**Input:** The feature description provided by the user
|
|
165
158
|
**Process:**
|
|
@@ -172,39 +165,26 @@ This indicator MUST be displayed before spawning the agent for that phase.
|
|
|
172
165
|
**Trace data:** Verdict (Approved/Rejected/Approved with conditions), risks identified, conditions if any
|
|
173
166
|
**Exit condition:** If the Advisor rejects the feature, the pipeline stops here. Inform the user of the reason and suggest adjustments if any.
|
|
174
167
|
|
|
175
|
-
### Phase 2 — Specification (
|
|
176
|
-
|
|
177
|
-
**Progress:** `[2/6] Product Owner (opus) — Defining spec...`
|
|
178
|
-
**Agent:** Reads `.claude/agents/product-owner.md` via Task tool with `model: "opus"`
|
|
179
|
-
**Input:** The feature approved by the Advisor + their observations
|
|
180
|
-
**Process:**
|
|
181
|
-
|
|
182
|
-
1. The Product Owner breaks the feature into concrete tasks
|
|
183
|
-
2. Defines verifiable acceptance criteria for each task
|
|
184
|
-
3. Estimates effort and suggests implementation order
|
|
168
|
+
### Phase 2 — Specification & Technical Approach (Tech Lead)
|
|
185
169
|
|
|
186
|
-
**
|
|
187
|
-
**Trace data:** Tasks defined count, acceptance criteria count, estimated effort
|
|
188
|
-
|
|
189
|
-
### Phase 3 — Technical Approach (Tech Lead)
|
|
190
|
-
|
|
191
|
-
**Progress:** `[3/6] Tech Lead (opus) — Defining technical approach...`
|
|
170
|
+
**Progress:** `[2/5] Tech Lead (opus) — Defining spec and technical approach...`
|
|
192
171
|
**Agent:** Reads `.claude/agents/tech-lead.md` via Task tool with `model: "opus"`
|
|
193
|
-
**Input:**
|
|
172
|
+
**Input:** The feature approved by the Advisor + their observations
|
|
194
173
|
**Process:**
|
|
195
174
|
|
|
196
|
-
1. The Tech Lead
|
|
197
|
-
2.
|
|
175
|
+
1. The Tech Lead breaks the feature into concrete tasks with verifiable acceptance criteria
|
|
176
|
+
2. Defines the implementation approach: files to modify, patterns to follow, interfaces
|
|
198
177
|
3. Anticipates technical risks and proposes mitigations
|
|
178
|
+
4. Estimates effort and suggests implementation order
|
|
199
179
|
|
|
200
|
-
**Output:**
|
|
201
|
-
**Trace data:**
|
|
180
|
+
**Output:** Task list with acceptance criteria + technical plan with files, patterns, interfaces, and risks
|
|
181
|
+
**Trace data:** Tasks defined count, acceptance criteria count, key patterns identified, files to modify, technical risks
|
|
202
182
|
|
|
203
|
-
### Phase
|
|
183
|
+
### Phase 3 — Implementation (Developer)
|
|
204
184
|
|
|
205
|
-
**Progress:** `[
|
|
185
|
+
**Progress:** `[3/5] Developer (sonnet) — Implementing...`
|
|
206
186
|
**Agent:** Reads `.claude/agents/developer.md` via Task tool with `model: "sonnet"`
|
|
207
|
-
**Input:** Tech Lead technical plan +
|
|
187
|
+
**Input:** Tech Lead technical plan + acceptance criteria
|
|
208
188
|
**Process:**
|
|
209
189
|
|
|
210
190
|
1. The Developer implements following the technical plan
|
|
@@ -217,7 +197,7 @@ This indicator MUST be displayed before spawning the agent for that phase.
|
|
|
217
197
|
|
|
218
198
|
### Pre-Review Gate (mandatory)
|
|
219
199
|
|
|
220
|
-
Before advancing to Phase
|
|
200
|
+
Before advancing to Phase 4, run automated verification:
|
|
221
201
|
|
|
222
202
|
1. Run the project test commands (e.g., `npm test`) — if it fails, the Developer must fix before advancing
|
|
223
203
|
2. Run the project lint commands (e.g., `npm run lint`) — if it fails, the Developer must fix before advancing
|
|
@@ -227,9 +207,9 @@ This gate CANNOT be skipped, even if the user requested phase skipping. The spec
|
|
|
227
207
|
|
|
228
208
|
**Trace data:** Tests pass/fail, lint pass/fail
|
|
229
209
|
|
|
230
|
-
### Phase
|
|
210
|
+
### Phase 4 — Review (Code Reviewer)
|
|
231
211
|
|
|
232
|
-
**Progress:** `[5
|
|
212
|
+
**Progress:** `[4/5] Code Reviewer (opus) — Reviewing changes...`
|
|
233
213
|
**Agent:** Reads `.claude/agents/code-reviewer.md` via Task tool with `model: "opus"`
|
|
234
214
|
**Input:** The implemented changes (git diff)
|
|
235
215
|
**Process:**
|
|
@@ -239,13 +219,13 @@ This gate CANNOT be skipped, even if the user requested phase skipping. The spec
|
|
|
239
219
|
|
|
240
220
|
**Output:** Review report with classified findings
|
|
241
221
|
**Trace data:** Blockers count, warnings count, suggestions count, review-fix loops
|
|
242
|
-
**Loop condition:** If there are Blocker findings, return to **Phase
|
|
222
|
+
**Loop condition:** If there are Blocker findings, return to **Phase 3** for the Developer to fix them. Maximum 2 review-fix iterations.
|
|
243
223
|
|
|
244
|
-
### Phase
|
|
224
|
+
### Phase 5 — QA (delegates to /qa-cycle)
|
|
245
225
|
|
|
246
|
-
**Progress:** `[
|
|
226
|
+
**Progress:** `[5/5] QA (sonnet) — Validating acceptance criteria...`
|
|
247
227
|
|
|
248
|
-
Runs the `/qa-cycle` skill passing the
|
|
228
|
+
Runs the `/qa-cycle` skill passing the acceptance criteria as context. The qa-cycle handles:
|
|
249
229
|
|
|
250
230
|
1. Running project tests and lint
|
|
251
231
|
2. Validating acceptance criteria
|
|
@@ -253,7 +233,7 @@ Runs the `/qa-cycle` skill passing the PO acceptance criteria as context. The qa
|
|
|
253
233
|
4. Bugfix cycle if issues arise (maximum 3 cycles)
|
|
254
234
|
|
|
255
235
|
**Trace data:** Acceptance criteria verified count, bugs found, QA cycles
|
|
256
|
-
**Additional loop condition:** If the qa-cycle bugfix introduces significant changes, return to **Phase
|
|
236
|
+
**Additional loop condition:** If the qa-cycle bugfix introduces significant changes, return to **Phase 4** (Review) for verification. Maximum 2 review-QA cycles.
|
|
257
237
|
|
|
258
238
|
## Checkpoint Commits
|
|
259
239
|
|
|
@@ -267,11 +247,10 @@ git commit -m "wip: [feature-name] phase N complete — [phase-name]"
|
|
|
267
247
|
Pattern for each phase:
|
|
268
248
|
|
|
269
249
|
- After Phase 1: `wip: [feature] phase 1 — advisor approved`
|
|
270
|
-
- After Phase 2: `wip: [feature] phase 2 —
|
|
271
|
-
- After Phase 3: `wip: [feature] phase 3 —
|
|
272
|
-
- After Phase 4: `wip: [feature] phase 4 —
|
|
273
|
-
- After Phase 5: `wip: [feature] phase 5 —
|
|
274
|
-
- After Phase 6: `wip: [feature] phase 6 — QA passed`
|
|
250
|
+
- After Phase 2: `wip: [feature] phase 2 — spec and tech approach defined`
|
|
251
|
+
- After Phase 3: `wip: [feature] phase 3 — implementation done` -- also write partial trace (phases 1-3) to spec and update status to `implementing`
|
|
252
|
+
- After Phase 4: `wip: [feature] phase 4 — review passed`
|
|
253
|
+
- After Phase 5: `wip: [feature] phase 5 — QA passed`
|
|
275
254
|
|
|
276
255
|
Also update SESSION.md at each phase transition:
|
|
277
256
|
|
|
@@ -325,7 +304,7 @@ Append this section to the spec file:
|
|
|
325
304
|
|
|
326
305
|
pipeline-start: [YYYY-MM-DD]
|
|
327
306
|
pipeline-end: [YYYY-MM-DD]
|
|
328
|
-
phases-completed: [N]/
|
|
307
|
+
phases-completed: [N]/5
|
|
329
308
|
review-fix-loops: [N]
|
|
330
309
|
qa-cycles: [N]
|
|
331
310
|
final-gate: pass | fail
|
|
@@ -335,19 +314,16 @@ final-gate: pass | fail
|
|
|
335
314
|
- **Verdict**: [Approved/Rejected/Approved with conditions]
|
|
336
315
|
- **Risks identified**: [list or "None"]
|
|
337
316
|
|
|
338
|
-
### Phase 2 — Specification
|
|
317
|
+
### Phase 2 — Specification & Technical Approach
|
|
339
318
|
|
|
340
319
|
- **Tasks defined**: [N]
|
|
341
320
|
- **Acceptance criteria**: [N]
|
|
342
|
-
- **Estimated effort**: [summary]
|
|
343
|
-
|
|
344
|
-
### Phase 3 — Technical Approach
|
|
345
|
-
|
|
346
321
|
- **Key patterns**: [list]
|
|
347
322
|
- **Files to modify**: [list]
|
|
348
323
|
- **Technical risks**: [list or "None"]
|
|
324
|
+
- **Estimated effort**: [summary]
|
|
349
325
|
|
|
350
|
-
### Phase
|
|
326
|
+
### Phase 3 — Implementation
|
|
351
327
|
|
|
352
328
|
- **Files created/modified**: [list]
|
|
353
329
|
- **Tests added**: [N]
|
|
@@ -358,14 +334,14 @@ final-gate: pass | fail
|
|
|
358
334
|
- **Tests**: pass | fail
|
|
359
335
|
- **Lint**: pass | fail
|
|
360
336
|
|
|
361
|
-
### Phase
|
|
337
|
+
### Phase 4 — Review
|
|
362
338
|
|
|
363
339
|
- **Blockers**: [N]
|
|
364
340
|
- **Warnings**: [N]
|
|
365
341
|
- **Suggestions**: [N]
|
|
366
342
|
- **Review-fix loops**: [N]
|
|
367
343
|
|
|
368
|
-
### Phase
|
|
344
|
+
### Phase 5 — QA
|
|
369
345
|
|
|
370
346
|
- **Acceptance criteria verified**: [N]/[total]
|
|
371
347
|
- **Bugs found**: [N]
|
|
@@ -380,15 +356,15 @@ final-gate: pass | fail
|
|
|
380
356
|
|
|
381
357
|
### When to write the trace
|
|
382
358
|
|
|
383
|
-
- **Phase
|
|
359
|
+
- **Phase 3 checkpoint:** Write a partial trace covering phases 1-3 to the spec file. Set status to `implementing`. Include the spec file in the checkpoint commit.
|
|
384
360
|
- **Pipeline completion:** Write the complete trace (all phases) to the spec file. Set status to `implemented`. Include the spec file in the final checkpoint commit.
|
|
385
361
|
|
|
386
362
|
## Final Gate (mandatory before Completion)
|
|
387
363
|
|
|
388
364
|
Before declaring the pipeline as complete, run final verification:
|
|
389
365
|
|
|
390
|
-
1. Run project tests — if it fails, return to Phase
|
|
391
|
-
2. Run project lint — if it fails, return to Phase
|
|
366
|
+
1. Run project tests — if it fails, return to Phase 5 (QA/Bugfix)
|
|
367
|
+
2. Run project lint — if it fails, return to Phase 3 (Developer)
|
|
392
368
|
3. Both must pass with exit code 0
|
|
393
369
|
|
|
394
370
|
This gate is the last safety net. It CANNOT be skipped under any circumstances.
|
|
@@ -423,7 +399,7 @@ When spawning agents via the Task tool, use these `subagent_type` values:
|
|
|
423
399
|
|
|
424
400
|
| Guild Agent Role | subagent_type to use |
|
|
425
401
|
| --- | --- |
|
|
426
|
-
| advisor,
|
|
402
|
+
| advisor, tech-lead | `"general-purpose"` |
|
|
427
403
|
| developer, bugfix | `"general-purpose"` |
|
|
428
404
|
| code-reviewer, qa | `"general-purpose"` |
|
|
429
405
|
|
|
@@ -445,22 +421,19 @@ The `model` parameter is resolved from the step's `model-tier`: reasoning→`"op
|
|
|
445
421
|
```text
|
|
446
422
|
User: /build-feature add dark mode toggle to settings page
|
|
447
423
|
|
|
448
|
-
[1/
|
|
424
|
+
[1/5] Advisor (opus) — Evaluating feature...
|
|
449
425
|
Approved. Low risk, aligns with UX roadmap.
|
|
450
426
|
|
|
451
|
-
[2/
|
|
452
|
-
3 tasks defined
|
|
453
|
-
|
|
454
|
-
[3/6] Tech Lead (opus) — Defining technical approach...
|
|
455
|
-
Use CSS variables + context provider pattern.
|
|
427
|
+
[2/5] Tech Lead (opus) — Defining spec and technical approach...
|
|
428
|
+
3 tasks defined. Use CSS variables + context provider pattern.
|
|
456
429
|
|
|
457
|
-
[
|
|
430
|
+
[3/5] Developer (sonnet) — Implementing...
|
|
458
431
|
Implemented ThemeContext, toggle component, CSS vars.
|
|
459
432
|
|
|
460
|
-
[5
|
|
433
|
+
[4/5] Code Reviewer (opus) — Reviewing changes...
|
|
461
434
|
Passed. 1 suggestion (memoize context value).
|
|
462
435
|
|
|
463
|
-
[
|
|
436
|
+
[5/5] QA (sonnet) — Validating acceptance criteria...
|
|
464
437
|
All 3 acceptance criteria verified. 0 bugs.
|
|
465
438
|
|
|
466
439
|
Feature complete. PR ready for merge.
|
|
@@ -468,7 +441,7 @@ Feature complete. PR ready for merge.
|
|
|
468
441
|
|
|
469
442
|
## Notes
|
|
470
443
|
|
|
471
|
-
- If the user wants to skip phases (e.g., "already evaluated, implement directly"), allow skipping to Phase
|
|
444
|
+
- If the user wants to skip phases (e.g., "already evaluated, implement directly"), allow skipping to Phase 3 but warn that validation is lost. Verification gates (pre-Review and final) are NEVER skipped
|
|
472
445
|
- The pipeline is sequential: each phase depends on the output of the previous one
|
|
473
446
|
- Review/QA loops have limits to prevent infinite cycles
|
|
474
447
|
- In v1.x, parallel pipeline execution (multiple build-features via worktrees) is best-effort and depends on the host environment supporting concurrent agents
|
|
@@ -3,10 +3,9 @@
|
|
|
3
3
|
"evals": [
|
|
4
4
|
{
|
|
5
5
|
"id": "bf-has-core-phases",
|
|
6
|
-
"description": "Plan contains evaluate,
|
|
6
|
+
"description": "Plan contains evaluate, design, implement phases",
|
|
7
7
|
"expectations": [
|
|
8
8
|
{ "text": "Has evaluate step", "assertion": "step-exists:evaluate" },
|
|
9
|
-
{ "text": "Has specify step", "assertion": "step-exists:specify" },
|
|
10
9
|
{ "text": "Has design step", "assertion": "step-exists:design" },
|
|
11
10
|
{ "text": "Has implement step", "assertion": "step-exists:implement" }
|
|
12
11
|
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "build-feature",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Full pipeline: evaluation -> spec -> implementation -> review -> QA",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "build a new feature with full pipeline", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "implement this feature end to end", "shouldTrigger": true, "keywordExpected": false },
|
|
9
|
+
{ "prompt": "run the full implementation pipeline", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "I want to ship this end to end", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "create a pull request", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "save my session", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "debug this bug", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -87,13 +87,13 @@ Invokes all 3 agents IN PARALLEL using Task tool:
|
|
|
87
87
|
|
|
88
88
|
### 2. Council Feature-Scope
|
|
89
89
|
|
|
90
|
-
**Participants:** Advisor +
|
|
90
|
+
**Participants:** Advisor + Developer + Tech Lead
|
|
91
91
|
**When it applies:** Defining feature scope, prioritizing functionality, evaluating product proposals
|
|
92
92
|
|
|
93
93
|
Invokes all 3 agents IN PARALLEL using Task tool:
|
|
94
94
|
|
|
95
95
|
- Task 1: Reads `.claude/agents/advisor.md` — domain and strategic vision perspective
|
|
96
|
-
- Task 2: Reads `.claude/agents/
|
|
96
|
+
- Task 2: Reads `.claude/agents/developer.md` — implementability and pragmatism perspective
|
|
97
97
|
- Task 3: Reads `.claude/agents/tech-lead.md` — technical feasibility and effort perspective
|
|
98
98
|
|
|
99
99
|
### 3. Council Tech-Debt
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "council",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Convenes multiple agents to debate an important decision",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "convene a council to debate this decision", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "I need multiple agents to debate this", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "let the council decide", "shouldTrigger": true, "keywordExpected": false },
|
|
10
|
+
{ "prompt": "I need help making a decision", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "build a new feature", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "save my session", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "debug this bug", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "create-pr",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "cpr-has-core-steps",
|
|
6
|
+
"description": "PR creation has verify, gather, generate, create steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has verify-branch step", "assertion": "step-exists:verify-branch" },
|
|
9
|
+
{ "text": "Has gather-context step", "assertion": "step-exists:gather-context" },
|
|
10
|
+
{ "text": "Has generate-description step", "assertion": "step-exists:generate-description" },
|
|
11
|
+
{ "text": "Has create-pr step", "assertion": "step-exists:create-pr" }
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"id": "cpr-all-system-role",
|
|
16
|
+
"description": "All steps use system role (no agent delegation)",
|
|
17
|
+
"expectations": [
|
|
18
|
+
{ "text": "verify-branch is system", "assertion": "step-role:verify-branch:system" },
|
|
19
|
+
{ "text": "gather-context is system", "assertion": "step-role:gather-context:system" },
|
|
20
|
+
{ "text": "generate-description is system", "assertion": "step-role:generate-description:system" },
|
|
21
|
+
{ "text": "create-pr is system", "assertion": "step-role:create-pr:system" },
|
|
22
|
+
{ "text": "post-creation is system", "assertion": "step-role:post-creation:system" }
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"id": "cpr-gates",
|
|
27
|
+
"description": "Gates at description generation and post-creation",
|
|
28
|
+
"expectations": [
|
|
29
|
+
{ "text": "Generate-description has gate", "assertion": "gate-exists:generate-description" },
|
|
30
|
+
{ "text": "Post-creation has gate", "assertion": "gate-exists:post-creation" }
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"id": "cpr-dependencies",
|
|
35
|
+
"description": "Steps have correct dependency chain",
|
|
36
|
+
"expectations": [
|
|
37
|
+
{ "text": "gather-context requires branch-state", "assertion": "step-requires:gather-context:branch-state" },
|
|
38
|
+
{ "text": "generate-description requires commit-list", "assertion": "step-requires:generate-description:commit-list" },
|
|
39
|
+
{ "text": "create-pr requires pr-description", "assertion": "step-requires:create-pr:pr-description" },
|
|
40
|
+
{ "text": "post-creation requires pr-url", "assertion": "step-requires:post-creation:pr-url" }
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
]
|
|
44
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "create-pr",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Create a pull request from the current branch with structured summary",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "create a pull request", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "open a PR for this branch", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "push and create PR", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "I'm ready to submit this for review", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "review my code changes", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "start a new feature", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "deploy to production", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "save my session", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: debug
|
|
3
|
-
description: "Discipline skill — systematic debugging process. Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes."
|
|
3
|
+
description: "Discipline skill — systematic debugging process. Use when encountering any bug, debug issue, test failure, broken function, or unexpected behavior, before proposing fixes."
|
|
4
4
|
user-invocable: true
|
|
5
5
|
---
|
|
6
6
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "debug",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Discipline skill — systematic debugging process. Use when encountering any bug, debug issue, test failure, broken function, or unexpected behavior, before proposing fixes.",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "I have a bug in the login flow", "shouldTrigger": true, "keywordExpected": false },
|
|
8
|
+
{ "prompt": "tests are failing unexpectedly", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "unexpected behavior in the API", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "help me debug this function", "shouldTrigger": true },
|
|
11
|
+
{ "prompt": "create a new feature", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "save my session", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "what phase am I in", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -47,11 +47,10 @@ Read `SESSION.md` to determine:
|
|
|
47
47
|
The pipeline phases are:
|
|
48
48
|
|
|
49
49
|
1. **Evaluation** (Advisor) — go/no-go
|
|
50
|
-
2. **Specification** (
|
|
51
|
-
3. **
|
|
52
|
-
4. **
|
|
53
|
-
5. **
|
|
54
|
-
6. **QA** — functional validation
|
|
50
|
+
2. **Specification & Technical Approach** (Tech Lead) — tasks, acceptance criteria, implementation plan
|
|
51
|
+
3. **Implementation** (Developer) — code and tests
|
|
52
|
+
4. **Review** (Code Reviewer) — quality review
|
|
53
|
+
5. **QA** — functional validation
|
|
55
54
|
|
|
56
55
|
### Step 3 — Present flow state
|
|
57
56
|
|
|
@@ -59,11 +58,10 @@ The pipeline phases are:
|
|
|
59
58
|
Dev Flow — [feature name]
|
|
60
59
|
|
|
61
60
|
[x] Phase 1 — Evaluation (completed)
|
|
62
|
-
[x] Phase 2 — Specification (completed)
|
|
63
|
-
[ ] Phase 3 —
|
|
64
|
-
[ ] Phase 4 —
|
|
65
|
-
[ ] Phase 5 —
|
|
66
|
-
[ ] Phase 6 — QA
|
|
61
|
+
[x] Phase 2 — Specification & Technical Approach (completed)
|
|
62
|
+
[ ] Phase 3 — Implementation (pending) <-- you are here
|
|
63
|
+
[ ] Phase 4 — Review
|
|
64
|
+
[ ] Phase 5 — QA
|
|
67
65
|
|
|
68
66
|
Next step: Run /build-feature to continue from Phase 3.
|
|
69
67
|
```
|
|
@@ -76,8 +74,8 @@ If there is no feature in progress, report that there is no active pipeline and
|
|
|
76
74
|
User: /dev-flow
|
|
77
75
|
|
|
78
76
|
Current pipeline: build-feature "add user preferences"
|
|
79
|
-
Phase:
|
|
77
|
+
Phase: 3 of 5 — Implementation
|
|
80
78
|
Developer agent active.
|
|
81
79
|
|
|
82
|
-
Next: Phase
|
|
80
|
+
Next: Phase 4 — Code Review
|
|
83
81
|
```
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "dev-flow",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "df-has-steps",
|
|
6
|
+
"description": "Dev flow has read-state and present-flow steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has read-state step", "assertion": "step-exists:read-state" },
|
|
9
|
+
{ "text": "Has present-flow step", "assertion": "step-exists:present-flow" }
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"id": "df-all-system",
|
|
14
|
+
"description": "All steps are system role",
|
|
15
|
+
"expectations": [
|
|
16
|
+
{ "text": "read-state is system", "assertion": "step-role:read-state:system" },
|
|
17
|
+
{ "text": "present-flow is system", "assertion": "step-role:present-flow:system" }
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "df-presentation-gate",
|
|
22
|
+
"description": "Present-flow step has a gate for user confirmation",
|
|
23
|
+
"expectations": [
|
|
24
|
+
{ "text": "present-flow has gate", "assertion": "gate-exists:present-flow" }
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"id": "df-dependencies",
|
|
29
|
+
"description": "Present-flow requires session state",
|
|
30
|
+
"expectations": [
|
|
31
|
+
{ "text": "present-flow requires session-state", "assertion": "step-requires:present-flow:session-state" },
|
|
32
|
+
{ "text": "present-flow requires current-phase", "assertion": "step-requires:present-flow:current-phase" }
|
|
33
|
+
]
|
|
34
|
+
}
|
|
35
|
+
]
|
|
36
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "dev-flow",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Shows current pipeline phase and what comes next",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "what phase am I in", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "show the current pipeline phase", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "what comes next in the flow", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "where did I leave off", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "create a pull request", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "fix this bug", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "run the tests", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -126,13 +126,10 @@ Invoke the Tech Lead agent using Task tool with `model: "sonnet"` (execution tie
|
|
|
126
126
|
|
|
127
127
|
- **advisor.md**: real project domain, target users
|
|
128
128
|
- **tech-lead.md**: specific stack, detected patterns, architecture decisions
|
|
129
|
-
- **product-owner.md**: existing functionality, visible backlog
|
|
130
129
|
- **developer.md**: code conventions, main framework, file structure
|
|
131
130
|
- **code-reviewer.md**: lint rules, project patterns, anti-patterns to watch
|
|
132
131
|
- **qa.md**: testing framework, commands to run tests, current coverage
|
|
133
132
|
- **bugfix.md**: debugging stack, logs, available tools
|
|
134
|
-
- **db-migration.md**: ORM, migration tool, current schema (if applicable)
|
|
135
|
-
- **platform-expert.md**: Claude Code version, known permission bugs, hook configuration
|
|
136
133
|
|
|
137
134
|
When specializing agents, append a zone at the bottom of each agent file:
|
|
138
135
|
|
|
@@ -204,7 +201,6 @@ Tech Lead (sonnet) — Specializing agents...
|
|
|
204
201
|
Agents updated:
|
|
205
202
|
- developer.md: Specialized for Next.js + TypeScript
|
|
206
203
|
- qa.md: Configured for Vitest + Playwright
|
|
207
|
-
- db-migration.md: Configured for Prisma
|
|
208
204
|
|
|
209
205
|
Run /status to see the full state.
|
|
210
206
|
```
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "guild-specialize",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "gs-has-core-steps",
|
|
6
|
+
"description": "Guild specialize has read, explore, enrich, specialize, confirm, commit steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has read-base step", "assertion": "step-exists:read-base" },
|
|
9
|
+
{ "text": "Has explore-project step", "assertion": "step-exists:explore-project" },
|
|
10
|
+
{ "text": "Has enrich-claude-md step", "assertion": "step-exists:enrich-claude-md" },
|
|
11
|
+
{ "text": "Has specialize-agents step", "assertion": "step-exists:specialize-agents" },
|
|
12
|
+
{ "text": "Has confirm step", "assertion": "step-exists:confirm" },
|
|
13
|
+
{ "text": "Has commit-enrichment step", "assertion": "step-exists:commit-enrichment" }
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "gs-enrichment-uses-reasoning",
|
|
18
|
+
"description": "CLAUDE.md enrichment uses reasoning tier (opus)",
|
|
19
|
+
"expectations": [
|
|
20
|
+
{ "text": "enrich-claude-md uses reasoning", "assertion": "step-model-tier:enrich-claude-md:reasoning" }
|
|
21
|
+
]
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"id": "gs-agents-use-execution",
|
|
25
|
+
"description": "Agent specialization uses execution tier (sonnet)",
|
|
26
|
+
"expectations": [
|
|
27
|
+
{ "text": "specialize-agents uses execution", "assertion": "step-model-tier:specialize-agents:execution" }
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "gs-gates",
|
|
32
|
+
"description": "Gates at exploration and confirmation",
|
|
33
|
+
"expectations": [
|
|
34
|
+
{ "text": "explore-project has gate", "assertion": "gate-exists:explore-project" },
|
|
35
|
+
{ "text": "confirm has gate", "assertion": "gate-exists:confirm" }
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"id": "gs-tech-lead-role",
|
|
40
|
+
"description": "Enrichment and specialization use tech-lead role",
|
|
41
|
+
"expectations": [
|
|
42
|
+
{ "text": "enrich-claude-md is tech-lead", "assertion": "step-role:enrich-claude-md:tech-lead" },
|
|
43
|
+
{ "text": "specialize-agents is tech-lead", "assertion": "step-role:specialize-agents:tech-lead" }
|
|
44
|
+
]
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"id": "gs-minimum-steps",
|
|
48
|
+
"description": "Has at least 6 steps",
|
|
49
|
+
"expectations": [
|
|
50
|
+
{ "text": "At least 6 steps", "assertion": "step-count:6" }
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
]
|
|
54
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "guild-specialize",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Enriches CLAUDE.md by exploring the project and specializes agents to the real stack",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "specialize the agents for this project", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "enrich CLAUDE.md with the project stack", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "explore the project and specialize agents", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "set up Guild for this codebase", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "create a pull request", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "debug this bug", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "save my session", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|