npm - guild-agents - Versions diffs - 1.3.0 → 1.5.0 - Mend

guild-agents 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/README.md +19 -6
package/bin/guild.js +46 -0
package/package.json +2 -2
package/src/commands/eval.js +225 -0
package/src/commands/stats.js +147 -0
package/src/templates/agents/advisor.md +0 -1
package/src/templates/agents/developer.md +2 -2
package/src/templates/agents/qa.md +1 -1
package/src/templates/agents/tech-lead.md +2 -2
package/src/templates/skills/build-feature/SKILL.md +53 -80
package/src/templates/skills/build-feature/evals/evals.json +1 -2
package/src/templates/skills/build-feature/evals/triggers.json +16 -0
package/src/templates/skills/council/SKILL.md +2 -2
package/src/templates/skills/council/evals/triggers.json +16 -0
package/src/templates/skills/create-pr/evals/evals.json +44 -0
package/src/templates/skills/create-pr/evals/triggers.json +16 -0
package/src/templates/skills/debug/SKILL.md +1 -1
package/src/templates/skills/debug/evals/triggers.json +16 -0
package/src/templates/skills/dev-flow/SKILL.md +10 -12
package/src/templates/skills/dev-flow/evals/evals.json +36 -0
package/src/templates/skills/dev-flow/evals/triggers.json +16 -0
package/src/templates/skills/guild-specialize/SKILL.md +0 -4
package/src/templates/skills/guild-specialize/evals/evals.json +54 -0
package/src/templates/skills/guild-specialize/evals/triggers.json +16 -0
package/src/templates/skills/new-feature/evals/evals.json +41 -0
package/src/templates/skills/new-feature/evals/triggers.json +16 -0
package/src/templates/skills/qa-cycle/evals/evals.json +46 -0
package/src/templates/skills/qa-cycle/evals/triggers.json +16 -0
package/src/templates/skills/re-specialize/evals/evals.json +48 -0
package/src/templates/skills/re-specialize/evals/triggers.json +16 -0
package/src/templates/skills/review/evals/evals.json +43 -0
package/src/templates/skills/review/evals/triggers.json +16 -0
package/src/templates/skills/session-end/evals/evals.json +40 -0
package/src/templates/skills/session-end/evals/triggers.json +16 -0
package/src/templates/skills/session-start/evals/evals.json +50 -0
package/src/templates/skills/session-start/evals/triggers.json +16 -0
package/src/templates/skills/status/SKILL.md +1 -1
package/src/templates/skills/status/evals/evals.json +40 -0
package/src/templates/skills/status/evals/triggers.json +16 -0
package/src/templates/skills/tdd/evals/triggers.json +16 -0
package/src/templates/skills/verify/evals/triggers.json +16 -0
package/src/utils/accounting.js +139 -0
package/src/utils/benchmark.js +128 -0
package/src/utils/description-analyzer.js +92 -0
package/src/utils/dispatch-protocol.js +0 -3
package/src/utils/executor.js +133 -23
package/src/utils/pricing.js +28 -0
package/src/utils/semantic-matcher.js +91 -0
package/src/utils/trigger-matcher.js +64 -0
package/src/utils/trigger-runner.js +132 -0
package/src/templates/agents/db-migration.md +0 -51
package/src/templates/agents/platform-expert.md +0 -92
package/src/templates/agents/product-owner.md +0 -52

package/src/templates/skills/build-feature/SKILL.md CHANGED Viewed

@@ -12,19 +12,13 @@ workflow:
       produces: [evaluation-report, verdict]
       model-tier: reasoning
       on-failure: abort
-    - id: specify
-      role: product-owner
-      intent: "Break the feature into concrete tasks with verifiable acceptance criteria. Estimate effort and suggest implementation order."
-      requires: [feature-description, evaluation-report]
-      produces: [task-list, acceptance-criteria]
-      model-tier: reasoning
-      condition: step.evaluate.verdict != rejected
     - id: design
       role: tech-lead
-      intent: "Define implementation approach: files to modify, patterns to follow, interfaces, and technical risks."
-      requires: [task-list, acceptance-criteria]
-      produces: [technical-plan]
+      intent: "Break the feature into concrete tasks with acceptance criteria. Define implementation approach: files to modify, patterns to follow, interfaces, and technical risks."
+      requires: [feature-description, evaluation-report]
+      produces: [task-list, acceptance-criteria, technical-plan]
       model-tier: reasoning
+      condition: step.evaluate.verdict != rejected
     - id: implement
       role: developer
       intent: "Implement the feature following the technical plan. Write unit tests. Make atomic commits."
@@ -131,19 +125,18 @@ git worktree remove .claude/worktrees/[branch-name]
 When running a single build-feature, a simple `git checkout -b` is sufficient.
-## 6-Phase Pipeline
+## 5-Phase Pipeline
 ### Progress Display
 At the start of each phase, display a progress indicator to the user before any agent output:
 ```text
-[1/6] Advisor (opus) — Evaluating feature...
-[2/6] Product Owner (opus) — Defining spec...
-[3/6] Tech Lead (opus) — Defining technical approach...
-[4/6] Developer (sonnet) — Implementing...
-[5/6] Code Reviewer (opus) — Reviewing changes...
-[6/6] QA (sonnet) — Validating acceptance criteria...
+[1/5] Advisor (opus) — Evaluating feature...
+[2/5] Tech Lead (opus) — Defining spec and technical approach...
+[3/5] Developer (sonnet) — Implementing...
+[4/5] Code Reviewer (opus) — Reviewing changes...
+[5/5] QA (sonnet) — Validating acceptance criteria...
 ```
 Model names are resolved from the step's `model-tier` using the `max` profile: reasoning=opus, execution=sonnet, routine=haiku. System/gate steps do not show a model name.
@@ -151,15 +144,15 @@ Model names are resolved from the step's `model-tier` using the `max` profile: r
 When a phase loops (review-fix or QA-review cycles), show the iteration:
 ```text
-[5/6 · round 2] Code Reviewer (opus) — Re-reviewing after fixes...
-[4/6 · round 2] Developer (sonnet) — Fixing review blockers...
+[4/5 · round 2] Code Reviewer (opus) — Re-reviewing after fixes...
+[3/5 · round 2] Developer (sonnet) — Fixing review blockers...
 ```
 This indicator MUST be displayed before spawning the agent for that phase.
 ### Phase 1 — Evaluation (Advisor)
-**Progress:** `[1/6] Advisor (opus) — Evaluating feature...`
+**Progress:** `[1/5] Advisor (opus) — Evaluating feature...`
 **Agent:** Reads `.claude/agents/advisor.md` via Task tool with `model: "opus"`
 **Input:** The feature description provided by the user
 **Process:**
@@ -172,39 +165,26 @@ This indicator MUST be displayed before spawning the agent for that phase.
 **Trace data:** Verdict (Approved/Rejected/Approved with conditions), risks identified, conditions if any
 **Exit condition:** If the Advisor rejects the feature, the pipeline stops here. Inform the user of the reason and suggest adjustments if any.
-### Phase 2 — Specification (Product Owner)
-**Progress:** `[2/6] Product Owner (opus) — Defining spec...`
-**Agent:** Reads `.claude/agents/product-owner.md` via Task tool with `model: "opus"`
-**Input:** The feature approved by the Advisor + their observations
-**Process:**
-1. The Product Owner breaks the feature into concrete tasks
-2. Defines verifiable acceptance criteria for each task
-3. Estimates effort and suggests implementation order
+### Phase 2 — Specification & Technical Approach (Tech Lead)
-**Output:** Task list with acceptance criteria, estimation, and order
-**Trace data:** Tasks defined count, acceptance criteria count, estimated effort
-### Phase 3 — Technical Approach (Tech Lead)
-**Progress:** `[3/6] Tech Lead (opus) — Defining technical approach...`
+**Progress:** `[2/5] Tech Lead (opus) — Defining spec and technical approach...`
 **Agent:** Reads `.claude/agents/tech-lead.md` via Task tool with `model: "opus"`
-**Input:** Product Owner tasks + acceptance criteria
+**Input:** The feature approved by the Advisor + their observations
 **Process:**
-1. The Tech Lead defines the implementation approach
-2. Identifies files to modify, patterns to follow, interfaces
+1. The Tech Lead breaks the feature into concrete tasks with verifiable acceptance criteria
+2. Defines the implementation approach: files to modify, patterns to follow, interfaces
 3. Anticipates technical risks and proposes mitigations
+4. Estimates effort and suggests implementation order
-**Output:** Technical plan with files, patterns, interfaces, and risks
-**Trace data:** Key patterns identified, files to modify, technical risks
+**Output:** Task list with acceptance criteria + technical plan with files, patterns, interfaces, and risks
+**Trace data:** Tasks defined count, acceptance criteria count, key patterns identified, files to modify, technical risks
-### Phase 4 — Implementation (Developer)
+### Phase 3 — Implementation (Developer)
-**Progress:** `[4/6] Developer (sonnet) — Implementing...`
+**Progress:** `[3/5] Developer (sonnet) — Implementing...`
 **Agent:** Reads `.claude/agents/developer.md` via Task tool with `model: "sonnet"`
-**Input:** Tech Lead technical plan + PO acceptance criteria
+**Input:** Tech Lead technical plan + acceptance criteria
 **Process:**
 1. The Developer implements following the technical plan
@@ -217,7 +197,7 @@ This indicator MUST be displayed before spawning the agent for that phase.
 ### Pre-Review Gate (mandatory)
-Before advancing to Phase 5, run automated verification:
+Before advancing to Phase 4, run automated verification:
 1. Run the project test commands (e.g., `npm test`) — if it fails, the Developer must fix before advancing
 2. Run the project lint commands (e.g., `npm run lint`) — if it fails, the Developer must fix before advancing
@@ -227,9 +207,9 @@ This gate CANNOT be skipped, even if the user requested phase skipping. The spec
 **Trace data:** Tests pass/fail, lint pass/fail
-### Phase 5 — Review (Code Reviewer)
+### Phase 4 — Review (Code Reviewer)
-**Progress:** `[5/6] Code Reviewer (opus) — Reviewing changes...`
+**Progress:** `[4/5] Code Reviewer (opus) — Reviewing changes...`
 **Agent:** Reads `.claude/agents/code-reviewer.md` via Task tool with `model: "opus"`
 **Input:** The implemented changes (git diff)
 **Process:**
@@ -239,13 +219,13 @@ This gate CANNOT be skipped, even if the user requested phase skipping. The spec
 **Output:** Review report with classified findings
 **Trace data:** Blockers count, warnings count, suggestions count, review-fix loops
-**Loop condition:** If there are Blocker findings, return to **Phase 4** for the Developer to fix them. Maximum 2 review-fix iterations.
+**Loop condition:** If there are Blocker findings, return to **Phase 3** for the Developer to fix them. Maximum 2 review-fix iterations.
-### Phase 6 — QA (delegates to /qa-cycle)
+### Phase 5 — QA (delegates to /qa-cycle)
-**Progress:** `[6/6] QA (sonnet) — Validating acceptance criteria...`
+**Progress:** `[5/5] QA (sonnet) — Validating acceptance criteria...`
-Runs the `/qa-cycle` skill passing the PO acceptance criteria as context. The qa-cycle handles:
+Runs the `/qa-cycle` skill passing the acceptance criteria as context. The qa-cycle handles:
 1. Running project tests and lint
 2. Validating acceptance criteria
@@ -253,7 +233,7 @@ Runs the `/qa-cycle` skill passing the PO acceptance criteria as context. The qa
 4. Bugfix cycle if issues arise (maximum 3 cycles)
 **Trace data:** Acceptance criteria verified count, bugs found, QA cycles
-**Additional loop condition:** If the qa-cycle bugfix introduces significant changes, return to **Phase 5** (Review) for verification. Maximum 2 review-QA cycles.
+**Additional loop condition:** If the qa-cycle bugfix introduces significant changes, return to **Phase 4** (Review) for verification. Maximum 2 review-QA cycles.
 ## Checkpoint Commits
@@ -267,11 +247,10 @@ git commit -m "wip: [feature-name] phase N complete — [phase-name]"
 Pattern for each phase:
 - After Phase 1: `wip: [feature] phase 1 — advisor approved`
-- After Phase 2: `wip: [feature] phase 2 — PO spec ready`
-- After Phase 3: `wip: [feature] phase 3 — tech approach defined`
-- After Phase 4: `wip: [feature] phase 4 — implementation done` -- also write partial trace (phases 1-4) to spec and update status to `implementing`
-- After Phase 5: `wip: [feature] phase 5 — review passed`
-- After Phase 6: `wip: [feature] phase 6 — QA passed`
+- After Phase 2: `wip: [feature] phase 2 — spec and tech approach defined`
+- After Phase 3: `wip: [feature] phase 3 — implementation done` -- also write partial trace (phases 1-3) to spec and update status to `implementing`
+- After Phase 4: `wip: [feature] phase 4 — review passed`
+- After Phase 5: `wip: [feature] phase 5 — QA passed`
 Also update SESSION.md at each phase transition:
@@ -325,7 +304,7 @@ Append this section to the spec file:
 pipeline-start: [YYYY-MM-DD]
 pipeline-end: [YYYY-MM-DD]
-phases-completed: [N]/6
+phases-completed: [N]/5
 review-fix-loops: [N]
 qa-cycles: [N]
 final-gate: pass | fail
@@ -335,19 +314,16 @@ final-gate: pass | fail
 - **Verdict**: [Approved/Rejected/Approved with conditions]
 - **Risks identified**: [list or "None"]
-### Phase 2 — Specification
+### Phase 2 — Specification & Technical Approach
 - **Tasks defined**: [N]
 - **Acceptance criteria**: [N]
-- **Estimated effort**: [summary]
-### Phase 3 — Technical Approach
 - **Key patterns**: [list]
 - **Files to modify**: [list]
 - **Technical risks**: [list or "None"]
+- **Estimated effort**: [summary]
-### Phase 4 — Implementation
+### Phase 3 — Implementation
 - **Files created/modified**: [list]
 - **Tests added**: [N]
@@ -358,14 +334,14 @@ final-gate: pass | fail
 - **Tests**: pass | fail
 - **Lint**: pass | fail
-### Phase 5 — Review
+### Phase 4 — Review
 - **Blockers**: [N]
 - **Warnings**: [N]
 - **Suggestions**: [N]
 - **Review-fix loops**: [N]
-### Phase 6 — QA
+### Phase 5 — QA
 - **Acceptance criteria verified**: [N]/[total]
 - **Bugs found**: [N]
@@ -380,15 +356,15 @@ final-gate: pass | fail
 ### When to write the trace
-- **Phase 4 checkpoint:** Write a partial trace covering phases 1-4 to the spec file. Set status to `implementing`. Include the spec file in the checkpoint commit.
+- **Phase 3 checkpoint:** Write a partial trace covering phases 1-3 to the spec file. Set status to `implementing`. Include the spec file in the checkpoint commit.
 - **Pipeline completion:** Write the complete trace (all phases) to the spec file. Set status to `implemented`. Include the spec file in the final checkpoint commit.
 ## Final Gate (mandatory before Completion)
 Before declaring the pipeline as complete, run final verification:
-1. Run project tests — if it fails, return to Phase 6 (QA/Bugfix)
-2. Run project lint — if it fails, return to Phase 4 (Developer)
+1. Run project tests — if it fails, return to Phase 5 (QA/Bugfix)
+2. Run project lint — if it fails, return to Phase 3 (Developer)
 3. Both must pass with exit code 0
 This gate is the last safety net. It CANNOT be skipped under any circumstances.
@@ -423,7 +399,7 @@ When spawning agents via the Task tool, use these `subagent_type` values:
 | Guild Agent Role | subagent_type to use |
 | --- | --- |
-| advisor, product-owner, tech-lead | `"general-purpose"` |
+| advisor, tech-lead | `"general-purpose"` |
 | developer, bugfix | `"general-purpose"` |
 | code-reviewer, qa | `"general-purpose"` |
@@ -445,22 +421,19 @@ The `model` parameter is resolved from the step's `model-tier`: reasoning→`"op
 ```text
 User: /build-feature add dark mode toggle to settings page
-[1/6] Advisor (opus) — Evaluating feature...
+[1/5] Advisor (opus) — Evaluating feature...
   Approved. Low risk, aligns with UX roadmap.
-[2/6] Product Owner (opus) — Defining spec...
-  3 tasks defined with acceptance criteria.
-[3/6] Tech Lead (opus) — Defining technical approach...
-  Use CSS variables + context provider pattern.
+[2/5] Tech Lead (opus) — Defining spec and technical approach...
+  3 tasks defined. Use CSS variables + context provider pattern.
-[4/6] Developer (sonnet) — Implementing...
+[3/5] Developer (sonnet) — Implementing...
   Implemented ThemeContext, toggle component, CSS vars.
-[5/6] Code Reviewer (opus) — Reviewing changes...
+[4/5] Code Reviewer (opus) — Reviewing changes...
   Passed. 1 suggestion (memoize context value).
-[6/6] QA (sonnet) — Validating acceptance criteria...
+[5/5] QA (sonnet) — Validating acceptance criteria...
   All 3 acceptance criteria verified. 0 bugs.
 Feature complete. PR ready for merge.
@@ -468,7 +441,7 @@ Feature complete. PR ready for merge.
 ## Notes
-- If the user wants to skip phases (e.g., "already evaluated, implement directly"), allow skipping to Phase 4 but warn that validation is lost. Verification gates (pre-Review and final) are NEVER skipped
+- If the user wants to skip phases (e.g., "already evaluated, implement directly"), allow skipping to Phase 3 but warn that validation is lost. Verification gates (pre-Review and final) are NEVER skipped
 - The pipeline is sequential: each phase depends on the output of the previous one
 - Review/QA loops have limits to prevent infinite cycles
 - In v1.x, parallel pipeline execution (multiple build-features via worktrees) is best-effort and depends on the host environment supporting concurrent agents

package/src/templates/skills/build-feature/evals/evals.json CHANGED Viewed

@@ -3,10 +3,9 @@
   "evals": [
     {
       "id": "bf-has-core-phases",
-      "description": "Plan contains evaluate, specify, design, implement phases",
+      "description": "Plan contains evaluate, design, implement phases",
       "expectations": [
         { "text": "Has evaluate step", "assertion": "step-exists:evaluate" },
-        { "text": "Has specify step", "assertion": "step-exists:specify" },
         { "text": "Has design step", "assertion": "step-exists:design" },
         { "text": "Has implement step", "assertion": "step-exists:implement" }
       ]

package/src/templates/skills/build-feature/evals/triggers.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "skill": "build-feature",
+  "matcherType": "keyword",
+  "description": "Full pipeline: evaluation -> spec -> implementation -> review -> QA",
+  "threshold": 0.3,
+  "tests": [
+    { "prompt": "build a new feature with full pipeline", "shouldTrigger": true },
+    { "prompt": "implement this feature end to end", "shouldTrigger": true, "keywordExpected": false },
+    { "prompt": "run the full implementation pipeline", "shouldTrigger": true },
+    { "prompt": "I want to ship this end to end", "shouldTrigger": true, "keywordExpected": false },
+    { "prompt": "review my code", "shouldTrigger": false },
+    { "prompt": "create a pull request", "shouldTrigger": false },
+    { "prompt": "save my session", "shouldTrigger": false },
+    { "prompt": "debug this bug", "shouldTrigger": false }
+  ]
+}

package/src/templates/skills/council/SKILL.md CHANGED Viewed

@@ -87,13 +87,13 @@ Invokes all 3 agents IN PARALLEL using Task tool:
 ### 2. Council Feature-Scope
-**Participants:** Advisor + Product Owner + Tech Lead
+**Participants:** Advisor + Developer + Tech Lead
 **When it applies:** Defining feature scope, prioritizing functionality, evaluating product proposals
 Invokes all 3 agents IN PARALLEL using Task tool:
 - Task 1: Reads `.claude/agents/advisor.md` — domain and strategic vision perspective
-- Task 2: Reads `.claude/agents/product-owner.md` — user value and scope perspective
+- Task 2: Reads `.claude/agents/developer.md` — implementability and pragmatism perspective
 - Task 3: Reads `.claude/agents/tech-lead.md` — technical feasibility and effort perspective
 ### 3. Council Tech-Debt

package/src/templates/skills/council/evals/triggers.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "skill": "council",
+  "matcherType": "keyword",
+  "description": "Convenes multiple agents to debate an important decision",
+  "threshold": 0.3,
+  "tests": [
+    { "prompt": "convene a council to debate this decision", "shouldTrigger": true },
+    { "prompt": "I need multiple agents to debate this", "shouldTrigger": true },
+    { "prompt": "let the council decide", "shouldTrigger": true, "keywordExpected": false },
+    { "prompt": "I need help making a decision", "shouldTrigger": true, "keywordExpected": false },
+    { "prompt": "build a new feature", "shouldTrigger": false },
+    { "prompt": "review my code", "shouldTrigger": false },
+    { "prompt": "save my session", "shouldTrigger": false },
+    { "prompt": "debug this bug", "shouldTrigger": false }
+  ]
+}

package/src/templates/skills/create-pr/evals/evals.json ADDED Viewed

@@ -0,0 +1,44 @@
+{
+  "skill": "create-pr",
+  "evals": [
+    {
+      "id": "cpr-has-core-steps",
+      "description": "PR creation has verify, gather, generate, create steps",
+      "expectations": [
+        { "text": "Has verify-branch step", "assertion": "step-exists:verify-branch" },
+        { "text": "Has gather-context step", "assertion": "step-exists:gather-context" },
+        { "text": "Has generate-description step", "assertion": "step-exists:generate-description" },
+        { "text": "Has create-pr step", "assertion": "step-exists:create-pr" }
+      ]
+    },
+    {
+      "id": "cpr-all-system-role",
+      "description": "All steps use system role (no agent delegation)",
+      "expectations": [
+        { "text": "verify-branch is system", "assertion": "step-role:verify-branch:system" },
+        { "text": "gather-context is system", "assertion": "step-role:gather-context:system" },
+        { "text": "generate-description is system", "assertion": "step-role:generate-description:system" },
+        { "text": "create-pr is system", "assertion": "step-role:create-pr:system" },
+        { "text": "post-creation is system", "assertion": "step-role:post-creation:system" }
+      ]
+    },
+    {
+      "id": "cpr-gates",
+      "description": "Gates at description generation and post-creation",
+      "expectations": [
+        { "text": "Generate-description has gate", "assertion": "gate-exists:generate-description" },
+        { "text": "Post-creation has gate", "assertion": "gate-exists:post-creation" }
+      ]
+    },
+    {
+      "id": "cpr-dependencies",
+      "description": "Steps have correct dependency chain",
+      "expectations": [
+        { "text": "gather-context requires branch-state", "assertion": "step-requires:gather-context:branch-state" },
+        { "text": "generate-description requires commit-list", "assertion": "step-requires:generate-description:commit-list" },
+        { "text": "create-pr requires pr-description", "assertion": "step-requires:create-pr:pr-description" },
+        { "text": "post-creation requires pr-url", "assertion": "step-requires:post-creation:pr-url" }
+      ]
+    }
+  ]
+}

package/src/templates/skills/create-pr/evals/triggers.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "skill": "create-pr",
+  "matcherType": "keyword",
+  "description": "Create a pull request from the current branch with structured summary",
+  "threshold": 0.3,
+  "tests": [
+    { "prompt": "create a pull request", "shouldTrigger": true },
+    { "prompt": "open a PR for this branch", "shouldTrigger": true },
+    { "prompt": "push and create PR", "shouldTrigger": true },
+    { "prompt": "I'm ready to submit this for review", "shouldTrigger": true, "keywordExpected": false },
+    { "prompt": "review my code changes", "shouldTrigger": false },
+    { "prompt": "start a new feature", "shouldTrigger": false },
+    { "prompt": "deploy to production", "shouldTrigger": false },
+    { "prompt": "save my session", "shouldTrigger": false }
+  ]
+}

package/src/templates/skills/debug/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: debug
-description: "Discipline skill — systematic debugging process. Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes."
+description: "Discipline skill — systematic debugging process. Use when encountering any bug, debug issue, test failure, broken function, or unexpected behavior, before proposing fixes."
 user-invocable: true
 ---

package/src/templates/skills/debug/evals/triggers.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "skill": "debug",
+  "matcherType": "keyword",
+  "description": "Discipline skill — systematic debugging process. Use when encountering any bug, debug issue, test failure, broken function, or unexpected behavior, before proposing fixes.",
+  "threshold": 0.3,
+  "tests": [
+    { "prompt": "I have a bug in the login flow", "shouldTrigger": true, "keywordExpected": false },
+    { "prompt": "tests are failing unexpectedly", "shouldTrigger": true },
+    { "prompt": "unexpected behavior in the API", "shouldTrigger": true },
+    { "prompt": "help me debug this function", "shouldTrigger": true },
+    { "prompt": "create a new feature", "shouldTrigger": false },
+    { "prompt": "review my code", "shouldTrigger": false },
+    { "prompt": "save my session", "shouldTrigger": false },
+    { "prompt": "what phase am I in", "shouldTrigger": false }
+  ]
+}

package/src/templates/skills/dev-flow/SKILL.md CHANGED Viewed

@@ -47,11 +47,10 @@ Read `SESSION.md` to determine:
 The pipeline phases are:
 1. **Evaluation** (Advisor) — go/no-go
-2. **Specification** (Product Owner) — acceptance criteria
-3. **Technical Approach** (Tech Lead) — implementation plan
-4. **Implementation** (Developer) — code and tests
-5. **Review** (Code Reviewer) — quality review
-6. **QA** — functional validation
+2. **Specification & Technical Approach** (Tech Lead) — tasks, acceptance criteria, implementation plan
+3. **Implementation** (Developer) — code and tests
+4. **Review** (Code Reviewer) — quality review
+5. **QA** — functional validation
 ### Step 3 — Present flow state
@@ -59,11 +58,10 @@ The pipeline phases are:
 Dev Flow — [feature name]
 [x] Phase 1 — Evaluation (completed)
-[x] Phase 2 — Specification (completed)
-[ ] Phase 3 — Technical Approach (pending) <-- you are here
-[ ] Phase 4 — Implementation
-[ ] Phase 5 — Review
-[ ] Phase 6 — QA
+[x] Phase 2 — Specification & Technical Approach (completed)
+[ ] Phase 3 — Implementation (pending) <-- you are here
+[ ] Phase 4 — Review
+[ ] Phase 5 — QA
 Next step: Run /build-feature to continue from Phase 3.
 ```
@@ -76,8 +74,8 @@ If there is no feature in progress, report that there is no active pipeline and
 User: /dev-flow
 Current pipeline: build-feature "add user preferences"
-Phase: 4 of 6 — Implementation
+Phase: 3 of 5 — Implementation
 Developer agent active.
-Next: Phase 5 — Code Review
+Next: Phase 4 — Code Review
 ```

package/src/templates/skills/dev-flow/evals/evals.json ADDED Viewed

@@ -0,0 +1,36 @@
+{
+  "skill": "dev-flow",
+  "evals": [
+    {
+      "id": "df-has-steps",
+      "description": "Dev flow has read-state and present-flow steps",
+      "expectations": [
+        { "text": "Has read-state step", "assertion": "step-exists:read-state" },
+        { "text": "Has present-flow step", "assertion": "step-exists:present-flow" }
+      ]
+    },
+    {
+      "id": "df-all-system",
+      "description": "All steps are system role",
+      "expectations": [
+        { "text": "read-state is system", "assertion": "step-role:read-state:system" },
+        { "text": "present-flow is system", "assertion": "step-role:present-flow:system" }
+      ]
+    },
+    {
+      "id": "df-presentation-gate",
+      "description": "Present-flow step has a gate for user confirmation",
+      "expectations": [
+        { "text": "present-flow has gate", "assertion": "gate-exists:present-flow" }
+      ]
+    },
+    {
+      "id": "df-dependencies",
+      "description": "Present-flow requires session state",
+      "expectations": [
+        { "text": "present-flow requires session-state", "assertion": "step-requires:present-flow:session-state" },
+        { "text": "present-flow requires current-phase", "assertion": "step-requires:present-flow:current-phase" }
+      ]
+    }
+  ]
+}

package/src/templates/skills/dev-flow/evals/triggers.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "skill": "dev-flow",
+  "matcherType": "keyword",
+  "description": "Shows current pipeline phase and what comes next",
+  "threshold": 0.3,
+  "tests": [
+    { "prompt": "what phase am I in", "shouldTrigger": true },
+    { "prompt": "show the current pipeline phase", "shouldTrigger": true },
+    { "prompt": "what comes next in the flow", "shouldTrigger": true },
+    { "prompt": "where did I leave off", "shouldTrigger": true, "keywordExpected": false },
+    { "prompt": "create a pull request", "shouldTrigger": false },
+    { "prompt": "review my code", "shouldTrigger": false },
+    { "prompt": "fix this bug", "shouldTrigger": false },
+    { "prompt": "run the tests", "shouldTrigger": false }
+  ]
+}

package/src/templates/skills/guild-specialize/SKILL.md CHANGED Viewed

@@ -126,13 +126,10 @@ Invoke the Tech Lead agent using Task tool with `model: "sonnet"` (execution tie
 - **advisor.md**: real project domain, target users
 - **tech-lead.md**: specific stack, detected patterns, architecture decisions
-- **product-owner.md**: existing functionality, visible backlog
 - **developer.md**: code conventions, main framework, file structure
 - **code-reviewer.md**: lint rules, project patterns, anti-patterns to watch
 - **qa.md**: testing framework, commands to run tests, current coverage
 - **bugfix.md**: debugging stack, logs, available tools
-- **db-migration.md**: ORM, migration tool, current schema (if applicable)
-- **platform-expert.md**: Claude Code version, known permission bugs, hook configuration
 When specializing agents, append a zone at the bottom of each agent file:
@@ -204,7 +201,6 @@ Tech Lead (sonnet) — Specializing agents...
 Agents updated:
 - developer.md: Specialized for Next.js + TypeScript
 - qa.md: Configured for Vitest + Playwright
-- db-migration.md: Configured for Prisma
 Run /status to see the full state.
 ```

package/src/templates/skills/guild-specialize/evals/evals.json ADDED Viewed

@@ -0,0 +1,54 @@
+{
+  "skill": "guild-specialize",
+  "evals": [
+    {
+      "id": "gs-has-core-steps",
+      "description": "Guild specialize has read, explore, enrich, specialize, confirm, commit steps",
+      "expectations": [
+        { "text": "Has read-base step", "assertion": "step-exists:read-base" },
+        { "text": "Has explore-project step", "assertion": "step-exists:explore-project" },
+        { "text": "Has enrich-claude-md step", "assertion": "step-exists:enrich-claude-md" },
+        { "text": "Has specialize-agents step", "assertion": "step-exists:specialize-agents" },
+        { "text": "Has confirm step", "assertion": "step-exists:confirm" },
+        { "text": "Has commit-enrichment step", "assertion": "step-exists:commit-enrichment" }
+      ]
+    },
+    {
+      "id": "gs-enrichment-uses-reasoning",
+      "description": "CLAUDE.md enrichment uses reasoning tier (opus)",
+      "expectations": [
+        { "text": "enrich-claude-md uses reasoning", "assertion": "step-model-tier:enrich-claude-md:reasoning" }
+      ]
+    },
+    {
+      "id": "gs-agents-use-execution",
+      "description": "Agent specialization uses execution tier (sonnet)",
+      "expectations": [
+        { "text": "specialize-agents uses execution", "assertion": "step-model-tier:specialize-agents:execution" }
+      ]
+    },
+    {
+      "id": "gs-gates",
+      "description": "Gates at exploration and confirmation",
+      "expectations": [
+        { "text": "explore-project has gate", "assertion": "gate-exists:explore-project" },
+        { "text": "confirm has gate", "assertion": "gate-exists:confirm" }
+      ]
+    },
+    {
+      "id": "gs-tech-lead-role",
+      "description": "Enrichment and specialization use tech-lead role",
+      "expectations": [
+        { "text": "enrich-claude-md is tech-lead", "assertion": "step-role:enrich-claude-md:tech-lead" },
+        { "text": "specialize-agents is tech-lead", "assertion": "step-role:specialize-agents:tech-lead" }
+      ]
+    },
+    {
+      "id": "gs-minimum-steps",
+      "description": "Has at least 6 steps",
+      "expectations": [
+        { "text": "At least 6 steps", "assertion": "step-count:6" }
+      ]
+    }
+  ]
+}

package/src/templates/skills/guild-specialize/evals/triggers.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "skill": "guild-specialize",
+  "matcherType": "keyword",
+  "description": "Enriches CLAUDE.md by exploring the project and specializes agents to the real stack",
+  "threshold": 0.3,
+  "tests": [
+    { "prompt": "specialize the agents for this project", "shouldTrigger": true },
+    { "prompt": "enrich CLAUDE.md with the project stack", "shouldTrigger": true },
+    { "prompt": "explore the project and specialize agents", "shouldTrigger": true },
+    { "prompt": "set up Guild for this codebase", "shouldTrigger": true, "keywordExpected": false },
+    { "prompt": "create a pull request", "shouldTrigger": false },
+    { "prompt": "review my code", "shouldTrigger": false },
+    { "prompt": "debug this bug", "shouldTrigger": false },
+    { "prompt": "save my session", "shouldTrigger": false }
+  ]
+}