guild-agents 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/bin/guild.js +73 -0
- package/package.json +5 -2
- package/src/commands/eval.js +225 -0
- package/src/commands/stats.js +147 -0
- package/src/commands/workspace.js +38 -1
- package/src/templates/skills/build-feature/evals/evals.json +53 -0
- package/src/templates/skills/build-feature/evals/triggers.json +16 -0
- package/src/templates/skills/council/SKILL.md +27 -6
- package/src/templates/skills/council/evals/evals.json +41 -0
- package/src/templates/skills/council/evals/triggers.json +16 -0
- package/src/templates/skills/create-pr/evals/evals.json +44 -0
- package/src/templates/skills/create-pr/evals/triggers.json +16 -0
- package/src/templates/skills/debug/SKILL.md +1 -1
- package/src/templates/skills/debug/evals/triggers.json +16 -0
- package/src/templates/skills/dev-flow/evals/evals.json +36 -0
- package/src/templates/skills/dev-flow/evals/triggers.json +16 -0
- package/src/templates/skills/guild-specialize/evals/evals.json +54 -0
- package/src/templates/skills/guild-specialize/evals/triggers.json +16 -0
- package/src/templates/skills/new-feature/evals/evals.json +41 -0
- package/src/templates/skills/new-feature/evals/triggers.json +16 -0
- package/src/templates/skills/qa-cycle/evals/evals.json +46 -0
- package/src/templates/skills/qa-cycle/evals/triggers.json +16 -0
- package/src/templates/skills/re-specialize/evals/evals.json +48 -0
- package/src/templates/skills/re-specialize/evals/triggers.json +16 -0
- package/src/templates/skills/review/evals/evals.json +43 -0
- package/src/templates/skills/review/evals/triggers.json +16 -0
- package/src/templates/skills/session-end/evals/evals.json +40 -0
- package/src/templates/skills/session-end/evals/triggers.json +16 -0
- package/src/templates/skills/session-start/evals/evals.json +50 -0
- package/src/templates/skills/session-start/evals/triggers.json +16 -0
- package/src/templates/skills/status/evals/evals.json +40 -0
- package/src/templates/skills/status/evals/triggers.json +16 -0
- package/src/templates/skills/tdd/evals/triggers.json +16 -0
- package/src/templates/skills/verify/evals/triggers.json +16 -0
- package/src/utils/accounting.js +139 -0
- package/src/utils/benchmark.js +128 -0
- package/src/utils/description-analyzer.js +92 -0
- package/src/utils/eval-runner.js +139 -0
- package/src/utils/pricing.js +28 -0
- package/src/utils/semantic-matcher.js +91 -0
- package/src/utils/trigger-matcher.js +64 -0
- package/src/utils/trigger-runner.js +132 -0
- package/src/utils/workspace.js +89 -0
|
@@ -11,24 +11,30 @@ workflow:
|
|
|
11
11
|
requires: [user-question]
|
|
12
12
|
produces: [council-type, participant-roles]
|
|
13
13
|
gate: true
|
|
14
|
+
- id: workspace-context
|
|
15
|
+
role: system
|
|
16
|
+
intent: "Detect workspace membership. If in a workspace, collect context from sibling repos (CLAUDE.md, PROJECT.md, SESSION.md) and build workspace context block."
|
|
17
|
+
requires: [council-type]
|
|
18
|
+
produces: [workspace-context]
|
|
19
|
+
condition: in-workspace
|
|
14
20
|
- id: agent-1
|
|
15
21
|
role: dynamic
|
|
16
22
|
intent: "Analyze the question from specialized perspective. State position with concrete arguments."
|
|
17
|
-
requires: [user-question, council-type]
|
|
23
|
+
requires: [user-question, council-type, workspace-context]
|
|
18
24
|
produces: [perspective-1]
|
|
19
25
|
model-tier: reasoning
|
|
20
26
|
parallel: [agent-2, agent-3]
|
|
21
27
|
- id: agent-2
|
|
22
28
|
role: dynamic
|
|
23
29
|
intent: "Analyze the question from specialized perspective. State position with concrete arguments."
|
|
24
|
-
requires: [user-question, council-type]
|
|
30
|
+
requires: [user-question, council-type, workspace-context]
|
|
25
31
|
produces: [perspective-2]
|
|
26
32
|
model-tier: reasoning
|
|
27
33
|
parallel: [agent-1, agent-3]
|
|
28
34
|
- id: agent-3
|
|
29
35
|
role: dynamic
|
|
30
36
|
intent: "Analyze the question from specialized perspective. State position with concrete arguments."
|
|
31
|
-
requires: [user-question, council-type]
|
|
37
|
+
requires: [user-question, council-type, workspace-context]
|
|
32
38
|
produces: [perspective-3]
|
|
33
39
|
model-tier: reasoning
|
|
34
40
|
parallel: [agent-1, agent-2]
|
|
@@ -114,12 +120,23 @@ Analyze the user's question and determine which council type applies:
|
|
|
114
120
|
|
|
115
121
|
### Step 2 — Convene agents
|
|
116
122
|
|
|
123
|
+
**Workspace detection:** Before invoking agents, check if the project is inside a workspace:
|
|
124
|
+
|
|
125
|
+
1. Look for a `guild-workspace.json` file by searching upward from the project root
|
|
126
|
+
2. If found, load the workspace config and identify which member this project is
|
|
127
|
+
3. Read CLAUDE.md, PROJECT.md, and SESSION.md from each sibling member repo
|
|
128
|
+
4. Build a workspace context block with:
|
|
129
|
+
- Workspace name
|
|
130
|
+
- Each sibling's stack, structure summary, and current task
|
|
131
|
+
- Absolute paths so the agent can read any sibling file for deeper analysis
|
|
132
|
+
|
|
117
133
|
Invoke the 3 corresponding agents IN PARALLEL using Task tool with `model: "opus"` (all council agents use reasoning tier). Each agent:
|
|
118
134
|
|
|
119
135
|
1. Reads their `.claude/agents/[name].md` file to assume their role
|
|
120
136
|
2. Reads `CLAUDE.md` and `SESSION.md` for project context
|
|
121
|
-
3.
|
|
122
|
-
4.
|
|
137
|
+
3. **If in a workspace:** receives the workspace context block and considers cross-repo impact as part of their analysis. They may read files from sibling repos using the provided paths.
|
|
138
|
+
4. Analyzes the question from their specialized perspective
|
|
139
|
+
5. States their position with concrete arguments
|
|
123
140
|
|
|
124
141
|
### Step 3 — Present debate
|
|
125
142
|
|
|
@@ -191,7 +208,11 @@ Example:
|
|
|
191
208
|
Task tool with:
|
|
192
209
|
subagent_type: "general-purpose"
|
|
193
210
|
model: "opus"
|
|
194
|
-
prompt: "Read .claude/agents/tech-lead.md and assume that role. Then: [debate question]
|
|
211
|
+
prompt: "Read .claude/agents/tech-lead.md and assume that role. Then: [debate question]
|
|
212
|
+
|
|
213
|
+
[If in workspace, append:]
|
|
214
|
+
## Workspace context
|
|
215
|
+
[workspace context block from Step 2]"
|
|
195
216
|
```
|
|
196
217
|
|
|
197
218
|
The `model` parameter is resolved from the step's `model-tier`: all council agents use reasoning→`"opus"`.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "council",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "council-three-parallel-agents",
|
|
6
|
+
"description": "Council has 3 agent steps in parallel",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Agent-1 exists", "assertion": "step-exists:agent-1" },
|
|
9
|
+
{ "text": "Agent-2 exists", "assertion": "step-exists:agent-2" },
|
|
10
|
+
{ "text": "Agent-3 exists", "assertion": "step-exists:agent-3" },
|
|
11
|
+
{ "text": "Agent-1 is parallel", "assertion": "step-parallel:agent-1" },
|
|
12
|
+
{ "text": "Agent-2 is parallel", "assertion": "step-parallel:agent-2" },
|
|
13
|
+
{ "text": "Agent-3 is parallel", "assertion": "step-parallel:agent-3" }
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "council-agents-use-reasoning",
|
|
18
|
+
"description": "All council agents use reasoning tier",
|
|
19
|
+
"expectations": [
|
|
20
|
+
{ "text": "Agent-1 uses reasoning", "assertion": "step-model-tier:agent-1:reasoning" },
|
|
21
|
+
{ "text": "Agent-2 uses reasoning", "assertion": "step-model-tier:agent-2:reasoning" },
|
|
22
|
+
{ "text": "Agent-3 uses reasoning", "assertion": "step-model-tier:agent-3:reasoning" }
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"id": "council-synthesize-gate",
|
|
27
|
+
"description": "Synthesize step exists with gate",
|
|
28
|
+
"expectations": [
|
|
29
|
+
{ "text": "Synthesize step exists", "assertion": "step-exists:synthesize" },
|
|
30
|
+
{ "text": "Synthesize has gate", "assertion": "gate-exists:synthesize" }
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"id": "council-workspace-context",
|
|
35
|
+
"description": "Workspace context step exists with condition",
|
|
36
|
+
"expectations": [
|
|
37
|
+
{ "text": "Workspace-context step exists", "assertion": "step-exists:workspace-context" }
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "council",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Convenes multiple agents to debate an important decision",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "convene a council to debate this decision", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "I need multiple agents to debate this", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "let the council decide", "shouldTrigger": true, "keywordExpected": false },
|
|
10
|
+
{ "prompt": "I need help making a decision", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "build a new feature", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "save my session", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "debug this bug", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "create-pr",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "cpr-has-core-steps",
|
|
6
|
+
"description": "PR creation has verify, gather, generate, create steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has verify-branch step", "assertion": "step-exists:verify-branch" },
|
|
9
|
+
{ "text": "Has gather-context step", "assertion": "step-exists:gather-context" },
|
|
10
|
+
{ "text": "Has generate-description step", "assertion": "step-exists:generate-description" },
|
|
11
|
+
{ "text": "Has create-pr step", "assertion": "step-exists:create-pr" }
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"id": "cpr-all-system-role",
|
|
16
|
+
"description": "All steps use system role (no agent delegation)",
|
|
17
|
+
"expectations": [
|
|
18
|
+
{ "text": "verify-branch is system", "assertion": "step-role:verify-branch:system" },
|
|
19
|
+
{ "text": "gather-context is system", "assertion": "step-role:gather-context:system" },
|
|
20
|
+
{ "text": "generate-description is system", "assertion": "step-role:generate-description:system" },
|
|
21
|
+
{ "text": "create-pr is system", "assertion": "step-role:create-pr:system" },
|
|
22
|
+
{ "text": "post-creation is system", "assertion": "step-role:post-creation:system" }
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"id": "cpr-gates",
|
|
27
|
+
"description": "Gates at description generation and post-creation",
|
|
28
|
+
"expectations": [
|
|
29
|
+
{ "text": "Generate-description has gate", "assertion": "gate-exists:generate-description" },
|
|
30
|
+
{ "text": "Post-creation has gate", "assertion": "gate-exists:post-creation" }
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"id": "cpr-dependencies",
|
|
35
|
+
"description": "Steps have correct dependency chain",
|
|
36
|
+
"expectations": [
|
|
37
|
+
{ "text": "gather-context requires branch-state", "assertion": "step-requires:gather-context:branch-state" },
|
|
38
|
+
{ "text": "generate-description requires commit-list", "assertion": "step-requires:generate-description:commit-list" },
|
|
39
|
+
{ "text": "create-pr requires pr-description", "assertion": "step-requires:create-pr:pr-description" },
|
|
40
|
+
{ "text": "post-creation requires pr-url", "assertion": "step-requires:post-creation:pr-url" }
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
]
|
|
44
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "create-pr",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Create a pull request from the current branch with structured summary",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "create a pull request", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "open a PR for this branch", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "push and create PR", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "I'm ready to submit this for review", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "review my code changes", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "start a new feature", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "deploy to production", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "save my session", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: debug
|
|
3
|
-
description: "Discipline skill — systematic debugging process. Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes."
|
|
3
|
+
description: "Discipline skill — systematic debugging process. Use when encountering any bug, debug issue, test failure, broken function, or unexpected behavior, before proposing fixes."
|
|
4
4
|
user-invocable: true
|
|
5
5
|
---
|
|
6
6
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "debug",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Discipline skill — systematic debugging process. Use when encountering any bug, debug issue, test failure, broken function, or unexpected behavior, before proposing fixes.",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "I have a bug in the login flow", "shouldTrigger": true, "keywordExpected": false },
|
|
8
|
+
{ "prompt": "tests are failing unexpectedly", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "unexpected behavior in the API", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "help me debug this function", "shouldTrigger": true },
|
|
11
|
+
{ "prompt": "create a new feature", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "save my session", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "what phase am I in", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "dev-flow",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "df-has-steps",
|
|
6
|
+
"description": "Dev flow has read-state and present-flow steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has read-state step", "assertion": "step-exists:read-state" },
|
|
9
|
+
{ "text": "Has present-flow step", "assertion": "step-exists:present-flow" }
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"id": "df-all-system",
|
|
14
|
+
"description": "All steps are system role",
|
|
15
|
+
"expectations": [
|
|
16
|
+
{ "text": "read-state is system", "assertion": "step-role:read-state:system" },
|
|
17
|
+
{ "text": "present-flow is system", "assertion": "step-role:present-flow:system" }
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "df-presentation-gate",
|
|
22
|
+
"description": "Present-flow step has a gate for user confirmation",
|
|
23
|
+
"expectations": [
|
|
24
|
+
{ "text": "present-flow has gate", "assertion": "gate-exists:present-flow" }
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"id": "df-dependencies",
|
|
29
|
+
"description": "Present-flow requires session state",
|
|
30
|
+
"expectations": [
|
|
31
|
+
{ "text": "present-flow requires session-state", "assertion": "step-requires:present-flow:session-state" },
|
|
32
|
+
{ "text": "present-flow requires current-phase", "assertion": "step-requires:present-flow:current-phase" }
|
|
33
|
+
]
|
|
34
|
+
}
|
|
35
|
+
]
|
|
36
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "dev-flow",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Shows current pipeline phase and what comes next",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "what phase am I in", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "show the current pipeline phase", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "what comes next in the flow", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "where did I leave off", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "create a pull request", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "fix this bug", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "run the tests", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "guild-specialize",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "gs-has-core-steps",
|
|
6
|
+
"description": "Guild specialize has read, explore, enrich, specialize, confirm, commit steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has read-base step", "assertion": "step-exists:read-base" },
|
|
9
|
+
{ "text": "Has explore-project step", "assertion": "step-exists:explore-project" },
|
|
10
|
+
{ "text": "Has enrich-claude-md step", "assertion": "step-exists:enrich-claude-md" },
|
|
11
|
+
{ "text": "Has specialize-agents step", "assertion": "step-exists:specialize-agents" },
|
|
12
|
+
{ "text": "Has confirm step", "assertion": "step-exists:confirm" },
|
|
13
|
+
{ "text": "Has commit-enrichment step", "assertion": "step-exists:commit-enrichment" }
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "gs-enrichment-uses-reasoning",
|
|
18
|
+
"description": "CLAUDE.md enrichment uses reasoning tier (opus)",
|
|
19
|
+
"expectations": [
|
|
20
|
+
{ "text": "enrich-claude-md uses reasoning", "assertion": "step-model-tier:enrich-claude-md:reasoning" }
|
|
21
|
+
]
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"id": "gs-agents-use-execution",
|
|
25
|
+
"description": "Agent specialization uses execution tier (sonnet)",
|
|
26
|
+
"expectations": [
|
|
27
|
+
{ "text": "specialize-agents uses execution", "assertion": "step-model-tier:specialize-agents:execution" }
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "gs-gates",
|
|
32
|
+
"description": "Gates at exploration and confirmation",
|
|
33
|
+
"expectations": [
|
|
34
|
+
{ "text": "explore-project has gate", "assertion": "gate-exists:explore-project" },
|
|
35
|
+
{ "text": "confirm has gate", "assertion": "gate-exists:confirm" }
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"id": "gs-tech-lead-role",
|
|
40
|
+
"description": "Enrichment and specialization use tech-lead role",
|
|
41
|
+
"expectations": [
|
|
42
|
+
{ "text": "enrich-claude-md is tech-lead", "assertion": "step-role:enrich-claude-md:tech-lead" },
|
|
43
|
+
{ "text": "specialize-agents is tech-lead", "assertion": "step-role:specialize-agents:tech-lead" }
|
|
44
|
+
]
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"id": "gs-minimum-steps",
|
|
48
|
+
"description": "Has at least 6 steps",
|
|
49
|
+
"expectations": [
|
|
50
|
+
{ "text": "At least 6 steps", "assertion": "step-count:6" }
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
]
|
|
54
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "guild-specialize",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Enriches CLAUDE.md by exploring the project and specializes agents to the real stack",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "specialize the agents for this project", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "enrich CLAUDE.md with the project stack", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "explore the project and specialize agents", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "set up Guild for this codebase", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "create a pull request", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "debug this bug", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "save my session", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "new-feature",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "nf-has-core-steps",
|
|
6
|
+
"description": "New feature has name, branch, session, confirm steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has get-name step", "assertion": "step-exists:get-name" },
|
|
9
|
+
{ "text": "Has create-branch step", "assertion": "step-exists:create-branch" },
|
|
10
|
+
{ "text": "Has update-session step", "assertion": "step-exists:update-session" },
|
|
11
|
+
{ "text": "Has confirm step", "assertion": "step-exists:confirm" }
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"id": "nf-optional-issue",
|
|
16
|
+
"description": "GitHub issue creation step exists",
|
|
17
|
+
"expectations": [
|
|
18
|
+
{ "text": "Has create-issue step", "assertion": "step-exists:create-issue" }
|
|
19
|
+
]
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"id": "nf-gates",
|
|
23
|
+
"description": "Gates at name input, session update, and confirmation",
|
|
24
|
+
"expectations": [
|
|
25
|
+
{ "text": "get-name has gate", "assertion": "gate-exists:get-name" },
|
|
26
|
+
{ "text": "update-session has gate", "assertion": "gate-exists:update-session" },
|
|
27
|
+
{ "text": "confirm has gate", "assertion": "gate-exists:confirm" }
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "nf-all-system",
|
|
32
|
+
"description": "All steps are system role",
|
|
33
|
+
"expectations": [
|
|
34
|
+
{ "text": "get-name is system", "assertion": "step-role:get-name:system" },
|
|
35
|
+
{ "text": "create-branch is system", "assertion": "step-role:create-branch:system" },
|
|
36
|
+
{ "text": "update-session is system", "assertion": "step-role:update-session:system" },
|
|
37
|
+
{ "text": "confirm is system", "assertion": "step-role:confirm:system" }
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "new-feature",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Creates branch and scaffold for a new feature",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "create a new feature branch", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "scaffold a new feature", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "start a new feature called user-auth", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "prepare the branch for a new feature", "shouldTrigger": true },
|
|
11
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "create a pull request", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "debug this bug", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "save my session", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "qa-cycle",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "qa-has-core-steps",
|
|
6
|
+
"description": "QA cycle has pre-gate, validate, bugfix, post-gate steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has gate-pre-qa step", "assertion": "step-exists:gate-pre-qa" },
|
|
9
|
+
{ "text": "Has qa-validate step", "assertion": "step-exists:qa-validate" },
|
|
10
|
+
{ "text": "Has bugfix step", "assertion": "step-exists:bugfix" },
|
|
11
|
+
{ "text": "Has gate-post-qa step", "assertion": "step-exists:gate-post-qa" }
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"id": "qa-agent-roles",
|
|
16
|
+
"description": "QA and bugfix steps use correct agent roles",
|
|
17
|
+
"expectations": [
|
|
18
|
+
{ "text": "qa-validate uses qa role", "assertion": "step-role:qa-validate:qa" },
|
|
19
|
+
{ "text": "bugfix uses bugfix role", "assertion": "step-role:bugfix:bugfix" }
|
|
20
|
+
]
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"id": "qa-execution-tier",
|
|
24
|
+
"description": "QA and bugfix use execution tier (sonnet)",
|
|
25
|
+
"expectations": [
|
|
26
|
+
{ "text": "qa-validate uses execution", "assertion": "step-model-tier:qa-validate:execution" },
|
|
27
|
+
{ "text": "bugfix uses execution", "assertion": "step-model-tier:bugfix:execution" }
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "qa-gates",
|
|
32
|
+
"description": "Pre and post QA gates exist",
|
|
33
|
+
"expectations": [
|
|
34
|
+
{ "text": "gate-pre-qa has gate", "assertion": "gate-exists:gate-pre-qa" },
|
|
35
|
+
{ "text": "gate-post-qa has gate", "assertion": "gate-exists:gate-post-qa" }
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"id": "qa-bugfix-requires-report",
|
|
40
|
+
"description": "Bugfix step requires QA report",
|
|
41
|
+
"expectations": [
|
|
42
|
+
{ "text": "bugfix requires qa-report", "assertion": "step-requires:bugfix:qa-report" }
|
|
43
|
+
]
|
|
44
|
+
}
|
|
45
|
+
]
|
|
46
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "qa-cycle",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "QA + bugfix cycle until it passes",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "run a QA cycle on this", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "QA and bugfix until it passes", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "validate with QA cycle", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "check if my implementation is correct", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "create a pull request", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "save my session", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "start a new feature", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "re-specialize",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "rs-has-core-steps",
|
|
6
|
+
"description": "Re-specialize has read, explore, check-zones, regenerate, update, confirm, commit steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has read-current step", "assertion": "step-exists:read-current" },
|
|
9
|
+
{ "text": "Has explore-project step", "assertion": "step-exists:explore-project" },
|
|
10
|
+
{ "text": "Has check-zones step", "assertion": "step-exists:check-zones" },
|
|
11
|
+
{ "text": "Has regenerate-zones step", "assertion": "step-exists:regenerate-zones" },
|
|
12
|
+
{ "text": "Has update-agents step", "assertion": "step-exists:update-agents" },
|
|
13
|
+
{ "text": "Has confirm step", "assertion": "step-exists:confirm" },
|
|
14
|
+
{ "text": "Has commit step", "assertion": "step-exists:commit" }
|
|
15
|
+
]
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"id": "rs-reasoning-for-regeneration",
|
|
19
|
+
"description": "Zone regeneration uses reasoning tier (opus)",
|
|
20
|
+
"expectations": [
|
|
21
|
+
{ "text": "regenerate-zones uses reasoning", "assertion": "step-model-tier:regenerate-zones:reasoning" }
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"id": "rs-execution-for-agents",
|
|
26
|
+
"description": "Agent updates use execution tier (sonnet)",
|
|
27
|
+
"expectations": [
|
|
28
|
+
{ "text": "update-agents uses execution", "assertion": "step-model-tier:update-agents:execution" }
|
|
29
|
+
]
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"id": "rs-gates",
|
|
33
|
+
"description": "Gates at exploration, zone check, and confirmation",
|
|
34
|
+
"expectations": [
|
|
35
|
+
{ "text": "explore-project has gate", "assertion": "gate-exists:explore-project" },
|
|
36
|
+
{ "text": "check-zones has gate", "assertion": "gate-exists:check-zones" },
|
|
37
|
+
{ "text": "confirm has gate", "assertion": "gate-exists:confirm" }
|
|
38
|
+
]
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": "rs-minimum-steps",
|
|
42
|
+
"description": "Has at least 7 steps",
|
|
43
|
+
"expectations": [
|
|
44
|
+
{ "text": "At least 7 steps", "assertion": "step-count:7" }
|
|
45
|
+
]
|
|
46
|
+
}
|
|
47
|
+
]
|
|
48
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "re-specialize",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Incremental re-specialization — re-scans the project and updates only auto-generated zones in CLAUDE.md and agents",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "run incremental re-specialization", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "re-scan the project and update CLAUDE.md zones", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "update the auto-generated zones in agents", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "my stack changed, update the agents", "shouldTrigger": true, "keywordExpected": false },
|
|
11
|
+
{ "prompt": "create a pull request", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "review my code", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "debug this bug", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "save my session", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "review",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "rv-has-core-steps",
|
|
6
|
+
"description": "Review has gather-diff, review, and present steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has gather-diff step", "assertion": "step-exists:gather-diff" },
|
|
9
|
+
{ "text": "Has review step", "assertion": "step-exists:review" },
|
|
10
|
+
{ "text": "Has present step", "assertion": "step-exists:present" }
|
|
11
|
+
]
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"id": "rv-reviewer-role",
|
|
15
|
+
"description": "Review step uses code-reviewer role",
|
|
16
|
+
"expectations": [
|
|
17
|
+
{ "text": "review uses code-reviewer role", "assertion": "step-role:review:code-reviewer" }
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "rv-reasoning-tier",
|
|
22
|
+
"description": "Code review uses reasoning tier (opus)",
|
|
23
|
+
"expectations": [
|
|
24
|
+
{ "text": "review uses reasoning tier", "assertion": "step-model-tier:review:reasoning" }
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"id": "rv-presentation-gate",
|
|
29
|
+
"description": "Present step has gate for user review",
|
|
30
|
+
"expectations": [
|
|
31
|
+
{ "text": "present has gate", "assertion": "gate-exists:present" }
|
|
32
|
+
]
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"id": "rv-dependencies",
|
|
36
|
+
"description": "Review requires diff content, present requires report",
|
|
37
|
+
"expectations": [
|
|
38
|
+
{ "text": "review requires diff-content", "assertion": "step-requires:review:diff-content" },
|
|
39
|
+
{ "text": "present requires review-report", "assertion": "step-requires:present:review-report" }
|
|
40
|
+
]
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "review",
|
|
3
|
+
"matcherType": "keyword",
|
|
4
|
+
"description": "Standalone code review on the current diff",
|
|
5
|
+
"threshold": 0.3,
|
|
6
|
+
"tests": [
|
|
7
|
+
{ "prompt": "review my code", "shouldTrigger": true },
|
|
8
|
+
{ "prompt": "do a code review on the current changes", "shouldTrigger": true },
|
|
9
|
+
{ "prompt": "check my diff for issues", "shouldTrigger": true },
|
|
10
|
+
{ "prompt": "review the current diff", "shouldTrigger": true },
|
|
11
|
+
{ "prompt": "create a pull request", "shouldTrigger": false },
|
|
12
|
+
{ "prompt": "save my session", "shouldTrigger": false },
|
|
13
|
+
{ "prompt": "what phase am I in", "shouldTrigger": false },
|
|
14
|
+
{ "prompt": "start a new feature", "shouldTrigger": false }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "session-end",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "se-has-core-steps",
|
|
6
|
+
"description": "Session end has gather, update, wip-commit, confirm steps",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has gather-state step", "assertion": "step-exists:gather-state" },
|
|
9
|
+
{ "text": "Has update-session step", "assertion": "step-exists:update-session" },
|
|
10
|
+
{ "text": "Has commit-wip step", "assertion": "step-exists:commit-wip" },
|
|
11
|
+
{ "text": "Has confirm step", "assertion": "step-exists:confirm" }
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"id": "se-all-system",
|
|
16
|
+
"description": "All steps are system role",
|
|
17
|
+
"expectations": [
|
|
18
|
+
{ "text": "gather-state is system", "assertion": "step-role:gather-state:system" },
|
|
19
|
+
{ "text": "update-session is system", "assertion": "step-role:update-session:system" },
|
|
20
|
+
{ "text": "commit-wip is system", "assertion": "step-role:commit-wip:system" },
|
|
21
|
+
{ "text": "confirm is system", "assertion": "step-role:confirm:system" }
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"id": "se-gates",
|
|
26
|
+
"description": "Gates at session update and confirmation",
|
|
27
|
+
"expectations": [
|
|
28
|
+
{ "text": "update-session has gate", "assertion": "gate-exists:update-session" },
|
|
29
|
+
{ "text": "confirm has gate", "assertion": "gate-exists:confirm" }
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"id": "se-wip-requires-files",
|
|
34
|
+
"description": "WIP commit requires knowledge of modified files",
|
|
35
|
+
"expectations": [
|
|
36
|
+
{ "text": "commit-wip requires modified-files", "assertion": "step-requires:commit-wip:modified-files" }
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
]
|
|
40
|
+
}
|