guild-agents 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/guild.js CHANGED
@@ -225,4 +225,31 @@ workspaceCmd
225
225
  }
226
226
  });
227
227
 
228
+ // guild workspace run
229
+ workspaceCmd
230
+ .command('run')
231
+ .description('Run a command in a workspace member repo')
232
+ .argument('[member]', 'Member name (or omit with --all)')
233
+ .argument('[preset]', 'Preset command: test, lint, build')
234
+ .option('--cmd <command>', 'Custom command to run')
235
+ .option('--all', 'Run in all workspace members')
236
+ .action(async (member, preset, options) => {
237
+ try {
238
+ const { runWorkspaceCommand } = await import('../src/commands/workspace.js');
239
+ const results = runWorkspaceCommand(member, preset, options);
240
+ for (const r of results) {
241
+ const icon = r.status === 'passed' ? '\u2705' : '\u274C';
242
+ console.log(`${icon} ${r.member}: ${r.status} (${r.duration}ms)`);
243
+ if (r.status === 'failed' && r.output) {
244
+ console.log(r.output);
245
+ }
246
+ }
247
+ const failed = results.filter(r => r.status === 'failed');
248
+ if (failed.length > 0) process.exit(1);
249
+ } catch (err) {
250
+ console.error(err.message);
251
+ process.exit(1);
252
+ }
253
+ });
254
+
228
255
  program.parse();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "guild-agents",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "description": "Specification-driven development CLI for Claude Code — think before you build",
5
5
  "type": "module",
6
6
  "files": [
@@ -28,7 +28,10 @@
28
28
  "publish:snapshot": "npm run version:snapshot && npm publish --tag snapshot",
29
29
  "publish:beta": "npm run version:beta && npm publish --tag beta",
30
30
  "publish:stable": "npm run version:stable && npm publish --tag latest",
31
- "publish:promote-beta": "npm dist-tag add guild-agents@$(node --input-type=commonjs -p \"require('./package.json').version\") beta"
31
+ "publish:promote-beta": "npm dist-tag add guild-agents@$(node --input-type=commonjs -p \"require('./package.json').version\") beta",
32
+ "eval": "node scripts/run-evals.js",
33
+ "eval:build-feature": "node scripts/run-evals.js build-feature",
34
+ "eval:council": "node scripts/run-evals.js council"
32
35
  },
33
36
  "keywords": [
34
37
  "claude",
@@ -1,6 +1,6 @@
1
1
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
2
2
  import { basename, join } from 'path';
3
- import { findWorkspaceRoot, WORKSPACE_FILE } from '../utils/workspace.js';
3
+ import { findWorkspaceRoot, loadWorkspace, runInMember, PRESET_COMMANDS, WORKSPACE_FILE } from '../utils/workspace.js';
4
4
 
5
5
  export async function createWorkspaceFile(name, memberPaths) {
6
6
  const members = memberPaths.map(p => ({
@@ -38,6 +38,43 @@ export async function addWorkspaceMember(memberPath) {
38
38
  writeFileSync(filePath, JSON.stringify(config, null, 2) + '\n', 'utf8');
39
39
  }
40
40
 
41
+ export function runWorkspaceCommand(memberName, preset, options) {
42
+ const workspace = loadWorkspace();
43
+ if (!workspace) throw new Error('No workspace found. Run `guild workspace init` first.');
44
+
45
+ // Resolve command
46
+ let cmd, args;
47
+ if (options.cmd) {
48
+ const parts = options.cmd.split(/\s+/);
49
+ cmd = parts[0];
50
+ args = parts.slice(1);
51
+ } else if (preset && PRESET_COMMANDS[preset]) {
52
+ ({ cmd, args } = PRESET_COMMANDS[preset]);
53
+ } else {
54
+ throw new Error(`Unknown command: "${preset}". Use test, lint, build, or --cmd "...".`);
55
+ }
56
+
57
+ // Resolve members
58
+ let targets;
59
+ if (options.all) {
60
+ targets = workspace.members;
61
+ } else {
62
+ const member = workspace.members.find(m => m.name === memberName);
63
+ if (!member) {
64
+ const available = workspace.members.map(m => m.name).join(', ');
65
+ throw new Error(`Member "${memberName}" not found. Available: ${available}`);
66
+ }
67
+ targets = [member];
68
+ }
69
+
70
+ // Execute sequentially, collect all
71
+ const results = [];
72
+ for (const target of targets) {
73
+ results.push(runInMember(target, cmd, args));
74
+ }
75
+ return results;
76
+ }
77
+
41
78
  export async function getWorkspaceStatus() {
42
79
  const root = findWorkspaceRoot();
43
80
  if (!root) throw new Error('No workspace found. Run `guild workspace init` first.');
@@ -0,0 +1,53 @@
1
+ {
2
+ "skill": "build-feature",
3
+ "evals": [
4
+ {
5
+ "id": "bf-has-core-phases",
6
+ "description": "Plan contains evaluate, specify, design, implement phases",
7
+ "expectations": [
8
+ { "text": "Has evaluate step", "assertion": "step-exists:evaluate" },
9
+ { "text": "Has specify step", "assertion": "step-exists:specify" },
10
+ { "text": "Has design step", "assertion": "step-exists:design" },
11
+ { "text": "Has implement step", "assertion": "step-exists:implement" }
12
+ ]
13
+ },
14
+ {
15
+ "id": "bf-has-quality-phases",
16
+ "description": "Plan contains review, QA, and completion phases",
17
+ "expectations": [
18
+ { "text": "Has review step", "assertion": "step-exists:review" },
19
+ { "text": "Has QA phase", "assertion": "step-exists:qa-phase" },
20
+ { "text": "Has completion step", "assertion": "step-exists:completion" }
21
+ ]
22
+ },
23
+ {
24
+ "id": "bf-advisor-uses-reasoning",
25
+ "description": "Advisor (evaluate) uses reasoning tier",
26
+ "expectations": [
27
+ { "text": "Evaluate uses reasoning tier", "assertion": "step-model-tier:evaluate:reasoning" }
28
+ ]
29
+ },
30
+ {
31
+ "id": "bf-developer-uses-execution",
32
+ "description": "Developer (implement) uses execution tier",
33
+ "expectations": [
34
+ { "text": "Implement uses execution tier", "assertion": "step-model-tier:implement:execution" }
35
+ ]
36
+ },
37
+ {
38
+ "id": "bf-gates-exist",
39
+ "description": "Quality gates exist at pre-review and final",
40
+ "expectations": [
41
+ { "text": "Pre-review gate exists", "assertion": "gate-exists:gate-pre-review" },
42
+ { "text": "Final gate exists", "assertion": "gate-exists:gate-final" }
43
+ ]
44
+ },
45
+ {
46
+ "id": "bf-minimum-steps",
47
+ "description": "Plan has at least 10 steps",
48
+ "expectations": [
49
+ { "text": "At least 10 steps", "assertion": "step-count:10" }
50
+ ]
51
+ }
52
+ ]
53
+ }
@@ -11,24 +11,30 @@ workflow:
11
11
  requires: [user-question]
12
12
  produces: [council-type, participant-roles]
13
13
  gate: true
14
+ - id: workspace-context
15
+ role: system
16
+ intent: "Detect workspace membership. If in a workspace, collect context from sibling repos (CLAUDE.md, PROJECT.md, SESSION.md) and build workspace context block."
17
+ requires: [council-type]
18
+ produces: [workspace-context]
19
+ condition: in-workspace
14
20
  - id: agent-1
15
21
  role: dynamic
16
22
  intent: "Analyze the question from specialized perspective. State position with concrete arguments."
17
- requires: [user-question, council-type]
23
+ requires: [user-question, council-type, workspace-context]
18
24
  produces: [perspective-1]
19
25
  model-tier: reasoning
20
26
  parallel: [agent-2, agent-3]
21
27
  - id: agent-2
22
28
  role: dynamic
23
29
  intent: "Analyze the question from specialized perspective. State position with concrete arguments."
24
- requires: [user-question, council-type]
30
+ requires: [user-question, council-type, workspace-context]
25
31
  produces: [perspective-2]
26
32
  model-tier: reasoning
27
33
  parallel: [agent-1, agent-3]
28
34
  - id: agent-3
29
35
  role: dynamic
30
36
  intent: "Analyze the question from specialized perspective. State position with concrete arguments."
31
- requires: [user-question, council-type]
37
+ requires: [user-question, council-type, workspace-context]
32
38
  produces: [perspective-3]
33
39
  model-tier: reasoning
34
40
  parallel: [agent-1, agent-2]
@@ -114,12 +120,23 @@ Analyze the user's question and determine which council type applies:
114
120
 
115
121
  ### Step 2 — Convene agents
116
122
 
123
+ **Workspace detection:** Before invoking agents, check if the project is inside a workspace:
124
+
125
+ 1. Look for a `guild-workspace.json` file by searching upward from the project root
126
+ 2. If found, load the workspace config and identify which member this project is
127
+ 3. Read CLAUDE.md, PROJECT.md, and SESSION.md from each sibling member repo
128
+ 4. Build a workspace context block with:
129
+ - Workspace name
130
+ - Each sibling's stack, structure summary, and current task
131
+ - Absolute paths so the agent can read any sibling file for deeper analysis
132
+
117
133
  Invoke the 3 corresponding agents IN PARALLEL using Task tool with `model: "opus"` (all council agents use reasoning tier). Each agent:
118
134
 
119
135
  1. Reads their `.claude/agents/[name].md` file to assume their role
120
136
  2. Reads `CLAUDE.md` and `SESSION.md` for project context
121
- 3. Analyzes the question from their specialized perspective
122
- 4. States their position with concrete arguments
137
+ 3. **If in a workspace:** receives the workspace context block and considers cross-repo impact as part of their analysis. They may read files from sibling repos using the provided paths.
138
+ 4. Analyzes the question from their specialized perspective
139
+ 5. States their position with concrete arguments
123
140
 
124
141
  ### Step 3 — Present debate
125
142
 
@@ -191,7 +208,11 @@ Example:
191
208
  Task tool with:
192
209
  subagent_type: "general-purpose"
193
210
  model: "opus"
194
- prompt: "Read .claude/agents/tech-lead.md and assume that role. Then: [debate question]"
211
+ prompt: "Read .claude/agents/tech-lead.md and assume that role. Then: [debate question]
212
+
213
+ [If in workspace, append:]
214
+ ## Workspace context
215
+ [workspace context block from Step 2]"
195
216
  ```
196
217
 
197
218
  The `model` parameter is resolved from the step's `model-tier`: all council agents use reasoning→`"opus"`.
@@ -0,0 +1,41 @@
1
+ {
2
+ "skill": "council",
3
+ "evals": [
4
+ {
5
+ "id": "council-three-parallel-agents",
6
+ "description": "Council has 3 agent steps in parallel",
7
+ "expectations": [
8
+ { "text": "Agent-1 exists", "assertion": "step-exists:agent-1" },
9
+ { "text": "Agent-2 exists", "assertion": "step-exists:agent-2" },
10
+ { "text": "Agent-3 exists", "assertion": "step-exists:agent-3" },
11
+ { "text": "Agent-1 is parallel", "assertion": "step-parallel:agent-1" },
12
+ { "text": "Agent-2 is parallel", "assertion": "step-parallel:agent-2" },
13
+ { "text": "Agent-3 is parallel", "assertion": "step-parallel:agent-3" }
14
+ ]
15
+ },
16
+ {
17
+ "id": "council-agents-use-reasoning",
18
+ "description": "All council agents use reasoning tier",
19
+ "expectations": [
20
+ { "text": "Agent-1 uses reasoning", "assertion": "step-model-tier:agent-1:reasoning" },
21
+ { "text": "Agent-2 uses reasoning", "assertion": "step-model-tier:agent-2:reasoning" },
22
+ { "text": "Agent-3 uses reasoning", "assertion": "step-model-tier:agent-3:reasoning" }
23
+ ]
24
+ },
25
+ {
26
+ "id": "council-synthesize-gate",
27
+ "description": "Synthesize step exists with gate",
28
+ "expectations": [
29
+ { "text": "Synthesize step exists", "assertion": "step-exists:synthesize" },
30
+ { "text": "Synthesize has gate", "assertion": "gate-exists:synthesize" }
31
+ ]
32
+ },
33
+ {
34
+ "id": "council-workspace-context",
35
+ "description": "Workspace context step exists with condition",
36
+ "expectations": [
37
+ { "text": "Workspace-context step exists", "assertion": "step-exists:workspace-context" }
38
+ ]
39
+ }
40
+ ]
41
+ }
@@ -0,0 +1,139 @@
1
+ /**
2
+ * eval-runner.js — Skill evaluation framework for Guild.
3
+ *
4
+ * Runs assertions against parsed skill workflows to verify
5
+ * structural correctness. Compatible with anthropics/skills eval format.
6
+ */
7
+
8
+ import { readFileSync, existsSync } from 'fs';
9
+ import { join, dirname } from 'path';
10
+ import { fileURLToPath } from 'url';
11
+ import { parseSkill } from './workflow-parser.js';
12
+
13
+ const __dirname = dirname(fileURLToPath(import.meta.url));
14
+ const TEMPLATES_DIR = join(__dirname, '..', 'templates', 'skills');
15
+
16
+ /**
17
+ * Evaluates a single assertion against a parsed workflow.
18
+ * @param {object} workflow - Parsed workflow with { version, steps[] }
19
+ * @param {string} assertion - Assertion string (e.g. "step-exists:evaluate")
20
+ * @returns {{ passed: boolean, evidence: string }}
21
+ */
22
+ export function evaluateAssertion(workflow, assertion) {
23
+ const colonIdx = assertion.indexOf(':');
24
+ if (colonIdx === -1) {
25
+ return { passed: false, evidence: `Malformed assertion: "${assertion}"` };
26
+ }
27
+
28
+ const type = assertion.slice(0, colonIdx);
29
+ const args = assertion.slice(colonIdx + 1);
30
+
31
+ switch (type) {
32
+ case 'step-exists': {
33
+ const step = workflow.steps.find(s => s.id === args);
34
+ return step
35
+ ? { passed: true, evidence: `Step "${args}" found` }
36
+ : { passed: false, evidence: `Step "${args}" not found in ${workflow.steps.map(s => s.id).join(', ')}` };
37
+ }
38
+
39
+ case 'step-role': {
40
+ const [stepId, expectedRole] = args.split(':');
41
+ const step = workflow.steps.find(s => s.id === stepId);
42
+ if (!step) return { passed: false, evidence: `Step "${stepId}" not found` };
43
+ return step.role === expectedRole
44
+ ? { passed: true, evidence: `Step "${stepId}" has role "${expectedRole}"` }
45
+ : { passed: false, evidence: `Step "${stepId}" has role "${step.role}", expected "${expectedRole}"` };
46
+ }
47
+
48
+ case 'step-model-tier': {
49
+ const [stepId, expectedTier] = args.split(':');
50
+ const step = workflow.steps.find(s => s.id === stepId);
51
+ if (!step) return { passed: false, evidence: `Step "${stepId}" not found` };
52
+ return step.modelTier === expectedTier
53
+ ? { passed: true, evidence: `Step "${stepId}" uses tier "${expectedTier}"` }
54
+ : { passed: false, evidence: `Step "${stepId}" uses tier "${step.modelTier}", expected "${expectedTier}"` };
55
+ }
56
+
57
+ case 'step-requires': {
58
+ const [stepId, dep] = args.split(':');
59
+ const step = workflow.steps.find(s => s.id === stepId);
60
+ if (!step) return { passed: false, evidence: `Step "${stepId}" not found` };
61
+ return step.requires.includes(dep)
62
+ ? { passed: true, evidence: `Step "${stepId}" requires "${dep}"` }
63
+ : { passed: false, evidence: `Step "${stepId}" requires [${step.requires.join(', ')}], missing "${dep}"` };
64
+ }
65
+
66
+ case 'step-parallel': {
67
+ const step = workflow.steps.find(s => s.id === args);
68
+ if (!step) return { passed: false, evidence: `Step "${args}" not found` };
69
+ return step.parallel && step.parallel.length > 0
70
+ ? { passed: true, evidence: `Step "${args}" is parallel with [${step.parallel.join(', ')}]` }
71
+ : { passed: false, evidence: `Step "${args}" has no parallel group` };
72
+ }
73
+
74
+ case 'gate-exists': {
75
+ const step = workflow.steps.find(s => s.id === args);
76
+ if (!step) return { passed: false, evidence: `Step "${args}" not found` };
77
+ return step.gate === true
78
+ ? { passed: true, evidence: `Step "${args}" has gate: true` }
79
+ : { passed: false, evidence: `Step "${args}" has gate: ${step.gate}` };
80
+ }
81
+
82
+ case 'step-count': {
83
+ const min = parseInt(args, 10);
84
+ const actual = workflow.steps.length;
85
+ return actual >= min
86
+ ? { passed: true, evidence: `Workflow has ${actual} steps (minimum ${min})` }
87
+ : { passed: false, evidence: `Workflow has ${actual} steps, expected at least ${min}` };
88
+ }
89
+
90
+ default:
91
+ return { passed: false, evidence: `Unknown assertion type: "${type}"` };
92
+ }
93
+ }
94
+
95
+ /**
96
+ * Loads evals.json for a skill template.
97
+ * @param {string} skillName - Skill directory name (e.g. 'build-feature')
98
+ * @returns {object|null} Parsed evals object or null if no evals exist
99
+ */
100
+ export function loadEvals(skillName) {
101
+ const evalsPath = join(TEMPLATES_DIR, skillName, 'evals', 'evals.json');
102
+ if (!existsSync(evalsPath)) return null;
103
+ return JSON.parse(readFileSync(evalsPath, 'utf8'));
104
+ }
105
+
106
+ /**
107
+ * Runs all evals for a skill template.
108
+ * Parses the SKILL.md, loads evals.json, and evaluates each assertion.
109
+ * @param {string} skillName - Skill directory name
110
+ * @returns {{ skill: string, results: Array<{ id: string, description: string, passed: boolean, expectations: Array }> }}
111
+ */
112
+ export function runEvals(skillName) {
113
+ const evals = loadEvals(skillName);
114
+ if (!evals) throw new Error(`No evals found for skill "${skillName}"`);
115
+
116
+ const skillPath = join(TEMPLATES_DIR, skillName, 'SKILL.md');
117
+ const content = readFileSync(skillPath, 'utf8');
118
+ const skill = parseSkill(content);
119
+
120
+ if (!skill.workflow) {
121
+ throw new Error(`Skill "${skillName}" has no workflow definition`);
122
+ }
123
+
124
+ const results = evals.evals.map(evalCase => {
125
+ const expectations = evalCase.expectations.map(exp => {
126
+ const result = evaluateAssertion(skill.workflow, exp.assertion);
127
+ return { text: exp.text, assertion: exp.assertion, ...result };
128
+ });
129
+ const passed = expectations.every(e => e.passed);
130
+ return {
131
+ id: evalCase.id,
132
+ description: evalCase.description,
133
+ passed,
134
+ expectations,
135
+ };
136
+ });
137
+
138
+ return { skill: skillName, results };
139
+ }
@@ -1,8 +1,15 @@
1
1
  import { existsSync, readFileSync, readdirSync } from 'fs';
2
2
  import { join, dirname, resolve } from 'path';
3
+ import { execFileSync } from 'node:child_process';
3
4
 
4
5
  export const WORKSPACE_FILE = 'guild-workspace.json';
5
6
 
7
+ export const PRESET_COMMANDS = {
8
+ test: { cmd: 'npm', args: ['test'] },
9
+ lint: { cmd: 'npm', args: ['run', 'lint'] },
10
+ build: { cmd: 'npm', args: ['run', 'build'] },
11
+ };
12
+
6
13
  export function findWorkspaceRoot(startDir = process.cwd()) {
7
14
  let dir = resolve(startDir);
8
15
  while (true) {
@@ -80,3 +87,85 @@ export function generateWorkspaceContext(workspace, currentMemberName) {
80
87
 
81
88
  return lines.join('\n');
82
89
  }
90
+
91
+ export function collectMemberContext(workspace, currentMemberName) {
92
+ if (!workspace) return '';
93
+
94
+ const siblings = workspace.members.filter(m => m.name !== currentMemberName);
95
+ if (siblings.length === 0) return '';
96
+
97
+ const lines = [`## Workspace: ${workspace.name}`, ''];
98
+
99
+ for (const member of siblings) {
100
+ lines.push(`### ${member.name} (sibling — ${member.absolutePath})`);
101
+
102
+ const projectMdPath = join(member.absolutePath, 'PROJECT.md');
103
+ if (existsSync(projectMdPath)) {
104
+ const content = readFileSync(projectMdPath, 'utf8');
105
+ const stackMatch = content.match(/\*\*Stack:\*\*\s*(.+)/);
106
+ if (stackMatch) {
107
+ lines.push(`- **Stack:** ${stackMatch[1].trim()}`);
108
+ }
109
+ }
110
+
111
+ const claudeMdPath = join(member.absolutePath, 'CLAUDE.md');
112
+ if (existsSync(claudeMdPath)) {
113
+ const content = readFileSync(claudeMdPath, 'utf8');
114
+ const structureMatch = content.match(/## Project structure\n(.+)/);
115
+ if (structureMatch) {
116
+ lines.push(`- **Structure:** ${structureMatch[1].trim()}`);
117
+ }
118
+ }
119
+
120
+ const sessionMdPath = join(member.absolutePath, 'SESSION.md');
121
+ if (existsSync(sessionMdPath)) {
122
+ const content = readFileSync(sessionMdPath, 'utf8');
123
+ const taskMatch = content.match(/\*\*Current task:\*\*\s*(.+)/);
124
+ if (taskMatch) {
125
+ lines.push(`- **Current task:** ${taskMatch[1].trim()}`);
126
+ }
127
+ }
128
+
129
+ lines.push(`You can read any file under ${member.absolutePath}/ for deeper analysis.`);
130
+ lines.push('');
131
+ }
132
+
133
+ return lines.join('\n').trim();
134
+ }
135
+
136
+ export function runInMember(member, cmd, args) {
137
+ if (!existsSync(member.absolutePath)) {
138
+ return {
139
+ member: member.name,
140
+ status: 'failed',
141
+ output: `Directory not found: ${member.absolutePath}`,
142
+ duration: 0,
143
+ };
144
+ }
145
+
146
+ const start = Date.now();
147
+ try {
148
+ const stdout = execFileSync(cmd, args, {
149
+ cwd: member.absolutePath,
150
+ encoding: 'utf8',
151
+ stdio: ['pipe', 'pipe', 'pipe'],
152
+ });
153
+ const duration = Date.now() - start;
154
+ return {
155
+ member: member.name,
156
+ status: 'passed',
157
+ output: stdout.trim(),
158
+ duration,
159
+ };
160
+ } catch (error) {
161
+ const duration = Date.now() - start;
162
+ const stdout = error.stdout || '';
163
+ const stderr = error.stderr || '';
164
+ return {
165
+ member: member.name,
166
+ status: 'failed',
167
+ output: (stdout + stderr).trim(),
168
+ duration,
169
+ };
170
+ }
171
+ }