npm - guild-agents - Versions diffs - 1.2.0 → 1.3.0 - Mend

guild-agents 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/bin/guild.js +27 -0
package/package.json +5 -2
package/src/commands/workspace.js +38 -1
package/src/templates/skills/build-feature/evals/evals.json +53 -0
package/src/templates/skills/council/SKILL.md +27 -6
package/src/templates/skills/council/evals/evals.json +41 -0
package/src/utils/eval-runner.js +139 -0
package/src/utils/workspace.js +89 -0

package/bin/guild.js CHANGED Viewed

@@ -225,4 +225,31 @@ workspaceCmd
     }
   });
+// guild workspace run
+workspaceCmd
+  .command('run')
+  .description('Run a command in a workspace member repo')
+  .argument('[member]', 'Member name (or omit with --all)')
+  .argument('[preset]', 'Preset command: test, lint, build')
+  .option('--cmd <command>', 'Custom command to run')
+  .option('--all', 'Run in all workspace members')
+  .action(async (member, preset, options) => {
+    try {
+      const { runWorkspaceCommand } = await import('../src/commands/workspace.js');
+      const results = runWorkspaceCommand(member, preset, options);
+      for (const r of results) {
+        const icon = r.status === 'passed' ? '\u2705' : '\u274C';
+        console.log(`${icon} ${r.member}: ${r.status} (${r.duration}ms)`);
+        if (r.status === 'failed' && r.output) {
+          console.log(r.output);
+        }
+      }
+      const failed = results.filter(r => r.status === 'failed');
+      if (failed.length > 0) process.exit(1);
+    } catch (err) {
+      console.error(err.message);
+      process.exit(1);
+    }
+  });
 program.parse();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "guild-agents",
-  "version": "1.2.0",
+  "version": "1.3.0",
   "description": "Specification-driven development CLI for Claude Code — think before you build",
   "type": "module",
   "files": [
@@ -28,7 +28,10 @@
     "publish:snapshot": "npm run version:snapshot && npm publish --tag snapshot",
     "publish:beta": "npm run version:beta && npm publish --tag beta",
     "publish:stable": "npm run version:stable && npm publish --tag latest",
-    "publish:promote-beta": "npm dist-tag add guild-agents@$(node --input-type=commonjs -p \"require('./package.json').version\") beta"
+    "publish:promote-beta": "npm dist-tag add guild-agents@$(node --input-type=commonjs -p \"require('./package.json').version\") beta",
+    "eval": "node scripts/run-evals.js",
+    "eval:build-feature": "node scripts/run-evals.js build-feature",
+    "eval:council": "node scripts/run-evals.js council"
   },
   "keywords": [
     "claude",

package/src/commands/workspace.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
 import { basename, join } from 'path';
-import { findWorkspaceRoot, WORKSPACE_FILE } from '../utils/workspace.js';
+import { findWorkspaceRoot, loadWorkspace, runInMember, PRESET_COMMANDS, WORKSPACE_FILE } from '../utils/workspace.js';
 export async function createWorkspaceFile(name, memberPaths) {
   const members = memberPaths.map(p => ({
@@ -38,6 +38,43 @@ export async function addWorkspaceMember(memberPath) {
   writeFileSync(filePath, JSON.stringify(config, null, 2) + '\n', 'utf8');
 }
+export function runWorkspaceCommand(memberName, preset, options) {
+  const workspace = loadWorkspace();
+  if (!workspace) throw new Error('No workspace found. Run `guild workspace init` first.');
+  // Resolve command
+  let cmd, args;
+  if (options.cmd) {
+    const parts = options.cmd.split(/\s+/);
+    cmd = parts[0];
+    args = parts.slice(1);
+  } else if (preset && PRESET_COMMANDS[preset]) {
+    ({ cmd, args } = PRESET_COMMANDS[preset]);
+  } else {
+    throw new Error(`Unknown command: "${preset}". Use test, lint, build, or --cmd "...".`);
+  }
+  // Resolve members
+  let targets;
+  if (options.all) {
+    targets = workspace.members;
+  } else {
+    const member = workspace.members.find(m => m.name === memberName);
+    if (!member) {
+      const available = workspace.members.map(m => m.name).join(', ');
+      throw new Error(`Member "${memberName}" not found. Available: ${available}`);
+    }
+    targets = [member];
+  }
+  // Execute sequentially, collect all
+  const results = [];
+  for (const target of targets) {
+    results.push(runInMember(target, cmd, args));
+  }
+  return results;
+}
 export async function getWorkspaceStatus() {
   const root = findWorkspaceRoot();
   if (!root) throw new Error('No workspace found. Run `guild workspace init` first.');

package/src/templates/skills/build-feature/evals/evals.json ADDED Viewed

@@ -0,0 +1,53 @@
+{
+  "skill": "build-feature",
+  "evals": [
+    {
+      "id": "bf-has-core-phases",
+      "description": "Plan contains evaluate, specify, design, implement phases",
+      "expectations": [
+        { "text": "Has evaluate step", "assertion": "step-exists:evaluate" },
+        { "text": "Has specify step", "assertion": "step-exists:specify" },
+        { "text": "Has design step", "assertion": "step-exists:design" },
+        { "text": "Has implement step", "assertion": "step-exists:implement" }
+      ]
+    },
+    {
+      "id": "bf-has-quality-phases",
+      "description": "Plan contains review, QA, and completion phases",
+      "expectations": [
+        { "text": "Has review step", "assertion": "step-exists:review" },
+        { "text": "Has QA phase", "assertion": "step-exists:qa-phase" },
+        { "text": "Has completion step", "assertion": "step-exists:completion" }
+      ]
+    },
+    {
+      "id": "bf-advisor-uses-reasoning",
+      "description": "Advisor (evaluate) uses reasoning tier",
+      "expectations": [
+        { "text": "Evaluate uses reasoning tier", "assertion": "step-model-tier:evaluate:reasoning" }
+      ]
+    },
+    {
+      "id": "bf-developer-uses-execution",
+      "description": "Developer (implement) uses execution tier",
+      "expectations": [
+        { "text": "Implement uses execution tier", "assertion": "step-model-tier:implement:execution" }
+      ]
+    },
+    {
+      "id": "bf-gates-exist",
+      "description": "Quality gates exist at pre-review and final",
+      "expectations": [
+        { "text": "Pre-review gate exists", "assertion": "gate-exists:gate-pre-review" },
+        { "text": "Final gate exists", "assertion": "gate-exists:gate-final" }
+      ]
+    },
+    {
+      "id": "bf-minimum-steps",
+      "description": "Plan has at least 10 steps",
+      "expectations": [
+        { "text": "At least 10 steps", "assertion": "step-count:10" }
+      ]
+    }
+  ]
+}

package/src/templates/skills/council/SKILL.md CHANGED Viewed

@@ -11,24 +11,30 @@ workflow:
       requires: [user-question]
       produces: [council-type, participant-roles]
       gate: true
+    - id: workspace-context
+      role: system
+      intent: "Detect workspace membership. If in a workspace, collect context from sibling repos (CLAUDE.md, PROJECT.md, SESSION.md) and build workspace context block."
+      requires: [council-type]
+      produces: [workspace-context]
+      condition: in-workspace
     - id: agent-1
       role: dynamic
       intent: "Analyze the question from specialized perspective. State position with concrete arguments."
-      requires: [user-question, council-type]
+      requires: [user-question, council-type, workspace-context]
       produces: [perspective-1]
       model-tier: reasoning
       parallel: [agent-2, agent-3]
     - id: agent-2
       role: dynamic
       intent: "Analyze the question from specialized perspective. State position with concrete arguments."
-      requires: [user-question, council-type]
+      requires: [user-question, council-type, workspace-context]
       produces: [perspective-2]
       model-tier: reasoning
       parallel: [agent-1, agent-3]
     - id: agent-3
       role: dynamic
       intent: "Analyze the question from specialized perspective. State position with concrete arguments."
-      requires: [user-question, council-type]
+      requires: [user-question, council-type, workspace-context]
       produces: [perspective-3]
       model-tier: reasoning
       parallel: [agent-1, agent-2]
@@ -114,12 +120,23 @@ Analyze the user's question and determine which council type applies:
 ### Step 2 — Convene agents
+**Workspace detection:** Before invoking agents, check if the project is inside a workspace:
+1. Look for a `guild-workspace.json` file by searching upward from the project root
+2. If found, load the workspace config and identify which member this project is
+3. Read CLAUDE.md, PROJECT.md, and SESSION.md from each sibling member repo
+4. Build a workspace context block with:
+   - Workspace name
+   - Each sibling's stack, structure summary, and current task
+   - Absolute paths so the agent can read any sibling file for deeper analysis
 Invoke the 3 corresponding agents IN PARALLEL using Task tool with `model: "opus"` (all council agents use reasoning tier). Each agent:
 1. Reads their `.claude/agents/[name].md` file to assume their role
 2. Reads `CLAUDE.md` and `SESSION.md` for project context
-3. Analyzes the question from their specialized perspective
-4. States their position with concrete arguments
+3. **If in a workspace:** receives the workspace context block and considers cross-repo impact as part of their analysis. They may read files from sibling repos using the provided paths.
+4. Analyzes the question from their specialized perspective
+5. States their position with concrete arguments
 ### Step 3 — Present debate
@@ -191,7 +208,11 @@ Example:
 Task tool with:
   subagent_type: "general-purpose"
   model: "opus"
-  prompt: "Read .claude/agents/tech-lead.md and assume that role. Then: [debate question]"
+  prompt: "Read .claude/agents/tech-lead.md and assume that role. Then: [debate question]
+[If in workspace, append:]
+## Workspace context
+[workspace context block from Step 2]"
 ```
 The `model` parameter is resolved from the step's `model-tier`: all council agents use reasoning→`"opus"`.

package/src/templates/skills/council/evals/evals.json ADDED Viewed

@@ -0,0 +1,41 @@
+{
+  "skill": "council",
+  "evals": [
+    {
+      "id": "council-three-parallel-agents",
+      "description": "Council has 3 agent steps in parallel",
+      "expectations": [
+        { "text": "Agent-1 exists", "assertion": "step-exists:agent-1" },
+        { "text": "Agent-2 exists", "assertion": "step-exists:agent-2" },
+        { "text": "Agent-3 exists", "assertion": "step-exists:agent-3" },
+        { "text": "Agent-1 is parallel", "assertion": "step-parallel:agent-1" },
+        { "text": "Agent-2 is parallel", "assertion": "step-parallel:agent-2" },
+        { "text": "Agent-3 is parallel", "assertion": "step-parallel:agent-3" }
+      ]
+    },
+    {
+      "id": "council-agents-use-reasoning",
+      "description": "All council agents use reasoning tier",
+      "expectations": [
+        { "text": "Agent-1 uses reasoning", "assertion": "step-model-tier:agent-1:reasoning" },
+        { "text": "Agent-2 uses reasoning", "assertion": "step-model-tier:agent-2:reasoning" },
+        { "text": "Agent-3 uses reasoning", "assertion": "step-model-tier:agent-3:reasoning" }
+      ]
+    },
+    {
+      "id": "council-synthesize-gate",
+      "description": "Synthesize step exists with gate",
+      "expectations": [
+        { "text": "Synthesize step exists", "assertion": "step-exists:synthesize" },
+        { "text": "Synthesize has gate", "assertion": "gate-exists:synthesize" }
+      ]
+    },
+    {
+      "id": "council-workspace-context",
+      "description": "Workspace context step exists with condition",
+      "expectations": [
+        { "text": "Workspace-context step exists", "assertion": "step-exists:workspace-context" }
+      ]
+    }
+  ]
+}

package/src/utils/eval-runner.js ADDED Viewed

@@ -0,0 +1,139 @@
+/**
+ * eval-runner.js — Skill evaluation framework for Guild.
+ *
+ * Runs assertions against parsed skill workflows to verify
+ * structural correctness. Compatible with anthropics/skills eval format.
+ */
+import { readFileSync, existsSync } from 'fs';
+import { join, dirname } from 'path';
+import { fileURLToPath } from 'url';
+import { parseSkill } from './workflow-parser.js';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const TEMPLATES_DIR = join(__dirname, '..', 'templates', 'skills');
+/**
+ * Evaluates a single assertion against a parsed workflow.
+ * @param {object} workflow - Parsed workflow with { version, steps[] }
+ * @param {string} assertion - Assertion string (e.g. "step-exists:evaluate")
+ * @returns {{ passed: boolean, evidence: string }}
+ */
+export function evaluateAssertion(workflow, assertion) {
+  const colonIdx = assertion.indexOf(':');
+  if (colonIdx === -1) {
+    return { passed: false, evidence: `Malformed assertion: "${assertion}"` };
+  }
+  const type = assertion.slice(0, colonIdx);
+  const args = assertion.slice(colonIdx + 1);
+  switch (type) {
+    case 'step-exists': {
+      const step = workflow.steps.find(s => s.id === args);
+      return step
+        ? { passed: true, evidence: `Step "${args}" found` }
+        : { passed: false, evidence: `Step "${args}" not found in ${workflow.steps.map(s => s.id).join(', ')}` };
+    }
+    case 'step-role': {
+      const [stepId, expectedRole] = args.split(':');
+      const step = workflow.steps.find(s => s.id === stepId);
+      if (!step) return { passed: false, evidence: `Step "${stepId}" not found` };
+      return step.role === expectedRole
+        ? { passed: true, evidence: `Step "${stepId}" has role "${expectedRole}"` }
+        : { passed: false, evidence: `Step "${stepId}" has role "${step.role}", expected "${expectedRole}"` };
+    }
+    case 'step-model-tier': {
+      const [stepId, expectedTier] = args.split(':');
+      const step = workflow.steps.find(s => s.id === stepId);
+      if (!step) return { passed: false, evidence: `Step "${stepId}" not found` };
+      return step.modelTier === expectedTier
+        ? { passed: true, evidence: `Step "${stepId}" uses tier "${expectedTier}"` }
+        : { passed: false, evidence: `Step "${stepId}" uses tier "${step.modelTier}", expected "${expectedTier}"` };
+    }
+    case 'step-requires': {
+      const [stepId, dep] = args.split(':');
+      const step = workflow.steps.find(s => s.id === stepId);
+      if (!step) return { passed: false, evidence: `Step "${stepId}" not found` };
+      return step.requires.includes(dep)
+        ? { passed: true, evidence: `Step "${stepId}" requires "${dep}"` }
+        : { passed: false, evidence: `Step "${stepId}" requires [${step.requires.join(', ')}], missing "${dep}"` };
+    }
+    case 'step-parallel': {
+      const step = workflow.steps.find(s => s.id === args);
+      if (!step) return { passed: false, evidence: `Step "${args}" not found` };
+      return step.parallel && step.parallel.length > 0
+        ? { passed: true, evidence: `Step "${args}" is parallel with [${step.parallel.join(', ')}]` }
+        : { passed: false, evidence: `Step "${args}" has no parallel group` };
+    }
+    case 'gate-exists': {
+      const step = workflow.steps.find(s => s.id === args);
+      if (!step) return { passed: false, evidence: `Step "${args}" not found` };
+      return step.gate === true
+        ? { passed: true, evidence: `Step "${args}" has gate: true` }
+        : { passed: false, evidence: `Step "${args}" has gate: ${step.gate}` };
+    }
+    case 'step-count': {
+      const min = parseInt(args, 10);
+      const actual = workflow.steps.length;
+      return actual >= min
+        ? { passed: true, evidence: `Workflow has ${actual} steps (minimum ${min})` }
+        : { passed: false, evidence: `Workflow has ${actual} steps, expected at least ${min}` };
+    }
+    default:
+      return { passed: false, evidence: `Unknown assertion type: "${type}"` };
+  }
+}
+/**
+ * Loads evals.json for a skill template.
+ * @param {string} skillName - Skill directory name (e.g. 'build-feature')
+ * @returns {object|null} Parsed evals object or null if no evals exist
+ */
+export function loadEvals(skillName) {
+  const evalsPath = join(TEMPLATES_DIR, skillName, 'evals', 'evals.json');
+  if (!existsSync(evalsPath)) return null;
+  return JSON.parse(readFileSync(evalsPath, 'utf8'));
+}
+/**
+ * Runs all evals for a skill template.
+ * Parses the SKILL.md, loads evals.json, and evaluates each assertion.
+ * @param {string} skillName - Skill directory name
+ * @returns {{ skill: string, results: Array<{ id: string, description: string, passed: boolean, expectations: Array }> }}
+ */
+export function runEvals(skillName) {
+  const evals = loadEvals(skillName);
+  if (!evals) throw new Error(`No evals found for skill "${skillName}"`);
+  const skillPath = join(TEMPLATES_DIR, skillName, 'SKILL.md');
+  const content = readFileSync(skillPath, 'utf8');
+  const skill = parseSkill(content);
+  if (!skill.workflow) {
+    throw new Error(`Skill "${skillName}" has no workflow definition`);
+  }
+  const results = evals.evals.map(evalCase => {
+    const expectations = evalCase.expectations.map(exp => {
+      const result = evaluateAssertion(skill.workflow, exp.assertion);
+      return { text: exp.text, assertion: exp.assertion, ...result };
+    });
+    const passed = expectations.every(e => e.passed);
+    return {
+      id: evalCase.id,
+      description: evalCase.description,
+      passed,
+      expectations,
+    };
+  });
+  return { skill: skillName, results };
+}

package/src/utils/workspace.js CHANGED Viewed

@@ -1,8 +1,15 @@
 import { existsSync, readFileSync, readdirSync } from 'fs';
 import { join, dirname, resolve } from 'path';
+import { execFileSync } from 'node:child_process';
 export const WORKSPACE_FILE = 'guild-workspace.json';
+export const PRESET_COMMANDS = {
+  test:  { cmd: 'npm', args: ['test'] },
+  lint:  { cmd: 'npm', args: ['run', 'lint'] },
+  build: { cmd: 'npm', args: ['run', 'build'] },
+};
 export function findWorkspaceRoot(startDir = process.cwd()) {
   let dir = resolve(startDir);
   while (true) {
@@ -80,3 +87,85 @@ export function generateWorkspaceContext(workspace, currentMemberName) {
   return lines.join('\n');
 }
+export function collectMemberContext(workspace, currentMemberName) {
+  if (!workspace) return '';
+  const siblings = workspace.members.filter(m => m.name !== currentMemberName);
+  if (siblings.length === 0) return '';
+  const lines = [`## Workspace: ${workspace.name}`, ''];
+  for (const member of siblings) {
+    lines.push(`### ${member.name} (sibling — ${member.absolutePath})`);
+    const projectMdPath = join(member.absolutePath, 'PROJECT.md');
+    if (existsSync(projectMdPath)) {
+      const content = readFileSync(projectMdPath, 'utf8');
+      const stackMatch = content.match(/\*\*Stack:\*\*\s*(.+)/);
+      if (stackMatch) {
+        lines.push(`- **Stack:** ${stackMatch[1].trim()}`);
+      }
+    }
+    const claudeMdPath = join(member.absolutePath, 'CLAUDE.md');
+    if (existsSync(claudeMdPath)) {
+      const content = readFileSync(claudeMdPath, 'utf8');
+      const structureMatch = content.match(/## Project structure\n(.+)/);
+      if (structureMatch) {
+        lines.push(`- **Structure:** ${structureMatch[1].trim()}`);
+      }
+    }
+    const sessionMdPath = join(member.absolutePath, 'SESSION.md');
+    if (existsSync(sessionMdPath)) {
+      const content = readFileSync(sessionMdPath, 'utf8');
+      const taskMatch = content.match(/\*\*Current task:\*\*\s*(.+)/);
+      if (taskMatch) {
+        lines.push(`- **Current task:** ${taskMatch[1].trim()}`);
+      }
+    }
+    lines.push(`You can read any file under ${member.absolutePath}/ for deeper analysis.`);
+    lines.push('');
+  }
+  return lines.join('\n').trim();
+}
+export function runInMember(member, cmd, args) {
+  if (!existsSync(member.absolutePath)) {
+    return {
+      member: member.name,
+      status: 'failed',
+      output: `Directory not found: ${member.absolutePath}`,
+      duration: 0,
+    };
+  }
+  const start = Date.now();
+  try {
+    const stdout = execFileSync(cmd, args, {
+      cwd: member.absolutePath,
+      encoding: 'utf8',
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+    const duration = Date.now() - start;
+    return {
+      member: member.name,
+      status: 'passed',
+      output: stdout.trim(),
+      duration,
+    };
+  } catch (error) {
+    const duration = Date.now() - start;
+    const stdout = error.stdout || '';
+    const stderr = error.stderr || '';
+    return {
+      member: member.name,
+      status: 'failed',
+      output: (stdout + stderr).trim(),
+      duration,
+    };
+  }
+}