guild-agents 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/guild.js +27 -0
- package/package.json +5 -2
- package/src/commands/workspace.js +38 -1
- package/src/templates/skills/build-feature/evals/evals.json +53 -0
- package/src/templates/skills/council/SKILL.md +27 -6
- package/src/templates/skills/council/evals/evals.json +41 -0
- package/src/utils/eval-runner.js +139 -0
- package/src/utils/workspace.js +89 -0
package/bin/guild.js
CHANGED
|
@@ -225,4 +225,31 @@ workspaceCmd
|
|
|
225
225
|
}
|
|
226
226
|
});
|
|
227
227
|
|
|
228
|
+
// guild workspace run
|
|
229
|
+
workspaceCmd
|
|
230
|
+
.command('run')
|
|
231
|
+
.description('Run a command in a workspace member repo')
|
|
232
|
+
.argument('[member]', 'Member name (or omit with --all)')
|
|
233
|
+
.argument('[preset]', 'Preset command: test, lint, build')
|
|
234
|
+
.option('--cmd <command>', 'Custom command to run')
|
|
235
|
+
.option('--all', 'Run in all workspace members')
|
|
236
|
+
.action(async (member, preset, options) => {
|
|
237
|
+
try {
|
|
238
|
+
const { runWorkspaceCommand } = await import('../src/commands/workspace.js');
|
|
239
|
+
const results = runWorkspaceCommand(member, preset, options);
|
|
240
|
+
for (const r of results) {
|
|
241
|
+
const icon = r.status === 'passed' ? '\u2705' : '\u274C';
|
|
242
|
+
console.log(`${icon} ${r.member}: ${r.status} (${r.duration}ms)`);
|
|
243
|
+
if (r.status === 'failed' && r.output) {
|
|
244
|
+
console.log(r.output);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
const failed = results.filter(r => r.status === 'failed');
|
|
248
|
+
if (failed.length > 0) process.exit(1);
|
|
249
|
+
} catch (err) {
|
|
250
|
+
console.error(err.message);
|
|
251
|
+
process.exit(1);
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
|
|
228
255
|
program.parse();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "guild-agents",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"description": "Specification-driven development CLI for Claude Code — think before you build",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -28,7 +28,10 @@
|
|
|
28
28
|
"publish:snapshot": "npm run version:snapshot && npm publish --tag snapshot",
|
|
29
29
|
"publish:beta": "npm run version:beta && npm publish --tag beta",
|
|
30
30
|
"publish:stable": "npm run version:stable && npm publish --tag latest",
|
|
31
|
-
"publish:promote-beta": "npm dist-tag add guild-agents@$(node --input-type=commonjs -p \"require('./package.json').version\") beta"
|
|
31
|
+
"publish:promote-beta": "npm dist-tag add guild-agents@$(node --input-type=commonjs -p \"require('./package.json').version\") beta",
|
|
32
|
+
"eval": "node scripts/run-evals.js",
|
|
33
|
+
"eval:build-feature": "node scripts/run-evals.js build-feature",
|
|
34
|
+
"eval:council": "node scripts/run-evals.js council"
|
|
32
35
|
},
|
|
33
36
|
"keywords": [
|
|
34
37
|
"claude",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
2
2
|
import { basename, join } from 'path';
|
|
3
|
-
import { findWorkspaceRoot, WORKSPACE_FILE } from '../utils/workspace.js';
|
|
3
|
+
import { findWorkspaceRoot, loadWorkspace, runInMember, PRESET_COMMANDS, WORKSPACE_FILE } from '../utils/workspace.js';
|
|
4
4
|
|
|
5
5
|
export async function createWorkspaceFile(name, memberPaths) {
|
|
6
6
|
const members = memberPaths.map(p => ({
|
|
@@ -38,6 +38,43 @@ export async function addWorkspaceMember(memberPath) {
|
|
|
38
38
|
writeFileSync(filePath, JSON.stringify(config, null, 2) + '\n', 'utf8');
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
export function runWorkspaceCommand(memberName, preset, options) {
|
|
42
|
+
const workspace = loadWorkspace();
|
|
43
|
+
if (!workspace) throw new Error('No workspace found. Run `guild workspace init` first.');
|
|
44
|
+
|
|
45
|
+
// Resolve command
|
|
46
|
+
let cmd, args;
|
|
47
|
+
if (options.cmd) {
|
|
48
|
+
const parts = options.cmd.split(/\s+/);
|
|
49
|
+
cmd = parts[0];
|
|
50
|
+
args = parts.slice(1);
|
|
51
|
+
} else if (preset && PRESET_COMMANDS[preset]) {
|
|
52
|
+
({ cmd, args } = PRESET_COMMANDS[preset]);
|
|
53
|
+
} else {
|
|
54
|
+
throw new Error(`Unknown command: "${preset}". Use test, lint, build, or --cmd "...".`);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Resolve members
|
|
58
|
+
let targets;
|
|
59
|
+
if (options.all) {
|
|
60
|
+
targets = workspace.members;
|
|
61
|
+
} else {
|
|
62
|
+
const member = workspace.members.find(m => m.name === memberName);
|
|
63
|
+
if (!member) {
|
|
64
|
+
const available = workspace.members.map(m => m.name).join(', ');
|
|
65
|
+
throw new Error(`Member "${memberName}" not found. Available: ${available}`);
|
|
66
|
+
}
|
|
67
|
+
targets = [member];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Execute sequentially, collect all
|
|
71
|
+
const results = [];
|
|
72
|
+
for (const target of targets) {
|
|
73
|
+
results.push(runInMember(target, cmd, args));
|
|
74
|
+
}
|
|
75
|
+
return results;
|
|
76
|
+
}
|
|
77
|
+
|
|
41
78
|
export async function getWorkspaceStatus() {
|
|
42
79
|
const root = findWorkspaceRoot();
|
|
43
80
|
if (!root) throw new Error('No workspace found. Run `guild workspace init` first.');
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "build-feature",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "bf-has-core-phases",
|
|
6
|
+
"description": "Plan contains evaluate, specify, design, implement phases",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Has evaluate step", "assertion": "step-exists:evaluate" },
|
|
9
|
+
{ "text": "Has specify step", "assertion": "step-exists:specify" },
|
|
10
|
+
{ "text": "Has design step", "assertion": "step-exists:design" },
|
|
11
|
+
{ "text": "Has implement step", "assertion": "step-exists:implement" }
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"id": "bf-has-quality-phases",
|
|
16
|
+
"description": "Plan contains review, QA, and completion phases",
|
|
17
|
+
"expectations": [
|
|
18
|
+
{ "text": "Has review step", "assertion": "step-exists:review" },
|
|
19
|
+
{ "text": "Has QA phase", "assertion": "step-exists:qa-phase" },
|
|
20
|
+
{ "text": "Has completion step", "assertion": "step-exists:completion" }
|
|
21
|
+
]
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"id": "bf-advisor-uses-reasoning",
|
|
25
|
+
"description": "Advisor (evaluate) uses reasoning tier",
|
|
26
|
+
"expectations": [
|
|
27
|
+
{ "text": "Evaluate uses reasoning tier", "assertion": "step-model-tier:evaluate:reasoning" }
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "bf-developer-uses-execution",
|
|
32
|
+
"description": "Developer (implement) uses execution tier",
|
|
33
|
+
"expectations": [
|
|
34
|
+
{ "text": "Implement uses execution tier", "assertion": "step-model-tier:implement:execution" }
|
|
35
|
+
]
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"id": "bf-gates-exist",
|
|
39
|
+
"description": "Quality gates exist at pre-review and final",
|
|
40
|
+
"expectations": [
|
|
41
|
+
{ "text": "Pre-review gate exists", "assertion": "gate-exists:gate-pre-review" },
|
|
42
|
+
{ "text": "Final gate exists", "assertion": "gate-exists:gate-final" }
|
|
43
|
+
]
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"id": "bf-minimum-steps",
|
|
47
|
+
"description": "Plan has at least 10 steps",
|
|
48
|
+
"expectations": [
|
|
49
|
+
{ "text": "At least 10 steps", "assertion": "step-count:10" }
|
|
50
|
+
]
|
|
51
|
+
}
|
|
52
|
+
]
|
|
53
|
+
}
|
|
@@ -11,24 +11,30 @@ workflow:
|
|
|
11
11
|
requires: [user-question]
|
|
12
12
|
produces: [council-type, participant-roles]
|
|
13
13
|
gate: true
|
|
14
|
+
- id: workspace-context
|
|
15
|
+
role: system
|
|
16
|
+
intent: "Detect workspace membership. If in a workspace, collect context from sibling repos (CLAUDE.md, PROJECT.md, SESSION.md) and build workspace context block."
|
|
17
|
+
requires: [council-type]
|
|
18
|
+
produces: [workspace-context]
|
|
19
|
+
condition: in-workspace
|
|
14
20
|
- id: agent-1
|
|
15
21
|
role: dynamic
|
|
16
22
|
intent: "Analyze the question from specialized perspective. State position with concrete arguments."
|
|
17
|
-
requires: [user-question, council-type]
|
|
23
|
+
requires: [user-question, council-type, workspace-context]
|
|
18
24
|
produces: [perspective-1]
|
|
19
25
|
model-tier: reasoning
|
|
20
26
|
parallel: [agent-2, agent-3]
|
|
21
27
|
- id: agent-2
|
|
22
28
|
role: dynamic
|
|
23
29
|
intent: "Analyze the question from specialized perspective. State position with concrete arguments."
|
|
24
|
-
requires: [user-question, council-type]
|
|
30
|
+
requires: [user-question, council-type, workspace-context]
|
|
25
31
|
produces: [perspective-2]
|
|
26
32
|
model-tier: reasoning
|
|
27
33
|
parallel: [agent-1, agent-3]
|
|
28
34
|
- id: agent-3
|
|
29
35
|
role: dynamic
|
|
30
36
|
intent: "Analyze the question from specialized perspective. State position with concrete arguments."
|
|
31
|
-
requires: [user-question, council-type]
|
|
37
|
+
requires: [user-question, council-type, workspace-context]
|
|
32
38
|
produces: [perspective-3]
|
|
33
39
|
model-tier: reasoning
|
|
34
40
|
parallel: [agent-1, agent-2]
|
|
@@ -114,12 +120,23 @@ Analyze the user's question and determine which council type applies:
|
|
|
114
120
|
|
|
115
121
|
### Step 2 — Convene agents
|
|
116
122
|
|
|
123
|
+
**Workspace detection:** Before invoking agents, check if the project is inside a workspace:
|
|
124
|
+
|
|
125
|
+
1. Look for a `guild-workspace.json` file by searching upward from the project root
|
|
126
|
+
2. If found, load the workspace config and identify which member this project is
|
|
127
|
+
3. Read CLAUDE.md, PROJECT.md, and SESSION.md from each sibling member repo
|
|
128
|
+
4. Build a workspace context block with:
|
|
129
|
+
- Workspace name
|
|
130
|
+
- Each sibling's stack, structure summary, and current task
|
|
131
|
+
- Absolute paths so the agent can read any sibling file for deeper analysis
|
|
132
|
+
|
|
117
133
|
Invoke the 3 corresponding agents IN PARALLEL using Task tool with `model: "opus"` (all council agents use reasoning tier). Each agent:
|
|
118
134
|
|
|
119
135
|
1. Reads their `.claude/agents/[name].md` file to assume their role
|
|
120
136
|
2. Reads `CLAUDE.md` and `SESSION.md` for project context
|
|
121
|
-
3.
|
|
122
|
-
4.
|
|
137
|
+
3. **If in a workspace:** receives the workspace context block and considers cross-repo impact as part of their analysis. They may read files from sibling repos using the provided paths.
|
|
138
|
+
4. Analyzes the question from their specialized perspective
|
|
139
|
+
5. States their position with concrete arguments
|
|
123
140
|
|
|
124
141
|
### Step 3 — Present debate
|
|
125
142
|
|
|
@@ -191,7 +208,11 @@ Example:
|
|
|
191
208
|
Task tool with:
|
|
192
209
|
subagent_type: "general-purpose"
|
|
193
210
|
model: "opus"
|
|
194
|
-
prompt: "Read .claude/agents/tech-lead.md and assume that role. Then: [debate question]
|
|
211
|
+
prompt: "Read .claude/agents/tech-lead.md and assume that role. Then: [debate question]
|
|
212
|
+
|
|
213
|
+
[If in workspace, append:]
|
|
214
|
+
## Workspace context
|
|
215
|
+
[workspace context block from Step 2]"
|
|
195
216
|
```
|
|
196
217
|
|
|
197
218
|
The `model` parameter is resolved from the step's `model-tier`: all council agents use reasoning→`"opus"`.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "council",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": "council-three-parallel-agents",
|
|
6
|
+
"description": "Council has 3 agent steps in parallel",
|
|
7
|
+
"expectations": [
|
|
8
|
+
{ "text": "Agent-1 exists", "assertion": "step-exists:agent-1" },
|
|
9
|
+
{ "text": "Agent-2 exists", "assertion": "step-exists:agent-2" },
|
|
10
|
+
{ "text": "Agent-3 exists", "assertion": "step-exists:agent-3" },
|
|
11
|
+
{ "text": "Agent-1 is parallel", "assertion": "step-parallel:agent-1" },
|
|
12
|
+
{ "text": "Agent-2 is parallel", "assertion": "step-parallel:agent-2" },
|
|
13
|
+
{ "text": "Agent-3 is parallel", "assertion": "step-parallel:agent-3" }
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "council-agents-use-reasoning",
|
|
18
|
+
"description": "All council agents use reasoning tier",
|
|
19
|
+
"expectations": [
|
|
20
|
+
{ "text": "Agent-1 uses reasoning", "assertion": "step-model-tier:agent-1:reasoning" },
|
|
21
|
+
{ "text": "Agent-2 uses reasoning", "assertion": "step-model-tier:agent-2:reasoning" },
|
|
22
|
+
{ "text": "Agent-3 uses reasoning", "assertion": "step-model-tier:agent-3:reasoning" }
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"id": "council-synthesize-gate",
|
|
27
|
+
"description": "Synthesize step exists with gate",
|
|
28
|
+
"expectations": [
|
|
29
|
+
{ "text": "Synthesize step exists", "assertion": "step-exists:synthesize" },
|
|
30
|
+
{ "text": "Synthesize has gate", "assertion": "gate-exists:synthesize" }
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"id": "council-workspace-context",
|
|
35
|
+
"description": "Workspace context step exists with condition",
|
|
36
|
+
"expectations": [
|
|
37
|
+
{ "text": "Workspace-context step exists", "assertion": "step-exists:workspace-context" }
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* eval-runner.js — Skill evaluation framework for Guild.
|
|
3
|
+
*
|
|
4
|
+
* Runs assertions against parsed skill workflows to verify
|
|
5
|
+
* structural correctness. Compatible with anthropics/skills eval format.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { readFileSync, existsSync } from 'fs';
|
|
9
|
+
import { join, dirname } from 'path';
|
|
10
|
+
import { fileURLToPath } from 'url';
|
|
11
|
+
import { parseSkill } from './workflow-parser.js';
|
|
12
|
+
|
|
13
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
14
|
+
const TEMPLATES_DIR = join(__dirname, '..', 'templates', 'skills');
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Evaluates a single assertion against a parsed workflow.
|
|
18
|
+
* @param {object} workflow - Parsed workflow with { version, steps[] }
|
|
19
|
+
* @param {string} assertion - Assertion string (e.g. "step-exists:evaluate")
|
|
20
|
+
* @returns {{ passed: boolean, evidence: string }}
|
|
21
|
+
*/
|
|
22
|
+
export function evaluateAssertion(workflow, assertion) {
|
|
23
|
+
const colonIdx = assertion.indexOf(':');
|
|
24
|
+
if (colonIdx === -1) {
|
|
25
|
+
return { passed: false, evidence: `Malformed assertion: "${assertion}"` };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const type = assertion.slice(0, colonIdx);
|
|
29
|
+
const args = assertion.slice(colonIdx + 1);
|
|
30
|
+
|
|
31
|
+
switch (type) {
|
|
32
|
+
case 'step-exists': {
|
|
33
|
+
const step = workflow.steps.find(s => s.id === args);
|
|
34
|
+
return step
|
|
35
|
+
? { passed: true, evidence: `Step "${args}" found` }
|
|
36
|
+
: { passed: false, evidence: `Step "${args}" not found in ${workflow.steps.map(s => s.id).join(', ')}` };
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
case 'step-role': {
|
|
40
|
+
const [stepId, expectedRole] = args.split(':');
|
|
41
|
+
const step = workflow.steps.find(s => s.id === stepId);
|
|
42
|
+
if (!step) return { passed: false, evidence: `Step "${stepId}" not found` };
|
|
43
|
+
return step.role === expectedRole
|
|
44
|
+
? { passed: true, evidence: `Step "${stepId}" has role "${expectedRole}"` }
|
|
45
|
+
: { passed: false, evidence: `Step "${stepId}" has role "${step.role}", expected "${expectedRole}"` };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
case 'step-model-tier': {
|
|
49
|
+
const [stepId, expectedTier] = args.split(':');
|
|
50
|
+
const step = workflow.steps.find(s => s.id === stepId);
|
|
51
|
+
if (!step) return { passed: false, evidence: `Step "${stepId}" not found` };
|
|
52
|
+
return step.modelTier === expectedTier
|
|
53
|
+
? { passed: true, evidence: `Step "${stepId}" uses tier "${expectedTier}"` }
|
|
54
|
+
: { passed: false, evidence: `Step "${stepId}" uses tier "${step.modelTier}", expected "${expectedTier}"` };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
case 'step-requires': {
|
|
58
|
+
const [stepId, dep] = args.split(':');
|
|
59
|
+
const step = workflow.steps.find(s => s.id === stepId);
|
|
60
|
+
if (!step) return { passed: false, evidence: `Step "${stepId}" not found` };
|
|
61
|
+
return step.requires.includes(dep)
|
|
62
|
+
? { passed: true, evidence: `Step "${stepId}" requires "${dep}"` }
|
|
63
|
+
: { passed: false, evidence: `Step "${stepId}" requires [${step.requires.join(', ')}], missing "${dep}"` };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
case 'step-parallel': {
|
|
67
|
+
const step = workflow.steps.find(s => s.id === args);
|
|
68
|
+
if (!step) return { passed: false, evidence: `Step "${args}" not found` };
|
|
69
|
+
return step.parallel && step.parallel.length > 0
|
|
70
|
+
? { passed: true, evidence: `Step "${args}" is parallel with [${step.parallel.join(', ')}]` }
|
|
71
|
+
: { passed: false, evidence: `Step "${args}" has no parallel group` };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
case 'gate-exists': {
|
|
75
|
+
const step = workflow.steps.find(s => s.id === args);
|
|
76
|
+
if (!step) return { passed: false, evidence: `Step "${args}" not found` };
|
|
77
|
+
return step.gate === true
|
|
78
|
+
? { passed: true, evidence: `Step "${args}" has gate: true` }
|
|
79
|
+
: { passed: false, evidence: `Step "${args}" has gate: ${step.gate}` };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
case 'step-count': {
|
|
83
|
+
const min = parseInt(args, 10);
|
|
84
|
+
const actual = workflow.steps.length;
|
|
85
|
+
return actual >= min
|
|
86
|
+
? { passed: true, evidence: `Workflow has ${actual} steps (minimum ${min})` }
|
|
87
|
+
: { passed: false, evidence: `Workflow has ${actual} steps, expected at least ${min}` };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
default:
|
|
91
|
+
return { passed: false, evidence: `Unknown assertion type: "${type}"` };
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Loads evals.json for a skill template.
|
|
97
|
+
* @param {string} skillName - Skill directory name (e.g. 'build-feature')
|
|
98
|
+
* @returns {object|null} Parsed evals object or null if no evals exist
|
|
99
|
+
*/
|
|
100
|
+
export function loadEvals(skillName) {
|
|
101
|
+
const evalsPath = join(TEMPLATES_DIR, skillName, 'evals', 'evals.json');
|
|
102
|
+
if (!existsSync(evalsPath)) return null;
|
|
103
|
+
return JSON.parse(readFileSync(evalsPath, 'utf8'));
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Runs all evals for a skill template.
|
|
108
|
+
* Parses the SKILL.md, loads evals.json, and evaluates each assertion.
|
|
109
|
+
* @param {string} skillName - Skill directory name
|
|
110
|
+
* @returns {{ skill: string, results: Array<{ id: string, description: string, passed: boolean, expectations: Array }> }}
|
|
111
|
+
*/
|
|
112
|
+
export function runEvals(skillName) {
|
|
113
|
+
const evals = loadEvals(skillName);
|
|
114
|
+
if (!evals) throw new Error(`No evals found for skill "${skillName}"`);
|
|
115
|
+
|
|
116
|
+
const skillPath = join(TEMPLATES_DIR, skillName, 'SKILL.md');
|
|
117
|
+
const content = readFileSync(skillPath, 'utf8');
|
|
118
|
+
const skill = parseSkill(content);
|
|
119
|
+
|
|
120
|
+
if (!skill.workflow) {
|
|
121
|
+
throw new Error(`Skill "${skillName}" has no workflow definition`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const results = evals.evals.map(evalCase => {
|
|
125
|
+
const expectations = evalCase.expectations.map(exp => {
|
|
126
|
+
const result = evaluateAssertion(skill.workflow, exp.assertion);
|
|
127
|
+
return { text: exp.text, assertion: exp.assertion, ...result };
|
|
128
|
+
});
|
|
129
|
+
const passed = expectations.every(e => e.passed);
|
|
130
|
+
return {
|
|
131
|
+
id: evalCase.id,
|
|
132
|
+
description: evalCase.description,
|
|
133
|
+
passed,
|
|
134
|
+
expectations,
|
|
135
|
+
};
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
return { skill: skillName, results };
|
|
139
|
+
}
|
package/src/utils/workspace.js
CHANGED
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
import { existsSync, readFileSync, readdirSync } from 'fs';
|
|
2
2
|
import { join, dirname, resolve } from 'path';
|
|
3
|
+
import { execFileSync } from 'node:child_process';
|
|
3
4
|
|
|
4
5
|
export const WORKSPACE_FILE = 'guild-workspace.json';
|
|
5
6
|
|
|
7
|
+
export const PRESET_COMMANDS = {
|
|
8
|
+
test: { cmd: 'npm', args: ['test'] },
|
|
9
|
+
lint: { cmd: 'npm', args: ['run', 'lint'] },
|
|
10
|
+
build: { cmd: 'npm', args: ['run', 'build'] },
|
|
11
|
+
};
|
|
12
|
+
|
|
6
13
|
export function findWorkspaceRoot(startDir = process.cwd()) {
|
|
7
14
|
let dir = resolve(startDir);
|
|
8
15
|
while (true) {
|
|
@@ -80,3 +87,85 @@ export function generateWorkspaceContext(workspace, currentMemberName) {
|
|
|
80
87
|
|
|
81
88
|
return lines.join('\n');
|
|
82
89
|
}
|
|
90
|
+
|
|
91
|
+
export function collectMemberContext(workspace, currentMemberName) {
|
|
92
|
+
if (!workspace) return '';
|
|
93
|
+
|
|
94
|
+
const siblings = workspace.members.filter(m => m.name !== currentMemberName);
|
|
95
|
+
if (siblings.length === 0) return '';
|
|
96
|
+
|
|
97
|
+
const lines = [`## Workspace: ${workspace.name}`, ''];
|
|
98
|
+
|
|
99
|
+
for (const member of siblings) {
|
|
100
|
+
lines.push(`### ${member.name} (sibling — ${member.absolutePath})`);
|
|
101
|
+
|
|
102
|
+
const projectMdPath = join(member.absolutePath, 'PROJECT.md');
|
|
103
|
+
if (existsSync(projectMdPath)) {
|
|
104
|
+
const content = readFileSync(projectMdPath, 'utf8');
|
|
105
|
+
const stackMatch = content.match(/\*\*Stack:\*\*\s*(.+)/);
|
|
106
|
+
if (stackMatch) {
|
|
107
|
+
lines.push(`- **Stack:** ${stackMatch[1].trim()}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const claudeMdPath = join(member.absolutePath, 'CLAUDE.md');
|
|
112
|
+
if (existsSync(claudeMdPath)) {
|
|
113
|
+
const content = readFileSync(claudeMdPath, 'utf8');
|
|
114
|
+
const structureMatch = content.match(/## Project structure\n(.+)/);
|
|
115
|
+
if (structureMatch) {
|
|
116
|
+
lines.push(`- **Structure:** ${structureMatch[1].trim()}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const sessionMdPath = join(member.absolutePath, 'SESSION.md');
|
|
121
|
+
if (existsSync(sessionMdPath)) {
|
|
122
|
+
const content = readFileSync(sessionMdPath, 'utf8');
|
|
123
|
+
const taskMatch = content.match(/\*\*Current task:\*\*\s*(.+)/);
|
|
124
|
+
if (taskMatch) {
|
|
125
|
+
lines.push(`- **Current task:** ${taskMatch[1].trim()}`);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
lines.push(`You can read any file under ${member.absolutePath}/ for deeper analysis.`);
|
|
130
|
+
lines.push('');
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return lines.join('\n').trim();
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function runInMember(member, cmd, args) {
|
|
137
|
+
if (!existsSync(member.absolutePath)) {
|
|
138
|
+
return {
|
|
139
|
+
member: member.name,
|
|
140
|
+
status: 'failed',
|
|
141
|
+
output: `Directory not found: ${member.absolutePath}`,
|
|
142
|
+
duration: 0,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const start = Date.now();
|
|
147
|
+
try {
|
|
148
|
+
const stdout = execFileSync(cmd, args, {
|
|
149
|
+
cwd: member.absolutePath,
|
|
150
|
+
encoding: 'utf8',
|
|
151
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
152
|
+
});
|
|
153
|
+
const duration = Date.now() - start;
|
|
154
|
+
return {
|
|
155
|
+
member: member.name,
|
|
156
|
+
status: 'passed',
|
|
157
|
+
output: stdout.trim(),
|
|
158
|
+
duration,
|
|
159
|
+
};
|
|
160
|
+
} catch (error) {
|
|
161
|
+
const duration = Date.now() - start;
|
|
162
|
+
const stdout = error.stdout || '';
|
|
163
|
+
const stderr = error.stderr || '';
|
|
164
|
+
return {
|
|
165
|
+
member: member.name,
|
|
166
|
+
status: 'failed',
|
|
167
|
+
output: (stdout + stderr).trim(),
|
|
168
|
+
duration,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
}
|