@helmiq/crew 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/crew.cjs +29 -0
- package/defaults/personas/engineer.persona.yaml +12 -4
- package/dist/config/config.test.js +57 -7
- package/dist/engine/composite.js +1 -1
- package/dist/engine/composite.test.js +153 -33
- package/dist/tools/registry.d.ts.map +1 -1
- package/dist/tools/registry.js +34 -1
- package/dist/tools/registry.test.js +1 -0
- package/dist/tools/tool-groups.js +1 -1
- package/dist/tools/tool-groups.test.js +6 -2
- package/package.json +8 -4
package/bin/crew.cjs
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const { execFileSync } = require('node:child_process');
|
|
4
|
+
const path = require('node:path');
|
|
5
|
+
|
|
6
|
+
let cliEntry;
|
|
7
|
+
try {
|
|
8
|
+
cliEntry = path.join(
|
|
9
|
+
path.dirname(require.resolve('@helmiq/crew-cli/package.json')),
|
|
10
|
+
'dist',
|
|
11
|
+
'index.js',
|
|
12
|
+
);
|
|
13
|
+
} catch {
|
|
14
|
+
console.error('crew: @helmiq/crew-cli is not installed.');
|
|
15
|
+
console.error('');
|
|
16
|
+
console.error(' npm install @helmiq/crew @helmiq/crew-cli');
|
|
17
|
+
console.error(' # or');
|
|
18
|
+
console.error(' pnpm add @helmiq/crew @helmiq/crew-cli');
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
try {
|
|
23
|
+
execFileSync(process.execPath, [cliEntry, ...process.argv.slice(2)], {
|
|
24
|
+
stdio: 'inherit',
|
|
25
|
+
});
|
|
26
|
+
} catch (e) {
|
|
27
|
+
if (e && e.status) process.exit(e.status);
|
|
28
|
+
throw e;
|
|
29
|
+
}
|
|
@@ -15,6 +15,7 @@ persona:
|
|
|
15
15
|
- test-write
|
|
16
16
|
- self-review
|
|
17
17
|
- pr-author
|
|
18
|
+
- ci-check
|
|
18
19
|
|
|
19
20
|
perception:
|
|
20
21
|
always_read:
|
|
@@ -59,13 +60,13 @@ persona:
|
|
|
59
60
|
skill: test-write
|
|
60
61
|
reads: [code-changes, requirements]
|
|
61
62
|
produces: test-files
|
|
62
|
-
tools: [code, git, shell]
|
|
63
|
+
tools: [code, git-read, git-commit, shell]
|
|
63
64
|
|
|
64
65
|
- name: quality-checker
|
|
65
66
|
skill: quality-check
|
|
66
67
|
reads: [code-changes, test-files]
|
|
67
68
|
produces: quality-report
|
|
68
|
-
tools: [shell, code, git]
|
|
69
|
+
tools: [shell, code, git-read, git-commit]
|
|
69
70
|
max_iterations: 3
|
|
70
71
|
|
|
71
72
|
- name: self-reviewer
|
|
@@ -82,7 +83,14 @@ persona:
|
|
|
82
83
|
produces: pull-request
|
|
83
84
|
tools: [git]
|
|
84
85
|
|
|
85
|
-
|
|
86
|
+
- name: ci-checker
|
|
87
|
+
skill: ci-check
|
|
88
|
+
reads: [pull-request, code-changes]
|
|
89
|
+
produces: ci-report
|
|
90
|
+
tools: [git, shell, code]
|
|
91
|
+
max_iterations: 3
|
|
92
|
+
|
|
93
|
+
published_artifact: ci-report
|
|
86
94
|
quality_gate: human-review
|
|
87
95
|
|
|
88
96
|
address-feedback:
|
|
@@ -104,7 +112,7 @@ persona:
|
|
|
104
112
|
skill: quality-check
|
|
105
113
|
reads: [code-changes]
|
|
106
114
|
produces: quality-report
|
|
107
|
-
tools: [shell, code, git]
|
|
115
|
+
tools: [shell, code, git-read, git-commit]
|
|
108
116
|
max_iterations: 3
|
|
109
117
|
|
|
110
118
|
published_artifact: code-changes
|
|
@@ -324,13 +324,14 @@ describe('T-FR-01-1: default Engineer persona spec schema validation', () => {
|
|
|
324
324
|
expect(spec.persona.skills).toContain('test-write');
|
|
325
325
|
expect(spec.persona.skills).toContain('self-review');
|
|
326
326
|
expect(spec.persona.skills).toContain('pr-author');
|
|
327
|
+
expect(spec.persona.skills).toContain('ci-check');
|
|
327
328
|
});
|
|
328
|
-
it('defines implement-story as a composite task with
|
|
329
|
+
it('defines implement-story as a composite task with seven sub-agents', () => {
|
|
329
330
|
const task = spec.persona.tasks['implement-story'];
|
|
330
331
|
expect(task).toBeDefined();
|
|
331
332
|
expect(task.mode).toBe('composite');
|
|
332
333
|
const composite = task;
|
|
333
|
-
expect(composite.sub_agents).toHaveLength(
|
|
334
|
+
expect(composite.sub_agents).toHaveLength(7);
|
|
334
335
|
const names = composite.sub_agents.map((a) => a.name);
|
|
335
336
|
expect(names).toEqual([
|
|
336
337
|
'planner',
|
|
@@ -339,6 +340,7 @@ describe('T-FR-01-1: default Engineer persona spec schema validation', () => {
|
|
|
339
340
|
'quality-checker',
|
|
340
341
|
'self-reviewer',
|
|
341
342
|
'pr-author',
|
|
343
|
+
'ci-checker',
|
|
342
344
|
]);
|
|
343
345
|
});
|
|
344
346
|
it('maps sub-agents to skills', () => {
|
|
@@ -350,6 +352,7 @@ describe('T-FR-01-1: default Engineer persona spec schema validation', () => {
|
|
|
350
352
|
expect(byName['quality-checker']).toBe('quality-check');
|
|
351
353
|
expect(byName['self-reviewer']).toBe('self-review');
|
|
352
354
|
expect(byName['pr-author']).toBe('pr-author');
|
|
355
|
+
expect(byName['ci-checker']).toBe('ci-check');
|
|
353
356
|
});
|
|
354
357
|
});
|
|
355
358
|
describe('T-FR-01-2: skill resolution', () => {
|
|
@@ -490,19 +493,62 @@ llm:
|
|
|
490
493
|
}
|
|
491
494
|
});
|
|
492
495
|
});
|
|
496
|
+
describe('T-CREW-10-003: ci-checker sub-agent configuration', () => {
|
|
497
|
+
let spec;
|
|
498
|
+
beforeAll(async () => {
|
|
499
|
+
const specPath = join(crewRepoPath, 'defaults', 'personas', 'engineer.persona.yaml');
|
|
500
|
+
spec = await loadYaml(specPath, PersonaSpecSchema, 'Engineer persona spec');
|
|
501
|
+
});
|
|
502
|
+
it('ci-checker references the ci-check skill', () => {
|
|
503
|
+
const task = spec.persona.tasks['implement-story'];
|
|
504
|
+
const ci = task.sub_agents.find((a) => a.name === 'ci-checker');
|
|
505
|
+
expect(ci).toBeDefined();
|
|
506
|
+
expect(ci.skill).toBe('ci-check');
|
|
507
|
+
});
|
|
508
|
+
it('ci-checker has git, shell, and code tools', () => {
|
|
509
|
+
const task = spec.persona.tasks['implement-story'];
|
|
510
|
+
const ci = task.sub_agents.find((a) => a.name === 'ci-checker');
|
|
511
|
+
expect(ci.tools).toContain('git');
|
|
512
|
+
expect(ci.tools).toContain('shell');
|
|
513
|
+
expect(ci.tools).toContain('code');
|
|
514
|
+
});
|
|
515
|
+
it('ci-checker has max_iterations of 3', () => {
|
|
516
|
+
const task = spec.persona.tasks['implement-story'];
|
|
517
|
+
const ci = task.sub_agents.find((a) => a.name === 'ci-checker');
|
|
518
|
+
expect(ci.max_iterations).toBe(3);
|
|
519
|
+
});
|
|
520
|
+
it('ci-checker reads pull-request and code-changes', () => {
|
|
521
|
+
const task = spec.persona.tasks['implement-story'];
|
|
522
|
+
const ci = task.sub_agents.find((a) => a.name === 'ci-checker');
|
|
523
|
+
expect(ci.reads).toContain('pull-request');
|
|
524
|
+
expect(ci.reads).toContain('code-changes');
|
|
525
|
+
});
|
|
526
|
+
it('ci-checker produces ci-report', () => {
|
|
527
|
+
const task = spec.persona.tasks['implement-story'];
|
|
528
|
+
const ci = task.sub_agents.find((a) => a.name === 'ci-checker');
|
|
529
|
+
expect(ci.produces).toBe('ci-report');
|
|
530
|
+
});
|
|
531
|
+
it('get-pr-checks is in the git-read tool group', async () => {
|
|
532
|
+
const { expandToolNames } = await import('../tools/tool-groups.js');
|
|
533
|
+
const gitReadTools = expandToolNames(['git-read']);
|
|
534
|
+
expect(gitReadTools.has('get-pr-checks')).toBe(true);
|
|
535
|
+
});
|
|
536
|
+
});
|
|
493
537
|
describe('T-CREW-10-001: test-writer sub-agent has git tools', () => {
|
|
494
538
|
let spec;
|
|
495
539
|
beforeAll(async () => {
|
|
496
540
|
const specPath = join(crewRepoPath, 'defaults', 'personas', 'engineer.persona.yaml');
|
|
497
541
|
spec = await loadYaml(specPath, PersonaSpecSchema, 'Engineer persona spec');
|
|
498
542
|
});
|
|
499
|
-
it('test-writer has code, git, and shell tools', () => {
|
|
543
|
+
it('test-writer has code, git-read, git-commit, and shell tools (least privilege)', () => {
|
|
500
544
|
const task = spec.persona.tasks['implement-story'];
|
|
501
545
|
const tw = task.sub_agents.find((a) => a.name === 'test-writer');
|
|
502
546
|
expect(tw).toBeDefined();
|
|
503
547
|
expect(tw.tools).toContain('code');
|
|
504
|
-
expect(tw.tools).toContain('git');
|
|
548
|
+
expect(tw.tools).toContain('git-read');
|
|
549
|
+
expect(tw.tools).toContain('git-commit');
|
|
505
550
|
expect(tw.tools).toContain('shell');
|
|
551
|
+
expect(tw.tools).not.toContain('git');
|
|
506
552
|
});
|
|
507
553
|
});
|
|
508
554
|
describe('T-FR-05: quality-checker sub-agent configuration', () => {
|
|
@@ -517,12 +563,14 @@ describe('T-FR-05: quality-checker sub-agent configuration', () => {
|
|
|
517
563
|
expect(qc).toBeDefined();
|
|
518
564
|
expect(qc.skill).toBe('quality-check');
|
|
519
565
|
});
|
|
520
|
-
it('quality-checker has shell, code, and git tools
|
|
566
|
+
it('quality-checker has shell, code, git-read, and git-commit tools (least privilege)', () => {
|
|
521
567
|
const task = spec.persona.tasks['implement-story'];
|
|
522
568
|
const qc = task.sub_agents.find((a) => a.name === 'quality-checker');
|
|
523
569
|
expect(qc.tools).toContain('shell');
|
|
524
570
|
expect(qc.tools).toContain('code');
|
|
525
|
-
expect(qc.tools).toContain('git');
|
|
571
|
+
expect(qc.tools).toContain('git-read');
|
|
572
|
+
expect(qc.tools).toContain('git-commit');
|
|
573
|
+
expect(qc.tools).not.toContain('git');
|
|
526
574
|
});
|
|
527
575
|
it('quality-checker has max_iterations of 3', () => {
|
|
528
576
|
const task = spec.persona.tasks['implement-story'];
|
|
@@ -541,7 +589,9 @@ describe('T-FR-05: quality-checker sub-agent configuration', () => {
|
|
|
541
589
|
expect(qc.skill).toBe('quality-check');
|
|
542
590
|
expect(qc.tools).toContain('shell');
|
|
543
591
|
expect(qc.tools).toContain('code');
|
|
544
|
-
expect(qc.tools).toContain('git');
|
|
592
|
+
expect(qc.tools).toContain('git-read');
|
|
593
|
+
expect(qc.tools).toContain('git-commit');
|
|
594
|
+
expect(qc.tools).not.toContain('git');
|
|
545
595
|
expect(qc.max_iterations).toBe(3);
|
|
546
596
|
});
|
|
547
597
|
});
|
package/dist/engine/composite.js
CHANGED
|
@@ -58,7 +58,7 @@ async function evaluateGate(gateExpression, subAgentOutput, provider, model) {
|
|
|
58
58
|
};
|
|
59
59
|
}
|
|
60
60
|
catch {
|
|
61
|
-
return { pass:
|
|
61
|
+
return { pass: false, reason: 'Gate evaluation produced non-JSON output; treating as fail' };
|
|
62
62
|
}
|
|
63
63
|
}
|
|
64
64
|
/**
|
|
@@ -282,6 +282,96 @@ describe('T-01-004c: jump-back via on_fail/gate/max_loops', () => {
|
|
|
282
282
|
const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
|
|
283
283
|
expect(result.content).toBe('');
|
|
284
284
|
});
|
|
285
|
+
it('treats malformed gate response as fail (CREW-10-004)', async () => {
|
|
286
|
+
const provider = {
|
|
287
|
+
generateText: vi
|
|
288
|
+
.fn()
|
|
289
|
+
.mockImplementation((opts) => {
|
|
290
|
+
if (opts.system.includes('gate evaluator')) {
|
|
291
|
+
return Promise.resolve({
|
|
292
|
+
text: 'This is not valid JSON at all',
|
|
293
|
+
toolCalls: [],
|
|
294
|
+
tokensIn: 5,
|
|
295
|
+
tokensOut: 5,
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
return Promise.resolve({
|
|
299
|
+
text: 'some output',
|
|
300
|
+
toolCalls: [],
|
|
301
|
+
tokensIn: 10,
|
|
302
|
+
tokensOut: 20,
|
|
303
|
+
});
|
|
304
|
+
}),
|
|
305
|
+
};
|
|
306
|
+
const task = makeTask({
|
|
307
|
+
sub_agents: [
|
|
308
|
+
{ name: 'implementer', skill: 'feature-implementation', reads: [], produces: 'code' },
|
|
309
|
+
{
|
|
310
|
+
name: 'reviewer',
|
|
311
|
+
skill: 'code-review',
|
|
312
|
+
reads: ['code'],
|
|
313
|
+
produces: 'review',
|
|
314
|
+
gate: 'no blocking issues',
|
|
315
|
+
on_fail: 'implementer',
|
|
316
|
+
max_loops: 1,
|
|
317
|
+
},
|
|
318
|
+
],
|
|
319
|
+
published_artifact: 'review',
|
|
320
|
+
});
|
|
321
|
+
const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
|
|
322
|
+
expect(result.content).toBe('');
|
|
323
|
+
});
|
|
324
|
+
it('loops back on malformed gate with sufficient max_loops (CREW-10-004)', async () => {
|
|
325
|
+
let gateCallCount = 0;
|
|
326
|
+
const provider = {
|
|
327
|
+
generateText: vi
|
|
328
|
+
.fn()
|
|
329
|
+
.mockImplementation((opts) => {
|
|
330
|
+
if (opts.system.includes('gate evaluator')) {
|
|
331
|
+
gateCallCount++;
|
|
332
|
+
if (gateCallCount < 2) {
|
|
333
|
+
return Promise.resolve({
|
|
334
|
+
text: 'NOT JSON',
|
|
335
|
+
toolCalls: [],
|
|
336
|
+
tokensIn: 5,
|
|
337
|
+
tokensOut: 5,
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
return Promise.resolve({
|
|
341
|
+
text: JSON.stringify({ pass: true, reason: 'ok now' }),
|
|
342
|
+
toolCalls: [],
|
|
343
|
+
tokensIn: 5,
|
|
344
|
+
tokensOut: 5,
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
return Promise.resolve({
|
|
348
|
+
text: 'some output',
|
|
349
|
+
toolCalls: [],
|
|
350
|
+
tokensIn: 10,
|
|
351
|
+
tokensOut: 20,
|
|
352
|
+
});
|
|
353
|
+
}),
|
|
354
|
+
};
|
|
355
|
+
const task = makeTask({
|
|
356
|
+
sub_agents: [
|
|
357
|
+
{ name: 'implementer', skill: 'feature-implementation', reads: [], produces: 'code' },
|
|
358
|
+
{
|
|
359
|
+
name: 'reviewer',
|
|
360
|
+
skill: 'code-review',
|
|
361
|
+
reads: ['code'],
|
|
362
|
+
produces: 'review',
|
|
363
|
+
gate: 'no blocking issues',
|
|
364
|
+
on_fail: 'implementer',
|
|
365
|
+
max_loops: 2,
|
|
366
|
+
},
|
|
367
|
+
],
|
|
368
|
+
published_artifact: 'review',
|
|
369
|
+
});
|
|
370
|
+
const result = await executeCompositeTask(makePersona(), task, emptyContext, mockModel, provider, emptyTools, tmpDir);
|
|
371
|
+
expect(result.content).not.toBe('');
|
|
372
|
+
const implementerRuns = result.subAgentResults.filter((r) => r.name === 'implementer');
|
|
373
|
+
expect(implementerRuns.length).toBe(2);
|
|
374
|
+
});
|
|
285
375
|
});
|
|
286
376
|
describe('T-01-004d: checkpoint work products', () => {
|
|
287
377
|
it('writes each sub-agent output to runs/{run_id}/work/', async () => {
|
|
@@ -349,7 +439,7 @@ describe('T-FR-04: test-writer sub-agent', () => {
|
|
|
349
439
|
skill: 'test-writing',
|
|
350
440
|
reads: ['code-changes', 'requirements'],
|
|
351
441
|
produces: 'test-files',
|
|
352
|
-
tools: ['code', 'git', 'shell'],
|
|
442
|
+
tools: ['code', 'git-read', 'git-commit', 'shell'],
|
|
353
443
|
},
|
|
354
444
|
],
|
|
355
445
|
published_artifact: 'test-files',
|
|
@@ -401,7 +491,7 @@ describe('T-FR-04: test-writer sub-agent', () => {
|
|
|
401
491
|
.content;
|
|
402
492
|
expect(testWriterPrompt).toContain('code-changes');
|
|
403
493
|
});
|
|
404
|
-
it('restricts tools to code, git, and shell
|
|
494
|
+
it('restricts tools to code, git-read, git-commit, and shell', async () => {
|
|
405
495
|
const noop = async () => ({});
|
|
406
496
|
const tools = {
|
|
407
497
|
'read-artifact': {
|
|
@@ -431,14 +521,7 @@ describe('T-FR-04: test-writer sub-agent', () => {
|
|
|
431
521
|
const calls = provider.generateText.mock.calls;
|
|
432
522
|
const testWriterCall = calls[1][0];
|
|
433
523
|
const toolNames = Object.keys(testWriterCall.tools ?? {}).sort();
|
|
434
|
-
expect(toolNames).toEqual([
|
|
435
|
-
'git-branch',
|
|
436
|
-
'git-commit',
|
|
437
|
-
'git-diff',
|
|
438
|
-
'read-file',
|
|
439
|
-
'run-command',
|
|
440
|
-
'write-file',
|
|
441
|
-
]);
|
|
524
|
+
expect(toolNames).toEqual(['git-commit', 'git-diff', 'read-file', 'run-command', 'write-file']);
|
|
442
525
|
});
|
|
443
526
|
it('produces test-files output and checkpoints it', async () => {
|
|
444
527
|
const provider = sequentialProvider(['code changes', 'test file content']);
|
|
@@ -1588,8 +1671,8 @@ describe('T-FR-03: implementer sub-agent', () => {
|
|
|
1588
1671
|
});
|
|
1589
1672
|
describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
1590
1673
|
/**
|
|
1591
|
-
* Full
|
|
1592
|
-
* planner -> implementer -> test-writer -> quality-checker -> self-reviewer -> pr-author
|
|
1674
|
+
* Full 7-step sub-agent chain matching the engineer persona spec:
|
|
1675
|
+
* planner -> implementer -> test-writer -> quality-checker -> self-reviewer -> pr-author -> ci-checker
|
|
1593
1676
|
*/
|
|
1594
1677
|
function makeFullImplementStoryTask() {
|
|
1595
1678
|
return {
|
|
@@ -1615,14 +1698,14 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1615
1698
|
skill: 'test-writing',
|
|
1616
1699
|
reads: ['code-changes', 'requirements'],
|
|
1617
1700
|
produces: 'test-files',
|
|
1618
|
-
tools: ['code', 'git', 'shell'],
|
|
1701
|
+
tools: ['code', 'git-read', 'git-commit', 'shell'],
|
|
1619
1702
|
},
|
|
1620
1703
|
{
|
|
1621
1704
|
name: 'quality-checker',
|
|
1622
1705
|
skill: 'quality-check',
|
|
1623
1706
|
reads: ['code-changes', 'test-files'],
|
|
1624
1707
|
produces: 'quality-report',
|
|
1625
|
-
tools: ['shell', 'code', 'git'],
|
|
1708
|
+
tools: ['shell', 'code', 'git-read', 'git-commit'],
|
|
1626
1709
|
max_iterations: 3,
|
|
1627
1710
|
},
|
|
1628
1711
|
{
|
|
@@ -1641,8 +1724,16 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1641
1724
|
produces: 'pull-request',
|
|
1642
1725
|
tools: ['git'],
|
|
1643
1726
|
},
|
|
1727
|
+
{
|
|
1728
|
+
name: 'ci-checker',
|
|
1729
|
+
skill: 'ci-check',
|
|
1730
|
+
reads: ['pull-request', 'code-changes'],
|
|
1731
|
+
produces: 'ci-report',
|
|
1732
|
+
tools: ['git', 'shell', 'code'],
|
|
1733
|
+
max_iterations: 3,
|
|
1734
|
+
},
|
|
1644
1735
|
],
|
|
1645
|
-
published_artifact: '
|
|
1736
|
+
published_artifact: 'ci-report',
|
|
1646
1737
|
quality_gate: 'human-review',
|
|
1647
1738
|
};
|
|
1648
1739
|
}
|
|
@@ -1660,6 +1751,7 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1660
1751
|
'test-writing',
|
|
1661
1752
|
'self-review',
|
|
1662
1753
|
'pr-authoring',
|
|
1754
|
+
'ci-check',
|
|
1663
1755
|
],
|
|
1664
1756
|
perception: { per_task: {} },
|
|
1665
1757
|
tasks: {},
|
|
@@ -1679,6 +1771,7 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1679
1771
|
'quality-check': 'Run lint, typecheck, tests. Fix failures iteratively.',
|
|
1680
1772
|
'self-review': 'Review changes against design. Report blocking issues.',
|
|
1681
1773
|
'pr-authoring': 'Create a GitHub PR with structured description.',
|
|
1774
|
+
'ci-check': 'Poll CI checks. Fix failures if needed. Produce CI report.',
|
|
1682
1775
|
'feature-implementation': 'Feature implementation skill.',
|
|
1683
1776
|
'code-review': 'Code review skill.',
|
|
1684
1777
|
};
|
|
@@ -1691,6 +1784,7 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1691
1784
|
'quality-checker',
|
|
1692
1785
|
'self-reviewer',
|
|
1693
1786
|
'pr-author',
|
|
1787
|
+
'ci-checker',
|
|
1694
1788
|
];
|
|
1695
1789
|
return {
|
|
1696
1790
|
generateText: vi
|
|
@@ -1715,7 +1809,7 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1715
1809
|
}),
|
|
1716
1810
|
};
|
|
1717
1811
|
}
|
|
1718
|
-
it('executes all
|
|
1812
|
+
it('executes all 7 sub-agents in sequence', async () => {
|
|
1719
1813
|
const provider = gatePassingProvider({
|
|
1720
1814
|
planner: '## Plan\nModify engine/composite.ts',
|
|
1721
1815
|
implementer: 'Created branch, wrote code, committed',
|
|
@@ -1723,15 +1817,17 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1723
1817
|
'quality-checker': 'Lint: pass, Typecheck: pass, Tests: 14/14 pass',
|
|
1724
1818
|
'self-reviewer': '## Verdict: PASS\nNo blocking issues.',
|
|
1725
1819
|
'pr-author': 'Created PR #47: feat(CREW-03-011): end-to-end composite task',
|
|
1820
|
+
'ci-checker': '# CI Check Report\n\n**Overall: PASS**',
|
|
1726
1821
|
});
|
|
1727
1822
|
const result = await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
|
|
1728
|
-
expect(result.subAgentResults).toHaveLength(
|
|
1823
|
+
expect(result.subAgentResults).toHaveLength(7);
|
|
1729
1824
|
expect(result.subAgentResults[0].name).toBe('planner');
|
|
1730
1825
|
expect(result.subAgentResults[1].name).toBe('implementer');
|
|
1731
1826
|
expect(result.subAgentResults[2].name).toBe('test-writer');
|
|
1732
1827
|
expect(result.subAgentResults[3].name).toBe('quality-checker');
|
|
1733
1828
|
expect(result.subAgentResults[4].name).toBe('self-reviewer');
|
|
1734
1829
|
expect(result.subAgentResults[5].name).toBe('pr-author');
|
|
1830
|
+
expect(result.subAgentResults[6].name).toBe('ci-checker');
|
|
1735
1831
|
});
|
|
1736
1832
|
it('shares working state between all sub-agents', async () => {
|
|
1737
1833
|
const planOutput = '## Plan\n1. Modify composite.ts\n2. Add tests';
|
|
@@ -1758,34 +1854,35 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1758
1854
|
expect(qualityPrompt).toContain(codeOutput);
|
|
1759
1855
|
expect(qualityPrompt).toContain(testOutput);
|
|
1760
1856
|
});
|
|
1761
|
-
it('checkpoints all
|
|
1857
|
+
it('checkpoints all 7 intermediate work products', async () => {
|
|
1762
1858
|
const provider = gatePassingProvider({});
|
|
1763
1859
|
const result = await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
|
|
1764
|
-
expect(result.checkpoints).toHaveLength(
|
|
1860
|
+
expect(result.checkpoints).toHaveLength(7);
|
|
1765
1861
|
expect(result.checkpoints[0].path).toContain('01-planner.md');
|
|
1766
1862
|
expect(result.checkpoints[1].path).toContain('02-implementer.md');
|
|
1767
1863
|
expect(result.checkpoints[2].path).toContain('03-test-writer.md');
|
|
1768
1864
|
expect(result.checkpoints[3].path).toContain('04-quality-checker.md');
|
|
1769
1865
|
expect(result.checkpoints[4].path).toContain('05-self-reviewer.md');
|
|
1770
1866
|
expect(result.checkpoints[5].path).toContain('06-pr-author.md');
|
|
1867
|
+
expect(result.checkpoints[6].path).toContain('07-ci-checker.md');
|
|
1771
1868
|
for (const cp of result.checkpoints) {
|
|
1772
1869
|
const content = await readFile(cp.path, 'utf-8');
|
|
1773
1870
|
expect(content).toBeTruthy();
|
|
1774
1871
|
}
|
|
1775
1872
|
});
|
|
1776
|
-
it('final output is from the
|
|
1777
|
-
const
|
|
1873
|
+
it('final output is from the ci-checker sub-agent', async () => {
|
|
1874
|
+
const ciOutput = '# CI Check Report\n\n**Overall: PASS**\n\nAll checks passed.';
|
|
1778
1875
|
const provider = gatePassingProvider({
|
|
1779
|
-
'
|
|
1876
|
+
'ci-checker': ciOutput,
|
|
1780
1877
|
});
|
|
1781
1878
|
const result = await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
|
|
1782
|
-
expect(result.content).toBe(
|
|
1879
|
+
expect(result.content).toBe(ciOutput);
|
|
1783
1880
|
});
|
|
1784
|
-
it('accumulates tokens across all
|
|
1881
|
+
it('accumulates tokens across all 7 sub-agents', async () => {
|
|
1785
1882
|
const provider = gatePassingProvider({});
|
|
1786
1883
|
const result = await executeCompositeTask(makeEngineerPersona(), makeFullImplementStoryTask(), emptyContext, mockModel, provider, emptyTools, tmpDir, allSkills);
|
|
1787
|
-
expect(result.tokensIn).toBe(
|
|
1788
|
-
expect(result.tokensOut).toBe(
|
|
1884
|
+
expect(result.tokensIn).toBe(700);
|
|
1885
|
+
expect(result.tokensOut).toBe(350);
|
|
1789
1886
|
});
|
|
1790
1887
|
it('self-reviewer gate failure loops back to implementer', async () => {
|
|
1791
1888
|
let subAgentIdx = 0;
|
|
@@ -1821,7 +1918,7 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1821
1918
|
expect(names[3]).toBe('quality-checker');
|
|
1822
1919
|
expect(names[4]).toBe('self-reviewer');
|
|
1823
1920
|
expect(names[5]).toBe('implementer');
|
|
1824
|
-
expect(names[names.length - 1]).toBe('
|
|
1921
|
+
expect(names[names.length - 1]).toBe('ci-checker');
|
|
1825
1922
|
expect(result.content).not.toBe('');
|
|
1826
1923
|
});
|
|
1827
1924
|
it('provides assembled context to planner sub-agent', async () => {
|
|
@@ -1880,6 +1977,12 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1880
1977
|
'git-push': { name: 'git-push', description: 'push', parameters: {}, execute: noop },
|
|
1881
1978
|
'git-diff': { name: 'git-diff', description: 'diff', parameters: {}, execute: noop },
|
|
1882
1979
|
'git-log': { name: 'git-log', description: 'log', parameters: {}, execute: noop },
|
|
1980
|
+
'get-pr-checks': {
|
|
1981
|
+
name: 'get-pr-checks',
|
|
1982
|
+
description: 'checks',
|
|
1983
|
+
parameters: {},
|
|
1984
|
+
execute: noop,
|
|
1985
|
+
},
|
|
1883
1986
|
'create-pr': { name: 'create-pr', description: 'pr', parameters: {}, execute: noop },
|
|
1884
1987
|
'run-command': { name: 'run-command', description: 'shell', parameters: {}, execute: noop },
|
|
1885
1988
|
};
|
|
@@ -1891,6 +1994,7 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1891
1994
|
const implementerTools = Object.keys(calls[1][0].tools ?? {}).sort();
|
|
1892
1995
|
expect(implementerTools).toEqual([
|
|
1893
1996
|
'create-pr',
|
|
1997
|
+
'get-pr-checks',
|
|
1894
1998
|
'git-branch',
|
|
1895
1999
|
'git-commit',
|
|
1896
2000
|
'git-diff',
|
|
@@ -1904,12 +2008,10 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1904
2008
|
]);
|
|
1905
2009
|
const testWriterTools = Object.keys(calls[2][0].tools ?? {}).sort();
|
|
1906
2010
|
expect(testWriterTools).toEqual([
|
|
1907
|
-
'
|
|
1908
|
-
'git-branch',
|
|
2011
|
+
'get-pr-checks',
|
|
1909
2012
|
'git-commit',
|
|
1910
2013
|
'git-diff',
|
|
1911
2014
|
'git-log',
|
|
1912
|
-
'git-push',
|
|
1913
2015
|
'list-directory',
|
|
1914
2016
|
'read-file',
|
|
1915
2017
|
'run-command',
|
|
@@ -1918,12 +2020,10 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1918
2020
|
]);
|
|
1919
2021
|
const qualityCheckerTools = Object.keys(calls[3][0].tools ?? {}).sort();
|
|
1920
2022
|
expect(qualityCheckerTools).toEqual([
|
|
1921
|
-
'
|
|
1922
|
-
'git-branch',
|
|
2023
|
+
'get-pr-checks',
|
|
1923
2024
|
'git-commit',
|
|
1924
2025
|
'git-diff',
|
|
1925
2026
|
'git-log',
|
|
1926
|
-
'git-push',
|
|
1927
2027
|
'list-directory',
|
|
1928
2028
|
'read-file',
|
|
1929
2029
|
'run-command',
|
|
@@ -1937,11 +2037,31 @@ describe('T-FR-07: end-to-end implement-story composite task', () => {
|
|
|
1937
2037
|
const prAuthorTools = Object.keys(calls[prAuthorIdx][0].tools ?? {}).sort();
|
|
1938
2038
|
expect(prAuthorTools).toEqual([
|
|
1939
2039
|
'create-pr',
|
|
2040
|
+
'get-pr-checks',
|
|
1940
2041
|
'git-branch',
|
|
1941
2042
|
'git-commit',
|
|
1942
2043
|
'git-diff',
|
|
1943
2044
|
'git-log',
|
|
1944
2045
|
'git-push',
|
|
1945
2046
|
]);
|
|
2047
|
+
const ciCheckerIdx = calls.findIndex((call) => {
|
|
2048
|
+
const msgs = call[0].messages;
|
|
2049
|
+
return msgs[0]?.content.includes('Sub-task: ci-report');
|
|
2050
|
+
});
|
|
2051
|
+
const ciCheckerTools = Object.keys(calls[ciCheckerIdx][0].tools ?? {}).sort();
|
|
2052
|
+
expect(ciCheckerTools).toEqual([
|
|
2053
|
+
'create-pr',
|
|
2054
|
+
'get-pr-checks',
|
|
2055
|
+
'git-branch',
|
|
2056
|
+
'git-commit',
|
|
2057
|
+
'git-diff',
|
|
2058
|
+
'git-log',
|
|
2059
|
+
'git-push',
|
|
2060
|
+
'list-directory',
|
|
2061
|
+
'read-file',
|
|
2062
|
+
'run-command',
|
|
2063
|
+
'search-codebase',
|
|
2064
|
+
'write-file',
|
|
2065
|
+
]);
|
|
1946
2066
|
});
|
|
1947
2067
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/tools/registry.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/tools/registry.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAkBH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAC5D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAEhE,MAAM,WAAW,mBAAmB;IAClC,aAAa,CAAC,EAAE,aAAa,CAAC;CAC/B;AAED,wBAAgB,kBAAkB,CAAC,OAAO,CAAC,EAAE,mBAAmB,GAAG,WAAW,CAiW7E"}
|
package/dist/tools/registry.js
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* so the LLM only needs to provide the operation-specific parameters.
|
|
8
8
|
*/
|
|
9
9
|
import { readFileFromRepo, writeFileToRepo, listDirectory, searchCodebase, } from '@helmiq/crew-code';
|
|
10
|
-
import { gitBranch, gitCommit, gitPush, gitDiff, gitLog, createPr } from '@helmiq/crew-git';
|
|
10
|
+
import { gitBranch, gitCommit, gitPush, gitDiff, gitLog, createPr, getPrChecks, } from '@helmiq/crew-git';
|
|
11
11
|
import { runCommand, createAllowlist } from '@helmiq/crew-shell';
|
|
12
12
|
export function createToolRegistry(options) {
|
|
13
13
|
const registry = {
|
|
@@ -162,6 +162,39 @@ export function createToolRegistry(options) {
|
|
|
162
162
|
return gitLog({ maxCount, path }, ctx.targetRepoPath);
|
|
163
163
|
},
|
|
164
164
|
},
|
|
165
|
+
'get-pr-checks': {
|
|
166
|
+
name: 'get-pr-checks',
|
|
167
|
+
description: 'Poll GitHub check runs for a commit or branch. Waits for all checks to complete (up to timeout) and returns pass/fail status.',
|
|
168
|
+
parameters: {
|
|
169
|
+
type: 'object',
|
|
170
|
+
properties: {
|
|
171
|
+
ref: {
|
|
172
|
+
type: 'string',
|
|
173
|
+
description: 'Git ref to check (branch name, commit SHA, or PR head ref)',
|
|
174
|
+
},
|
|
175
|
+
timeoutSeconds: {
|
|
176
|
+
type: 'number',
|
|
177
|
+
description: 'Maximum seconds to wait for checks to complete (default: 120)',
|
|
178
|
+
},
|
|
179
|
+
intervalSeconds: {
|
|
180
|
+
type: 'number',
|
|
181
|
+
description: 'Seconds between poll attempts (default: 15)',
|
|
182
|
+
},
|
|
183
|
+
},
|
|
184
|
+
required: ['ref'],
|
|
185
|
+
},
|
|
186
|
+
execute: async (params, ctx) => {
|
|
187
|
+
const { ref, timeoutSeconds, intervalSeconds } = params;
|
|
188
|
+
const [owner, repo] = ctx.project.source.repo.replace('github:', '').split('/');
|
|
189
|
+
return getPrChecks({
|
|
190
|
+
owner: owner,
|
|
191
|
+
repo: repo,
|
|
192
|
+
ref,
|
|
193
|
+
timeoutSeconds,
|
|
194
|
+
intervalSeconds,
|
|
195
|
+
});
|
|
196
|
+
},
|
|
197
|
+
},
|
|
165
198
|
'create-pr': {
|
|
166
199
|
name: 'create-pr',
|
|
167
200
|
description: 'Create a GitHub pull request. The branch must be pushed to the remote first using git-push; creation will fail if the branch does not exist on the remote.',
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*/
|
|
13
13
|
const CODE_READ = ['read-file', 'list-directory', 'search-codebase'];
|
|
14
14
|
const CODE_WRITE = ['write-file'];
|
|
15
|
-
const GIT_READ = ['git-diff', 'git-log'];
|
|
15
|
+
const GIT_READ = ['git-diff', 'git-log', 'get-pr-checks'];
|
|
16
16
|
const GIT_WRITE = ['git-branch', 'git-commit', 'git-push', 'create-pr'];
|
|
17
17
|
const GIT_ALL = [...GIT_READ, ...GIT_WRITE];
|
|
18
18
|
export const TOOL_GROUPS = {
|
|
@@ -13,7 +13,7 @@ describe('tool-groups', () => {
|
|
|
13
13
|
expect(TOOL_GROUPS['code']).toEqual(expect.arrayContaining(['read-file', 'write-file', 'list-directory', 'search-codebase']));
|
|
14
14
|
});
|
|
15
15
|
it('defines git-read group with read-only git tools', () => {
|
|
16
|
-
expect(TOOL_GROUPS['git-read']).toEqual(expect.arrayContaining(['git-diff', 'git-log']));
|
|
16
|
+
expect(TOOL_GROUPS['git-read']).toEqual(expect.arrayContaining(['git-diff', 'git-log', 'get-pr-checks']));
|
|
17
17
|
expect(TOOL_GROUPS['git-read']).not.toContain('git-branch');
|
|
18
18
|
expect(TOOL_GROUPS['git-read']).not.toContain('git-commit');
|
|
19
19
|
});
|
|
@@ -29,6 +29,7 @@ describe('tool-groups', () => {
|
|
|
29
29
|
'git-push',
|
|
30
30
|
'git-diff',
|
|
31
31
|
'git-log',
|
|
32
|
+
'get-pr-checks',
|
|
32
33
|
'create-pr',
|
|
33
34
|
]));
|
|
34
35
|
});
|
|
@@ -54,7 +55,7 @@ describe('tool-groups', () => {
|
|
|
54
55
|
});
|
|
55
56
|
it('expands git-read to read-only git tools', () => {
|
|
56
57
|
const result = expandToolNames(new Set(['git-read']));
|
|
57
|
-
expect(result).toEqual(new Set(['git-diff', 'git-log']));
|
|
58
|
+
expect(result).toEqual(new Set(['git-diff', 'git-log', 'get-pr-checks']));
|
|
58
59
|
});
|
|
59
60
|
it('expands git-write to mutating git tools', () => {
|
|
60
61
|
const result = expandToolNames(new Set(['git-write']));
|
|
@@ -70,6 +71,7 @@ describe('tool-groups', () => {
|
|
|
70
71
|
expect(result).toContain('read-file');
|
|
71
72
|
expect(result).toContain('git-branch');
|
|
72
73
|
expect(result).toContain('git-diff');
|
|
74
|
+
expect(result).toContain('get-pr-checks');
|
|
73
75
|
expect(result).toContain('run-command');
|
|
74
76
|
});
|
|
75
77
|
it('handles mix of group names and individual names', () => {
|
|
@@ -97,6 +99,7 @@ describe('tool-groups', () => {
|
|
|
97
99
|
'git-push',
|
|
98
100
|
'git-diff',
|
|
99
101
|
'git-log',
|
|
102
|
+
'get-pr-checks',
|
|
100
103
|
'create-pr',
|
|
101
104
|
'run-command',
|
|
102
105
|
]));
|
|
@@ -112,6 +115,7 @@ describe('tool-groups', () => {
|
|
|
112
115
|
const denied = expandToolNames(['write-code', 'git-write']);
|
|
113
116
|
expect(permitted).toContain('read-file');
|
|
114
117
|
expect(permitted).toContain('git-diff');
|
|
118
|
+
expect(permitted).toContain('get-pr-checks');
|
|
115
119
|
expect(permitted).toContain('run-command');
|
|
116
120
|
expect(permitted).not.toContain('write-file');
|
|
117
121
|
expect(permitted).not.toContain('git-branch');
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@helmiq/crew",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "Artifact-centric autonomous delivery runtime",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -19,7 +19,11 @@
|
|
|
19
19
|
"engines": {
|
|
20
20
|
"node": ">=20.9.0"
|
|
21
21
|
},
|
|
22
|
+
"bin": {
|
|
23
|
+
"crew": "./bin/crew.cjs"
|
|
24
|
+
},
|
|
22
25
|
"files": [
|
|
26
|
+
"bin",
|
|
23
27
|
"dist",
|
|
24
28
|
"defaults"
|
|
25
29
|
],
|
|
@@ -35,9 +39,9 @@
|
|
|
35
39
|
"simple-git": "^3.33.0",
|
|
36
40
|
"yaml": "^2.8.3",
|
|
37
41
|
"zod": "^4.3.6",
|
|
38
|
-
"@helmiq/crew-code": "0.1.
|
|
39
|
-
"@helmiq/crew-git": "0.1.
|
|
40
|
-
"@helmiq/crew-shell": "0.1.
|
|
42
|
+
"@helmiq/crew-code": "0.1.1",
|
|
43
|
+
"@helmiq/crew-git": "0.1.1",
|
|
44
|
+
"@helmiq/crew-shell": "0.1.1"
|
|
41
45
|
},
|
|
42
46
|
"devDependencies": {
|
|
43
47
|
"@types/node": "^25.5.0",
|