teamcast 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/registry/instruction-fragments.js +11 -5
- package/dist/renderers/claude/docs.js +49 -9
- package/dist/renderers/claude/skill-md.js +2 -2
- package/dist/utils/prompts.js +1 -1
- package/package.json +1 -1
- package/templates/presets/feature-team.yaml +5 -11
- package/templates/presets/red-blue.yaml +127 -0
package/README.md
CHANGED
|
@@ -117,6 +117,7 @@ teamcast clean --yes # skip clean confirmation
|
|
|
117
117
|
| `solo-dev` | developer | single full-stack agent handles end-to-end: plan, implement, test, verify |
|
|
118
118
|
| `research-and-build` | orchestrator, researcher, planner, developer | research-first: orchestrator routes to researcher for external info, planner integrates findings, developer implements |
|
|
119
119
|
| `secure-dev` | orchestrator, planner, developer, security-auditor, reviewer | mandatory security pipeline: planner includes threat model, developer follows OWASP, security-auditor gates every change, reviewer checks quality |
|
|
120
|
+
| `red-blue` | orchestrator, red-agent, blue-agent, judge | adversarial hardening: red attacks with failing tests, blue fixes without weakening them, judge decides SHIP or next round |
|
|
120
121
|
|
|
121
122
|
The built-in preset files live in `templates/presets/` and are valid TeamCast YAML. Use them as a reference when creating custom presets, or copy one as a starting point:
|
|
122
123
|
|
|
@@ -46,12 +46,18 @@ const INSTRUCTION_FRAGMENTS = {
|
|
|
46
46
|
].join('\n')),
|
|
47
47
|
'solo-dev-style': block('style', 'Follow existing code style. Keep changes minimal and focused.'),
|
|
48
48
|
'feature-orchestrator-workflow': block('workflow', [
|
|
49
|
-
'
|
|
50
|
-
'-
|
|
51
|
-
'-
|
|
52
|
-
'-
|
|
49
|
+
'Classify every incoming task before acting:',
|
|
50
|
+
'- META (git operations, read file, explain code, answer a question) -> handle directly',
|
|
51
|
+
'- MICRO (typo, rename, 1-2 line fix) -> handle directly',
|
|
52
|
+
'- SMALL (bug fix, isolated change, single module, <50 lines) -> delegate to developer only',
|
|
53
|
+
'- MEDIUM (new feature, refactor touching multiple files) -> planner -> developer -> reviewer',
|
|
54
|
+
'- LARGE (complex feature, cross-cutting concern, new subsystem) -> planner -> developer -> reviewer with detailed handoff context',
|
|
55
|
+
'- CRITICAL (security-sensitive change, breaking API, data migration, auth/permissions) -> Do NOT handle autonomously. Summarize scope and risks, then return control to the user for supervised coordination.',
|
|
56
|
+
].join('\n')),
|
|
57
|
+
'feature-orchestrator-output': block('delegation', [
|
|
58
|
+
'When handling directly: be concise, do not explain your triage decision.',
|
|
59
|
+
'When delegating: state the goal, relevant files, and expected output format.',
|
|
53
60
|
].join('\n')),
|
|
54
|
-
'feature-orchestrator-output': block('delegation', 'Never write code or modify files yourself. Your output is always a delegation or a final summary.'),
|
|
55
61
|
'feature-planner-workflow': block('workflow', 'Always read the relevant files before making conclusions. Search for existing patterns and utilities that can be reused.'),
|
|
56
62
|
'feature-planner-read-only': block('safety', 'Your output is always a plan - never code changes.'),
|
|
57
63
|
'feature-reviewer-checklist': block('workflow', [
|
|
@@ -1,4 +1,52 @@
|
|
|
1
1
|
import { mapPoliciesToClaudePermissions } from './policy-mapper.js';
|
|
2
|
+
function findDirectSpecialist(handoffs, agents) {
|
|
3
|
+
const writer = handoffs.find((id) => {
|
|
4
|
+
const agent = agents[id];
|
|
5
|
+
return agent?.runtime.tools?.some((t) => ['Write', 'Edit', 'MultiEdit'].includes(t));
|
|
6
|
+
});
|
|
7
|
+
return writer ?? handoffs[handoffs.length - 1];
|
|
8
|
+
}
|
|
9
|
+
function renderWorkflowSection(team, entryPoints) {
|
|
10
|
+
const lines = [];
|
|
11
|
+
// Use first entry point (typically the only orchestrator)
|
|
12
|
+
const [orchestratorId, orchestrator] = entryPoints[0];
|
|
13
|
+
const handoffs = orchestrator.metadata?.handoffs ?? [];
|
|
14
|
+
const specialist = findDirectSpecialist(handoffs, team.agents);
|
|
15
|
+
lines.push('## Workflow');
|
|
16
|
+
lines.push('');
|
|
17
|
+
lines.push('Classify every task by complexity before choosing a mode:');
|
|
18
|
+
lines.push('');
|
|
19
|
+
lines.push('| Level | Examples | Mode |');
|
|
20
|
+
lines.push('|-------|----------|------|');
|
|
21
|
+
lines.push('| META | explain code, git operations, answer question | Handle directly |');
|
|
22
|
+
lines.push('| MICRO | typo, rename, 1-2 line fix | Handle directly |');
|
|
23
|
+
lines.push(`| SMALL | bug fix, single module, <50 lines | Delegate to **${specialist}** |`);
|
|
24
|
+
lines.push(`| MEDIUM | new feature, refactor, 2-5 files | Delegate to **${orchestratorId}** |`);
|
|
25
|
+
lines.push('| LARGE | new subsystem, cross-cutting concern, 5+ files | Supervised coordination |');
|
|
26
|
+
lines.push('| CRITICAL | security change, breaking API, data migration | Supervised + user confirmation at each step |');
|
|
27
|
+
lines.push('');
|
|
28
|
+
// If there are additional entry points, show their chains too
|
|
29
|
+
if (entryPoints.length > 1) {
|
|
30
|
+
for (const [agentId, agent] of entryPoints) {
|
|
31
|
+
const agentChain = agent.metadata?.handoffs ? [agentId, ...agent.metadata.handoffs].join(' -> ') : agentId;
|
|
32
|
+
lines.push(`Pipeline **${agentId}**: \`${agentChain}\``);
|
|
33
|
+
}
|
|
34
|
+
lines.push('');
|
|
35
|
+
}
|
|
36
|
+
lines.push('### Supervised mode (LARGE / CRITICAL)');
|
|
37
|
+
lines.push('');
|
|
38
|
+
lines.push(`Do NOT delegate to **${orchestratorId}**. Personally coordinate the chain:`);
|
|
39
|
+
handoffs.forEach((agentId, index) => {
|
|
40
|
+
if (index < handoffs.length - 1) {
|
|
41
|
+
lines.push(`${index + 1}. Delegate to **${agentId}** — present result to user`);
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
lines.push(`${index + 1}. Delegate to **${agentId}** — present result, decide next step`);
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
lines.push('');
|
|
48
|
+
return lines;
|
|
49
|
+
}
|
|
2
50
|
export function renderClaudeMd(team) {
|
|
3
51
|
const lines = [];
|
|
4
52
|
lines.push(`# ${team.project.name}`);
|
|
@@ -21,15 +69,7 @@ export function renderClaudeMd(team) {
|
|
|
21
69
|
return agent.runtime.tools?.includes('Agent') && (agent.metadata?.handoffs?.length ?? 0) > 0;
|
|
22
70
|
});
|
|
23
71
|
if (entryPoints.length > 0) {
|
|
24
|
-
lines.push(
|
|
25
|
-
lines.push('');
|
|
26
|
-
for (const [agentId, agent] of entryPoints) {
|
|
27
|
-
const chain = agent.metadata?.handoffs ? [agentId, ...agent.metadata.handoffs].join(' -> ') : agentId;
|
|
28
|
-
lines.push(`For complex tasks, start with **${agentId}**: \`${chain}\``);
|
|
29
|
-
}
|
|
30
|
-
lines.push('');
|
|
31
|
-
lines.push('For simple single-file changes, work directly without delegation.');
|
|
32
|
-
lines.push('');
|
|
72
|
+
lines.push(...renderWorkflowSection(team, entryPoints));
|
|
33
73
|
}
|
|
34
74
|
if (team.policies) {
|
|
35
75
|
lines.push('## Security Boundaries');
|
|
@@ -2,7 +2,7 @@ import { defaultRegistry } from '../../registry/index.js';
|
|
|
2
2
|
// --- Frontmatter ---
|
|
3
3
|
function buildFrontmatter(skill) {
|
|
4
4
|
const lines = ['---'];
|
|
5
|
-
lines.push(`name: ${skill.
|
|
5
|
+
lines.push(`name: ${skill.id}`);
|
|
6
6
|
lines.push(`description: ${skill.description}`);
|
|
7
7
|
if (skill.allowed_tools?.length) {
|
|
8
8
|
lines.push(`allowed-tools:`);
|
|
@@ -20,7 +20,7 @@ function generateSkillStub(skillName) {
|
|
|
20
20
|
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
|
21
21
|
.join(' ');
|
|
22
22
|
return `---
|
|
23
|
-
name: ${
|
|
23
|
+
name: ${skillName}
|
|
24
24
|
description: <!-- describe when this skill triggers -->
|
|
25
25
|
---
|
|
26
26
|
|
package/dist/utils/prompts.js
CHANGED
package/package.json
CHANGED
|
@@ -27,15 +27,14 @@ claude:
|
|
|
27
27
|
- planner
|
|
28
28
|
- developer
|
|
29
29
|
- reviewer
|
|
30
|
-
description:
|
|
31
|
-
|
|
32
|
-
directly.
|
|
30
|
+
description: Tech lead. Handles simple tasks directly, delegates complex
|
|
31
|
+
work to the right specialist. Triages by task size before acting.
|
|
33
32
|
model: opus
|
|
34
33
|
capability_traits:
|
|
35
34
|
- base-read
|
|
35
|
+
- file-authoring
|
|
36
|
+
- command-execution
|
|
36
37
|
- delegation
|
|
37
|
-
- no-file-edits
|
|
38
|
-
- no-commands
|
|
39
38
|
- no-web
|
|
40
39
|
skills:
|
|
41
40
|
- triage
|
|
@@ -44,12 +43,7 @@ claude:
|
|
|
44
43
|
instruction_blocks:
|
|
45
44
|
- kind: behavior
|
|
46
45
|
content: |
|
|
47
|
-
You are the
|
|
48
|
-
- Bug report or regression -> delegate to planner for root-cause analysis, then developer to fix
|
|
49
|
-
- New feature or enhancement -> delegate to planner for design, then developer to implement, then reviewer to sign off
|
|
50
|
-
- Refactor or cleanup -> delegate directly to developer, then reviewer
|
|
51
|
-
Never write or edit code yourself. Your output is always a delegation message or a final summary.
|
|
52
|
-
When delegating, state the goal, the relevant files, and the expected output format.
|
|
46
|
+
You are the tech lead. Handle simple tasks directly. Delegate complex work to the right specialist.
|
|
53
47
|
instruction_fragments:
|
|
54
48
|
- feature-orchestrator-workflow
|
|
55
49
|
- feature-orchestrator-output
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
version: "2"
|
|
2
|
+
preset_meta:
|
|
3
|
+
tags:
|
|
4
|
+
- security
|
|
5
|
+
- testing
|
|
6
|
+
- adversarial
|
|
7
|
+
min_version: "2"
|
|
8
|
+
project:
|
|
9
|
+
name: placeholder
|
|
10
|
+
preset: red-blue
|
|
11
|
+
description: Adversarial hardening team. Red agent attacks with failing tests, blue agent defends with fixes, judge decides when to ship.
|
|
12
|
+
claude:
|
|
13
|
+
policies:
|
|
14
|
+
fragments:
|
|
15
|
+
- allow-git-read
|
|
16
|
+
- allow-git-write
|
|
17
|
+
- ask-git-push
|
|
18
|
+
- deny-destructive-shell
|
|
19
|
+
- deny-network-downloads
|
|
20
|
+
- deny-dynamic-exec
|
|
21
|
+
- deny-env-files
|
|
22
|
+
- sandbox-default
|
|
23
|
+
agents:
|
|
24
|
+
orchestrator:
|
|
25
|
+
forge:
|
|
26
|
+
handoffs:
|
|
27
|
+
- red-agent
|
|
28
|
+
- blue-agent
|
|
29
|
+
- judge
|
|
30
|
+
description: Manages adversarial rounds. Routes between red, blue, and judge.
|
|
31
|
+
Ships when judge approves or max rounds reached.
|
|
32
|
+
model: opus
|
|
33
|
+
capability_traits:
|
|
34
|
+
- base-read
|
|
35
|
+
- delegation
|
|
36
|
+
- no-file-edits
|
|
37
|
+
- no-commands
|
|
38
|
+
- no-web
|
|
39
|
+
max_turns: 30
|
|
40
|
+
instruction_blocks:
|
|
41
|
+
- kind: behavior
|
|
42
|
+
content: |
|
|
43
|
+
You are the round manager. Run adversarial rounds between red-agent and blue-agent.
|
|
44
|
+
Track the current round number (start at 1, maximum 3).
|
|
45
|
+
Round flow: red-agent -> blue-agent -> judge.
|
|
46
|
+
If judge returns SHIP or round >= 3: deliver final report and stop.
|
|
47
|
+
If judge returns ROUND N+1: increment round, pass the judge hint to red-agent.
|
|
48
|
+
Never write or modify code yourself.
|
|
49
|
+
- kind: delegation
|
|
50
|
+
content: |
|
|
51
|
+
When delegating to red-agent: include the target scope and any judge hint from the previous round.
|
|
52
|
+
When delegating to blue-agent: include red's attack report and the list of failing test files.
|
|
53
|
+
When delegating to judge: include both red's attack report and blue's fix report.
|
|
54
|
+
red-agent:
|
|
55
|
+
description: Attacker. Finds weaknesses and writes failing tests. Never
|
|
56
|
+
modifies production code.
|
|
57
|
+
model: sonnet
|
|
58
|
+
capability_traits:
|
|
59
|
+
- base-read
|
|
60
|
+
- file-authoring
|
|
61
|
+
- command-execution
|
|
62
|
+
- no-web
|
|
63
|
+
skills:
|
|
64
|
+
- security-check
|
|
65
|
+
- test-first
|
|
66
|
+
instruction_blocks:
|
|
67
|
+
- kind: behavior
|
|
68
|
+
content: |
|
|
69
|
+
You are the attacker. Your goal is to break the code through tests.
|
|
70
|
+
Read the target code carefully. Find: edge cases, invalid inputs, null paths,
|
|
71
|
+
boundary conditions, type coercion issues, missing error handling, race conditions.
|
|
72
|
+
Write tests that expose these weaknesses. Run them — confirm they FAIL before reporting.
|
|
73
|
+
If the judge gave you a hint for this round, focus your attack on that angle.
|
|
74
|
+
- kind: safety
|
|
75
|
+
content: |
|
|
76
|
+
Write only test files. Never edit, create, or delete production source files.
|
|
77
|
+
Each test must have a clear name describing what weakness it exposes.
|
|
78
|
+
Only include tests that actually fail in your report.
|
|
79
|
+
blue-agent:
|
|
80
|
+
description: Defender. Makes red's failing tests pass without deleting or
|
|
81
|
+
weakening them.
|
|
82
|
+
model: sonnet
|
|
83
|
+
capability_traits:
|
|
84
|
+
- base-read
|
|
85
|
+
- file-authoring
|
|
86
|
+
- command-execution
|
|
87
|
+
- no-web
|
|
88
|
+
skills:
|
|
89
|
+
- clean-code
|
|
90
|
+
- secure-coding
|
|
91
|
+
instruction_blocks:
|
|
92
|
+
- kind: behavior
|
|
93
|
+
content: |
|
|
94
|
+
You are the defender. Make every failing test from red-agent pass.
|
|
95
|
+
Fix the root cause — do not delete, skip, or weaken any test.
|
|
96
|
+
Run the full test suite after your fixes. All tests must be green before reporting.
|
|
97
|
+
Keep changes minimal and focused. Do not refactor beyond what is needed to pass the tests.
|
|
98
|
+
- kind: safety
|
|
99
|
+
content: |
|
|
100
|
+
Never delete, skip (.skip), or modify red's test files.
|
|
101
|
+
If a test appears wrong, flag it in your report — do not remove it.
|
|
102
|
+
judge:
|
|
103
|
+
description: Evaluates blue's fixes. Returns SHIP if solid, or ROUND N+1
|
|
104
|
+
with a new attack hint for red.
|
|
105
|
+
model: sonnet
|
|
106
|
+
capability_traits:
|
|
107
|
+
- base-read
|
|
108
|
+
- command-execution
|
|
109
|
+
- no-file-edits
|
|
110
|
+
- no-web
|
|
111
|
+
skills:
|
|
112
|
+
- code-review
|
|
113
|
+
- security-check
|
|
114
|
+
instruction_blocks:
|
|
115
|
+
- kind: behavior
|
|
116
|
+
content: |
|
|
117
|
+
You are the judge. Read red's attack report and blue's fix report.
|
|
118
|
+
Evaluate: did blue fix the root cause, or just suppress the symptom?
|
|
119
|
+
Look for new attack surfaces introduced by blue's changes.
|
|
120
|
+
Check for: try/catch that swallows errors, conditions that only handle
|
|
121
|
+
the tested input, hardcoded values that mask the real problem.
|
|
122
|
+
- kind: style
|
|
123
|
+
content: |
|
|
124
|
+
Return exactly one of:
|
|
125
|
+
SHIP — fixes are solid, no new surfaces, ready to merge.
|
|
126
|
+
ROUND N+1: <specific hint> — what angle red should try next.
|
|
127
|
+
Be concrete in hints (e.g. "try concurrent calls to X", "pass null for Y").
|