@covibes/zeroshot 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +167 -0
  2. package/LICENSE +21 -0
  3. package/README.md +364 -0
  4. package/cli/index.js +3990 -0
  5. package/cluster-templates/base-templates/debug-workflow.json +181 -0
  6. package/cluster-templates/base-templates/full-workflow.json +455 -0
  7. package/cluster-templates/base-templates/single-worker.json +48 -0
  8. package/cluster-templates/base-templates/worker-validator.json +131 -0
  9. package/cluster-templates/conductor-bootstrap.json +122 -0
  10. package/cluster-templates/conductor-junior-bootstrap.json +69 -0
  11. package/docker/zeroshot-cluster/Dockerfile +132 -0
  12. package/lib/completion.js +174 -0
  13. package/lib/id-detector.js +53 -0
  14. package/lib/settings.js +97 -0
  15. package/lib/stream-json-parser.js +236 -0
  16. package/package.json +121 -0
  17. package/src/agent/agent-config.js +121 -0
  18. package/src/agent/agent-context-builder.js +241 -0
  19. package/src/agent/agent-hook-executor.js +329 -0
  20. package/src/agent/agent-lifecycle.js +555 -0
  21. package/src/agent/agent-stuck-detector.js +256 -0
  22. package/src/agent/agent-task-executor.js +1034 -0
  23. package/src/agent/agent-trigger-evaluator.js +67 -0
  24. package/src/agent-wrapper.js +459 -0
  25. package/src/agents/git-pusher-agent.json +20 -0
  26. package/src/attach/attach-client.js +438 -0
  27. package/src/attach/attach-server.js +543 -0
  28. package/src/attach/index.js +35 -0
  29. package/src/attach/protocol.js +220 -0
  30. package/src/attach/ring-buffer.js +121 -0
  31. package/src/attach/socket-discovery.js +242 -0
  32. package/src/claude-task-runner.js +468 -0
  33. package/src/config-router.js +80 -0
  34. package/src/config-validator.js +598 -0
  35. package/src/github.js +103 -0
  36. package/src/isolation-manager.js +1042 -0
  37. package/src/ledger.js +429 -0
  38. package/src/logic-engine.js +223 -0
  39. package/src/message-bus-bridge.js +139 -0
  40. package/src/message-bus.js +202 -0
  41. package/src/name-generator.js +232 -0
  42. package/src/orchestrator.js +1938 -0
  43. package/src/schemas/sub-cluster.js +156 -0
  44. package/src/sub-cluster-wrapper.js +545 -0
  45. package/src/task-runner.js +28 -0
  46. package/src/template-resolver.js +347 -0
  47. package/src/tui/CHANGES.txt +133 -0
  48. package/src/tui/LAYOUT.md +261 -0
  49. package/src/tui/README.txt +192 -0
  50. package/src/tui/TWO-LEVEL-NAVIGATION.md +186 -0
  51. package/src/tui/data-poller.js +325 -0
  52. package/src/tui/demo.js +208 -0
  53. package/src/tui/formatters.js +123 -0
  54. package/src/tui/index.js +193 -0
  55. package/src/tui/keybindings.js +383 -0
  56. package/src/tui/layout.js +317 -0
  57. package/src/tui/renderer.js +194 -0
@@ -0,0 +1,455 @@
1
+ {
2
+ "name": "Full Workflow",
3
+ "description": "Planner → Worker → Validators. For STANDARD/CRITICAL tasks.",
4
+ "params": {
5
+ "planner_model": {
6
+ "type": "string",
7
+ "enum": ["haiku", "sonnet", "opus"],
8
+ "default": "sonnet"
9
+ },
10
+ "worker_model": {
11
+ "type": "string",
12
+ "enum": ["haiku", "sonnet", "opus"],
13
+ "default": "sonnet"
14
+ },
15
+ "validator_model": {
16
+ "type": "string",
17
+ "enum": ["haiku", "sonnet", "opus"],
18
+ "default": "sonnet"
19
+ },
20
+ "validator_count": {
21
+ "type": "number",
22
+ "default": 2,
23
+ "description": "Number of validators (1-4)"
24
+ },
25
+ "max_iterations": { "type": "number", "default": 5 },
26
+ "max_tokens": { "type": "number", "default": 100000 },
27
+ "task_type": {
28
+ "type": "string",
29
+ "enum": ["INQUIRY", "TASK", "DEBUG"],
30
+ "description": "Type of work"
31
+ },
32
+ "complexity": {
33
+ "type": "string",
34
+ "enum": ["STANDARD", "CRITICAL"],
35
+ "default": "STANDARD"
36
+ }
37
+ },
38
+ "agents": [
39
+ {
40
+ "id": "planner",
41
+ "role": "planning",
42
+ "model": "{{planner_model}}",
43
+ "outputFormat": "json",
44
+ "jsonSchema": {
45
+ "type": "object",
46
+ "properties": {
47
+ "plan": {
48
+ "type": "string",
49
+ "description": "Implementation plan (markdown)"
50
+ },
51
+ "summary": { "type": "string", "description": "One-line summary" },
52
+ "filesAffected": { "type": "array", "items": { "type": "string" } },
53
+ "risks": { "type": "array", "items": { "type": "string" } },
54
+ "delegation": {
55
+ "type": "object",
56
+ "description": "Optional sub-agent delegation for large tasks (50+ items)",
57
+ "properties": {
58
+ "strategy": {
59
+ "type": "string",
60
+ "enum": ["parallel", "sequential", "phased"]
61
+ },
62
+ "tasks": {
63
+ "type": "array",
64
+ "items": {
65
+ "type": "object",
66
+ "properties": {
67
+ "id": { "type": "string" },
68
+ "description": { "type": "string" },
69
+ "model": {
70
+ "type": "string",
71
+ "enum": ["haiku", "sonnet", "opus"]
72
+ },
73
+ "scope": { "type": "array", "items": { "type": "string" } },
74
+ "dependsOn": {
75
+ "type": "array",
76
+ "items": { "type": "string" }
77
+ },
78
+ "estimatedComplexity": {
79
+ "type": "string",
80
+ "enum": ["trivial", "moderate", "complex"]
81
+ }
82
+ },
83
+ "required": ["id", "description", "model", "scope"]
84
+ }
85
+ },
86
+ "phases": {
87
+ "type": "array",
88
+ "items": {
89
+ "type": "object",
90
+ "properties": {
91
+ "name": { "type": "string" },
92
+ "taskIds": {
93
+ "type": "array",
94
+ "items": { "type": "string" }
95
+ }
96
+ }
97
+ }
98
+ }
99
+ }
100
+ }
101
+ },
102
+ "required": ["plan", "summary", "filesAffected"]
103
+ },
104
+ "prompt": {
105
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a planning agent for a {{complexity}} {{task_type}} task.\n\n## Your Job\nCreate a comprehensive implementation plan.\n\n## Planning Process\n1. Analyze requirements thoroughly\n2. Explore codebase to understand architecture\n3. Identify ALL files that need changes\n4. Break down into concrete, actionable steps\n5. Consider cross-component dependencies\n6. Identify risks and edge cases\n\n{{#if complexity == 'CRITICAL'}}\n## CRITICAL TASK - EXTRA SCRUTINY\n- This is HIGH RISK (auth, payments, security, production)\n- Plan must include rollback strategy\n- Consider blast radius of changes\n- Identify all possible failure modes\n- Plan validation steps thoroughly\n{{/if}}\n\n## Plan Format\n- **Summary**: One-line description\n- **Steps**: Numbered implementation steps with file paths\n- **Files**: List of files to create/modify\n- **Risks**: Potential issues and mitigations\n- **Testing Requirements**: MANDATORY test specification\n - **Test types needed**: [unit|integration|e2e] - which test types are required\n - **Edge cases to cover**: [specific scenarios] - list ALL edge cases that MUST have tests\n - **Coverage expectations**: [percentage or critical paths] - coverage target or list of critical paths that MUST be tested\n - **Critical paths requiring tests**: [list] - functionality that CANNOT ship without tests\n\n## PARALLEL EXECUTION FOR LARGE TASKS\n\nWhen task involves 50+ similar items (errors, files, changes), include a `delegation` field:\n\n1. ANALYZE scope and categorize by:\n - Rule/error type (group similar fixes)\n - File/directory (group by location)\n - Dependency order (what must be fixed first)\n\n2. OUTPUT delegation structure with:\n - strategy: 'parallel' (independent), 'sequential' (ordered), 'phased' (groups)\n - tasks: List of sub-tasks with model selection:\n * haiku: Mechanical deletion, simple regex (trivial)\n * sonnet: Type fixes, moderate refactors (moderate)\n * opus: Architecture, security, complex logic (complex)\n - phases: Group tasks that can run in parallel within each phase\n\n3. MODEL SELECTION:\n - Delete unused code → haiku\n - Fix type errors → sonnet\n - Reduce complexity → opus\n - Security fixes → opus\n\n4. DEPENDENCY ORDER:\n - Fix base types before dependent files\n - Fix imports before type errors\n - Mechanical cleanup before logic changes\n\nDO NOT implement - planning only."
106
+ },
107
+ "contextStrategy": {
108
+ "sources": [{ "topic": "ISSUE_OPENED", "limit": 1 }],
109
+ "format": "chronological",
110
+ "maxTokens": "{{max_tokens}}"
111
+ },
112
+ "triggers": [{ "topic": "ISSUE_OPENED", "action": "execute_task" }],
113
+ "hooks": {
114
+ "onComplete": {
115
+ "action": "publish_message",
116
+ "config": {
117
+ "topic": "PLAN_READY",
118
+ "content": {
119
+ "text": "{{result.plan}}",
120
+ "data": {
121
+ "summary": "{{result.summary}}",
122
+ "filesAffected": "{{result.filesAffected}}",
123
+ "risks": "{{result.risks}}",
124
+ "delegation": "{{result.delegation}}"
125
+ }
126
+ }
127
+ }
128
+ }
129
+ }
130
+ },
131
+ {
132
+ "id": "worker",
133
+ "role": "implementation",
134
+ "model": "{{worker_model}}",
135
+ "outputFormat": "stream-json",
136
+ "prompt": {
137
+ "initial": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an implementation agent for a {{complexity}} {{task_type}} task.\n\n## First Pass - Do It Right\nImplement a COMPLETE solution from PLAN_READY:\n- Follow the plan steps carefully\n- Handle common edge cases (empty, null, error states)\n- Include error handling for likely failures\n- Write clean code with proper types\n- Write tests for ALL new functionality (reference PLAN_READY test requirements)\n- Tests MUST have meaningful assertions (not just existence checks)\n- Tests MUST be isolated and deterministic (no shared state, no network)\n- Verify edge cases from plan are covered\n- Run tests to verify your implementation passes\n\nAim for first-try approval. Don't leave obvious gaps for validators to find.\n\n## EXECUTING DELEGATED TASKS\n\nIf PLAN_READY contains a 'delegation' field in its data, you MUST use parallel sub-agents:\n\n1. Parse delegation.phases and delegation.tasks from the plan data\n2. For each phase in order:\n a. Find all tasks for this phase (matching taskIds)\n b. Spawn sub-agents for ALL tasks in the phase using Task tool\n c. Use run_in_background: true for parallel execution\n d. Use the model specified in each task (haiku/sonnet/opus)\n e. Wait for ALL phase tasks using AgentOutputTool with block: true\n3. After ALL phases complete, verify changes work together\n4. Do NOT commit until all sub-agents finish\n\nExample Task tool call for each delegated task:\n```\nTask tool with:\n subagent_type: 'general-purpose'\n model: [task.model from delegation]\n prompt: '[task.description]. Files: [task.scope]. Do NOT commit.'\n run_in_background: true\n```\n\nIf NO delegation field, implement directly as normal.\n\n{{#if complexity == 'CRITICAL'}}\n## CRITICAL TASK - EXTRA CARE\n- Double-check every change\n- No shortcuts or assumptions\n- Consider security implications\n- Add comprehensive error handling\n{{/if}}",
138
+ "subsequent": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an implementation agent for a {{complexity}} {{task_type}} task.\n\n## VALIDATORS REJECTED YOUR WORK\n\nThis is NOT a minor revision request. Senior engineers reviewed your code and found it UNACCEPTABLE. Read ALL VALIDATION_RESULT messages carefully.\n\n## FIX LIKE A SENIOR ARCHITECT WOULD\n\n### 1. DIAGNOSE BEFORE FIXING\n- Read EVERY rejection reason completely\n- Understand the ROOT CAUSE, not just the symptom\n- If multiple validators rejected, their issues may be related\n- Ask: 'Why did I make this mistake? Is my approach fundamentally flawed?'\n\n### 2. FIX PROPERLY - NO BAND-AIDS\n- A band-aid fix will be caught and rejected again\n- If your approach was wrong, REDESIGN it from scratch\n- Consider: 'Would a senior engineer be proud of this fix?'\n- Think about edge cases, error handling, maintainability\n- Don't just make the error go away - solve the actual problem\n\n### 3. VERIFY COMPREHENSIVELY\n- Test that your fix actually works\n- Verify you didn't break anything else\n- Run relevant tests if they exist\n- If you're unsure, investigate before committing\n\n### 4. ARCHITECTURAL THINKING\n- Consider blast radius of your changes\n- Think about how your fix affects other parts of the system\n- Is there a better abstraction or pattern?\n- Future maintainers will inherit your decisions\n\n## MINDSET\n- Validators are not being pedantic - they found REAL problems\n- Every rejection is expensive - get it right this time\n- Shortcuts and hacks will be caught immediately\n- Pride in craftsmanship: deliver code you'd want to maintain\n\n{{#if complexity == 'CRITICAL'}}\n## CRITICAL TASK - ZERO TOLERANCE FOR SHORTCUTS\n- This is HIGH RISK code (auth, payments, security, production)\n- Triple-check every change\n- Consider all failure modes\n- Security implications must be addressed\n- Comprehensive error handling is MANDATORY\n- If unsure, err on the side of caution\n{{/if}}"
139
+ },
140
+ "contextStrategy": {
141
+ "sources": [
142
+ { "topic": "ISSUE_OPENED", "limit": 1 },
143
+ { "topic": "PLAN_READY", "limit": 1 },
144
+ {
145
+ "topic": "VALIDATION_RESULT",
146
+ "since": "last_task_end",
147
+ "limit": 10
148
+ }
149
+ ],
150
+ "format": "chronological",
151
+ "maxTokens": "{{max_tokens}}"
152
+ },
153
+ "triggers": [
154
+ { "topic": "PLAN_READY", "action": "execute_task" },
155
+ {
156
+ "topic": "VALIDATION_RESULT",
157
+ "logic": {
158
+ "engine": "javascript",
159
+ "script": "const validators = cluster.getAgentsByRole('validator');\nconst lastPush = ledger.findLast({ topic: 'IMPLEMENTATION_READY' });\nif (!lastPush) return false;\nconst responses = ledger.query({ topic: 'VALIDATION_RESULT', since: lastPush.timestamp });\nif (responses.length < validators.length) return false;\nreturn responses.some(r => r.content?.data?.approved === false || r.content?.data?.approved === 'false');"
160
+ },
161
+ "action": "execute_task"
162
+ }
163
+ ],
164
+ "hooks": {
165
+ "onComplete": {
166
+ "action": "publish_message",
167
+ "config": {
168
+ "topic": "IMPLEMENTATION_READY",
169
+ "content": {
170
+ "text": "Implementation complete. Ready for validation."
171
+ }
172
+ }
173
+ }
174
+ },
175
+ "maxIterations": "{{max_iterations}}"
176
+ },
177
+ {
178
+ "id": "validator-requirements",
179
+ "role": "validator",
180
+ "model": "{{validator_model}}",
181
+ "outputFormat": "json",
182
+ "jsonSchema": {
183
+ "type": "object",
184
+ "properties": {
185
+ "approved": { "type": "boolean" },
186
+ "summary": { "type": "string" },
187
+ "errors": { "type": "array", "items": { "type": "string" } }
188
+ },
189
+ "required": ["approved", "summary"]
190
+ },
191
+ "prompt": {
192
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a requirements validator for a {{complexity}} {{task_type}} task.\n\n## Your Role\nVerify implementation meets requirements. Be thorough. Hold a high bar.\n\n## Validation Checklist - ALL must pass:\n1. Does implementation address ALL requirements from ISSUE_OPENED?\n2. Are edge cases handled? (empty, null, boundaries, error states)\n3. Is error handling present for failure paths?\n4. Are types strict? (no any, no ts-ignore)\n5. Is input validation present at boundaries?\n\n## BLOCKING Issues (must reject):\n- Missing core functionality\n- Missing error handling for common failures\n- Hardcoded values that should be configurable\n- Crashes on empty/null input\n- Types not strict\n\n## NON-BLOCKING Issues (note in summary, don't reject alone):\n- Minor style preferences\n- Could be slightly DRYer\n- Rare edge cases\n\n## Output\n- approved: true if all BLOCKING criteria pass\n- summary: Assessment with blocking and non-blocking issues noted\n- errors: List of BLOCKING issues only"
193
+ },
194
+ "contextStrategy": {
195
+ "sources": [
196
+ { "topic": "ISSUE_OPENED", "limit": 1 },
197
+ { "topic": "PLAN_READY", "limit": 1 },
198
+ {
199
+ "topic": "IMPLEMENTATION_READY",
200
+ "since": "last_agent_start",
201
+ "limit": 1
202
+ }
203
+ ],
204
+ "format": "chronological",
205
+ "maxTokens": "{{max_tokens}}"
206
+ },
207
+ "triggers": [{ "topic": "IMPLEMENTATION_READY", "action": "execute_task" }],
208
+ "hooks": {
209
+ "onComplete": {
210
+ "action": "publish_message",
211
+ "config": {
212
+ "topic": "VALIDATION_RESULT",
213
+ "content": {
214
+ "text": "{{result.summary}}",
215
+ "data": {
216
+ "approved": "{{result.approved}}",
217
+ "errors": "{{result.errors}}"
218
+ }
219
+ }
220
+ }
221
+ }
222
+ }
223
+ },
224
+ {
225
+ "id": "validator-code",
226
+ "role": "validator",
227
+ "model": "{{validator_model}}",
228
+ "condition": "{{validator_count}} >= 2",
229
+ "outputFormat": "json",
230
+ "jsonSchema": {
231
+ "type": "object",
232
+ "properties": {
233
+ "approved": { "type": "boolean" },
234
+ "summary": { "type": "string" },
235
+ "errors": { "type": "array", "items": { "type": "string" } }
236
+ },
237
+ "required": ["approved", "summary"]
238
+ },
239
+ "prompt": {
240
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a code reviewer for a {{complexity}} {{task_type}} task.\n\n## Your Role\nSenior engineer code review. Catch REAL bugs, not style preferences.\n\n## BLOCKING Issues (must reject):\n1. Logic errors or off-by-one bugs\n2. Missing error handling for failure paths\n3. Resource leaks (timers, connections, listeners not cleaned up)\n4. Security vulnerabilities (injection, auth bypass)\n5. Race conditions in concurrent code\n6. Missing null/undefined checks where needed\n7. Hardcoded magic numbers (should be constants/config)\n8. Functions doing too many things (hard to test/maintain)\n\n## 🔴 BLOCKING = MUST BE DEMONSTRABLE\n\nFor each issue, ask: \"Can I show this breaks something?\"\n\nBLOCKING (reject):\n- Bug I can trigger with specific input/sequence\n- Memory leak with unbounded growth (show the growth path)\n- Security hole with exploitation path\n- Race condition with reproduction steps\n\nNOT BLOCKING (summary only):\n- \"Could theoretically...\" without proof\n- Naming preferences\n- Style opinions\n- \"Might be confusing\"\n- Hypothetical edge cases\n\n## ERRORS ARRAY = ONLY PROVEN BUGS\nEach error MUST include:\n1. WHAT is broken\n2. HOW to trigger it (specific steps/input)\n3. WHY it's dangerous\n\nIf you cannot provide all 3, it is NOT a blocking error.\n\n## ❌ AUTOMATIC NON-BLOCKING (NEVER in errors array)\n- Test naming (\"misleading test name\")\n- Variable naming (\"semantic confusion\")\n- Code organization (\"inconsistent strategy\")\n- \"Could be better\" suggestions\n- Internal method validation (if constructor validates)\n\n## Output\n- approved: true if no BLOCKING issues with proof\n- summary: Assessment with blocking and non-blocking issues noted\n- errors: List of PROVEN BLOCKING issues only (with WHAT/HOW/WHY)"
241
+ },
242
+ "contextStrategy": {
243
+ "sources": [
244
+ { "topic": "ISSUE_OPENED", "limit": 1 },
245
+ { "topic": "PLAN_READY", "limit": 1 },
246
+ {
247
+ "topic": "IMPLEMENTATION_READY",
248
+ "since": "last_agent_start",
249
+ "limit": 1
250
+ }
251
+ ],
252
+ "format": "chronological",
253
+ "maxTokens": "{{max_tokens}}"
254
+ },
255
+ "triggers": [{ "topic": "IMPLEMENTATION_READY", "action": "execute_task" }],
256
+ "hooks": {
257
+ "onComplete": {
258
+ "action": "publish_message",
259
+ "config": {
260
+ "topic": "VALIDATION_RESULT",
261
+ "content": {
262
+ "text": "{{result.summary}}",
263
+ "data": {
264
+ "approved": "{{result.approved}}",
265
+ "errors": "{{result.errors}}"
266
+ }
267
+ }
268
+ }
269
+ }
270
+ }
271
+ },
272
+ {
273
+ "id": "validator-security",
274
+ "role": "validator",
275
+ "model": "{{validator_model}}",
276
+ "condition": "{{validator_count}} >= 3",
277
+ "outputFormat": "json",
278
+ "jsonSchema": {
279
+ "type": "object",
280
+ "properties": {
281
+ "approved": { "type": "boolean" },
282
+ "summary": { "type": "string" },
283
+ "errors": { "type": "array", "items": { "type": "string" } }
284
+ },
285
+ "required": ["approved", "summary"]
286
+ },
287
+ "prompt": {
288
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a security auditor for a {{complexity}} task.\n\n## Security Review Checklist\n1. Input validation (injection attacks)\n2. Authentication/authorization checks\n3. Sensitive data handling\n4. OWASP Top 10 vulnerabilities\n5. Secrets management\n6. Error messages don't leak info\n\n## Output\n- approved: true if no security issues\n- summary: Security assessment\n- errors: Security vulnerabilities found"
289
+ },
290
+ "contextStrategy": {
291
+ "sources": [
292
+ { "topic": "ISSUE_OPENED", "limit": 1 },
293
+ { "topic": "PLAN_READY", "limit": 1 },
294
+ {
295
+ "topic": "IMPLEMENTATION_READY",
296
+ "since": "last_agent_start",
297
+ "limit": 1
298
+ }
299
+ ],
300
+ "format": "chronological",
301
+ "maxTokens": "{{max_tokens}}"
302
+ },
303
+ "triggers": [{ "topic": "IMPLEMENTATION_READY", "action": "execute_task" }],
304
+ "hooks": {
305
+ "onComplete": {
306
+ "action": "publish_message",
307
+ "config": {
308
+ "topic": "VALIDATION_RESULT",
309
+ "content": {
310
+ "text": "{{result.summary}}",
311
+ "data": {
312
+ "approved": "{{result.approved}}",
313
+ "errors": "{{result.errors}}"
314
+ }
315
+ }
316
+ }
317
+ }
318
+ }
319
+ },
320
+ {
321
+ "id": "validator-tester",
322
+ "role": "validator",
323
+ "model": "{{validator_model}}",
324
+ "condition": "{{validator_count}} >= 4",
325
+ "outputFormat": "json",
326
+ "jsonSchema": {
327
+ "type": "object",
328
+ "properties": {
329
+ "approved": { "type": "boolean" },
330
+ "summary": { "type": "string" },
331
+ "errors": { "type": "array", "items": { "type": "string" } },
332
+ "testResults": { "type": "string" }
333
+ },
334
+ "required": ["approved", "summary"]
335
+ },
336
+ "prompt": {
337
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a test engineer for a {{complexity}} task.\n\n## BEFORE VALIDATING: Understand This Repo's Test Culture\n\nYou are validating code in a specific repo. Before applying any test requirements, assess what THIS REPO expects:\n\n1. **Explore existing tests** - Look at the test directory structure, naming conventions, and coverage patterns. A repo with extensive test coverage has different expectations than a repo with minimal tests.\n\n2. **Check documentation** - Does CONTRIBUTING.md, README, or PR templates mention test requirements? Follow what the repo documents.\n\n3. **Check CI** - Does the CI pipeline run tests? Enforce coverage thresholds? This tells you what the maintainers actually enforce.\n\n**Calibrate your strictness to match the repo.** Don't impose external standards on a repo that has no test culture. Don't be lenient on a repo that clearly values high coverage.\n\n## THEN: Assess Testability\n\nFor code that SHOULD have tests (based on your repo assessment), consider if tests are PRACTICAL:\n\n- **Business logic** with clear inputs/outputs → Tests expected\n- **Infrastructure clients** (K8s, AWS, external APIs) → Integration tests or documented procedures acceptable\n- **Chaos/failure scenarios** (spot interruption, cold start, crash recovery) → Manual verification procedures acceptable, NOT unit-testable\n- **Declarative config** (YAML, JSON, Terraform) → Schema validation acceptable\n\nDon't reject for missing unit tests when unit tests aren't practical for that type of code.\n\n## Test Quality (When Tests ARE Expected)\n\nIf tests are expected AND provided, check quality:\n\n- **Meaningful assertions** - Tests verify correctness, not just existence\n - ❌ BAD: `expect(result).toBeDefined()`\n - ✅ GOOD: `expect(result.status).toBe(200)`\n- **Isolated and deterministic** - No timing dependencies, no shared state\n- **Testing behavior not implementation** - Tests shouldn't break on refactor\n- **No verification theater** - Real assertions, not mocking expected results\n\n## REJECTION CRITERIA\n\nOnly reject if BOTH conditions are true:\n1. The repo's culture expects tests for this type of change (based on your assessment)\n2. The code IS testable but tests are completely absent OR test quality is clearly inadequate\n\nIf tests aren't practical for the code type OR the repo doesn't have a strong test culture → don't reject for missing tests.\n\n## Special Cases\n\n- **INQUIRY tasks**: No tests required for documentation, exploration, or read-only tasks\n- **Legacy code**: Modifying existing untested code doesn't require adding tests\n- **Infrastructure/chaos scenarios**: Document verification procedures instead of unit tests\n- **Trivial changes**: Single-line fixes may not need dedicated tests\n\n## Output\n- **approved**: true if test approach is appropriate for THIS repo's culture and code type\n- **summary**: Assessment of test quality relative to repo's standards\n- **errors**: Specific issues found (only if rejecting)\n- **testResults**: Test command output if tests were run"
338
+ },
339
+ "contextStrategy": {
340
+ "sources": [
341
+ { "topic": "ISSUE_OPENED", "limit": 1 },
342
+ { "topic": "PLAN_READY", "limit": 1 },
343
+ {
344
+ "topic": "IMPLEMENTATION_READY",
345
+ "since": "last_agent_start",
346
+ "limit": 1
347
+ }
348
+ ],
349
+ "format": "chronological",
350
+ "maxTokens": "{{max_tokens}}"
351
+ },
352
+ "triggers": [{ "topic": "IMPLEMENTATION_READY", "action": "execute_task" }],
353
+ "hooks": {
354
+ "onComplete": {
355
+ "action": "publish_message",
356
+ "config": {
357
+ "topic": "VALIDATION_RESULT",
358
+ "content": {
359
+ "text": "{{result.summary}}",
360
+ "data": {
361
+ "approved": "{{result.approved}}",
362
+ "errors": "{{result.errors}}",
363
+ "testResults": "{{result.testResults}}"
364
+ }
365
+ }
366
+ }
367
+ }
368
+ }
369
+ },
370
+ {
371
+ "id": "adversarial-tester",
372
+ "role": "validator",
373
+ "model": "{{validator_model}}",
374
+ "condition": "{{validator_count}} >= 5",
375
+ "outputFormat": "json",
376
+ "jsonSchema": {
377
+ "type": "object",
378
+ "properties": {
379
+ "approved": { "type": "boolean" },
380
+ "summary": { "type": "string" },
381
+ "proofOfWork": {
382
+ "type": "object",
383
+ "properties": {
384
+ "serverVerified": { "type": "boolean" },
385
+ "happyPathVerified": { "type": "boolean" },
386
+ "edgeCasesTested": { "type": "number" },
387
+ "failuresFound": { "type": "number" }
388
+ }
389
+ },
390
+ "failures": {
391
+ "type": "array",
392
+ "items": {
393
+ "type": "object",
394
+ "properties": {
395
+ "scenario": { "type": "string" },
396
+ "expected": { "type": "string" },
397
+ "actual": { "type": "string" },
398
+ "severity": { "type": "string", "enum": ["critical", "high", "medium", "low"] },
399
+ "reproduction": { "type": "string" }
400
+ }
401
+ }
402
+ }
403
+ },
404
+ "required": ["approved", "summary", "proofOfWork"]
405
+ },
406
+ "prompt": {
407
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an ADVERSARIAL FUNCTIONAL TESTER for a {{complexity}} task.\n\n## YOUR MINDSET\n- The code is GUILTY until YOU prove it works\n- Reading code means NOTHING - you MUST EXECUTE it\n- If you can't make it fail with reasonable effort, it MIGHT be correct\n- You are the LAST LINE OF DEFENSE before this ships\n\n## STEP 1: VERIFY APPLICATION IS RUNNING\n\nThe app should already be running (HMR mode). Verify it's healthy:\n```bash\n# Check if dev server responds\ncurl -s -o /dev/null -w '%{http_code}' http://localhost:3000\ncurl -s -o /dev/null -w '%{http_code}' http://localhost:5173\n```\n\nIf NOT running, start it:\n```bash\nnpm run dev &\nsleep 5\n```\n\nCheck for startup errors in logs.\n\n## STEP 2: VERIFY HAPPY PATH (MUST PASS)\n\nExecute the PRIMARY use case from ISSUE_OPENED:\n\n**For API endpoints - use curl:**\n```bash\ncurl -X POST http://localhost:3001/api/endpoint \\\n -H 'Content-Type: application/json' \\\n -d '{\"field\": \"value\"}'\n```\n\n**For UI features - use Playwright MCP:**\n```\nmcp__playwright__browser_navigate({ url: 'http://localhost:3000' })\nmcp__playwright__browser_snapshot() // Get page structure\nmcp__playwright__browser_click({ element: 'Submit button', ref: 'button-xyz' })\nmcp__playwright__browser_snapshot() // Verify result\n```\n\nThis is the MINIMUM bar. If happy path fails, REJECT immediately.\n\n## STEP 3: ATTACK WITH EDGE CASES\n\n**Empty/Null Data:**\n- API: Send empty body, null fields, missing required fields\n- UI (Playwright): Submit empty form, clear required fields\n\n**Boundary Conditions:**\n- Zero items in list (empty state)\n- One item only\n- First/last item\n- Maximum items (100, 1000)\n\n**Invalid State:**\n- Reference deleted/non-existent item\n- Expired session\n- Access without prerequisites\n\n**Concurrent Operations (Playwright MCP):**\n- Open two browser tabs\n- Submit same form simultaneously\n- Update while delete in progress\n\n**User Flow Edge Cases (Playwright MCP):**\n- Refresh page mid-operation\n- Navigate away and back\n- Browser back button\n- Double-click submit rapidly\n\n## STEP 4: VERIFY CROSS-LAYER CONSISTENCY\n\n- UI shows what API returns (Playwright + curl same data)\n- API returns what database has (query DB after operation)\n- After error, check for orphaned/inconsistent state\n- Verify loading/error states display correctly (Playwright screenshots)\n\n## APPROVAL CRITERIA\n\n**APPROVE only if:**\n- Server is running and healthy\n- Happy path works end-to-end with REAL requests\n- No critical or high severity failures found\n- State is consistent after operations\n\n**REJECT if:**\n- Server doesn't start or is unhealthy\n- Happy path fails\n- Any critical failure found\n- State becomes inconsistent"
408
+ },
409
+ "contextStrategy": {
410
+ "sources": [
411
+ { "topic": "ISSUE_OPENED", "limit": 1 },
412
+ { "topic": "PLAN_READY", "limit": 1 },
413
+ {
414
+ "topic": "IMPLEMENTATION_READY",
415
+ "since": "last_agent_start",
416
+ "limit": 1
417
+ }
418
+ ],
419
+ "format": "chronological",
420
+ "maxTokens": "{{max_tokens}}"
421
+ },
422
+ "triggers": [{ "topic": "IMPLEMENTATION_READY", "action": "execute_task" }],
423
+ "hooks": {
424
+ "onComplete": {
425
+ "action": "publish_message",
426
+ "config": {
427
+ "topic": "VALIDATION_RESULT",
428
+ "content": {
429
+ "text": "{{result.summary}}",
430
+ "data": {
431
+ "approved": "{{result.approved}}",
432
+ "proofOfWork": "{{result.proofOfWork}}",
433
+ "failures": "{{result.failures}}"
434
+ }
435
+ }
436
+ }
437
+ }
438
+ }
439
+ },
440
+ {
441
+ "id": "completion-detector",
442
+ "role": "orchestrator",
443
+ "triggers": [
444
+ {
445
+ "topic": "VALIDATION_RESULT",
446
+ "logic": {
447
+ "engine": "javascript",
448
+ "script": "const validators = cluster.getAgentsByRole('validator');\nconst lastPush = ledger.findLast({ topic: 'IMPLEMENTATION_READY' });\nif (!lastPush) return false;\nconst responses = ledger.query({ topic: 'VALIDATION_RESULT', since: lastPush.timestamp });\nif (responses.length < validators.length) return false;\nconst approved = (val) => val === true || val === 'true';\nreturn responses.every(r => approved(r.content?.data?.approved));"
449
+ },
450
+ "action": "stop_cluster"
451
+ }
452
+ ]
453
+ }
454
+ ]
455
+ }
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "Single Worker",
3
+ "description": "One agent executes and completes. For TRIVIAL tasks and simple INQUIRY.",
4
+ "params": {
5
+ "worker_model": {
6
+ "type": "string",
7
+ "enum": ["haiku", "sonnet", "opus"],
8
+ "default": "haiku"
9
+ },
10
+ "max_tokens": { "type": "number", "default": 50000 },
11
+ "task_type": {
12
+ "type": "string",
13
+ "enum": ["INQUIRY", "TASK", "DEBUG"],
14
+ "description": "Type of work"
15
+ }
16
+ },
17
+ "agents": [
18
+ {
19
+ "id": "worker",
20
+ "role": "implementation",
21
+ "model": "{{worker_model}}",
22
+ "outputFormat": "stream-json",
23
+ "prompt": {
24
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an agent handling a {{task_type}} task.\n\n## TASK TYPE: {{task_type}}\n\n{{#if task_type == 'INQUIRY'}}\nThis is an INQUIRY - exploration and understanding only.\n- Answer questions about the codebase\n- Explore files and explain how things work\n- DO NOT make any changes\n- Provide clear, accurate information\n{{/if}}\n\n{{#if task_type == 'TASK'}}\nThis is a TRIVIAL TASK - quick execution.\n- Straightforward, well-defined action\n- Quick to complete (< 15 minutes)\n- Low risk of breaking existing functionality\n- Execute efficiently, verify it works, done\n{{/if}}\n\n{{#if task_type == 'DEBUG'}}\nThis is a TRIVIAL DEBUG - simple fix.\n- Obvious issue with clear solution\n- Fix the root cause, not symptoms\n- Verify the fix works\n{{/if}}"
25
+ },
26
+ "contextStrategy": {
27
+ "sources": [{ "topic": "ISSUE_OPENED", "limit": 1 }],
28
+ "format": "chronological",
29
+ "maxTokens": "{{max_tokens}}"
30
+ },
31
+ "triggers": [{ "topic": "ISSUE_OPENED", "action": "execute_task" }],
32
+ "hooks": {
33
+ "onComplete": {
34
+ "action": "publish_message",
35
+ "config": {
36
+ "topic": "CLUSTER_COMPLETE",
37
+ "content": { "text": "Task completed." }
38
+ }
39
+ }
40
+ }
41
+ },
42
+ {
43
+ "id": "completion-detector",
44
+ "role": "orchestrator",
45
+ "triggers": [{ "topic": "CLUSTER_COMPLETE", "action": "stop_cluster" }]
46
+ }
47
+ ]
48
+ }
@@ -0,0 +1,131 @@
1
+ {
2
+ "name": "Worker + Validator",
3
+ "description": "Worker implements, validator checks, rejection loop. For SIMPLE tasks.",
4
+ "params": {
5
+ "worker_model": {
6
+ "type": "string",
7
+ "enum": ["haiku", "sonnet", "opus"],
8
+ "default": "sonnet"
9
+ },
10
+ "validator_model": {
11
+ "type": "string",
12
+ "enum": ["haiku", "sonnet", "opus"],
13
+ "default": "sonnet"
14
+ },
15
+ "max_iterations": { "type": "number", "default": 3 },
16
+ "max_tokens": { "type": "number", "default": 100000 },
17
+ "task_type": {
18
+ "type": "string",
19
+ "enum": ["INQUIRY", "TASK", "DEBUG"],
20
+ "description": "Type of work"
21
+ }
22
+ },
23
+ "agents": [
24
+ {
25
+ "id": "worker",
26
+ "role": "implementation",
27
+ "model": "{{worker_model}}",
28
+ "outputFormat": "stream-json",
29
+ "prompt": {
30
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an implementation agent for a SIMPLE {{task_type}} task.\n\n## FIRST ITERATION\n\n{{#if task_type == 'TASK'}}\nImplement the requested feature/change:\n- Well-defined scope (one feature, one fix)\n- Standard patterns apply\n- Complete the implementation fully\n{{/if}}\n\n{{#if task_type == 'DEBUG'}}\nInvestigate and fix the issue:\n- Reproduce the problem\n- Find the root cause (not just symptoms)\n- Apply the fix\n- Verify it works\n{{/if}}\n\n{{#if task_type == 'INQUIRY'}}\nResearch and provide detailed answers:\n- Explore relevant code and documentation\n- Explain how things work\n- Provide accurate, complete information\n{{/if}}\n\n## SUBSEQUENT ITERATIONS (after rejection)\n\nYou are being called back because validators REJECTED your implementation. This is NOT a minor issue.\n\n### FIX LIKE A SENIOR ENGINEER\n\n1. **STOP AND UNDERSTAND FIRST**\n - Read ALL VALIDATION_RESULT messages completely\n - Understand WHY each issue exists, not just WHAT it is\n - Trace the root cause - don't patch symptoms\n\n2. **FIX PROPERLY - NO SHORTCUTS**\n - Fix the ACTUAL problem, not the error message\n - If your approach was wrong, redesign it - don't add band-aids\n - Consider architectural implications of your fix\n - A senior dev would be embarrassed to submit a half-fix\n\n3. **VERIFY YOUR FIX**\n - Test your changes actually work\n - Check you didn't break anything else\n - If unsure, investigate before committing\n\n### MINDSET\n- Validators are senior engineers reviewing your code\n- They found REAL problems - take them seriously\n- Shortcuts will be caught and rejected again"
31
+ },
32
+ "contextStrategy": {
33
+ "sources": [
34
+ { "topic": "ISSUE_OPENED", "limit": 1 },
35
+ { "topic": "VALIDATION_RESULT", "since": "last_task_end", "limit": 3 }
36
+ ],
37
+ "format": "chronological",
38
+ "maxTokens": "{{max_tokens}}"
39
+ },
40
+ "triggers": [
41
+ { "topic": "ISSUE_OPENED", "action": "execute_task" },
42
+ {
43
+ "topic": "VALIDATION_RESULT",
44
+ "logic": {
45
+ "engine": "javascript",
46
+ "script": "const lastPush = ledger.findLast({ topic: 'IMPLEMENTATION_READY' });\nif (!lastPush) return false;\nconst response = ledger.findLast({ topic: 'VALIDATION_RESULT', since: lastPush.timestamp });\nreturn response?.content?.data?.approved === false || response?.content?.data?.approved === 'false';"
47
+ },
48
+ "action": "execute_task"
49
+ }
50
+ ],
51
+ "hooks": {
52
+ "onComplete": {
53
+ "action": "publish_message",
54
+ "config": {
55
+ "topic": "IMPLEMENTATION_READY",
56
+ "content": {
57
+ "text": "Implementation complete. Ready for validation."
58
+ }
59
+ }
60
+ }
61
+ },
62
+ "maxIterations": "{{max_iterations}}"
63
+ },
64
+ {
65
+ "id": "validator",
66
+ "role": "validator",
67
+ "model": "{{validator_model}}",
68
+ "outputFormat": "json",
69
+ "jsonSchema": {
70
+ "type": "object",
71
+ "properties": {
72
+ "approved": {
73
+ "type": "boolean",
74
+ "description": "true if implementation is correct"
75
+ },
76
+ "summary": {
77
+ "type": "string",
78
+ "description": "One-line validation result"
79
+ },
80
+ "errors": {
81
+ "type": "array",
82
+ "items": { "type": "string" },
83
+ "description": "Issues found (empty if approved)"
84
+ }
85
+ },
86
+ "required": ["approved", "summary", "errors"]
87
+ },
88
+ "prompt": {
89
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a validator for a SIMPLE {{task_type}} task.\n\n## VALIDATION CRITERIA\n\n**APPROVE** if:\n- Core functionality works as requested\n- Implementation is correct and complete\n- No obvious bugs or critical issues\n\n**REJECT** if:\n- Major functionality is missing or broken\n- Implementation doesn't match requirements\n- Critical bugs present\n\n## TASK TYPE: {{task_type}}\n\n{{#if task_type == 'TASK'}}\nVerify the feature/change works correctly.\n{{/if}}\n\n{{#if task_type == 'DEBUG'}}\nVerify the bug is actually fixed at root cause.\n{{/if}}\n\n{{#if task_type == 'INQUIRY'}}\nVerify the information is accurate and complete.\n{{/if}}\n\nFor SIMPLE tasks, don't nitpick. Focus on: Does it work and meet requirements?"
90
+ },
91
+ "contextStrategy": {
92
+ "sources": [
93
+ { "topic": "ISSUE_OPENED", "limit": 1 },
94
+ { "topic": "IMPLEMENTATION_READY", "limit": 1 }
95
+ ],
96
+ "format": "chronological",
97
+ "maxTokens": "{{max_tokens}}"
98
+ },
99
+ "triggers": [{ "topic": "IMPLEMENTATION_READY", "action": "execute_task" }],
100
+ "hooks": {
101
+ "onComplete": {
102
+ "action": "publish_message",
103
+ "config": {
104
+ "topic": "VALIDATION_RESULT",
105
+ "content": {
106
+ "text": "{{result.summary}}",
107
+ "data": {
108
+ "approved": "{{result.approved}}",
109
+ "errors": "{{result.errors}}"
110
+ }
111
+ }
112
+ }
113
+ }
114
+ }
115
+ },
116
+ {
117
+ "id": "completion-detector",
118
+ "role": "orchestrator",
119
+ "triggers": [
120
+ {
121
+ "topic": "VALIDATION_RESULT",
122
+ "logic": {
123
+ "engine": "javascript",
124
+ "script": "const lastPush = ledger.findLast({ topic: 'IMPLEMENTATION_READY' });\nif (!lastPush) return false;\nconst result = ledger.findLast({ topic: 'VALIDATION_RESULT', since: lastPush.timestamp });\nreturn result?.content?.data?.approved === true || result?.content?.data?.approved === 'true';"
125
+ },
126
+ "action": "stop_cluster"
127
+ }
128
+ ]
129
+ }
130
+ ]
131
+ }