@covibes/zeroshot 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +167 -0
  2. package/LICENSE +21 -0
  3. package/README.md +364 -0
  4. package/cli/index.js +3990 -0
  5. package/cluster-templates/base-templates/debug-workflow.json +181 -0
  6. package/cluster-templates/base-templates/full-workflow.json +455 -0
  7. package/cluster-templates/base-templates/single-worker.json +48 -0
  8. package/cluster-templates/base-templates/worker-validator.json +131 -0
  9. package/cluster-templates/conductor-bootstrap.json +122 -0
  10. package/cluster-templates/conductor-junior-bootstrap.json +69 -0
  11. package/docker/zeroshot-cluster/Dockerfile +132 -0
  12. package/lib/completion.js +174 -0
  13. package/lib/id-detector.js +53 -0
  14. package/lib/settings.js +97 -0
  15. package/lib/stream-json-parser.js +236 -0
  16. package/package.json +121 -0
  17. package/src/agent/agent-config.js +121 -0
  18. package/src/agent/agent-context-builder.js +241 -0
  19. package/src/agent/agent-hook-executor.js +329 -0
  20. package/src/agent/agent-lifecycle.js +555 -0
  21. package/src/agent/agent-stuck-detector.js +256 -0
  22. package/src/agent/agent-task-executor.js +1034 -0
  23. package/src/agent/agent-trigger-evaluator.js +67 -0
  24. package/src/agent-wrapper.js +459 -0
  25. package/src/agents/git-pusher-agent.json +20 -0
  26. package/src/attach/attach-client.js +438 -0
  27. package/src/attach/attach-server.js +543 -0
  28. package/src/attach/index.js +35 -0
  29. package/src/attach/protocol.js +220 -0
  30. package/src/attach/ring-buffer.js +121 -0
  31. package/src/attach/socket-discovery.js +242 -0
  32. package/src/claude-task-runner.js +468 -0
  33. package/src/config-router.js +80 -0
  34. package/src/config-validator.js +598 -0
  35. package/src/github.js +103 -0
  36. package/src/isolation-manager.js +1042 -0
  37. package/src/ledger.js +429 -0
  38. package/src/logic-engine.js +223 -0
  39. package/src/message-bus-bridge.js +139 -0
  40. package/src/message-bus.js +202 -0
  41. package/src/name-generator.js +232 -0
  42. package/src/orchestrator.js +1938 -0
  43. package/src/schemas/sub-cluster.js +156 -0
  44. package/src/sub-cluster-wrapper.js +545 -0
  45. package/src/task-runner.js +28 -0
  46. package/src/template-resolver.js +347 -0
  47. package/src/tui/CHANGES.txt +133 -0
  48. package/src/tui/LAYOUT.md +261 -0
  49. package/src/tui/README.txt +192 -0
  50. package/src/tui/TWO-LEVEL-NAVIGATION.md +186 -0
  51. package/src/tui/data-poller.js +325 -0
  52. package/src/tui/demo.js +208 -0
  53. package/src/tui/formatters.js +123 -0
  54. package/src/tui/index.js +193 -0
  55. package/src/tui/keybindings.js +383 -0
  56. package/src/tui/layout.js +317 -0
  57. package/src/tui/renderer.js +194 -0
@@ -0,0 +1,181 @@
1
+ {
2
+ "name": "Debug Workflow",
3
+ "description": "Investigator → Fixer → Tester. For DEBUG tasks at SIMPLE+ complexity.",
4
+ "params": {
5
+ "investigator_model": {
6
+ "type": "string",
7
+ "enum": ["haiku", "sonnet", "opus"],
8
+ "default": "sonnet"
9
+ },
10
+ "fixer_model": {
11
+ "type": "string",
12
+ "enum": ["haiku", "sonnet", "opus"],
13
+ "default": "sonnet"
14
+ },
15
+ "tester_model": {
16
+ "type": "string",
17
+ "enum": ["haiku", "sonnet", "opus"],
18
+ "default": "sonnet"
19
+ },
20
+ "max_iterations": { "type": "number", "default": 10 },
21
+ "max_tokens": { "type": "number", "default": 100000 }
22
+ },
23
+ "agents": [
24
+ {
25
+ "id": "investigator",
26
+ "role": "planning",
27
+ "model": "{{investigator_model}}",
28
+ "outputFormat": "json",
29
+ "jsonSchema": {
30
+ "type": "object",
31
+ "properties": {
32
+ "successCriteria": {
33
+ "type": "string",
34
+ "description": "Measurable criteria that means user's request is FULLY satisfied"
35
+ },
36
+ "failureInventory": {
37
+ "type": "array",
38
+ "items": { "type": "string" },
39
+ "description": "Complete list of all failures/errors found"
40
+ },
41
+ "rootCauses": {
42
+ "type": "array",
43
+ "items": { "type": "string" },
44
+ "description": "All independent root causes identified"
45
+ },
46
+ "evidence": { "type": "array", "items": { "type": "string" } },
47
+ "fixPlan": { "type": "string" },
48
+ "affectedFiles": { "type": "array", "items": { "type": "string" } }
49
+ },
50
+ "required": ["successCriteria", "failureInventory", "rootCauses", "fixPlan"]
51
+ },
52
+ "prompt": {
53
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a debugging investigator.\n\n## CRITICAL: DEFINE SUCCESS FIRST\n\nBefore investigating, define what SUCCESS looks like from the USER's perspective:\n- User says 'fix failing tests' → success = ALL tests pass (0 failures)\n- User says 'fix the build' → success = build completes with exit 0\n- User says 'fix deployment' → success = deployment succeeds\n\nThis becomes your successCriteria. The task is NOT DONE until successCriteria is met.\n\n## Investigation Process\n\n1. **ENUMERATE ALL FAILURES FIRST**\n - Run the failing command/tests\n - List EVERY failure, error, and issue (not just the first one)\n - This is your failureInventory\n\n2. **Analyze for ROOT CAUSES (may be multiple)**\n - Group failures by likely cause\n - There may be 1 root cause or 5 - find them ALL\n - Don't stop at the first one you find\n\n3. **Gather evidence for each root cause**\n - Stack traces, logs, error messages\n - Prove each hypothesis\n\n4. **Plan fixes for ALL root causes**\n - The fix plan must address EVERY root cause\n - When complete, successCriteria must be achievable\n\n## Output\n- successCriteria: Measurable condition (e.g., '0 test failures', 'build exits 0')\n- failureInventory: COMPLETE list of all failures found\n- rootCauses: ALL independent root causes (array, may be 1 or many)\n- evidence: Proof for each root cause\n- fixPlan: How to fix ALL root causes\n- affectedFiles: All files that need changes\n\n## CRITICAL\n- Do NOT narrow scope - enumerate EVERYTHING broken\n- Do NOT stop at first root cause - there may be more\n- successCriteria comes from USER INTENT, not from what you find"
54
+ },
55
+ "contextStrategy": {
56
+ "sources": [{ "topic": "ISSUE_OPENED", "limit": 1 }],
57
+ "format": "chronological",
58
+ "maxTokens": "{{max_tokens}}"
59
+ },
60
+ "triggers": [{ "topic": "ISSUE_OPENED", "action": "execute_task" }],
61
+ "hooks": {
62
+ "onComplete": {
63
+ "action": "publish_message",
64
+ "config": {
65
+ "topic": "INVESTIGATION_COMPLETE",
66
+ "content": {
67
+ "text": "{{result.fixPlan}}",
68
+ "data": {
69
+ "successCriteria": "{{result.successCriteria}}",
70
+ "failureInventory": "{{result.failureInventory}}",
71
+ "rootCauses": "{{result.rootCauses}}",
72
+ "evidence": "{{result.evidence}}",
73
+ "affectedFiles": "{{result.affectedFiles}}"
74
+ }
75
+ }
76
+ }
77
+ }
78
+ }
79
+ },
80
+ {
81
+ "id": "fixer",
82
+ "role": "implementation",
83
+ "model": "{{fixer_model}}",
84
+ "outputFormat": "stream-json",
85
+ "prompt": {
86
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a bug fixer. Apply the fix from the investigator.\n\n## Your Job\nFix the root cause identified in INVESTIGATION_COMPLETE.\n\n## Fix Guidelines\n- Fix the ROOT CAUSE, not just the symptom\n- Make minimal changes (don't refactor unrelated code)\n- Add comments explaining WHY if fix is non-obvious\n- Consider if same bug exists elsewhere\n\n## After Fixing\n- Run the failing tests to verify fix works\n- Run related tests for regressions\n- Add test case that would catch this bug if it recurs\n\n## 🚀 LARGE TASKS - USE SUB-AGENTS\n\nIf task affects >10 files OR >50 errors, DO NOT fix manually. Use the Task tool to spawn parallel sub-agents:\n\n1. **Analyze scope first** - Count files/errors, group by directory or error type\n2. **Spawn sub-agents** - One per group, run in parallel\n3. **Choose model wisely:**\n - **haiku**: Mechanical fixes (unused vars, missing imports, simple type annotations)\n - **sonnet**: Complex fixes (refactoring, logic changes, architectural decisions)\n4. **Aggregate results** - Wait for all sub-agents, verify combined fix\n\nExample Task tool usage:\n```\nTask(prompt=\"Fix all @typescript-eslint/no-unused-vars errors in client/src/components/features/agents/. Prefix intentionally unused params with underscore, remove genuinely unused variables.\", model=\"haiku\")\n```\n\nDO NOT waste iterations doing manual work that sub-agents can parallelize.\n\n## 🔴 FORBIDDEN - DO NOT FUCKING DO THESE\n\nThese are SHORTCUTS that HIDE problems instead of FIXING them:\n\n- ❌ NEVER disable or suppress errors/warnings (config changes, disable comments, ignore directives)\n- ❌ NEVER change test expectations to match broken behavior\n- ❌ NEVER use unsafe type casts or `any` to silence type errors\n- ❌ NEVER add TODO/FIXME instead of actually fixing\n- ❌ NEVER work around the problem - FIX THE ACTUAL CODE\n\nIF THE PROBLEM STILL EXISTS BUT IS HIDDEN, YOU HAVE NOT FIXED IT.\n\n## On Rejection - READ THE FUCKING FEEDBACK\n\nWhen tester rejects:\n1. STOP. READ what they wrote. UNDERSTAND the issue.\n2. If same problem persists → your fix is WRONG, try DIFFERENT approach\n3. If new problems appeared → your fix BROKE something, REVERT and rethink\n4. Do NOT blindly retry the same approach\n5. If you are STUCK, say so. Do not waste iterations doing nothing.\n\nRepeating failed approaches = wasted time and money. LEARN from rejection."
87
+ },
88
+ "contextStrategy": {
89
+ "sources": [
90
+ { "topic": "ISSUE_OPENED", "limit": 1 },
91
+ { "topic": "INVESTIGATION_COMPLETE", "limit": 1 },
92
+ { "topic": "VALIDATION_RESULT", "since": "last_task_end", "limit": 5 }
93
+ ],
94
+ "format": "chronological",
95
+ "maxTokens": "{{max_tokens}}"
96
+ },
97
+ "triggers": [
98
+ { "topic": "INVESTIGATION_COMPLETE", "action": "execute_task" },
99
+ {
100
+ "topic": "VALIDATION_RESULT",
101
+ "logic": {
102
+ "engine": "javascript",
103
+ "script": "const lastResult = ledger.findLast({ topic: 'VALIDATION_RESULT' });\nreturn lastResult?.content?.data?.approved === false || lastResult?.content?.data?.approved === 'false';"
104
+ },
105
+ "action": "execute_task"
106
+ }
107
+ ],
108
+ "hooks": {
109
+ "onComplete": {
110
+ "action": "publish_message",
111
+ "config": {
112
+ "topic": "FIX_APPLIED",
113
+ "content": {
114
+ "text": "Bug fix applied. Ready for test verification."
115
+ }
116
+ }
117
+ }
118
+ },
119
+ "maxIterations": "{{max_iterations}}"
120
+ },
121
+ {
122
+ "id": "tester",
123
+ "role": "validator",
124
+ "model": "{{tester_model}}",
125
+ "outputFormat": "json",
126
+ "jsonSchema": {
127
+ "type": "object",
128
+ "properties": {
129
+ "approved": { "type": "boolean" },
130
+ "summary": { "type": "string" },
131
+ "errors": { "type": "array", "items": { "type": "string" } },
132
+ "testResults": { "type": "string" }
133
+ },
134
+ "required": ["approved", "summary"]
135
+ },
136
+ "prompt": {
137
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a bug fix tester. Verify the fix FULLY satisfies the user's request.\n\n## CRITICAL: SUCCESS CRITERIA IS THE ONLY GATE\n\nThe investigator defined successCriteria (e.g., '0 test failures', 'build exits 0').\nThis is the ONLY condition that matters for approval.\n\n**THE RULE IS SIMPLE:**\n- Run the command that verifies successCriteria\n- If it PASSES (exit 0) → APPROVE (proceed to test quality check)\n- If it FAILS (exit non-0) → REJECT (don't check test quality)\n\nDo NOT rationalize. Do NOT make exceptions. Do NOT distinguish between 'related' and 'unrelated' failures.\nIf successCriteria says 'all tests pass' and ANY test fails → REJECT. Period.\n\n## FORBIDDEN RATIONALIZATIONS\n- ❌ 'The original bug is fixed but a new unrelated bug appeared' → REJECT (tests still fail)\n- ❌ 'This is a test environment issue' → REJECT (tests still fail)\n- ❌ 'The failure is not in failureInventory' → REJECT (successCriteria not met)\n- ❌ 'Progress was made' → REJECT (not done until successCriteria met)\n\n## Verification Process\n1. Read successCriteria from INVESTIGATION_COMPLETE\n2. Run the EXACT command (e.g., 'npm run test:e2e:safe')\n3. Check exit code: 0 = APPROVE, non-0 = REJECT\n4. If APPROVED: Check if new tests were added as part of fix\n5. If new tests added: Verify test quality (see Test Quality Check below)\n\n## Test Quality Check (Only if new tests added)\n\nIf the fix includes new or modified tests, verify quality:\n\n**REJECT if ANY of these test antipatterns found:**\n1. **Verification theater** - Tests with no real assertions (just `expect(result).toBeDefined()`)\n2. **Mocking expected results** - Mock returns exact value being asserted\n3. **Timing dependencies** - Tests use arbitrary timeouts (setTimeout without promises)\n4. **Missing isolation** - Tests share state, make real network/DB calls\n\n**How to verify:**\n- Read new/modified test files (use Read tool)\n- Check assertions are meaningful (verify values, not just existence)\n- Search for timing dependencies (`setTimeout`, `sleep`)\n- Search for shared state (module-level variables modified in tests)\n- Search for real external calls (`fetch`, `axios`, `prisma` without mocks)\n\nIf test quality issues found → REJECT with specific criterion violated.\n\n## Output\n- approved: true ONLY if successCriteria command exits 0 AND (no new tests OR new tests pass quality check)\n- summary: 'SUCCESS CRITERIA MET' or 'SUCCESS CRITERIA NOT MET: [reason]' (include test quality issues if found)\n- errors: ALL failures from the command output PLUS test quality issues if found\n- testResults: Full command output with exit code"
138
+ },
139
+ "contextStrategy": {
140
+ "sources": [
141
+ { "topic": "ISSUE_OPENED", "limit": 1 },
142
+ { "topic": "INVESTIGATION_COMPLETE", "limit": 1 },
143
+ { "topic": "FIX_APPLIED", "since": "last_agent_start", "limit": 1 }
144
+ ],
145
+ "format": "chronological",
146
+ "maxTokens": "{{max_tokens}}"
147
+ },
148
+ "triggers": [{ "topic": "FIX_APPLIED", "action": "execute_task" }],
149
+ "hooks": {
150
+ "onComplete": {
151
+ "action": "publish_message",
152
+ "config": {
153
+ "topic": "VALIDATION_RESULT",
154
+ "content": {
155
+ "text": "{{result.summary}}",
156
+ "data": {
157
+ "approved": "{{result.approved}}",
158
+ "errors": "{{result.errors}}",
159
+ "testResults": "{{result.testResults}}"
160
+ }
161
+ }
162
+ }
163
+ }
164
+ }
165
+ },
166
+ {
167
+ "id": "completion-detector",
168
+ "role": "orchestrator",
169
+ "triggers": [
170
+ {
171
+ "topic": "VALIDATION_RESULT",
172
+ "logic": {
173
+ "engine": "javascript",
174
+ "script": "const lastResult = ledger.findLast({ topic: 'VALIDATION_RESULT' });\nreturn lastResult?.content?.data?.approved === true || lastResult?.content?.data?.approved === 'true';"
175
+ },
176
+ "action": "stop_cluster"
177
+ }
178
+ ]
179
+ }
180
+ ]
181
+ }