@covibes/zeroshot 5.2.1 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +174 -189
  2. package/README.md +199 -248
  3. package/cli/commands/providers.js +150 -0
  4. package/cli/index.js +214 -58
  5. package/cli/lib/first-run.js +40 -3
  6. package/cluster-templates/base-templates/debug-workflow.json +24 -78
  7. package/cluster-templates/base-templates/full-workflow.json +44 -145
  8. package/cluster-templates/base-templates/single-worker.json +23 -15
  9. package/cluster-templates/base-templates/worker-validator.json +47 -34
  10. package/cluster-templates/conductor-bootstrap.json +7 -5
  11. package/lib/docker-config.js +6 -1
  12. package/lib/provider-detection.js +59 -0
  13. package/lib/provider-names.js +56 -0
  14. package/lib/settings.js +191 -6
  15. package/lib/stream-json-parser.js +4 -238
  16. package/package.json +21 -5
  17. package/scripts/validate-templates.js +100 -0
  18. package/src/agent/agent-config.js +37 -13
  19. package/src/agent/agent-context-builder.js +64 -2
  20. package/src/agent/agent-hook-executor.js +82 -9
  21. package/src/agent/agent-lifecycle.js +53 -14
  22. package/src/agent/agent-task-executor.js +196 -194
  23. package/src/agent/output-extraction.js +200 -0
  24. package/src/agent/output-reformatter.js +175 -0
  25. package/src/agent/schema-utils.js +111 -0
  26. package/src/agent-wrapper.js +102 -30
  27. package/src/agents/git-pusher-agent.json +1 -1
  28. package/src/claude-task-runner.js +80 -30
  29. package/src/config-router.js +13 -13
  30. package/src/config-validator.js +231 -10
  31. package/src/github.js +36 -0
  32. package/src/isolation-manager.js +243 -154
  33. package/src/ledger.js +28 -6
  34. package/src/orchestrator.js +391 -96
  35. package/src/preflight.js +85 -82
  36. package/src/providers/anthropic/cli-builder.js +45 -0
  37. package/src/providers/anthropic/index.js +134 -0
  38. package/src/providers/anthropic/models.js +23 -0
  39. package/src/providers/anthropic/output-parser.js +159 -0
  40. package/src/providers/base-provider.js +181 -0
  41. package/src/providers/capabilities.js +51 -0
  42. package/src/providers/google/cli-builder.js +55 -0
  43. package/src/providers/google/index.js +116 -0
  44. package/src/providers/google/models.js +24 -0
  45. package/src/providers/google/output-parser.js +92 -0
  46. package/src/providers/index.js +75 -0
  47. package/src/providers/openai/cli-builder.js +122 -0
  48. package/src/providers/openai/index.js +135 -0
  49. package/src/providers/openai/models.js +21 -0
  50. package/src/providers/openai/output-parser.js +129 -0
  51. package/src/sub-cluster-wrapper.js +18 -3
  52. package/src/task-runner.js +8 -6
  53. package/src/tui/layout.js +20 -3
  54. package/task-lib/attachable-watcher.js +80 -78
  55. package/task-lib/claude-recovery.js +119 -0
  56. package/task-lib/commands/list.js +1 -1
  57. package/task-lib/commands/resume.js +3 -2
  58. package/task-lib/commands/run.js +12 -3
  59. package/task-lib/runner.js +59 -38
  60. package/task-lib/scheduler.js +2 -2
  61. package/task-lib/store.js +43 -30
  62. package/task-lib/watcher.js +81 -62
@@ -2,12 +2,15 @@
2
2
  "name": "Single Worker",
3
3
  "description": "One agent executes and completes. For TRIVIAL tasks and simple INQUIRY.",
4
4
  "params": {
5
- "worker_model": {
5
+ "worker_level": {
6
6
  "type": "string",
7
- "enum": ["haiku", "sonnet", "opus"],
8
- "default": "haiku"
7
+ "enum": ["level1", "level2", "level3"],
8
+ "default": "level1"
9
+ },
10
+ "max_tokens": {
11
+ "type": "number",
12
+ "default": 50000
9
13
  },
10
- "max_tokens": { "type": "number", "default": 50000 },
11
14
  "timeout": {
12
15
  "type": "number",
13
16
  "default": 0,
@@ -23,33 +26,38 @@
23
26
  {
24
27
  "id": "worker",
25
28
  "role": "implementation",
26
- "model": "{{worker_model}}",
29
+ "modelLevel": "{{worker_level}}",
27
30
  "timeout": "{{timeout}}",
28
31
  "prompt": {
29
32
  "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an agent handling a {{task_type}} task.\n\n## TASK TYPE: {{task_type}}\n\n{{#if task_type == 'INQUIRY'}}\nThis is an INQUIRY - exploration and understanding only.\n- Answer questions about the codebase\n- Explore files and explain how things work\n- DO NOT make any changes\n- Provide clear, accurate information\n{{/if}}\n\n{{#if task_type == 'TASK'}}\nThis is a TRIVIAL TASK - quick execution.\n- Straightforward, well-defined action\n- Quick to complete (< 15 minutes)\n- Low risk of breaking existing functionality\n- Execute efficiently, verify it works, done\n{{/if}}\n\n{{#if task_type == 'DEBUG'}}\nThis is a TRIVIAL DEBUG - simple fix.\n- Obvious issue with clear solution\n- Fix the root cause, not symptoms\n- Verify the fix works\n{{/if}}"
30
33
  },
31
34
  "contextStrategy": {
32
- "sources": [{ "topic": "ISSUE_OPENED", "limit": 1 }],
35
+ "sources": [
36
+ {
37
+ "topic": "ISSUE_OPENED",
38
+ "limit": 1
39
+ }
40
+ ],
33
41
  "format": "chronological",
34
42
  "maxTokens": "{{max_tokens}}"
35
43
  },
36
- "triggers": [{ "topic": "ISSUE_OPENED", "action": "execute_task" }],
44
+ "triggers": [
45
+ {
46
+ "topic": "ISSUE_OPENED",
47
+ "action": "execute_task"
48
+ }
49
+ ],
37
50
  "hooks": {
38
51
  "onComplete": {
39
52
  "action": "publish_message",
40
53
  "config": {
41
54
  "topic": "CLUSTER_COMPLETE",
42
- "content": { "text": "Task completed." }
55
+ "content": {
56
+ "text": "Task completed."
57
+ }
43
58
  }
44
59
  }
45
60
  }
46
- },
47
- {
48
- "id": "completion-detector",
49
- "role": "orchestrator",
50
- "model": "haiku",
51
- "timeout": 0,
52
- "triggers": [{ "topic": "CLUSTER_COMPLETE", "action": "stop_cluster" }]
53
61
  }
54
62
  ]
55
63
  }
@@ -2,18 +2,24 @@
2
2
  "name": "Worker + Validator",
3
3
  "description": "Worker implements, validator checks, rejection loop. For SIMPLE tasks.",
4
4
  "params": {
5
- "worker_model": {
5
+ "worker_level": {
6
6
  "type": "string",
7
- "enum": ["haiku", "sonnet", "opus"],
8
- "default": "sonnet"
7
+ "enum": ["level1", "level2", "level3"],
8
+ "default": "level2"
9
9
  },
10
- "validator_model": {
10
+ "validator_level": {
11
11
  "type": "string",
12
- "enum": ["haiku", "sonnet", "opus"],
13
- "default": "sonnet"
12
+ "enum": ["level1", "level2", "level3"],
13
+ "default": "level2"
14
+ },
15
+ "max_iterations": {
16
+ "type": "number",
17
+ "default": 3
18
+ },
19
+ "max_tokens": {
20
+ "type": "number",
21
+ "default": 100000
14
22
  },
15
- "max_iterations": { "type": "number", "default": 3 },
16
- "max_tokens": { "type": "number", "default": 100000 },
17
23
  "timeout": {
18
24
  "type": "number",
19
25
  "default": 0,
@@ -29,21 +35,31 @@
29
35
  {
30
36
  "id": "worker",
31
37
  "role": "implementation",
32
- "model": "{{worker_model}}",
38
+ "modelLevel": "{{worker_level}}",
33
39
  "timeout": "{{timeout}}",
34
40
  "prompt": {
35
41
  "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are an implementation agent for a SIMPLE {{task_type}} task.\n\n## FIRST ITERATION\n\n{{#if task_type == 'TASK'}}\nImplement the requested feature/change:\n- Well-defined scope (one feature, one fix)\n- Standard patterns apply\n- Complete the implementation fully\n{{/if}}\n\n{{#if task_type == 'DEBUG'}}\nInvestigate and fix the issue:\n- Reproduce the problem\n- Find the root cause (not just symptoms)\n- Apply the fix\n- Verify it works\n{{/if}}\n\n{{#if task_type == 'INQUIRY'}}\nResearch and provide detailed answers:\n- Explore relevant code and documentation\n- Explain how things work\n- Provide accurate, complete information\n{{/if}}\n\n## SUBSEQUENT ITERATIONS (after rejection)\n\nYou are being called back because validators REJECTED your implementation. This is NOT a minor issue.\n\n### FIX LIKE A SENIOR ENGINEER\n\n1. **STOP AND UNDERSTAND FIRST**\n - Read ALL VALIDATION_RESULT messages completely\n - Understand WHY each issue exists, not just WHAT it is\n - Trace the root cause - don't patch symptoms\n\n2. **FIX PROPERLY - NO SHORTCUTS**\n - Fix the ACTUAL problem, not the error message\n - If your approach was wrong, redesign it - don't add band-aids\n - Consider architectural implications of your fix\n - A senior dev would be embarrassed to submit a half-fix\n\n3. **VERIFY YOUR FIX**\n - Test your changes actually work\n - Check you didn't break anything else\n - If unsure, investigate before committing\n\n### MINDSET\n- Validators are senior engineers reviewing your code\n- They found REAL problems - take them seriously\n- Shortcuts will be caught and rejected again"
36
42
  },
37
43
  "contextStrategy": {
38
44
  "sources": [
39
- { "topic": "ISSUE_OPENED", "limit": 1 },
40
- { "topic": "VALIDATION_RESULT", "since": "last_task_end", "limit": 3 }
45
+ {
46
+ "topic": "ISSUE_OPENED",
47
+ "limit": 1
48
+ },
49
+ {
50
+ "topic": "VALIDATION_RESULT",
51
+ "since": "last_task_end",
52
+ "limit": 3
53
+ }
41
54
  ],
42
55
  "format": "chronological",
43
56
  "maxTokens": "{{max_tokens}}"
44
57
  },
45
58
  "triggers": [
46
- { "topic": "ISSUE_OPENED", "action": "execute_task" },
59
+ {
60
+ "topic": "ISSUE_OPENED",
61
+ "action": "execute_task"
62
+ },
47
63
  {
48
64
  "topic": "VALIDATION_RESULT",
49
65
  "logic": {
@@ -69,7 +85,7 @@
69
85
  {
70
86
  "id": "validator",
71
87
  "role": "validator",
72
- "model": "{{validator_model}}",
88
+ "modelLevel": "{{validator_level}}",
73
89
  "timeout": "{{timeout}}",
74
90
  "outputFormat": "json",
75
91
  "jsonSchema": {
@@ -85,24 +101,37 @@
85
101
  },
86
102
  "errors": {
87
103
  "type": "array",
88
- "items": { "type": "string" },
104
+ "items": {
105
+ "type": "string"
106
+ },
89
107
  "description": "Issues found (empty if approved)"
90
108
  }
91
109
  },
92
110
  "required": ["approved", "summary", "errors"]
93
111
  },
94
112
  "prompt": {
95
- "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a validator for a SIMPLE {{task_type}} task.\n\n## 🔴 VERIFICATION PROTOCOL (REQUIRED - PREVENTS FALSE CLAIMS)\n\nBefore making ANY claim about missing functionality or code issues:\n\n1. **SEARCH FIRST** - Use Glob to find ALL relevant files\n2. **READ THE CODE** - Use Read to inspect actual implementation\n3. **GREP FOR PATTERNS** - Use Grep to search for specific code (function names, endpoints, etc.)\n\n**NEVER claim something doesn't exist without FIRST searching for it.**\n\nThe worker may have implemented features in different files than originally planned. If you claim '/api/metrics endpoint is missing' without searching, you may miss that it exists in 'server/routes/health.ts' instead of 'server/routes/api.ts'.\n\n### Example Verification Flow:\n```\n1. Claim: 'Missing error handling for network failures'\n2. BEFORE claiming → Grep for 'catch', 'error', 'try' in relevant files\n3. BEFORE claiming → Read the actual implementation\n4. ONLY IF NOT FOUND → Add to errors array\n```\n\n## VALIDATION CRITERIA\n\n**APPROVE** if:\n- Core functionality works as requested\n- Implementation is correct and complete\n- No obvious bugs or critical issues\n\n**REJECT** if:\n- Major functionality is missing or broken (VERIFIED by searching)\n- Implementation doesn't match requirements (VERIFIED by reading code)\n- Critical bugs present (VERIFIED by inspection)\n\n## TASK TYPE: {{task_type}}\n\n{{#if task_type == 'TASK'}}\nVerify the feature/change works correctly.\n{{/if}}\n\n{{#if task_type == 'DEBUG'}}\nVerify the bug is actually fixed at root cause.\n{{/if}}\n\n{{#if task_type == 'INQUIRY'}}\nVerify the information is accurate and complete.\n{{/if}}\n\nFor SIMPLE tasks, don't nitpick. Focus on: Does it work and meet requirements?"
113
+ "system": "## 🚫 YOU CANNOT ASK QUESTIONS\n\nYou are running non-interactively. There is NO USER to answer.\n- NEVER use AskUserQuestion tool\n- NEVER say \"Should I...\" or \"Would you like...\"\n- When unsure: Make the SAFER choice and proceed.\n\nYou are a validator for a SIMPLE {{task_type}} task.\n\n## 🔴 VERIFICATION PROTOCOL (REQUIRED - PREVENTS FALSE CLAIMS)\n\nBefore making ANY claim about missing functionality or code issues:\n\n1. **SEARCH FIRST** - Use Glob to find ALL relevant files\n2. **READ THE CODE** - Use Read to inspect actual implementation\n3. **GREP FOR PATTERNS** - Use Grep to search for specific code (function names, endpoints, etc.)\n\n**NEVER claim something doesn't exist without FIRST searching for it.**\n\nThe worker may have implemented features in different files than originally planned. If you claim '/api/metrics endpoint is missing' without searching, you may miss that it exists in 'server/routes/health.ts' instead of 'server/routes/api.ts'.\n\n### Example Verification Flow:\n```\n1. Claim: 'Missing error handling for network failures'\n2. BEFORE claiming → Grep for 'catch', 'error', 'try' in relevant files\n3. BEFORE claiming → Read the actual implementation\n4. ONLY IF NOT FOUND → Add to errors array\n```\n\n## VALIDATION CRITERIA\n\n**APPROVE** if:\n- Core functionality works as requested\n- Implementation is correct and complete\n- No obvious bugs or critical issues\n\n**REJECT** if:\n- Major functionality is missing or broken (VERIFIED by searching)\n- Implementation doesn't match requirements (VERIFIED by reading code)\n- Critical bugs present (VERIFIED by inspection)\n\n## TASK TYPE: {{task_type}}\n\n{{#if task_type == 'TASK'}}\nVerify the feature/change works correctly.\n{{/if}}\n\n{{#if task_type == 'DEBUG'}}\nVerify the bug is actually fixed at root cause.\n{{/if}}\n\n{{#if task_type == 'INQUIRY'}}\nVerify the information is accurate and complete.\n{{/if}}\n\nFor SIMPLE tasks, don't nitpick. Focus on: Does it work and meet requirements?\n\n## 🔴 DEBUGGING METHODOLOGY CHECK\n\nBefore approving, verify the worker didn't take shortcuts:\n\n### Ad Hoc Fix Detection\n- Did worker fix ONE instance? → Grep for similar patterns. If N > 1 exists, REJECT.\n- Example: Fixed null check in `auth.ts:42`? → `grep -r \"similar pattern\" .` - are there others?\n\n### Root Cause vs Symptom\n- Did worker add a workaround? → Find the ACTUAL bug. If workaround hides real issue, REJECT.\n- Example: Added `|| []` fallback? → WHY is it undefined? Fix THAT.\n\n### Lazy Debugging Red Flags (INSTANT REJECT)\n- Worker suggests \"restart the service\" → REJECT (hides the bug)\n- Worker suggests \"clear the cache\" → REJECT (hides the bug)\n- Worker says \"works on my machine\" → REJECT (not a fix)\n- Worker blames the test → REJECT unless they PROVE test is wrong with evidence"
96
114
  },
97
115
  "contextStrategy": {
98
116
  "sources": [
99
- { "topic": "ISSUE_OPENED", "limit": 1 },
100
- { "topic": "IMPLEMENTATION_READY", "limit": 1 }
117
+ {
118
+ "topic": "ISSUE_OPENED",
119
+ "limit": 1
120
+ },
121
+ {
122
+ "topic": "IMPLEMENTATION_READY",
123
+ "limit": 1
124
+ }
101
125
  ],
102
126
  "format": "chronological",
103
127
  "maxTokens": "{{max_tokens}}"
104
128
  },
105
- "triggers": [{ "topic": "IMPLEMENTATION_READY", "action": "execute_task" }],
129
+ "triggers": [
130
+ {
131
+ "topic": "IMPLEMENTATION_READY",
132
+ "action": "execute_task"
133
+ }
134
+ ],
106
135
  "hooks": {
107
136
  "onComplete": {
108
137
  "action": "publish_message",
@@ -118,22 +147,6 @@
118
147
  }
119
148
  }
120
149
  }
121
- },
122
- {
123
- "id": "completion-detector",
124
- "role": "orchestrator",
125
- "model": "haiku",
126
- "timeout": 0,
127
- "triggers": [
128
- {
129
- "topic": "VALIDATION_RESULT",
130
- "logic": {
131
- "engine": "javascript",
132
- "script": "const lastPush = ledger.findLast({ topic: 'IMPLEMENTATION_READY' });\nif (!lastPush) return false;\nconst result = ledger.findLast({ topic: 'VALIDATION_RESULT', since: lastPush.timestamp });\nreturn result?.content?.data?.approved === true || result?.content?.data?.approved === 'true';"
133
- },
134
- "action": "stop_cluster"
135
- }
136
- ]
137
150
  }
138
151
  ]
139
152
  }
@@ -1,11 +1,12 @@
1
1
  {
2
2
  "name": "Two-Tier Conductor (Complexity × TaskType)",
3
- "description": "Cost-optimized conductor: Haiku junior for 2D classification (complexity + taskType), Sonnet senior for UNCERTAIN tasks. Routes to appropriate cluster config via helpers.getConfig().",
3
+ "description": "Cost-optimized conductor: level1 junior for 2D classification (complexity + taskType), level2 senior for UNCERTAIN tasks. Routes to appropriate cluster config via helpers.getConfig().",
4
4
  "agents": [
5
5
  {
6
6
  "id": "junior-conductor",
7
7
  "role": "conductor",
8
- "model": "haiku",
8
+ "modelLevel": "level1",
9
+ "useDirectApi": true,
9
10
  "outputFormat": "json",
10
11
  "jsonSchema": {
11
12
  "type": "object",
@@ -28,7 +29,7 @@
28
29
  "required": ["complexity", "taskType", "reasoning"]
29
30
  },
30
31
  "prompt": {
31
- "system": "You are the JUNIOR CONDUCTOR (Haiku) - fast, cost-efficient task classification.\n\n## Your Job\nClassify tasks on TWO dimensions: COMPLEXITY and TASK TYPE.\n\n## COMPLEXITY (how hard/risky)\n\n**TRIVIAL** - One command, one file, mechanical\n**SIMPLE** - One concern, straightforward\n**STANDARD** - Multi-file, needs planning\n**CRITICAL** - High risk (auth, payments, security, production)\n**UNCERTAIN** - Escalate to senior conductor\n\n## TASK TYPE (what kind of action)\n\n**INQUIRY** - Questions, exploration, understanding\n- \"How does X work?\", \"What files handle Y?\", \"Explain Z\"\n- NO changes made, just gathering information\n\n**TASK** - Implement something new\n- \"Add feature X\", \"Create Y\", \"Implement Z\"\n- Building new functionality\n\n**DEBUG** - Fix something broken\n- \"Why is X failing?\", \"Fix bug in Y\", \"Debug Z\"\n- Investigating and fixing existing issues\n\n## Output Format\n\n```json\n{\n \"complexity\": \"TRIVIAL|SIMPLE|STANDARD|CRITICAL|UNCERTAIN\",\n \"taskType\": \"INQUIRY|TASK|DEBUG\",\n \"reasoning\": \"Brief explanation\"\n}\n```\n\n## Examples\n\n\"How does auth work?\" → SIMPLE, INQUIRY\n\"Add rate limiting\" → STANDARD, TASK\n\"Fix null pointer bug\"SIMPLE, DEBUG\n\"Why is the API slow?\" → STANDARD, DEBUG\n\"Deploy to AWS\" → CRITICAL, TASK\n\"terraform plan\" → SIMPLE, TASK\n\"What files handle routing?\" TRIVIAL, INQUIRY\n\n## Critical Rules\n\n1. ALWAYS output both dimensions\n2. INQUIRY = read-only, TASK = create new, DEBUG = fix broken\n3. complexity=UNCERTAIN only when truly ambiguous\n\nTask: {{ISSUE_OPENED.content.text}}"
32
+ "system": "You are the JUNIOR CONDUCTOR - fast task classification.\n\n## Your Job\nClassify the task on TWO dimensions.\n\n## COMPLEXITY (pick ONE)\n- TRIVIAL - One command, one file, mechanical\n- SIMPLE - One concern, straightforward\n- STANDARD - Multi-file, needs planning\n- CRITICAL - High risk (auth, payments, security, production)\n- UNCERTAIN - Escalate to senior conductor\n\n## TASK TYPE (pick ONE)\n- INQUIRY - Questions, exploration, read-only\n- TASK - Implement something new\n- DEBUG - Fix something broken\n\n## Examples\n\nTask: \"How does auth work?\"\n```json\n{\"complexity\": \"SIMPLE\", \"taskType\": \"INQUIRY\", \"reasoning\": \"Asking about existing code\"}\n```\n\nTask: \"Add rate limiting\"\n```json\n{\"complexity\": \"STANDARD\", \"taskType\": \"TASK\", \"reasoning\": \"New feature, multiple files\"}\n```\n\nTask: \"Fix null pointer bug\"\n```json\n{\"complexity\": \"SIMPLE\", \"taskType\": \"DEBUG\", \"reasoning\": \"Fixing broken code\"}\n```\n\n## Critical Rules\n1. Output ONLY valid JSON - no other text\n2. complexity must be EXACTLY one of: TRIVIAL, SIMPLE, STANDARD, CRITICAL, UNCERTAIN\n3. taskType must be EXACTLY one of: INQUIRY, TASK, DEBUG\n\nTask: {{ISSUE_OPENED.content.text}}"
32
33
  },
33
34
  "contextStrategy": {
34
35
  "sources": [{ "topic": "ISSUE_OPENED", "limit": 1 }],
@@ -58,7 +59,8 @@
58
59
  {
59
60
  "id": "senior-conductor",
60
61
  "role": "conductor",
61
- "model": "sonnet",
62
+ "modelLevel": "level2",
63
+ "useDirectApi": true,
62
64
  "outputFormat": "json",
63
65
  "jsonSchema": {
64
66
  "type": "object",
@@ -81,7 +83,7 @@
81
83
  "required": ["complexity", "taskType", "reasoning"]
82
84
  },
83
85
  "prompt": {
84
- "system": "You are the SENIOR CONDUCTOR - expert task analyzer for ambiguous tasks.\n\nThe junior conductor was uncertain. Analyze deeply and make a definitive classification.\n\n## COMPLEXITY\n\n**TRIVIAL** - One command/file, mechanical, no risk\n**SIMPLE** - One concern, straightforward\n**STANDARD** - Multi-file, needs planning\n**CRITICAL** - High risk (auth/payments/security/production)\n\n## TASK TYPE\n\n**INQUIRY** - Questions, exploration (read-only)\n**TASK** - Implement something new (create)\n**DEBUG** - Fix something broken (investigate + fix)\n\n## Decision Rules\n\n1. YOU MUST DECIDE - no UNCERTAIN output\n2. When in doubt about complexity, go ONE LEVEL HIGHER\n3. INQUIRY vs TASK vs DEBUG based on intent, not keywords\n4. Consider: Is user asking a question? Building new? Fixing broken?\n\nJunior classified as UNCERTAIN. Original task and reasoning follow."
86
+ "system": "You are the SENIOR CONDUCTOR - expert task analyzer.\n\nThe junior conductor was uncertain. Make a definitive classification.\n\n## COMPLEXITY (pick ONE - no UNCERTAIN allowed)\n- TRIVIAL - One command/file, mechanical\n- SIMPLE - One concern, straightforward\n- STANDARD - Multi-file, needs planning\n- CRITICAL - High risk (auth/payments/security/production)\n\n## TASK TYPE (pick ONE)\n- INQUIRY - Questions, exploration (read-only)\n- TASK - Implement something new\n- DEBUG - Fix something broken\n\n## Example Output\n```json\n{\"complexity\": \"STANDARD\", \"taskType\": \"TASK\", \"reasoning\": \"Multiple files involved, new feature\"}\n```\n\n## Rules\n1. Output ONLY valid JSON - no other text\n2. YOU MUST DECIDE - pick exactly one value for each field\n3. When in doubt, go ONE LEVEL HIGHER for complexity\n\nJunior was uncertain. Original task follows."
85
87
  },
86
88
  "contextStrategy": {
87
89
  "sources": [
@@ -18,6 +18,9 @@ const MOUNT_PRESETS = {
18
18
  kube: { host: '~/.kube', container: '$HOME/.kube', readonly: true },
19
19
  terraform: { host: '~/.terraform.d', container: '$HOME/.terraform.d', readonly: false },
20
20
  gcloud: { host: '~/.config/gcloud', container: '$HOME/.config/gcloud', readonly: true },
21
+ claude: { host: '~/.claude', container: '$HOME/.claude', readonly: true },
22
+ codex: { host: '~/.config/codex', container: '$HOME/.config/codex', readonly: true },
23
+ gemini: { host: '~/.config/gemini', container: '$HOME/.config/gemini', readonly: true },
21
24
  };
22
25
 
23
26
  /**
@@ -76,7 +79,9 @@ function resolveMounts(config, options = {}) {
76
79
  };
77
80
  }
78
81
 
79
- throw new Error(`Invalid mount config: ${JSON.stringify(item)}. Use preset name or {host, container, readonly?}`);
82
+ throw new Error(
83
+ `Invalid mount config: ${JSON.stringify(item)}. Use preset name or {host, container, readonly?}`
84
+ );
80
85
  });
81
86
  }
82
87
 
@@ -0,0 +1,59 @@
1
+ const { execSync, spawnSync } = require('child_process');
2
+ const fs = require('fs');
3
+ const path = require('path');
4
+
5
+ function commandExists(command) {
6
+ if (!command) return false;
7
+ if (command.includes(path.sep)) {
8
+ return fs.existsSync(command);
9
+ }
10
+ try {
11
+ execSync(`command -v ${command}`, { stdio: 'pipe' });
12
+ return true;
13
+ } catch {
14
+ return false;
15
+ }
16
+ }
17
+
18
+ function getCommandPath(command) {
19
+ if (!command) return null;
20
+ if (command.includes(path.sep)) {
21
+ return fs.existsSync(command) ? command : null;
22
+ }
23
+ try {
24
+ const output = execSync(`command -v ${command}`, { encoding: 'utf8', stdio: 'pipe' });
25
+ return output.trim() || null;
26
+ } catch {
27
+ return null;
28
+ }
29
+ }
30
+
31
+ function getHelpOutput(command, args = []) {
32
+ if (!commandExists(command)) return '';
33
+
34
+ const attempt = (flag) => {
35
+ const result = spawnSync(command, [...args, flag], { encoding: 'utf8' });
36
+ const output = `${result.stdout || ''}${result.stderr || ''}`;
37
+ return output.trim();
38
+ };
39
+
40
+ const help = attempt('--help');
41
+ if (help) return help;
42
+
43
+ const alt = attempt('-h');
44
+ return alt || '';
45
+ }
46
+
47
+ function getVersionOutput(command, args = []) {
48
+ if (!commandExists(command)) return '';
49
+ const result = spawnSync(command, [...args, '--version'], { encoding: 'utf8' });
50
+ const output = `${result.stdout || ''}${result.stderr || ''}`;
51
+ return output.trim();
52
+ }
53
+
54
+ module.exports = {
55
+ commandExists,
56
+ getCommandPath,
57
+ getHelpOutput,
58
+ getVersionOutput,
59
+ };
@@ -0,0 +1,56 @@
1
+ const PROVIDER_ALIASES = {
2
+ anthropic: 'claude',
3
+ openai: 'codex',
4
+ google: 'gemini',
5
+ claude: 'claude',
6
+ codex: 'codex',
7
+ gemini: 'gemini',
8
+ };
9
+
10
+ const VALID_PROVIDERS = ['claude', 'codex', 'gemini'];
11
+
12
+ function normalizeProviderName(name) {
13
+ if (!name || typeof name !== 'string') return name;
14
+ const normalized = PROVIDER_ALIASES[name.toLowerCase()];
15
+ return normalized || name;
16
+ }
17
+
18
+ function normalizeProviderSettings(providerSettings) {
19
+ if (
20
+ !providerSettings ||
21
+ typeof providerSettings !== 'object' ||
22
+ Array.isArray(providerSettings)
23
+ ) {
24
+ return providerSettings;
25
+ }
26
+
27
+ const normalized = {};
28
+ const entries = Object.entries(providerSettings);
29
+ const aliasFirst = entries.sort(([left], [right]) => {
30
+ const leftIsCanonical = normalizeProviderName(left) === left;
31
+ const rightIsCanonical = normalizeProviderName(right) === right;
32
+ if (leftIsCanonical === rightIsCanonical) return 0;
33
+ return leftIsCanonical ? 1 : -1;
34
+ });
35
+
36
+ for (const [key, value] of aliasFirst) {
37
+ const canonical = normalizeProviderName(key);
38
+ if (!VALID_PROVIDERS.includes(canonical)) {
39
+ normalized[key] = value;
40
+ continue;
41
+ }
42
+ normalized[canonical] = {
43
+ ...(normalized[canonical] || {}),
44
+ ...(value || {}),
45
+ };
46
+ }
47
+
48
+ return normalized;
49
+ }
50
+
51
+ module.exports = {
52
+ PROVIDER_ALIASES,
53
+ VALID_PROVIDERS,
54
+ normalizeProviderName,
55
+ normalizeProviderSettings,
56
+ };